Patch simply to re-indent all of the basic coll component to 4 space
tabs (Jeff takes the blame for originally writing it with 2 space tabs). Changes to fix the PMB coming soon. This commit was SVN r6793.
Этот коммит содержится в:
родитель
a59fa8ac42
Коммит
a8fa19c5c1
@ -26,15 +26,17 @@
|
||||
#include "mca/pml/pml.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Globally exported variable
|
||||
*/
|
||||
|
||||
OMPI_COMP_EXPORT extern const mca_coll_base_component_1_0_0_t mca_coll_basic_component;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_basic_priority_param;
|
||||
OMPI_COMP_EXPORT extern const mca_coll_base_component_1_0_0_t
|
||||
mca_coll_basic_component;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_basic_priority_param;
|
||||
|
||||
|
||||
/*
|
||||
@ -42,215 +44,242 @@ OMPI_COMP_EXPORT extern int mca_coll_basic_priority_param;
|
||||
*/
|
||||
|
||||
|
||||
/* API functions */
|
||||
/* API functions */
|
||||
|
||||
int mca_coll_basic_init_query(bool enable_progress_threads,
|
||||
bool enable_mpi_threads);
|
||||
const struct mca_coll_base_module_1_0_0_t *
|
||||
mca_coll_basic_comm_query(struct ompi_communicator_t *comm, int *priority,
|
||||
struct mca_coll_base_comm_t **data);
|
||||
int mca_coll_basic_init_query(bool enable_progress_threads,
|
||||
bool enable_mpi_threads);
|
||||
const struct mca_coll_base_module_1_0_0_t
|
||||
*mca_coll_basic_comm_query(struct ompi_communicator_t *comm,
|
||||
int *priority,
|
||||
struct mca_coll_base_comm_t **data);
|
||||
|
||||
const struct mca_coll_base_module_1_0_0_t *
|
||||
mca_coll_basic_module_init(struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_module_finalize(struct ompi_communicator_t *comm);
|
||||
const struct mca_coll_base_module_1_0_0_t
|
||||
*mca_coll_basic_module_init(struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_module_finalize(struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_allgather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_allgather_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_allgather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_allgather_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_allgatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void * rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_allgatherv_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void * rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_allgatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts,
|
||||
int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_allgatherv_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts,
|
||||
int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_allreduce_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_allreduce_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_allreduce_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_allreduce_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_alltoall_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_alltoall_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_alltoall_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_alltoall_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_alltoallv_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_alltoallv_inter(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_alltoallv_intra(void *sbuf, int *scounts,
|
||||
int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts,
|
||||
int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_alltoallv_inter(void *sbuf, int *scounts,
|
||||
int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts,
|
||||
int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t **sdtypes,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t **rdtypes,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_alltoallw_inter(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t **sdtypes,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t **rdtypes,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts,
|
||||
int *sdisps,
|
||||
struct ompi_datatype_t **sdtypes,
|
||||
void *rbuf, int *rcounts,
|
||||
int *rdisps,
|
||||
struct ompi_datatype_t **rdtypes,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_alltoallw_inter(void *sbuf, int *scounts,
|
||||
int *sdisps,
|
||||
struct ompi_datatype_t **sdtypes,
|
||||
void *rbuf, int *rcounts,
|
||||
int *rdisps,
|
||||
struct ompi_datatype_t **rdtypes,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_barrier_intra_lin(struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_barrier_inter_lin(struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_barrier_intra_lin(struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_barrier_inter_lin(struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_barrier_intra_log(struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_barrier_intra_log(struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_bcast_lin_intra(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_bcast_lin_inter(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_bcast_lin_intra(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_bcast_lin_inter(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_bcast_log_intra(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_bcast_log_inter(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_bcast_log_intra(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_bcast_log_inter(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_exscan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_exscan_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_exscan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_exscan_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_gather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int rcount, struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_gather_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int rcount, struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_gather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_gather_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_gatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_gatherv_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_gatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_gatherv_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_reduce_lin_intra(void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_reduce_lin_inter(void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_reduce_lin_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_reduce_log_intra(void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_reduce_log_inter(void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_reduce_log_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf,
|
||||
int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf,
|
||||
int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_scan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_scan_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf,
|
||||
int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t
|
||||
*comm);
|
||||
int mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf,
|
||||
int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t
|
||||
*comm);
|
||||
|
||||
int mca_coll_basic_scatter_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int rcount, struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_scatter_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int rcount, struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_scan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_scan_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_scatterv_intra(void *sbuf, int *scounts, int *disps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_scatterv_inter(void *sbuf, int *scounts, int *disps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_scatter_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_scatter_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_scatterv_intra(void *sbuf, int *scounts, int *disps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_basic_scatterv_inter(void *sbuf, int *scounts, int *disps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
|
||||
|
||||
/* Utility functions */
|
||||
|
||||
static inline void mca_coll_basic_free_reqs(ompi_request_t **reqs, int count)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < count; ++i)
|
||||
ompi_request_free(&reqs[i]);
|
||||
}
|
||||
static inline void mca_coll_basic_free_reqs(ompi_request_t ** reqs,
|
||||
int count)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < count; ++i)
|
||||
ompi_request_free(&reqs[i]);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Data structure for hanging data off the communicator
|
||||
*/
|
||||
struct mca_coll_base_comm_t {
|
||||
ompi_request_t **mccb_reqs;
|
||||
int mccb_num_reqs;
|
||||
};
|
||||
struct mca_coll_base_comm_t
|
||||
{
|
||||
ompi_request_t **mccb_reqs;
|
||||
int mccb_num_reqs;
|
||||
};
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
|
@ -34,25 +34,26 @@
|
||||
* Accepts: - same as MPI_Allgather()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_allgather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int rcount, struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_allgather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int rcount, struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int size;
|
||||
int err;
|
||||
|
||||
/* Gather and broadcast. */
|
||||
int size;
|
||||
int err;
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
/* Gather and broadcast. */
|
||||
|
||||
err = comm->c_coll.coll_gather(sbuf, scount, sdtype, rbuf, rcount,
|
||||
rdtype, 0, comm);
|
||||
if (MPI_SUCCESS != err)
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
err = comm->c_coll.coll_gather(sbuf, scount, sdtype, rbuf, rcount,
|
||||
rdtype, 0, comm);
|
||||
if (MPI_SUCCESS != err)
|
||||
return err;
|
||||
|
||||
err = comm->c_coll.coll_bcast(rbuf, rcount * size, rdtype, 0, comm);
|
||||
return err;
|
||||
|
||||
err = comm->c_coll.coll_bcast(rbuf, rcount * size, rdtype, 0, comm);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@ -63,151 +64,149 @@ int mca_coll_basic_allgather_intra(void *sbuf, int scount,
|
||||
* Accepts: - same as MPI_Allgather()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_allgather_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_allgather_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int rank;
|
||||
int root=0;
|
||||
int root = 0;
|
||||
int size, rsize;
|
||||
int err;
|
||||
int i;
|
||||
char *tmpbuf=NULL, *ptmp;
|
||||
char *tmpbuf = NULL, *ptmp;
|
||||
long rlb, slb, rextent, sextent;
|
||||
long incr;
|
||||
ompi_request_t *req;
|
||||
ompi_request_t **reqs = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
rank = ompi_comm_rank ( comm );
|
||||
size = ompi_comm_size (comm);
|
||||
rsize = ompi_comm_remote_size (comm);
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
rsize = ompi_comm_remote_size(comm);
|
||||
|
||||
/* Algorithm:
|
||||
- a gather to the root in remote group (simultaniously executed,
|
||||
thats why we cannot use coll_gather).
|
||||
- exchange the temp-results between two roots
|
||||
- inter-bcast (again simultanious).
|
||||
*/
|
||||
* - a gather to the root in remote group (simultaniously executed,
|
||||
* thats why we cannot use coll_gather).
|
||||
* - exchange the temp-results between two roots
|
||||
* - inter-bcast (again simultanious).
|
||||
*/
|
||||
|
||||
/* Step one: gather operations: */
|
||||
if ( rank != root ) {
|
||||
/* send your data to root */
|
||||
err = MCA_PML_CALL(send(sbuf, scount, sdtype, root,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* receive a msg. from all other procs.*/
|
||||
err = ompi_ddt_get_extent(rdtype, &rlb, &rextent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
err = ompi_ddt_get_extent(sdtype, &slb, &sextent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
if (rank != root) {
|
||||
/* send your data to root */
|
||||
err = MCA_PML_CALL(send(sbuf, scount, sdtype, root,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
} else {
|
||||
/* receive a msg. from all other procs. */
|
||||
err = ompi_ddt_get_extent(rdtype, &rlb, &rextent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
err = ompi_ddt_get_extent(sdtype, &slb, &sextent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Do a send-recv between the two root procs. to avoid deadlock */
|
||||
err = MCA_PML_CALL(isend (sbuf, scount, sdtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, &reqs[rsize] ));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
return err;
|
||||
}
|
||||
/* Do a send-recv between the two root procs. to avoid deadlock */
|
||||
err = MCA_PML_CALL(isend(sbuf, scount, sdtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, &reqs[rsize]));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER, comm,
|
||||
&reqs[0]));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
incr = rextent * rcount;
|
||||
ptmp = (char *) rbuf + incr;
|
||||
for (i = 1; i < rsize; ++i, ptmp += incr) {
|
||||
err = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER, comm,
|
||||
&reqs[0]));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
incr = rextent * rcount;
|
||||
ptmp = (char *) rbuf + incr;
|
||||
for (i = 1; i < rsize; ++i, ptmp += incr) {
|
||||
err = MCA_PML_CALL(irecv(ptmp, rcount, rdtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
comm, &reqs[i]));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all (rsize+1, reqs, MPI_STATUSES_IGNORE);
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
return err;
|
||||
}
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
comm, &reqs[i]));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 2: exchange the resuts between the root processes */
|
||||
tmpbuf = (char *) malloc (scount * size *sextent);
|
||||
if ( NULL == tmpbuf ) {
|
||||
return err;
|
||||
}
|
||||
err = ompi_request_wait_all(rsize + 1, reqs, MPI_STATUSES_IGNORE);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = MCA_PML_CALL(isend (rbuf, rsize*rcount, rdtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, &req ));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
/* Step 2: exchange the resuts between the root processes */
|
||||
tmpbuf = (char *) malloc(scount * size * sextent);
|
||||
if (NULL == tmpbuf) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = MCA_PML_CALL(recv(tmpbuf, size *scount, sdtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all(1, &req, MPI_STATUS_IGNORE);
|
||||
if (OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
err = MCA_PML_CALL(isend(rbuf, rsize * rcount, rdtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, &req));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
err = MCA_PML_CALL(recv(tmpbuf, size * scount, sdtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all(1, &req, MPI_STATUS_IGNORE);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Step 3: bcast the data to the remote group. This
|
||||
happens in both groups simultaniously, thus we can
|
||||
not use coll_bcast (this would deadlock).
|
||||
*/
|
||||
if ( rank != root ) {
|
||||
/* post the recv */
|
||||
err = MCA_PML_CALL(recv (rbuf, size*rcount, rdtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Send the data to every other process in the remote group
|
||||
except to rank zero. which has it already. */
|
||||
for ( i=1; i<rsize; i++ ) {
|
||||
err = MCA_PML_CALL(isend(tmpbuf, size*scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, &reqs[i-1] ));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all (rsize-1, reqs, MPI_STATUSES_IGNORE);
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
* happens in both groups simultaniously, thus we can
|
||||
* not use coll_bcast (this would deadlock).
|
||||
*/
|
||||
if (rank != root) {
|
||||
/* post the recv */
|
||||
err = MCA_PML_CALL(recv(rbuf, size * rcount, rdtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
} else {
|
||||
/* Send the data to every other process in the remote group
|
||||
* except to rank zero. which has it already. */
|
||||
for (i = 1; i < rsize; i++) {
|
||||
err = MCA_PML_CALL(isend(tmpbuf, size * scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, &reqs[i - 1]));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all(rsize - 1, reqs, MPI_STATUSES_IGNORE);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
exit:
|
||||
if ( NULL != tmpbuf ) {
|
||||
free ( tmpbuf);
|
||||
exit:
|
||||
if (NULL != tmpbuf) {
|
||||
free(tmpbuf);
|
||||
}
|
||||
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@ -32,27 +32,28 @@
|
||||
* Accepts: - same as MPI_Allgatherv()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_allgatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void * rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_allgatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i, size;
|
||||
int err;
|
||||
int i, size;
|
||||
int err;
|
||||
|
||||
/* Collect all values at each process, one at a time. */
|
||||
/* Collect all values at each process, one at a time. */
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
for (i = 0; i < size; ++i) {
|
||||
err = comm->c_coll.coll_gatherv(sbuf, scount, sdtype, rbuf,
|
||||
rcounts, disps, rdtype, i, comm);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
size = ompi_comm_size(comm);
|
||||
for (i = 0; i < size; ++i) {
|
||||
err = comm->c_coll.coll_gatherv(sbuf, scount, sdtype, rbuf,
|
||||
rcounts, disps, rdtype, i, comm);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return MPI_SUCCESS;
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@ -63,40 +64,40 @@ int mca_coll_basic_allgatherv_intra(void *sbuf, int scount,
|
||||
* Accepts: - same as MPI_Allgatherv()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_allgatherv_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void * rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_allgatherv_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int size, rsize;
|
||||
int err, i;
|
||||
int *scounts=NULL;
|
||||
int *sdisps=NULL;
|
||||
|
||||
rsize = ompi_comm_remote_size (comm);
|
||||
size = ompi_comm_size (comm);
|
||||
int *scounts = NULL;
|
||||
int *sdisps = NULL;
|
||||
|
||||
scounts = (int *) malloc (rsize * sizeof(int) );
|
||||
sdisps = (int *) calloc (rsize, sizeof(int));
|
||||
if ( NULL == scounts || NULL == sdisps ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
for ( i=0; i<rsize; i++) {
|
||||
scounts[i] = scount;
|
||||
rsize = ompi_comm_remote_size(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
scounts = (int *) malloc(rsize * sizeof(int));
|
||||
sdisps = (int *) calloc(rsize, sizeof(int));
|
||||
if (NULL == scounts || NULL == sdisps) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
err = comm->c_coll.coll_alltoallv (sbuf, scounts, sdisps, sdtype,
|
||||
rbuf, rcounts, disps, rdtype,
|
||||
comm );
|
||||
for (i = 0; i < rsize; i++) {
|
||||
scounts[i] = scount;
|
||||
}
|
||||
|
||||
if (NULL != sdisps ) {
|
||||
free (sdisps);
|
||||
err = comm->c_coll.coll_alltoallv(sbuf, scounts, sdisps, sdtype,
|
||||
rbuf, rcounts, disps, rdtype, comm);
|
||||
|
||||
if (NULL != sdisps) {
|
||||
free(sdisps);
|
||||
}
|
||||
if ( NULL != scounts ) {
|
||||
free (scounts);
|
||||
if (NULL != scounts) {
|
||||
free(scounts);
|
||||
}
|
||||
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@ -35,21 +35,22 @@
|
||||
* Accepts: - same as MPI_Allreduce()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_allreduce_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_allreduce_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int err;
|
||||
int err;
|
||||
|
||||
/* Reduce to 0 and broadcast. */
|
||||
/* Reduce to 0 and broadcast. */
|
||||
|
||||
err = comm->c_coll.coll_reduce(sbuf, rbuf, count, dtype, op, 0, comm);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
err = comm->c_coll.coll_reduce(sbuf, rbuf, count, dtype, op, 0, comm);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
return comm->c_coll.coll_bcast(rbuf, count, dtype, 0, comm);
|
||||
return comm->c_coll.coll_bcast(rbuf, count, dtype, 0, comm);
|
||||
}
|
||||
|
||||
|
||||
@ -60,147 +61,148 @@ int mca_coll_basic_allreduce_intra(void *sbuf, void *rbuf, int count,
|
||||
* Accepts: - same as MPI_Allreduce()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_allreduce_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_allreduce_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int err, i;
|
||||
int rank;
|
||||
int root=0;
|
||||
int root = 0;
|
||||
int rsize;
|
||||
long lb, extent;
|
||||
char *tmpbuf=NULL, *pml_buffer=NULL;
|
||||
char *tmpbuf = NULL, *pml_buffer = NULL;
|
||||
ompi_request_t *req[2];
|
||||
ompi_request_t **reqs=comm->c_coll_basic_data->mccb_reqs;
|
||||
ompi_request_t **reqs = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
rank = ompi_comm_rank ( comm );
|
||||
rsize = ompi_comm_remote_size (comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
rsize = ompi_comm_remote_size(comm);
|
||||
|
||||
/* determine result of the remote group, you cannot
|
||||
use coll_reduce for inter-communicators, since than
|
||||
you would need to determine an order between the
|
||||
two groups (e.g. which group is providing the data
|
||||
and which one enters coll_reduce with providing
|
||||
MPI_PROC_NULL as root argument etc.) Here,
|
||||
we execute the data exchange for both groups
|
||||
simultaniously. */
|
||||
* use coll_reduce for inter-communicators, since than
|
||||
* you would need to determine an order between the
|
||||
* two groups (e.g. which group is providing the data
|
||||
* and which one enters coll_reduce with providing
|
||||
* MPI_PROC_NULL as root argument etc.) Here,
|
||||
* we execute the data exchange for both groups
|
||||
* simultaniously. */
|
||||
/*****************************************************************/
|
||||
if ( rank == root ) {
|
||||
err = ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
tmpbuf = (char *)malloc (count * extent);
|
||||
if ( NULL == tmpbuf ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
pml_buffer = tmpbuf - lb;
|
||||
|
||||
/* Do a send-recv between the two root procs. to avoid deadlock */
|
||||
err = MCA_PML_CALL(irecv(rbuf, count, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE, comm,
|
||||
&(req[0])));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
if (rank == root) {
|
||||
err = ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
err = MCA_PML_CALL(isend (sbuf, count, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, &(req[1]) ));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
tmpbuf = (char *) malloc(count * extent);
|
||||
if (NULL == tmpbuf) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
pml_buffer = tmpbuf - lb;
|
||||
|
||||
err = ompi_request_wait_all(2, req, MPI_STATUSES_IGNORE);
|
||||
if (OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
/* Do a send-recv between the two root procs. to avoid deadlock */
|
||||
err = MCA_PML_CALL(irecv(rbuf, count, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE, comm,
|
||||
&(req[0])));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
/* Loop receiving and calling reduction function (C or Fortran). */
|
||||
for (i = 1; i < rsize; i++) {
|
||||
err = MCA_PML_CALL(recv(pml_buffer, count, dtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* Perform the reduction */
|
||||
ompi_op_reduce(op, pml_buffer, rbuf, count, dtype);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* If not root, send data to the root. */
|
||||
err = MCA_PML_CALL(send(sbuf, count, dtype, root,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
err = MCA_PML_CALL(isend(sbuf, count, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, &(req[1])));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all(2, req, MPI_STATUSES_IGNORE);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
/* Loop receiving and calling reduction function (C or Fortran). */
|
||||
for (i = 1; i < rsize; i++) {
|
||||
err = MCA_PML_CALL(recv(pml_buffer, count, dtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* Perform the reduction */
|
||||
ompi_op_reduce(op, pml_buffer, rbuf, count, dtype);
|
||||
}
|
||||
} else {
|
||||
/* If not root, send data to the root. */
|
||||
err = MCA_PML_CALL(send(sbuf, count, dtype, root,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* now we have on one process the result of the remote group. To distribute
|
||||
the data to all processes in the local group, we exchange the data between
|
||||
the two root processes. They then send it to every other process in the
|
||||
remote group. */
|
||||
* the data to all processes in the local group, we exchange the data between
|
||||
* the two root processes. They then send it to every other process in the
|
||||
* remote group. */
|
||||
/***************************************************************************/
|
||||
if ( rank == root ) {
|
||||
/* sendrecv between the two roots */
|
||||
err = MCA_PML_CALL(irecv (pml_buffer, count, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
comm, &(req[1])));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
err = MCA_PML_CALL(isend (rbuf, count, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
&(req[0])));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
err = ompi_request_wait_all (2, req, MPI_STATUSES_IGNORE);
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* distribute the data to other processes in remote group.
|
||||
Note that we start from 1 (not from zero), since zero
|
||||
has already the correct data AND we avoid a potential
|
||||
deadlock here.
|
||||
*/
|
||||
if (rsize > 1) {
|
||||
for ( i=1; i<rsize; i++ ) {
|
||||
err = MCA_PML_CALL(isend (pml_buffer, count, dtype,i,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
&reqs[i - 1]));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
if (rank == root) {
|
||||
/* sendrecv between the two roots */
|
||||
err = MCA_PML_CALL(irecv(pml_buffer, count, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
comm, &(req[1])));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all (rsize - 1, reqs, MPI_STATUSES_IGNORE);
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
err = MCA_PML_CALL(recv (rbuf, count, dtype, root,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
err = MCA_PML_CALL(isend(rbuf, count, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
&(req[0])));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
err = ompi_request_wait_all(2, req, MPI_STATUSES_IGNORE);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* distribute the data to other processes in remote group.
|
||||
* Note that we start from 1 (not from zero), since zero
|
||||
* has already the correct data AND we avoid a potential
|
||||
* deadlock here.
|
||||
*/
|
||||
if (rsize > 1) {
|
||||
for (i = 1; i < rsize; i++) {
|
||||
err = MCA_PML_CALL(isend(pml_buffer, count, dtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
&reqs[i - 1]));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
err =
|
||||
ompi_request_wait_all(rsize - 1, reqs,
|
||||
MPI_STATUSES_IGNORE);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(rbuf, count, dtype, root,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
}
|
||||
|
||||
exit:
|
||||
if ( NULL != tmpbuf ) {
|
||||
free ( tmpbuf );
|
||||
exit:
|
||||
if (NULL != tmpbuf) {
|
||||
free(tmpbuf);
|
||||
}
|
||||
|
||||
|
||||
|
@ -33,11 +33,12 @@
|
||||
* Accepts: - same as MPI_Alltoall()
|
||||
* Returns: - MPI_SUCCESS or an MPI error code
|
||||
*/
|
||||
int mca_coll_basic_alltoall_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_alltoall_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int rank;
|
||||
@ -61,13 +62,13 @@ int mca_coll_basic_alltoall_intra(void *sbuf, int scount,
|
||||
|
||||
err = ompi_ddt_get_extent(sdtype, &lb, &sndinc);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
sndinc *= scount;
|
||||
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &rcvinc);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
rcvinc *= rcount;
|
||||
|
||||
@ -76,8 +77,7 @@ int mca_coll_basic_alltoall_intra(void *sbuf, int scount,
|
||||
psnd = ((char *) sbuf) + (rank * sndinc);
|
||||
prcv = ((char *) rbuf) + (rank * rcvinc);
|
||||
|
||||
err = ompi_ddt_sndrcv(psnd, scount, sdtype,
|
||||
prcv, rcount, rdtype);
|
||||
err = ompi_ddt_sndrcv(psnd, scount, sdtype, prcv, rcount, rdtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
@ -94,31 +94,33 @@ int mca_coll_basic_alltoall_intra(void *sbuf, int scount,
|
||||
req = rreq = comm->c_coll_basic_data->mccb_reqs;
|
||||
sreq = rreq + size - 1;
|
||||
|
||||
prcv = (char*) rbuf;
|
||||
psnd = (char*) sbuf;
|
||||
prcv = (char *) rbuf;
|
||||
psnd = (char *) sbuf;
|
||||
|
||||
/* Post all receives first -- a simple optimization */
|
||||
|
||||
for (i = (rank + 1) % size; i != rank;
|
||||
i = (i + 1) % size, ++rreq) {
|
||||
err = MCA_PML_CALL(irecv_init(prcv + (i * rcvinc), rcount, rdtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
|
||||
for (i = (rank + 1) % size; i != rank; i = (i + 1) % size, ++rreq) {
|
||||
err =
|
||||
MCA_PML_CALL(irecv_init
|
||||
(prcv + (i * rcvinc), rcount, rdtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(req, rreq - req);
|
||||
return err;
|
||||
mca_coll_basic_free_reqs(req, rreq - req);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now post all sends */
|
||||
|
||||
for (i = (rank + 1) % size; i != rank;
|
||||
i = (i + 1) % size, ++sreq) {
|
||||
err = MCA_PML_CALL(isend_init(psnd + (i * sndinc), scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, sreq));
|
||||
for (i = (rank + 1) % size; i != rank; i = (i + 1) % size, ++sreq) {
|
||||
err =
|
||||
MCA_PML_CALL(isend_init
|
||||
(psnd + (i * sndinc), scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, sreq));
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(req, sreq - req);
|
||||
return err;
|
||||
mca_coll_basic_free_reqs(req, sreq - req);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
@ -127,11 +129,11 @@ int mca_coll_basic_alltoall_intra(void *sbuf, int scount,
|
||||
MCA_PML_CALL(start(nreqs, req));
|
||||
|
||||
/* Wait for them all. If there's an error, note that we don't
|
||||
care what the error was -- just that there *was* an error. The
|
||||
PML will finish all requests, even if one or more of them fail.
|
||||
i.e., by the end of this call, all the requests are free-able.
|
||||
So free them anyway -- even if there was an error, and return
|
||||
the error after we free everything. */
|
||||
* care what the error was -- just that there *was* an error. The
|
||||
* PML will finish all requests, even if one or more of them fail.
|
||||
* i.e., by the end of this call, all the requests are free-able.
|
||||
* So free them anyway -- even if there was an error, and return
|
||||
* the error after we free everything. */
|
||||
|
||||
err = ompi_request_wait_all(nreqs, req, MPI_STATUSES_IGNORE);
|
||||
|
||||
@ -152,11 +154,12 @@ int mca_coll_basic_alltoall_intra(void *sbuf, int scount,
|
||||
* Accepts: - same as MPI_Alltoall()
|
||||
* Returns: - MPI_SUCCESS or an MPI error code
|
||||
*/
|
||||
int mca_coll_basic_alltoall_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_alltoall_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int rank;
|
||||
@ -180,49 +183,49 @@ int mca_coll_basic_alltoall_inter(void *sbuf, int scount,
|
||||
|
||||
err = ompi_ddt_get_extent(sdtype, &lb, &sndinc);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
sndinc *= scount;
|
||||
|
||||
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &rcvinc);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
rcvinc *= rcount;
|
||||
|
||||
|
||||
/* Initiate all send/recv to/from others. */
|
||||
nreqs = size * 2;
|
||||
req = rreq = comm->c_coll_basic_data->mccb_reqs;
|
||||
sreq = rreq + size;
|
||||
|
||||
prcv = (char*) rbuf;
|
||||
psnd = (char*) sbuf;
|
||||
|
||||
prcv = (char *) rbuf;
|
||||
psnd = (char *) sbuf;
|
||||
|
||||
/* Post all receives first */
|
||||
for (i = 0; i < size; i++, ++rreq) {
|
||||
for (i = 0; i < size; i++, ++rreq) {
|
||||
err = MCA_PML_CALL(irecv(prcv + (i * rcvinc), rcount, rdtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
|
||||
MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now post all sends */
|
||||
for (i = 0; i < size; i++, ++sreq) {
|
||||
err = MCA_PML_CALL(isend(psnd + (i * sndinc), scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, sreq));
|
||||
MCA_COLL_BASE_TAG_ALLTOALL,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, sreq));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Wait for them all. If there's an error, note that we don't
|
||||
care what the error was -- just that there *was* an error. The
|
||||
PML will finish all requests, even if one or more of them fail.
|
||||
i.e., by the end of this call, all the requests are free-able.
|
||||
So free them anyway -- even if there was an error, and return
|
||||
the error after we free everything. */
|
||||
* care what the error was -- just that there *was* an error. The
|
||||
* PML will finish all requests, even if one or more of them fail.
|
||||
* i.e., by the end of this call, all the requests are free-able.
|
||||
* So free them anyway -- even if there was an error, and return
|
||||
* the error after we free everything. */
|
||||
err = ompi_request_wait_all(nreqs, req, MPI_STATUSES_IGNORE);
|
||||
|
||||
/* All done */
|
||||
|
@ -35,110 +35,114 @@
|
||||
*/
|
||||
int
|
||||
mca_coll_basic_alltoallv_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
char *psnd;
|
||||
char *prcv;
|
||||
size_t nreqs;
|
||||
MPI_Aint sndextent;
|
||||
MPI_Aint rcvextent;
|
||||
MPI_Request *preq;
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
char *psnd;
|
||||
char *prcv;
|
||||
size_t nreqs;
|
||||
MPI_Aint sndextent;
|
||||
MPI_Aint rcvextent;
|
||||
MPI_Request *preq;
|
||||
|
||||
/* Initialize. */
|
||||
/* Initialize. */
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
ompi_ddt_type_extent(sdtype, &sndextent);
|
||||
ompi_ddt_type_extent(rdtype, &rcvextent);
|
||||
|
||||
/* simple optimization */
|
||||
ompi_ddt_type_extent(sdtype, &sndextent);
|
||||
ompi_ddt_type_extent(rdtype, &rcvextent);
|
||||
|
||||
psnd = ((char *) sbuf) + (sdisps[rank] * sndextent);
|
||||
prcv = ((char *) rbuf) + (rdisps[rank] * rcvextent);
|
||||
/* simple optimization */
|
||||
|
||||
if (0 != scounts[rank]) {
|
||||
err = ompi_ddt_sndrcv(psnd, scounts[rank], sdtype,
|
||||
prcv, rcounts[rank], rdtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
psnd = ((char *) sbuf) + (sdisps[rank] * sndextent);
|
||||
prcv = ((char *) rbuf) + (rdisps[rank] * rcvextent);
|
||||
|
||||
/* If only one process, we're done. */
|
||||
|
||||
if (1 == size) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Initiate all send/recv to/from others. */
|
||||
|
||||
nreqs = 0;
|
||||
preq = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
/* Post all receives first -- a simple optimization */
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
if (i == rank || 0 == rcounts[i]) {
|
||||
continue;
|
||||
if (0 != scounts[rank]) {
|
||||
err = ompi_ddt_sndrcv(psnd, scounts[rank], sdtype,
|
||||
prcv, rcounts[rank], rdtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
prcv = ((char *) rbuf) + (rdisps[i] * rcvextent);
|
||||
err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
|
||||
++nreqs;
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs, nreqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
/* If only one process, we're done. */
|
||||
|
||||
/* Now post all sends */
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
if (i == rank || 0 == scounts[i]) {
|
||||
continue;
|
||||
if (1 == size) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
psnd = ((char *) sbuf) + (sdisps[i] * sndextent);
|
||||
err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, preq++));
|
||||
++nreqs;
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs, nreqs);
|
||||
return err;
|
||||
/* Initiate all send/recv to/from others. */
|
||||
|
||||
nreqs = 0;
|
||||
preq = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
/* Post all receives first -- a simple optimization */
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
if (i == rank || 0 == rcounts[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
prcv = ((char *) rbuf) + (rdisps[i] * rcvextent);
|
||||
err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm,
|
||||
preq++));
|
||||
++nreqs;
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs,
|
||||
nreqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Start your engines. This will never return an error. */
|
||||
/* Now post all sends */
|
||||
|
||||
MCA_PML_CALL(start(nreqs, comm->c_coll_basic_data->mccb_reqs));
|
||||
for (i = 0; i < size; ++i) {
|
||||
if (i == rank || 0 == scounts[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Wait for them all. If there's an error, note that we don't care
|
||||
what the error was -- just that there *was* an error. The PML
|
||||
will finish all requests, even if one or more of them fail.
|
||||
i.e., by the end of this call, all the requests are free-able.
|
||||
So free them anyway -- even if there was an error, and return the
|
||||
error after we free everything. */
|
||||
psnd = ((char *) sbuf) + (sdisps[i] * sndextent);
|
||||
err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
preq++));
|
||||
++nreqs;
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs,
|
||||
nreqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all(nreqs, comm->c_coll_basic_data->mccb_reqs,
|
||||
MPI_STATUSES_IGNORE);
|
||||
/* Start your engines. This will never return an error. */
|
||||
|
||||
/* Free the requests. */
|
||||
MCA_PML_CALL(start(nreqs, comm->c_coll_basic_data->mccb_reqs));
|
||||
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs, nreqs);
|
||||
/* Wait for them all. If there's an error, note that we don't care
|
||||
* what the error was -- just that there *was* an error. The PML
|
||||
* will finish all requests, even if one or more of them fail.
|
||||
* i.e., by the end of this call, all the requests are free-able.
|
||||
* So free them anyway -- even if there was an error, and return the
|
||||
* error after we free everything. */
|
||||
|
||||
/* All done */
|
||||
err = ompi_request_wait_all(nreqs, comm->c_coll_basic_data->mccb_reqs,
|
||||
MPI_STATUSES_IGNORE);
|
||||
|
||||
return err;
|
||||
/* Free the requests. */
|
||||
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs, nreqs);
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@ -151,68 +155,68 @@ mca_coll_basic_alltoallv_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
*/
|
||||
int
|
||||
mca_coll_basic_alltoallv_inter(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int rsize;
|
||||
int rank;
|
||||
int err;
|
||||
char *psnd;
|
||||
char *prcv;
|
||||
size_t nreqs;
|
||||
MPI_Aint sndextent;
|
||||
MPI_Aint rcvextent;
|
||||
ompi_request_t **preq = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
int i;
|
||||
int rsize;
|
||||
int rank;
|
||||
int err;
|
||||
char *psnd;
|
||||
char *prcv;
|
||||
size_t nreqs;
|
||||
MPI_Aint sndextent;
|
||||
MPI_Aint rcvextent;
|
||||
ompi_request_t **preq = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
/* Initialize. */
|
||||
|
||||
rsize = ompi_comm_remote_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
/* Initialize. */
|
||||
|
||||
ompi_ddt_type_extent(sdtype, &sndextent);
|
||||
ompi_ddt_type_extent(rdtype, &rcvextent);
|
||||
|
||||
/* Initiate all send/recv to/from others. */
|
||||
nreqs = rsize * 2;
|
||||
rsize = ompi_comm_remote_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* Post all receives first */
|
||||
/* A simple optimization: do not send and recv msgs of length zero */
|
||||
for (i = 0; i < rsize; ++i) {
|
||||
prcv = ((char *) rbuf) + (rdisps[i] * rcvextent);
|
||||
if ( rcounts[i] > 0 ){
|
||||
err = MCA_PML_CALL(irecv(prcv, rcounts[i], rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, &preq[i]));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
else {
|
||||
preq[i] = MPI_REQUEST_NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now post all sends */
|
||||
for (i = 0; i < rsize; ++i) {
|
||||
psnd = ((char *) sbuf) + (sdisps[i] * sndextent);
|
||||
if ( scounts[i] > 0 ) {
|
||||
err = MCA_PML_CALL(isend(psnd, scounts[i], sdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, &preq[rsize+i]));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
else {
|
||||
preq[rsize+i] = MPI_REQUEST_NULL;
|
||||
}
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all(nreqs, preq, MPI_STATUSES_IGNORE);
|
||||
|
||||
/* All done */
|
||||
return err;
|
||||
ompi_ddt_type_extent(sdtype, &sndextent);
|
||||
ompi_ddt_type_extent(rdtype, &rcvextent);
|
||||
|
||||
/* Initiate all send/recv to/from others. */
|
||||
nreqs = rsize * 2;
|
||||
|
||||
/* Post all receives first */
|
||||
/* A simple optimization: do not send and recv msgs of length zero */
|
||||
for (i = 0; i < rsize; ++i) {
|
||||
prcv = ((char *) rbuf) + (rdisps[i] * rcvextent);
|
||||
if (rcounts[i] > 0) {
|
||||
err = MCA_PML_CALL(irecv(prcv, rcounts[i], rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm,
|
||||
&preq[i]));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
} else {
|
||||
preq[i] = MPI_REQUEST_NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now post all sends */
|
||||
for (i = 0; i < rsize; ++i) {
|
||||
psnd = ((char *) sbuf) + (sdisps[i] * sndextent);
|
||||
if (scounts[i] > 0) {
|
||||
err = MCA_PML_CALL(isend(psnd, scounts[i], sdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
&preq[rsize + i]));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
} else {
|
||||
preq[rsize + i] = MPI_REQUEST_NULL;
|
||||
}
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all(nreqs, preq, MPI_STATUSES_IGNORE);
|
||||
|
||||
/* All done */
|
||||
return err;
|
||||
}
|
||||
|
@ -32,104 +32,109 @@
|
||||
* Accepts: - same as MPI_Alltoallw()
|
||||
* Returns: - MPI_SUCCESS or an MPI error code
|
||||
*/
|
||||
int mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t **sdtypes,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t **rdtypes,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t **sdtypes,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t **rdtypes,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
char *psnd;
|
||||
char *prcv;
|
||||
size_t nreqs;
|
||||
MPI_Request *preq;
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
char *psnd;
|
||||
char *prcv;
|
||||
size_t nreqs;
|
||||
MPI_Request *preq;
|
||||
|
||||
/* Initialize. */
|
||||
/* Initialize. */
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* simple optimization */
|
||||
/* simple optimization */
|
||||
|
||||
psnd = ((char *) sbuf) + sdisps[rank];
|
||||
prcv = ((char *) rbuf) + rdisps[rank];
|
||||
psnd = ((char *) sbuf) + sdisps[rank];
|
||||
prcv = ((char *) rbuf) + rdisps[rank];
|
||||
|
||||
if (0 != scounts[rank]) {
|
||||
err = ompi_ddt_sndrcv(psnd, scounts[rank], sdtypes[rank],
|
||||
prcv, rcounts[rank], rdtypes[rank]);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
if (0 != scounts[rank]) {
|
||||
err = ompi_ddt_sndrcv(psnd, scounts[rank], sdtypes[rank],
|
||||
prcv, rcounts[rank], rdtypes[rank]);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* If only one process, we're done. */
|
||||
/* If only one process, we're done. */
|
||||
|
||||
if (1 == size) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Initiate all send/recv to/from others. */
|
||||
|
||||
nreqs = 0;
|
||||
preq = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
/* Post all receives first -- a simple optimization */
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
if (i == rank || 0 == rcounts[i])
|
||||
continue;
|
||||
|
||||
prcv = ((char *) rbuf) + rdisps[i];
|
||||
err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtypes[i],
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLW, comm,
|
||||
preq++));
|
||||
++nreqs;
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs,
|
||||
nreqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now post all sends */
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
if (i == rank || 0 == scounts[i])
|
||||
continue;
|
||||
|
||||
psnd = ((char *) sbuf) + sdisps[i];
|
||||
err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtypes[i],
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLW,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
preq++));
|
||||
++nreqs;
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs,
|
||||
nreqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Start your engines. This will never return an error. */
|
||||
|
||||
MCA_PML_CALL(start(nreqs, comm->c_coll_basic_data->mccb_reqs));
|
||||
|
||||
/* Wait for them all. If there's an error, note that we don't care
|
||||
* what the error was -- just that there *was* an error. The PML
|
||||
* will finish all requests, even if one or more of them fail.
|
||||
* i.e., by the end of this call, all the requests are free-able.
|
||||
* So free them anyway -- even if there was an error, and return the
|
||||
* error after we free everything. */
|
||||
|
||||
err = ompi_request_wait_all(nreqs, comm->c_coll_basic_data->mccb_reqs,
|
||||
MPI_STATUSES_IGNORE);
|
||||
|
||||
/* Free the requests. */
|
||||
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs, nreqs);
|
||||
|
||||
/* All done */
|
||||
|
||||
if (1 == size) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Initiate all send/recv to/from others. */
|
||||
|
||||
nreqs = 0;
|
||||
preq = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
/* Post all receives first -- a simple optimization */
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
if (i == rank || 0 == rcounts[i])
|
||||
continue;
|
||||
|
||||
prcv = ((char *) rbuf) + rdisps[i];
|
||||
err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtypes[i],
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLW, comm, preq++));
|
||||
++nreqs;
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs, nreqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now post all sends */
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
if (i == rank || 0 == scounts[i])
|
||||
continue;
|
||||
|
||||
psnd = ((char *) sbuf) + sdisps[i];
|
||||
err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtypes[i],
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLW,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, preq++));
|
||||
++nreqs;
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs, nreqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Start your engines. This will never return an error. */
|
||||
|
||||
MCA_PML_CALL(start(nreqs, comm->c_coll_basic_data->mccb_reqs));
|
||||
|
||||
/* Wait for them all. If there's an error, note that we don't care
|
||||
what the error was -- just that there *was* an error. The PML
|
||||
will finish all requests, even if one or more of them fail.
|
||||
i.e., by the end of this call, all the requests are free-able.
|
||||
So free them anyway -- even if there was an error, and return the
|
||||
error after we free everything. */
|
||||
|
||||
err = ompi_request_wait_all(nreqs, comm->c_coll_basic_data->mccb_reqs,
|
||||
MPI_STATUSES_IGNORE);
|
||||
|
||||
/* Free the requests. */
|
||||
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs, nreqs);
|
||||
|
||||
/* All done */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@ -140,11 +145,12 @@ int mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
* Accepts: - same as MPI_Alltoallw()
|
||||
* Returns: - MPI_SUCCESS or an MPI error code
|
||||
*/
|
||||
int mca_coll_basic_alltoallw_inter(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t **sdtypes,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t **rdtypes,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_alltoallw_inter(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t **sdtypes,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t **rdtypes,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
@ -154,54 +160,57 @@ int mca_coll_basic_alltoallw_inter(void *sbuf, int *scounts, int *sdisps,
|
||||
char *prcv;
|
||||
size_t nreqs;
|
||||
MPI_Request *preq;
|
||||
|
||||
|
||||
/* Initialize. */
|
||||
size = ompi_comm_remote_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
|
||||
/* Initiate all send/recv to/from others. */
|
||||
nreqs = size * 2;
|
||||
preq = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
|
||||
/* Post all receives first -- a simple optimization */
|
||||
for (i = 0; i < size; ++i) {
|
||||
prcv = ((char *) rbuf) + rdisps[i];
|
||||
err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtypes[i],
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLW,
|
||||
comm, preq++));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs, nreqs);
|
||||
return err;
|
||||
}
|
||||
prcv = ((char *) rbuf) + rdisps[i];
|
||||
err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtypes[i],
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLW,
|
||||
comm, preq++));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs,
|
||||
nreqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Now post all sends */
|
||||
for (i = 0; i < size; ++i) {
|
||||
psnd = ((char *) sbuf) + sdisps[i];
|
||||
err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtypes[i],
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLW,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, preq++));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs, nreqs);
|
||||
return err;
|
||||
}
|
||||
psnd = ((char *) sbuf) + sdisps[i];
|
||||
err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtypes[i],
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLW,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
preq++));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs,
|
||||
nreqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Start your engines. This will never return an error. */
|
||||
MCA_PML_CALL(start(nreqs, comm->c_coll_basic_data->mccb_reqs));
|
||||
|
||||
|
||||
/* Wait for them all. If there's an error, note that we don't care
|
||||
what the error was -- just that there *was* an error. The PML
|
||||
will finish all requests, even if one or more of them fail.
|
||||
i.e., by the end of this call, all the requests are free-able.
|
||||
So free them anyway -- even if there was an error, and return the
|
||||
error after we free everything. */
|
||||
* what the error was -- just that there *was* an error. The PML
|
||||
* will finish all requests, even if one or more of them fail.
|
||||
* i.e., by the end of this call, all the requests are free-able.
|
||||
* So free them anyway -- even if there was an error, and return the
|
||||
* error after we free everything. */
|
||||
err = ompi_request_wait_all(nreqs, comm->c_coll_basic_data->mccb_reqs,
|
||||
MPI_STATUSES_IGNORE);
|
||||
|
||||
MPI_STATUSES_IGNORE);
|
||||
|
||||
/* Free the requests. */
|
||||
mca_coll_basic_free_reqs(comm->c_coll_basic_data->mccb_reqs, nreqs);
|
||||
|
||||
|
||||
/* All done */
|
||||
return err;
|
||||
}
|
||||
|
@ -34,53 +34,61 @@
|
||||
* Accepts: - same as MPI_Barrier()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_barrier_intra_lin(struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_barrier_intra_lin(struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int err;
|
||||
int size = ompi_comm_size(comm);
|
||||
int rank = ompi_comm_rank(comm);
|
||||
int i;
|
||||
int err;
|
||||
int size = ompi_comm_size(comm);
|
||||
int rank = ompi_comm_rank(comm);
|
||||
|
||||
/* All non-root send & receive zero-length message. */
|
||||
/* All non-root send & receive zero-length message. */
|
||||
|
||||
if (rank > 0) {
|
||||
err = MCA_PML_CALL(send(NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
if (rank > 0) {
|
||||
err =
|
||||
MCA_PML_CALL(send
|
||||
(NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err =
|
||||
MCA_PML_CALL(recv
|
||||
(NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = MCA_PML_CALL(recv(NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
/* The root collects and broadcasts the messages. */
|
||||
|
||||
/* The root collects and broadcasts the messages. */
|
||||
else {
|
||||
for (i = 1; i < size; ++i) {
|
||||
err = MCA_PML_CALL(recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
else {
|
||||
for (i = 1; i < size; ++i) {
|
||||
err = MCA_PML_CALL(recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
for (i = 1; i < size; ++i) {
|
||||
err =
|
||||
MCA_PML_CALL(send
|
||||
(NULL, 0, MPI_BYTE, i,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 1; i < size; ++i) {
|
||||
err = MCA_PML_CALL(send(NULL, 0, MPI_BYTE, i, MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* All done */
|
||||
|
||||
/* All done */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@ -91,70 +99,74 @@ int mca_coll_basic_barrier_intra_lin(struct ompi_communicator_t *comm)
|
||||
* Accepts: - same as MPI_Barrier()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_barrier_intra_log(struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_barrier_intra_log(struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int err;
|
||||
int peer;
|
||||
int dim;
|
||||
int hibit;
|
||||
int mask;
|
||||
int size = ompi_comm_size(comm);
|
||||
int rank = ompi_comm_rank(comm);
|
||||
int i;
|
||||
int err;
|
||||
int peer;
|
||||
int dim;
|
||||
int hibit;
|
||||
int mask;
|
||||
int size = ompi_comm_size(comm);
|
||||
int rank = ompi_comm_rank(comm);
|
||||
|
||||
/* Send null-messages up and down the tree. Synchronization at the
|
||||
root (rank 0). */
|
||||
/* Send null-messages up and down the tree. Synchronization at the
|
||||
* root (rank 0). */
|
||||
|
||||
dim = comm->c_cube_dim;
|
||||
hibit = opal_hibit(rank, dim);
|
||||
--dim;
|
||||
dim = comm->c_cube_dim;
|
||||
hibit = opal_hibit(rank, dim);
|
||||
--dim;
|
||||
|
||||
/* Receive from children. */
|
||||
/* Receive from children. */
|
||||
|
||||
for (i = dim, mask = 1 << i; i > hibit; --i, mask >>= 1) {
|
||||
peer = rank | mask;
|
||||
if (peer < size) {
|
||||
err = MCA_PML_CALL(recv(NULL, 0, MPI_BYTE, peer,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Send to and receive from parent. */
|
||||
|
||||
if (rank > 0) {
|
||||
peer = rank & ~(1 << hibit);
|
||||
err = MCA_PML_CALL(send(NULL, 0, MPI_BYTE, peer, MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
for (i = dim, mask = 1 << i; i > hibit; --i, mask >>= 1) {
|
||||
peer = rank | mask;
|
||||
if (peer < size) {
|
||||
err = MCA_PML_CALL(recv(NULL, 0, MPI_BYTE, peer,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
err = MCA_PML_CALL(recv(NULL, 0, MPI_BYTE, peer,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
}
|
||||
/* Send to and receive from parent. */
|
||||
|
||||
/* Send to children. */
|
||||
if (rank > 0) {
|
||||
peer = rank & ~(1 << hibit);
|
||||
err =
|
||||
MCA_PML_CALL(send
|
||||
(NULL, 0, MPI_BYTE, peer,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) {
|
||||
peer = rank | mask;
|
||||
if (peer < size) {
|
||||
err = MCA_PML_CALL(send(NULL, 0, MPI_BYTE, peer,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
err = MCA_PML_CALL(recv(NULL, 0, MPI_BYTE, peer,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
/* Send to children. */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) {
|
||||
peer = rank | mask;
|
||||
if (peer < size) {
|
||||
err = MCA_PML_CALL(send(NULL, 0, MPI_BYTE, peer,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@ -165,11 +177,13 @@ int mca_coll_basic_barrier_intra_log(struct ompi_communicator_t *comm)
|
||||
* Accepts: - same as MPI_Barrier()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_barrier_inter_lin(struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_barrier_inter_lin(struct ompi_communicator_t *comm)
|
||||
{
|
||||
int rank;
|
||||
int result;
|
||||
|
||||
rank = ompi_comm_rank (comm);
|
||||
return comm->c_coll.coll_allreduce (&rank, &result, 1, MPI_INT, MPI_MAX, comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
return comm->c_coll.coll_allreduce(&rank, &result, 1, MPI_INT, MPI_MAX,
|
||||
comm);
|
||||
}
|
||||
|
@ -35,9 +35,10 @@
|
||||
* Accepts: - same arguments as MPI_Bcast()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_bcast_lin_intra(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_bcast_lin_intra(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
@ -48,29 +49,29 @@ int mca_coll_basic_bcast_lin_intra(void *buff, int count,
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
|
||||
/* Non-root receive the data. */
|
||||
|
||||
if (rank != root) {
|
||||
return MCA_PML_CALL(recv(buff, count, datatype, root,
|
||||
MCA_COLL_BASE_TAG_BCAST, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
MCA_COLL_BASE_TAG_BCAST, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
}
|
||||
|
||||
/* Root sends data to all others. */
|
||||
|
||||
for (i = 0, preq = reqs; i < size; ++i) {
|
||||
if (i == rank) {
|
||||
continue;
|
||||
}
|
||||
if (i == rank) {
|
||||
continue;
|
||||
}
|
||||
|
||||
err = MCA_PML_CALL(isend_init(buff, count, datatype, i,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
err = MCA_PML_CALL(isend_init(buff, count, datatype, i,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
--i;
|
||||
|
||||
@ -79,18 +80,18 @@ int mca_coll_basic_bcast_lin_intra(void *buff, int count,
|
||||
MCA_PML_CALL(start(i, reqs));
|
||||
|
||||
/* Wait for them all. If there's an error, note that we don't
|
||||
care what the error was -- just that there *was* an error. The
|
||||
PML will finish all requests, even if one or more of them fail.
|
||||
i.e., by the end of this call, all the requests are free-able.
|
||||
So free them anyway -- even if there was an error, and return
|
||||
the error after we free everything. */
|
||||
* care what the error was -- just that there *was* an error. The
|
||||
* PML will finish all requests, even if one or more of them fail.
|
||||
* i.e., by the end of this call, all the requests are free-able.
|
||||
* So free them anyway -- even if there was an error, and return
|
||||
* the error after we free everything. */
|
||||
|
||||
err = ompi_request_wait_all(i, reqs, MPI_STATUSES_IGNORE);
|
||||
|
||||
/* Free the reqs */
|
||||
|
||||
mca_coll_basic_free_reqs(reqs, i);
|
||||
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
@ -104,9 +105,10 @@ int mca_coll_basic_bcast_lin_intra(void *buff, int count,
|
||||
* Accepts: - same arguments as MPI_Bcast()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_bcast_log_intra(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_bcast_log_intra(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
@ -135,8 +137,8 @@ int mca_coll_basic_bcast_log_intra(void *buff, int count,
|
||||
peer = ((vrank & ~(1 << hibit)) + root) % size;
|
||||
|
||||
err = MCA_PML_CALL(recv(buff, count, datatype, peer,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
@ -154,11 +156,11 @@ int mca_coll_basic_bcast_log_intra(void *buff, int count,
|
||||
++nreqs;
|
||||
|
||||
err = MCA_PML_CALL(isend_init(buff, count, datatype, peer,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(reqs, preq - reqs);
|
||||
mca_coll_basic_free_reqs(reqs, preq - reqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
@ -168,22 +170,22 @@ int mca_coll_basic_bcast_log_intra(void *buff, int count,
|
||||
|
||||
if (nreqs > 0) {
|
||||
|
||||
/* Start your engines. This will never return an error. */
|
||||
/* Start your engines. This will never return an error. */
|
||||
|
||||
MCA_PML_CALL(start(nreqs, reqs));
|
||||
MCA_PML_CALL(start(nreqs, reqs));
|
||||
|
||||
/* Wait for them all. If there's an error, note that we don't
|
||||
care what the error was -- just that there *was* an error.
|
||||
The PML will finish all requests, even if one or more of them
|
||||
fail. i.e., by the end of this call, all the requests are
|
||||
free-able. So free them anyway -- even if there was an
|
||||
error, and return the error after we free everything. */
|
||||
|
||||
err = ompi_request_wait_all(nreqs, reqs, MPI_STATUSES_IGNORE);
|
||||
/* Wait for them all. If there's an error, note that we don't
|
||||
* care what the error was -- just that there *was* an error.
|
||||
* The PML will finish all requests, even if one or more of them
|
||||
* fail. i.e., by the end of this call, all the requests are
|
||||
* free-able. So free them anyway -- even if there was an
|
||||
* error, and return the error after we free everything. */
|
||||
|
||||
/* Free the reqs */
|
||||
|
||||
mca_coll_basic_free_reqs(reqs, nreqs);
|
||||
err = ompi_request_wait_all(nreqs, reqs, MPI_STATUSES_IGNORE);
|
||||
|
||||
/* Free the reqs */
|
||||
|
||||
mca_coll_basic_free_reqs(reqs, nreqs);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
@ -199,9 +201,10 @@ int mca_coll_basic_bcast_log_intra(void *buff, int count,
|
||||
* Accepts: - same arguments as MPI_Bcast()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_bcast_lin_inter(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_bcast_lin_inter(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int rsize;
|
||||
@ -210,33 +213,31 @@ int mca_coll_basic_bcast_lin_inter(void *buff, int count,
|
||||
ompi_request_t **reqs = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
rsize = ompi_comm_remote_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
if ( MPI_PROC_NULL == root ) {
|
||||
/* do nothing */
|
||||
err = OMPI_SUCCESS;
|
||||
}
|
||||
else if ( MPI_ROOT != root ) {
|
||||
/* Non-root receive the data. */
|
||||
if (MPI_PROC_NULL == root) {
|
||||
/* do nothing */
|
||||
err = OMPI_SUCCESS;
|
||||
} else if (MPI_ROOT != root) {
|
||||
/* Non-root receive the data. */
|
||||
err = MCA_PML_CALL(recv(buff, count, datatype, root,
|
||||
MCA_COLL_BASE_TAG_BCAST, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
}
|
||||
else {
|
||||
/* root section */
|
||||
for (i = 0; i < rsize; i++) {
|
||||
err = MCA_PML_CALL(isend(buff, count, datatype, i,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, &(reqs[i])));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
err = ompi_request_wait_all(rsize, reqs, MPI_STATUSES_IGNORE);
|
||||
MCA_COLL_BASE_TAG_BCAST, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
} else {
|
||||
/* root section */
|
||||
for (i = 0; i < rsize; i++) {
|
||||
err = MCA_PML_CALL(isend(buff, count, datatype, i,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, &(reqs[i])));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
err = ompi_request_wait_all(rsize, reqs, MPI_STATUSES_IGNORE);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* All done */
|
||||
return err;
|
||||
}
|
||||
@ -249,9 +250,10 @@ int mca_coll_basic_bcast_lin_inter(void *buff, int count,
|
||||
* Accepts: - same arguments as MPI_Bcast()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_bcast_log_inter(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_bcast_log_inter(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
@ -31,7 +31,7 @@
|
||||
* Public string showing the coll ompi_basic component version number
|
||||
*/
|
||||
const char *mca_coll_basic_component_version_string =
|
||||
"Open MPI basic collective MCA component version " OMPI_VERSION;
|
||||
"Open MPI basic collective MCA component version " OMPI_VERSION;
|
||||
|
||||
/*
|
||||
* Global variable
|
||||
@ -50,50 +50,49 @@ static int basic_open(void);
|
||||
|
||||
const mca_coll_base_component_1_0_0_t mca_coll_basic_component = {
|
||||
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
* about the component itself */
|
||||
|
||||
{
|
||||
/* Indicate that we are a coll v1.0.0 component (which also implies a
|
||||
specific MCA version) */
|
||||
{
|
||||
/* Indicate that we are a coll v1.0.0 component (which also implies a
|
||||
* specific MCA version) */
|
||||
|
||||
MCA_COLL_BASE_VERSION_1_0_0,
|
||||
MCA_COLL_BASE_VERSION_1_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
/* Component name and version */
|
||||
|
||||
"basic",
|
||||
OMPI_MAJOR_VERSION,
|
||||
OMPI_MINOR_VERSION,
|
||||
OMPI_RELEASE_VERSION,
|
||||
"basic",
|
||||
OMPI_MAJOR_VERSION,
|
||||
OMPI_MINOR_VERSION,
|
||||
OMPI_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
/* Component open and close functions */
|
||||
|
||||
basic_open,
|
||||
basic_open,
|
||||
NULL},
|
||||
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
|
||||
true},
|
||||
|
||||
/* Initialization / querying functions */
|
||||
|
||||
mca_coll_basic_init_query,
|
||||
mca_coll_basic_comm_query,
|
||||
NULL
|
||||
},
|
||||
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
|
||||
true
|
||||
},
|
||||
|
||||
/* Initialization / querying functions */
|
||||
|
||||
mca_coll_basic_init_query,
|
||||
mca_coll_basic_comm_query,
|
||||
NULL
|
||||
};
|
||||
|
||||
|
||||
static int basic_open(void)
|
||||
static int
|
||||
basic_open(void)
|
||||
{
|
||||
/* Use a low priority, but allow other components to be lower */
|
||||
|
||||
mca_coll_basic_priority_param =
|
||||
mca_base_param_register_int("coll", "basic", "priority", NULL, 10);
|
||||
|
||||
mca_coll_basic_priority_param =
|
||||
mca_base_param_register_int("coll", "basic", "priority", NULL, 10);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -36,128 +36,134 @@
|
||||
* Accepts: - same arguments as MPI_Exscan()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_exscan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_exscan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
long true_lb, true_extent, lb, extent;
|
||||
char *free_buffer = NULL;
|
||||
char *reduce_buffer = NULL;
|
||||
char *source;
|
||||
MPI_Request req = MPI_REQUEST_NULL;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
long true_lb, true_extent, lb, extent;
|
||||
char *free_buffer = NULL;
|
||||
char *reduce_buffer = NULL;
|
||||
char *source;
|
||||
MPI_Request req = MPI_REQUEST_NULL;
|
||||
|
||||
/* Initialize. */
|
||||
/* Initialize. */
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
/* If we're rank 0, then we send our sbuf to the next rank */
|
||||
/* If we're rank 0, then we send our sbuf to the next rank */
|
||||
|
||||
if (0 == rank) {
|
||||
return MCA_PML_CALL(send(sbuf, count, dtype, rank + 1,
|
||||
MCA_COLL_BASE_TAG_EXSCAN,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
|
||||
/* If we're the last rank, then just receive the result from the
|
||||
prior rank */
|
||||
|
||||
else if ((size - 1) == rank) {
|
||||
return MCA_PML_CALL(recv(rbuf, count, dtype, rank - 1,
|
||||
MCA_COLL_BASE_TAG_EXSCAN, comm, MPI_STATUS_IGNORE));
|
||||
}
|
||||
|
||||
/* Otherwise, get the result from the prior rank, combine it with my
|
||||
data, and send it to the next rank */
|
||||
|
||||
/* Start the receive for the prior rank's answer */
|
||||
|
||||
err = MCA_PML_CALL(irecv(rbuf, count, dtype, rank - 1,
|
||||
MCA_COLL_BASE_TAG_EXSCAN, comm, &req));
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Get a temporary buffer to perform the reduction into. Rationale
|
||||
for malloc'ing this size is provided in coll_basic_reduce.c. */
|
||||
|
||||
ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
|
||||
free_buffer = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
reduce_buffer = free_buffer - lb;
|
||||
|
||||
if (ompi_op_is_commute(op)) {
|
||||
|
||||
/* If we're commutative, we can copy my sbuf into the reduction
|
||||
buffer before the receive completes */
|
||||
|
||||
err = ompi_ddt_sndrcv(sbuf, count, dtype, reduce_buffer, count, dtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto error;
|
||||
if (0 == rank) {
|
||||
return MCA_PML_CALL(send(sbuf, count, dtype, rank + 1,
|
||||
MCA_COLL_BASE_TAG_EXSCAN,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
|
||||
/* Now setup the reduction */
|
||||
/* If we're the last rank, then just receive the result from the
|
||||
* prior rank */
|
||||
|
||||
source = rbuf;
|
||||
|
||||
/* Finally, wait for the receive to complete (so that we can do
|
||||
the reduction). */
|
||||
|
||||
err = ompi_request_wait(&req, MPI_STATUS_IGNORE);
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto error;
|
||||
}
|
||||
} else {
|
||||
|
||||
/* Setup the reduction */
|
||||
|
||||
source = sbuf;
|
||||
|
||||
/* If we're not commutative, we have to wait for the receive to
|
||||
complete and then copy it into the reduce buffer */
|
||||
|
||||
err = ompi_request_wait(&req, MPI_STATUS_IGNORE);
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto error;
|
||||
else if ((size - 1) == rank) {
|
||||
return MCA_PML_CALL(recv(rbuf, count, dtype, rank - 1,
|
||||
MCA_COLL_BASE_TAG_EXSCAN, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
}
|
||||
|
||||
err = ompi_ddt_sndrcv(rbuf, count, dtype, reduce_buffer, count, dtype);
|
||||
/* Otherwise, get the result from the prior rank, combine it with my
|
||||
* data, and send it to the next rank */
|
||||
|
||||
/* Start the receive for the prior rank's answer */
|
||||
|
||||
err = MCA_PML_CALL(irecv(rbuf, count, dtype, rank - 1,
|
||||
MCA_COLL_BASE_TAG_EXSCAN, comm, &req));
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto error;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now reduce the received answer with my source into the answer
|
||||
that we send off to the next rank */
|
||||
/* Get a temporary buffer to perform the reduction into. Rationale
|
||||
* for malloc'ing this size is provided in coll_basic_reduce.c. */
|
||||
|
||||
ompi_op_reduce(op, source, reduce_buffer, count, dtype);
|
||||
ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
|
||||
/* Send my result off to the next rank */
|
||||
free_buffer = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
reduce_buffer = free_buffer - lb;
|
||||
|
||||
err = MCA_PML_CALL(send(reduce_buffer, count, dtype, rank + 1,
|
||||
MCA_COLL_BASE_TAG_EXSCAN,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (ompi_op_is_commute(op)) {
|
||||
|
||||
/* Error */
|
||||
/* If we're commutative, we can copy my sbuf into the reduction
|
||||
* buffer before the receive completes */
|
||||
|
||||
error:
|
||||
free(free_buffer);
|
||||
if (MPI_REQUEST_NULL != req) {
|
||||
ompi_request_cancel(req);
|
||||
ompi_request_wait(&req, MPI_STATUS_IGNORE);
|
||||
}
|
||||
err =
|
||||
ompi_ddt_sndrcv(sbuf, count, dtype, reduce_buffer, count,
|
||||
dtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* All done */
|
||||
/* Now setup the reduction */
|
||||
|
||||
return err;
|
||||
source = rbuf;
|
||||
|
||||
/* Finally, wait for the receive to complete (so that we can do
|
||||
* the reduction). */
|
||||
|
||||
err = ompi_request_wait(&req, MPI_STATUS_IGNORE);
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto error;
|
||||
}
|
||||
} else {
|
||||
|
||||
/* Setup the reduction */
|
||||
|
||||
source = sbuf;
|
||||
|
||||
/* If we're not commutative, we have to wait for the receive to
|
||||
* complete and then copy it into the reduce buffer */
|
||||
|
||||
err = ompi_request_wait(&req, MPI_STATUS_IGNORE);
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
err =
|
||||
ompi_ddt_sndrcv(rbuf, count, dtype, reduce_buffer, count,
|
||||
dtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now reduce the received answer with my source into the answer
|
||||
* that we send off to the next rank */
|
||||
|
||||
ompi_op_reduce(op, source, reduce_buffer, count, dtype);
|
||||
|
||||
/* Send my result off to the next rank */
|
||||
|
||||
err = MCA_PML_CALL(send(reduce_buffer, count, dtype, rank + 1,
|
||||
MCA_COLL_BASE_TAG_EXSCAN,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
|
||||
/* Error */
|
||||
|
||||
error:
|
||||
free(free_buffer);
|
||||
if (MPI_REQUEST_NULL != req) {
|
||||
ompi_request_cancel(req);
|
||||
ompi_request_wait(&req, MPI_STATUS_IGNORE);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@ -168,10 +174,11 @@ error:
|
||||
* Accepts: - same arguments as MPI_Exscan()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_exscan_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_exscan_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
@ -32,11 +32,12 @@
|
||||
* Accepts: - same arguments as MPI_Gather()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_gather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_gather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int err;
|
||||
@ -53,9 +54,9 @@ int mca_coll_basic_gather_intra(void *sbuf, int scount,
|
||||
/* Everyone but root sends data and returns. */
|
||||
|
||||
if (rank != root) {
|
||||
return MCA_PML_CALL(send(sbuf, scount, sdtype, root,
|
||||
MCA_COLL_BASE_TAG_GATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
return MCA_PML_CALL(send(sbuf, scount, sdtype, root,
|
||||
MCA_COLL_BASE_TAG_GATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
|
||||
/* I am the root, loop receiving the data. */
|
||||
@ -71,11 +72,11 @@ int mca_coll_basic_gather_intra(void *sbuf, int scount,
|
||||
|
||||
if (i == rank) {
|
||||
err = ompi_ddt_sndrcv(sbuf, scount, sdtype, ptmp,
|
||||
rcount, rdtype);
|
||||
rcount, rdtype);
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(ptmp, rcount, rdtype, i,
|
||||
MCA_COLL_BASE_TAG_GATHER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
MCA_COLL_BASE_TAG_GATHER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
}
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
@ -83,7 +84,7 @@ int mca_coll_basic_gather_intra(void *sbuf, int scount,
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -95,11 +96,12 @@ int mca_coll_basic_gather_intra(void *sbuf, int scount,
|
||||
* Accepts: - same arguments as MPI_Gather()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_gather_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_gather_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int err;
|
||||
@ -114,34 +116,32 @@ int mca_coll_basic_gather_inter(void *sbuf, int scount,
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
|
||||
if ( MPI_PROC_NULL == root ) {
|
||||
/* do nothing */
|
||||
err = OMPI_SUCCESS;
|
||||
}
|
||||
else if ( MPI_ROOT != root ) {
|
||||
/* Everyone but root sends data and returns. */
|
||||
if (MPI_PROC_NULL == root) {
|
||||
/* do nothing */
|
||||
err = OMPI_SUCCESS;
|
||||
} else if (MPI_ROOT != root) {
|
||||
/* Everyone but root sends data and returns. */
|
||||
err = MCA_PML_CALL(send(sbuf, scount, sdtype, root,
|
||||
MCA_COLL_BASE_TAG_GATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
else {
|
||||
/* I am the root, loop receiving the data. */
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &extent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
MCA_COLL_BASE_TAG_GATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
} else {
|
||||
/* I am the root, loop receiving the data. */
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &extent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
incr = extent * rcount;
|
||||
for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) {
|
||||
incr = extent * rcount;
|
||||
for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) {
|
||||
err = MCA_PML_CALL(recv(ptmp, rcount, rdtype, i,
|
||||
MCA_COLL_BASE_TAG_GATHER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
MCA_COLL_BASE_TAG_GATHER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* All done */
|
||||
return err;
|
||||
}
|
||||
|
@ -32,72 +32,73 @@
|
||||
* Accepts: - same arguments as MPI_Gatherb()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_gatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_gatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int rank;
|
||||
int size;
|
||||
int err;
|
||||
char *ptmp;
|
||||
long lb;
|
||||
long extent;
|
||||
int i;
|
||||
int rank;
|
||||
int size;
|
||||
int err;
|
||||
char *ptmp;
|
||||
long lb;
|
||||
long extent;
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* Need this test here because this function is invoked by
|
||||
allgatherv in addition to the top-level MPI_Gatherv */
|
||||
/* Need this test here because this function is invoked by
|
||||
* allgatherv in addition to the top-level MPI_Gatherv */
|
||||
|
||||
if (0 == scount && rank != root) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Everyone but root sends data and returns. Note that we will only
|
||||
get here if scount > 0 or rank == root. */
|
||||
|
||||
if (rank != root) {
|
||||
err = MCA_PML_CALL(send(sbuf, scount, sdtype, root,
|
||||
MCA_COLL_BASE_TAG_GATHERV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
return err;
|
||||
}
|
||||
|
||||
/* I am the root, loop receiving data. */
|
||||
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &extent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
ptmp = ((char *) rbuf) + (extent * disps[i]);
|
||||
if (0 == rcounts[i]) {
|
||||
continue;
|
||||
if (0 == scount && rank != root) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* simple optimization */
|
||||
/* Everyone but root sends data and returns. Note that we will only
|
||||
* get here if scount > 0 or rank == root. */
|
||||
|
||||
if (i == rank) {
|
||||
err = ompi_ddt_sndrcv(sbuf, scount, sdtype,
|
||||
ptmp, rcounts[i], rdtype);
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(ptmp, rcounts[i], rdtype, i,
|
||||
MCA_COLL_BASE_TAG_GATHERV,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (rank != root) {
|
||||
err = MCA_PML_CALL(send(sbuf, scount, sdtype, root,
|
||||
MCA_COLL_BASE_TAG_GATHERV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
return err;
|
||||
}
|
||||
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
/* I am the root, loop receiving data. */
|
||||
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &extent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
for (i = 0; i < size; ++i) {
|
||||
ptmp = ((char *) rbuf) + (extent * disps[i]);
|
||||
if (0 == rcounts[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return MPI_SUCCESS;
|
||||
/* simple optimization */
|
||||
|
||||
if (i == rank) {
|
||||
err = ompi_ddt_sndrcv(sbuf, scount, sdtype,
|
||||
ptmp, rcounts[i], rdtype);
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(ptmp, rcounts[i], rdtype, i,
|
||||
MCA_COLL_BASE_TAG_GATHERV,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
}
|
||||
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@ -108,61 +109,60 @@ int mca_coll_basic_gatherv_intra(void *sbuf, int scount,
|
||||
* Accepts: - same arguments as MPI_Gatherv()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_gatherv_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_gatherv_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int rank;
|
||||
int size;
|
||||
int err;
|
||||
char *ptmp;
|
||||
long lb;
|
||||
long extent;
|
||||
ompi_request_t **reqs= comm->c_coll_basic_data->mccb_reqs;
|
||||
int i;
|
||||
int rank;
|
||||
int size;
|
||||
int err;
|
||||
char *ptmp;
|
||||
long lb;
|
||||
long extent;
|
||||
ompi_request_t **reqs = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
size = ompi_comm_remote_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_remote_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* If not root, receive data. Note that we will only get here if
|
||||
scount > 0 or rank == root. */
|
||||
/* If not root, receive data. Note that we will only get here if
|
||||
* scount > 0 or rank == root. */
|
||||
|
||||
if ( MPI_PROC_NULL == root ) {
|
||||
/* do nothing */
|
||||
err = OMPI_SUCCESS;
|
||||
}
|
||||
else if ( MPI_ROOT != root ) {
|
||||
/* Everyone but root sends data and returns. */
|
||||
err = MCA_PML_CALL(send(sbuf, scount, sdtype, root,
|
||||
MCA_COLL_BASE_TAG_GATHERV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
else {
|
||||
/* I am the root, loop receiving data. */
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &extent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
if (MPI_PROC_NULL == root) {
|
||||
/* do nothing */
|
||||
err = OMPI_SUCCESS;
|
||||
} else if (MPI_ROOT != root) {
|
||||
/* Everyone but root sends data and returns. */
|
||||
err = MCA_PML_CALL(send(sbuf, scount, sdtype, root,
|
||||
MCA_COLL_BASE_TAG_GATHERV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
} else {
|
||||
/* I am the root, loop receiving data. */
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &extent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
if (0 == rcounts[i]) {
|
||||
continue;
|
||||
}
|
||||
for (i = 0; i < size; ++i) {
|
||||
if (0 == rcounts[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ptmp = ((char *) rbuf) + (extent * disps[i]);
|
||||
err = MCA_PML_CALL(irecv(ptmp, rcounts[i], rdtype, i,
|
||||
MCA_COLL_BASE_TAG_GATHERV,
|
||||
comm, &reqs[i]));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all (size, reqs, MPI_STATUSES_IGNORE);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return err;
|
||||
ptmp = ((char *) rbuf) + (extent * disps[i]);
|
||||
err = MCA_PML_CALL(irecv(ptmp, rcounts[i], rdtype, i,
|
||||
MCA_COLL_BASE_TAG_GATHERV,
|
||||
comm, &reqs[i]));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return err;
|
||||
}
|
||||
|
@ -32,29 +32,29 @@
|
||||
*/
|
||||
static const mca_coll_base_module_1_0_0_t intra_linear = {
|
||||
|
||||
/* Initialization / finalization functions */
|
||||
/* Initialization / finalization functions */
|
||||
|
||||
mca_coll_basic_module_init,
|
||||
mca_coll_basic_module_finalize,
|
||||
mca_coll_basic_module_init,
|
||||
mca_coll_basic_module_finalize,
|
||||
|
||||
/* Collective function pointers */
|
||||
/* Collective function pointers */
|
||||
|
||||
mca_coll_basic_allgather_intra,
|
||||
mca_coll_basic_allgatherv_intra,
|
||||
mca_coll_basic_allreduce_intra,
|
||||
mca_coll_basic_alltoall_intra,
|
||||
mca_coll_basic_alltoallv_intra,
|
||||
mca_coll_basic_alltoallw_intra,
|
||||
mca_coll_basic_barrier_intra_lin,
|
||||
mca_coll_basic_bcast_lin_intra,
|
||||
mca_coll_basic_exscan_intra,
|
||||
mca_coll_basic_gather_intra,
|
||||
mca_coll_basic_gatherv_intra,
|
||||
mca_coll_basic_reduce_lin_intra,
|
||||
mca_coll_basic_reduce_scatter_intra,
|
||||
mca_coll_basic_scan_intra,
|
||||
mca_coll_basic_scatter_intra,
|
||||
mca_coll_basic_scatterv_intra
|
||||
mca_coll_basic_allgather_intra,
|
||||
mca_coll_basic_allgatherv_intra,
|
||||
mca_coll_basic_allreduce_intra,
|
||||
mca_coll_basic_alltoall_intra,
|
||||
mca_coll_basic_alltoallv_intra,
|
||||
mca_coll_basic_alltoallw_intra,
|
||||
mca_coll_basic_barrier_intra_lin,
|
||||
mca_coll_basic_bcast_lin_intra,
|
||||
mca_coll_basic_exscan_intra,
|
||||
mca_coll_basic_gather_intra,
|
||||
mca_coll_basic_gatherv_intra,
|
||||
mca_coll_basic_reduce_lin_intra,
|
||||
mca_coll_basic_reduce_scatter_intra,
|
||||
mca_coll_basic_scan_intra,
|
||||
mca_coll_basic_scatter_intra,
|
||||
mca_coll_basic_scatterv_intra
|
||||
};
|
||||
|
||||
|
||||
@ -65,29 +65,29 @@ static const mca_coll_base_module_1_0_0_t intra_linear = {
|
||||
*/
|
||||
static const mca_coll_base_module_1_0_0_t intra_log = {
|
||||
|
||||
/* Initialization / finalization functions */
|
||||
/* Initialization / finalization functions */
|
||||
|
||||
mca_coll_basic_module_init,
|
||||
mca_coll_basic_module_finalize,
|
||||
mca_coll_basic_module_init,
|
||||
mca_coll_basic_module_finalize,
|
||||
|
||||
/* Collective function pointers */
|
||||
/* Collective function pointers */
|
||||
|
||||
mca_coll_basic_allgather_intra,
|
||||
mca_coll_basic_allgatherv_intra,
|
||||
mca_coll_basic_allreduce_intra,
|
||||
mca_coll_basic_alltoall_intra,
|
||||
mca_coll_basic_alltoallv_intra,
|
||||
mca_coll_basic_alltoallw_intra,
|
||||
mca_coll_basic_barrier_intra_log,
|
||||
mca_coll_basic_bcast_log_intra,
|
||||
mca_coll_basic_exscan_intra,
|
||||
mca_coll_basic_gather_intra,
|
||||
mca_coll_basic_gatherv_intra,
|
||||
mca_coll_basic_reduce_log_intra,
|
||||
mca_coll_basic_reduce_scatter_intra,
|
||||
mca_coll_basic_scan_intra,
|
||||
mca_coll_basic_scatter_intra,
|
||||
mca_coll_basic_scatterv_intra
|
||||
mca_coll_basic_allgather_intra,
|
||||
mca_coll_basic_allgatherv_intra,
|
||||
mca_coll_basic_allreduce_intra,
|
||||
mca_coll_basic_alltoall_intra,
|
||||
mca_coll_basic_alltoallv_intra,
|
||||
mca_coll_basic_alltoallw_intra,
|
||||
mca_coll_basic_barrier_intra_log,
|
||||
mca_coll_basic_bcast_log_intra,
|
||||
mca_coll_basic_exscan_intra,
|
||||
mca_coll_basic_gather_intra,
|
||||
mca_coll_basic_gatherv_intra,
|
||||
mca_coll_basic_reduce_log_intra,
|
||||
mca_coll_basic_reduce_scatter_intra,
|
||||
mca_coll_basic_scan_intra,
|
||||
mca_coll_basic_scatter_intra,
|
||||
mca_coll_basic_scatterv_intra
|
||||
};
|
||||
|
||||
|
||||
@ -96,29 +96,29 @@ static const mca_coll_base_module_1_0_0_t intra_log = {
|
||||
*/
|
||||
static const mca_coll_base_module_1_0_0_t inter_linear = {
|
||||
|
||||
/* Initialization / finalization functions */
|
||||
/* Initialization / finalization functions */
|
||||
|
||||
mca_coll_basic_module_init,
|
||||
mca_coll_basic_module_finalize,
|
||||
mca_coll_basic_module_init,
|
||||
mca_coll_basic_module_finalize,
|
||||
|
||||
/* Collective function pointers */
|
||||
/* Collective function pointers */
|
||||
|
||||
mca_coll_basic_allgather_inter,
|
||||
mca_coll_basic_allgatherv_inter,
|
||||
mca_coll_basic_allreduce_inter,
|
||||
mca_coll_basic_alltoall_inter,
|
||||
mca_coll_basic_alltoallv_inter,
|
||||
mca_coll_basic_alltoallw_inter,
|
||||
mca_coll_basic_barrier_inter_lin,
|
||||
mca_coll_basic_bcast_lin_inter,
|
||||
NULL,
|
||||
mca_coll_basic_gather_inter,
|
||||
mca_coll_basic_gatherv_inter,
|
||||
mca_coll_basic_reduce_lin_inter,
|
||||
mca_coll_basic_reduce_scatter_inter,
|
||||
NULL,
|
||||
mca_coll_basic_scatter_inter,
|
||||
mca_coll_basic_scatterv_inter
|
||||
mca_coll_basic_allgather_inter,
|
||||
mca_coll_basic_allgatherv_inter,
|
||||
mca_coll_basic_allreduce_inter,
|
||||
mca_coll_basic_alltoall_inter,
|
||||
mca_coll_basic_alltoallv_inter,
|
||||
mca_coll_basic_alltoallw_inter,
|
||||
mca_coll_basic_barrier_inter_lin,
|
||||
mca_coll_basic_bcast_lin_inter,
|
||||
NULL,
|
||||
mca_coll_basic_gather_inter,
|
||||
mca_coll_basic_gatherv_inter,
|
||||
mca_coll_basic_reduce_lin_inter,
|
||||
mca_coll_basic_reduce_scatter_inter,
|
||||
NULL,
|
||||
mca_coll_basic_scatter_inter,
|
||||
mca_coll_basic_scatterv_inter
|
||||
};
|
||||
|
||||
|
||||
@ -127,11 +127,12 @@ static const mca_coll_base_module_1_0_0_t inter_linear = {
|
||||
* this component to disqualify itself if it doesn't support the
|
||||
* required level of thread support.
|
||||
*/
|
||||
int mca_coll_basic_init_query(bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
int
|
||||
mca_coll_basic_init_query(bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
/* Nothing to do */
|
||||
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -143,27 +144,28 @@ int mca_coll_basic_init_query(bool enable_progress_threads,
|
||||
*/
|
||||
const mca_coll_base_module_1_0_0_t *
|
||||
mca_coll_basic_comm_query(struct ompi_communicator_t *comm, int *priority,
|
||||
struct mca_coll_base_comm_t **data)
|
||||
struct mca_coll_base_comm_t **data)
|
||||
{
|
||||
if (OMPI_SUCCESS != mca_base_param_lookup_int(mca_coll_basic_priority_param,
|
||||
priority)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Choose whether to use [intra|inter], and [linear|log]-based
|
||||
algorithms. */
|
||||
|
||||
if (OMPI_COMM_IS_INTER(comm)) {
|
||||
return &inter_linear;
|
||||
} else {
|
||||
if (ompi_comm_size(comm) <= mca_coll_base_crossover) {
|
||||
return &intra_linear;
|
||||
} else {
|
||||
return &intra_log;
|
||||
if (OMPI_SUCCESS !=
|
||||
mca_base_param_lookup_int(mca_coll_basic_priority_param,
|
||||
priority)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Never reach here */
|
||||
/* Choose whether to use [intra|inter], and [linear|log]-based
|
||||
* algorithms. */
|
||||
|
||||
if (OMPI_COMM_IS_INTER(comm)) {
|
||||
return &inter_linear;
|
||||
} else {
|
||||
if (ompi_comm_size(comm) <= mca_coll_base_crossover) {
|
||||
return &intra_linear;
|
||||
} else {
|
||||
return &intra_log;
|
||||
}
|
||||
}
|
||||
|
||||
/* Never reach here */
|
||||
}
|
||||
|
||||
|
||||
@ -173,52 +175,53 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm, int *priority,
|
||||
const struct mca_coll_base_module_1_0_0_t *
|
||||
mca_coll_basic_module_init(struct ompi_communicator_t *comm)
|
||||
{
|
||||
int size;
|
||||
struct mca_coll_base_comm_t *data;
|
||||
int size;
|
||||
struct mca_coll_base_comm_t *data;
|
||||
|
||||
/* Allocate the data that hangs off the communicator */
|
||||
/* Allocate the data that hangs off the communicator */
|
||||
|
||||
if (OMPI_COMM_IS_INTER(comm)) {
|
||||
size = ompi_comm_remote_size(comm);
|
||||
} else {
|
||||
size = ompi_comm_size(comm);
|
||||
}
|
||||
data = malloc(sizeof(struct mca_coll_base_comm_t) +
|
||||
(sizeof(ompi_request_t *) * size * 2));
|
||||
|
||||
if (NULL == data) {
|
||||
return NULL;
|
||||
}
|
||||
data->mccb_reqs = (ompi_request_t **) (data + 1);
|
||||
data->mccb_num_reqs = size * 2;
|
||||
if (OMPI_COMM_IS_INTER(comm)) {
|
||||
size = ompi_comm_remote_size(comm);
|
||||
} else {
|
||||
size = ompi_comm_size(comm);
|
||||
}
|
||||
data = malloc(sizeof(struct mca_coll_base_comm_t) +
|
||||
(sizeof(ompi_request_t *) * size * 2));
|
||||
|
||||
/* All done */
|
||||
if (NULL == data) {
|
||||
return NULL;
|
||||
}
|
||||
data->mccb_reqs = (ompi_request_t **) (data + 1);
|
||||
data->mccb_num_reqs = size * 2;
|
||||
|
||||
comm->c_coll_basic_data = data;
|
||||
return comm->c_coll_basic_module;
|
||||
/* All done */
|
||||
|
||||
comm->c_coll_basic_data = data;
|
||||
return comm->c_coll_basic_module;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Finalize module on the communicator
|
||||
*/
|
||||
int mca_coll_basic_module_finalize(struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_module_finalize(struct ompi_communicator_t *comm)
|
||||
{
|
||||
if (NULL == comm->c_coll_basic_module) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
if (NULL == comm->c_coll_basic_module) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
/* Reset the reqs to NULL/0 -- they'll be freed as part of freeing
|
||||
the generel c_coll_basic_data */
|
||||
/* Reset the reqs to NULL/0 -- they'll be freed as part of freeing
|
||||
* the generel c_coll_basic_data */
|
||||
|
||||
comm->c_coll_basic_data->mccb_reqs = NULL;
|
||||
comm->c_coll_basic_data->mccb_num_reqs = 0;
|
||||
comm->c_coll_basic_data->mccb_reqs = NULL;
|
||||
comm->c_coll_basic_data->mccb_num_reqs = 0;
|
||||
#endif
|
||||
|
||||
/* All done */
|
||||
/* All done */
|
||||
|
||||
free(comm->c_coll_basic_data);
|
||||
comm->c_coll_basic_data = NULL;
|
||||
return OMPI_SUCCESS;
|
||||
free(comm->c_coll_basic_data);
|
||||
comm->c_coll_basic_data = NULL;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -33,216 +33,218 @@
|
||||
* Accepts: - same as MPI_Reduce()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int rank;
|
||||
int err;
|
||||
int size;
|
||||
long true_lb, true_extent, lb, extent;
|
||||
char *free_buffer = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
char *inbuf;
|
||||
int i;
|
||||
int rank;
|
||||
int err;
|
||||
int size;
|
||||
long true_lb, true_extent, lb, extent;
|
||||
char *free_buffer = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
char *inbuf;
|
||||
|
||||
/* Initialize */
|
||||
/* Initialize */
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
/* If not root, send data to the root. */
|
||||
/* If not root, send data to the root. */
|
||||
|
||||
if (rank != root) {
|
||||
err = MCA_PML_CALL(send(sbuf, count, dtype, root,
|
||||
MCA_COLL_BASE_TAG_REDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Root receives and reduces messages. Allocate buffer to receive
|
||||
messages. This comment applies to all collectives in this basic
|
||||
module where we allocate a temporary buffer. For the next few
|
||||
lines of code, it's tremendously complicated how we decided that
|
||||
this was the Right Thing to do. Sit back and enjoy. And prepare
|
||||
to have your mind warped. :-)
|
||||
|
||||
Recall some definitions (I always get these backwards, so I'm
|
||||
going to put them here):
|
||||
|
||||
extent: the length from the lower bound to the upper bound -- may
|
||||
be considerably larger than the buffer required to hold the data
|
||||
(or smaller! But it's easiest to think about when it's larger).
|
||||
|
||||
true extent: the exact number of bytes required to hold the data
|
||||
in the layout pattern in the datatype.
|
||||
|
||||
For example, consider the following buffer (just talking about
|
||||
LB, extent, and true extent -- extrapolate for UB; i.e., assume
|
||||
the UB equals exactly where the data ends):
|
||||
|
||||
A B C
|
||||
--------------------------------------------------------
|
||||
| | |
|
||||
--------------------------------------------------------
|
||||
|
||||
There are multiple cases:
|
||||
|
||||
1. A is what we give to MPI_Send (and friends), and A is where
|
||||
the data starts, and C is where the data ends. In this case:
|
||||
|
||||
- extent: C-A
|
||||
- true extent: C-A
|
||||
- LB: 0
|
||||
|
||||
A C
|
||||
--------------------------------------------------------
|
||||
| |
|
||||
--------------------------------------------------------
|
||||
<=======================extent=========================>
|
||||
<======================true extent=====================>
|
||||
|
||||
2. A is what we give to MPI_Send (and friends), B is where the
|
||||
data starts, and C is where the data ends. In this case:
|
||||
|
||||
- extent: C-A
|
||||
- true extent: C-B
|
||||
- LB: positive
|
||||
|
||||
A B C
|
||||
--------------------------------------------------------
|
||||
| | User buffer |
|
||||
--------------------------------------------------------
|
||||
<=======================extent=========================>
|
||||
<===============true extent=============>
|
||||
|
||||
3. B is what we give to MPI_Send (and friends), A is where the
|
||||
data starts, and C is where the data ends. In this case:
|
||||
|
||||
- extent: C-A
|
||||
- true extent: C-A
|
||||
- LB: negative
|
||||
|
||||
A B C
|
||||
--------------------------------------------------------
|
||||
| | User buffer |
|
||||
--------------------------------------------------------
|
||||
<=======================extent=========================>
|
||||
<======================true extent=====================>
|
||||
|
||||
4. MPI_BOTTOM is what we give to MPI_Send (and friends), B is
|
||||
where the data starts, and C is where the data ends. In this
|
||||
case:
|
||||
|
||||
- extent: C-MPI_BOTTOM
|
||||
- true extent: C-B
|
||||
- LB: [potentially very large] positive
|
||||
|
||||
MPI_BOTTOM B C
|
||||
--------------------------------------------------------
|
||||
| | User buffer |
|
||||
--------------------------------------------------------
|
||||
<=======================extent=========================>
|
||||
<===============true extent=============>
|
||||
|
||||
So in all cases, for a temporary buffer, all we need to malloc()
|
||||
is a buffer of size true_extent. We therefore need to know two
|
||||
pointer values: what value to give to MPI_Send (and friends) and
|
||||
what value to give to free(), because they might not be the same.
|
||||
|
||||
Clearly, what we give to free() is exactly what was returned from
|
||||
malloc(). That part is easy. :-)
|
||||
|
||||
What we give to MPI_Send (and friends) is a bit more complicated.
|
||||
Let's take the 4 cases from above:
|
||||
|
||||
1. If A is what we give to MPI_Send and A is where the data
|
||||
starts, then clearly we give to MPI_Send what we got back from
|
||||
malloc().
|
||||
|
||||
2. If B is what we get back from malloc, but we give A to
|
||||
MPI_Send, then the buffer range [A,B) represents "dead space"
|
||||
-- no data will be put there. So it's safe to give B-LB to
|
||||
MPI_Send. More specifically, the LB is positive, so B-LB is
|
||||
actually A.
|
||||
|
||||
3. If A is what we get back from malloc, and B is what we give to
|
||||
MPI_Send, then the LB is negative, so A-LB will actually equal
|
||||
B.
|
||||
|
||||
4. Although this seems like the weirdest case, it's actually
|
||||
quite similar to case #2 -- the pointer we give to MPI_Send is
|
||||
smaller than the pointer we got back from malloc().
|
||||
|
||||
Hence, in all cases, we give (return_from_malloc - LB) to MPI_Send.
|
||||
|
||||
This works fine and dandy if we only have (count==1), which we
|
||||
rarely do. ;-) So we really need to allocate (true_extent +
|
||||
((count - 1) * extent)) to get enough space for the rest. This may
|
||||
be more than is necessary, but it's ok.
|
||||
|
||||
Simple, no? :-)
|
||||
|
||||
*/
|
||||
|
||||
if (size > 1) {
|
||||
ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
|
||||
free_buffer = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
pml_buffer = free_buffer - lb;
|
||||
}
|
||||
|
||||
/* Initialize the receive buffer. */
|
||||
|
||||
if (rank == (size - 1)) {
|
||||
err = ompi_ddt_sndrcv(sbuf, count, dtype, rbuf, count, dtype);
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(rbuf, count, dtype, size - 1,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm, MPI_STATUS_IGNORE));
|
||||
}
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Loop receiving and calling reduction function (C or Fortran). */
|
||||
|
||||
for (i = size - 2; i >= 0; --i) {
|
||||
if (rank == i) {
|
||||
inbuf = sbuf;
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(pml_buffer, count, dtype, i,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
if (rank != root) {
|
||||
err = MCA_PML_CALL(send(sbuf, count, dtype, root,
|
||||
MCA_COLL_BASE_TAG_REDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
return err;
|
||||
}
|
||||
|
||||
inbuf = pml_buffer;
|
||||
}
|
||||
|
||||
/* Perform the reduction */
|
||||
/* Root receives and reduces messages. Allocate buffer to receive
|
||||
* messages. This comment applies to all collectives in this basic
|
||||
* module where we allocate a temporary buffer. For the next few
|
||||
* lines of code, it's tremendously complicated how we decided that
|
||||
* this was the Right Thing to do. Sit back and enjoy. And prepare
|
||||
* to have your mind warped. :-)
|
||||
*
|
||||
* Recall some definitions (I always get these backwards, so I'm
|
||||
* going to put them here):
|
||||
*
|
||||
* extent: the length from the lower bound to the upper bound -- may
|
||||
* be considerably larger than the buffer required to hold the data
|
||||
* (or smaller! But it's easiest to think about when it's larger).
|
||||
*
|
||||
* true extent: the exact number of bytes required to hold the data
|
||||
* in the layout pattern in the datatype.
|
||||
*
|
||||
* For example, consider the following buffer (just talking about
|
||||
* LB, extent, and true extent -- extrapolate for UB; i.e., assume
|
||||
* the UB equals exactly where the data ends):
|
||||
*
|
||||
* A B C
|
||||
* --------------------------------------------------------
|
||||
* | | |
|
||||
* --------------------------------------------------------
|
||||
*
|
||||
* There are multiple cases:
|
||||
*
|
||||
* 1. A is what we give to MPI_Send (and friends), and A is where
|
||||
* the data starts, and C is where the data ends. In this case:
|
||||
*
|
||||
* - extent: C-A
|
||||
* - true extent: C-A
|
||||
* - LB: 0
|
||||
*
|
||||
* A C
|
||||
* --------------------------------------------------------
|
||||
* | |
|
||||
* --------------------------------------------------------
|
||||
* <=======================extent=========================>
|
||||
* <======================true extent=====================>
|
||||
*
|
||||
* 2. A is what we give to MPI_Send (and friends), B is where the
|
||||
* data starts, and C is where the data ends. In this case:
|
||||
*
|
||||
* - extent: C-A
|
||||
* - true extent: C-B
|
||||
* - LB: positive
|
||||
*
|
||||
* A B C
|
||||
* --------------------------------------------------------
|
||||
* | | User buffer |
|
||||
* --------------------------------------------------------
|
||||
* <=======================extent=========================>
|
||||
* <===============true extent=============>
|
||||
*
|
||||
* 3. B is what we give to MPI_Send (and friends), A is where the
|
||||
* data starts, and C is where the data ends. In this case:
|
||||
*
|
||||
* - extent: C-A
|
||||
* - true extent: C-A
|
||||
* - LB: negative
|
||||
*
|
||||
* A B C
|
||||
* --------------------------------------------------------
|
||||
* | | User buffer |
|
||||
* --------------------------------------------------------
|
||||
* <=======================extent=========================>
|
||||
* <======================true extent=====================>
|
||||
*
|
||||
* 4. MPI_BOTTOM is what we give to MPI_Send (and friends), B is
|
||||
* where the data starts, and C is where the data ends. In this
|
||||
* case:
|
||||
*
|
||||
* - extent: C-MPI_BOTTOM
|
||||
* - true extent: C-B
|
||||
* - LB: [potentially very large] positive
|
||||
*
|
||||
* MPI_BOTTOM B C
|
||||
* --------------------------------------------------------
|
||||
* | | User buffer |
|
||||
* --------------------------------------------------------
|
||||
* <=======================extent=========================>
|
||||
* <===============true extent=============>
|
||||
*
|
||||
* So in all cases, for a temporary buffer, all we need to malloc()
|
||||
* is a buffer of size true_extent. We therefore need to know two
|
||||
* pointer values: what value to give to MPI_Send (and friends) and
|
||||
* what value to give to free(), because they might not be the same.
|
||||
*
|
||||
* Clearly, what we give to free() is exactly what was returned from
|
||||
* malloc(). That part is easy. :-)
|
||||
*
|
||||
* What we give to MPI_Send (and friends) is a bit more complicated.
|
||||
* Let's take the 4 cases from above:
|
||||
*
|
||||
* 1. If A is what we give to MPI_Send and A is where the data
|
||||
* starts, then clearly we give to MPI_Send what we got back from
|
||||
* malloc().
|
||||
*
|
||||
* 2. If B is what we get back from malloc, but we give A to
|
||||
* MPI_Send, then the buffer range [A,B) represents "dead space"
|
||||
* -- no data will be put there. So it's safe to give B-LB to
|
||||
* MPI_Send. More specifically, the LB is positive, so B-LB is
|
||||
* actually A.
|
||||
*
|
||||
* 3. If A is what we get back from malloc, and B is what we give to
|
||||
* MPI_Send, then the LB is negative, so A-LB will actually equal
|
||||
* B.
|
||||
*
|
||||
* 4. Although this seems like the weirdest case, it's actually
|
||||
* quite similar to case #2 -- the pointer we give to MPI_Send is
|
||||
* smaller than the pointer we got back from malloc().
|
||||
*
|
||||
* Hence, in all cases, we give (return_from_malloc - LB) to MPI_Send.
|
||||
*
|
||||
* This works fine and dandy if we only have (count==1), which we
|
||||
* rarely do. ;-) So we really need to allocate (true_extent +
|
||||
* ((count - 1) * extent)) to get enough space for the rest. This may
|
||||
* be more than is necessary, but it's ok.
|
||||
*
|
||||
* Simple, no? :-)
|
||||
*
|
||||
*/
|
||||
|
||||
ompi_op_reduce(op, inbuf, rbuf, count, dtype);
|
||||
}
|
||||
if (size > 1) {
|
||||
ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
free_buffer = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
pml_buffer = free_buffer - lb;
|
||||
}
|
||||
|
||||
/* All done */
|
||||
/* Initialize the receive buffer. */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
if (rank == (size - 1)) {
|
||||
err = ompi_ddt_sndrcv(sbuf, count, dtype, rbuf, count, dtype);
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(rbuf, count, dtype, size - 1,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
}
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Loop receiving and calling reduction function (C or Fortran). */
|
||||
|
||||
for (i = size - 2; i >= 0; --i) {
|
||||
if (rank == i) {
|
||||
inbuf = sbuf;
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(pml_buffer, count, dtype, i,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
inbuf = pml_buffer;
|
||||
}
|
||||
|
||||
/* Perform the reduction */
|
||||
|
||||
ompi_op_reduce(op, inbuf, rbuf, count, dtype);
|
||||
}
|
||||
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@ -253,191 +255,193 @@ int mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
|
||||
* Accepts: - same as MPI_Reduce()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int vrank;
|
||||
int err;
|
||||
int peer;
|
||||
int dim;
|
||||
int mask;
|
||||
long true_lb, true_extent, lb, extent;
|
||||
char *free_buffer = NULL;
|
||||
char *free_rbuf = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
char *snd_buffer = sbuf;
|
||||
char *rcv_buffer = rbuf;
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int vrank;
|
||||
int err;
|
||||
int peer;
|
||||
int dim;
|
||||
int mask;
|
||||
long true_lb, true_extent, lb, extent;
|
||||
char *free_buffer = NULL;
|
||||
char *free_rbuf = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
char *snd_buffer = sbuf;
|
||||
char *rcv_buffer = rbuf;
|
||||
|
||||
/* JMS Codearound for now -- if the operations is not communative,
|
||||
just call the linear algorithm. Need to talk to Edgar / George
|
||||
about fixing this algorithm here to work with non-communative
|
||||
operations. */
|
||||
/* JMS Codearound for now -- if the operations is not communative,
|
||||
* just call the linear algorithm. Need to talk to Edgar / George
|
||||
* about fixing this algorithm here to work with non-communative
|
||||
* operations. */
|
||||
|
||||
if (!ompi_op_is_commute(op)) {
|
||||
return mca_coll_basic_reduce_lin_intra(sbuf, rbuf, count, dtype,
|
||||
op, root, comm);
|
||||
}
|
||||
if (!ompi_op_is_commute(op)) {
|
||||
return mca_coll_basic_reduce_lin_intra(sbuf, rbuf, count, dtype,
|
||||
op, root, comm);
|
||||
}
|
||||
|
||||
/* Some variables */
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
vrank = ompi_op_is_commute(op) ? (rank - root + size) % size : rank;
|
||||
dim = comm->c_cube_dim;
|
||||
/* Some variables */
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
vrank = ompi_op_is_commute(op) ? (rank - root + size) % size : rank;
|
||||
dim = comm->c_cube_dim;
|
||||
|
||||
/* Allocate the incoming and resulting message buffers. See lengthy
|
||||
rationale above. */
|
||||
/* Allocate the incoming and resulting message buffers. See lengthy
|
||||
* rationale above. */
|
||||
|
||||
if (size > 1) {
|
||||
ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
if (size > 1) {
|
||||
ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
|
||||
free_buffer = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
free_buffer = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
pml_buffer = free_buffer - lb;
|
||||
/* read the comment about commutative operations (few lines down
|
||||
the page) */
|
||||
if( ompi_op_is_commute(op) ) {
|
||||
rcv_buffer = pml_buffer;
|
||||
}
|
||||
pml_buffer = free_buffer - lb;
|
||||
/* read the comment about commutative operations (few lines down
|
||||
* the page) */
|
||||
if (ompi_op_is_commute(op)) {
|
||||
rcv_buffer = pml_buffer;
|
||||
}
|
||||
|
||||
if (rank != root && 0 == (vrank & 1)) {
|
||||
/* root is the only one required to provide a valid rbuf.
|
||||
Assume rbuf is invalid for all other ranks, so fix it up
|
||||
here to be valid on all non-leaf ranks */
|
||||
free_rbuf = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_rbuf) {
|
||||
free(free_buffer);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
rbuf = free_rbuf - lb;
|
||||
}
|
||||
}
|
||||
if (rank != root && 0 == (vrank & 1)) {
|
||||
/* root is the only one required to provide a valid rbuf.
|
||||
* Assume rbuf is invalid for all other ranks, so fix it up
|
||||
* here to be valid on all non-leaf ranks */
|
||||
free_rbuf = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_rbuf) {
|
||||
free(free_buffer);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
rbuf = free_rbuf - lb;
|
||||
}
|
||||
}
|
||||
|
||||
/* Loop over cube dimensions. High processes send to low ones in the
|
||||
dimension. */
|
||||
/* Loop over cube dimensions. High processes send to low ones in the
|
||||
* dimension. */
|
||||
|
||||
for (i = 0, mask = 1; i < dim; ++i, mask <<= 1) {
|
||||
for (i = 0, mask = 1; i < dim; ++i, mask <<= 1) {
|
||||
|
||||
/* A high-proc sends to low-proc and stops. */
|
||||
if (vrank & mask) {
|
||||
peer = vrank & ~mask;
|
||||
if (ompi_op_is_commute(op)) {
|
||||
peer = (peer + root) % size;
|
||||
}
|
||||
/* A high-proc sends to low-proc and stops. */
|
||||
if (vrank & mask) {
|
||||
peer = vrank & ~mask;
|
||||
if (ompi_op_is_commute(op)) {
|
||||
peer = (peer + root) % size;
|
||||
}
|
||||
|
||||
err = MCA_PML_CALL(send( snd_buffer, count,
|
||||
dtype, peer, MCA_COLL_BASE_TAG_REDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
if (NULL != free_rbuf) {
|
||||
free(free_rbuf);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
snd_buffer = rbuf;
|
||||
break;
|
||||
}
|
||||
err = MCA_PML_CALL(send(snd_buffer, count,
|
||||
dtype, peer, MCA_COLL_BASE_TAG_REDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
if (NULL != free_rbuf) {
|
||||
free(free_rbuf);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
snd_buffer = rbuf;
|
||||
break;
|
||||
}
|
||||
|
||||
/* A low-proc receives, reduces, and moves to a higher
|
||||
dimension. */
|
||||
/* A low-proc receives, reduces, and moves to a higher
|
||||
* dimension. */
|
||||
|
||||
else {
|
||||
peer = vrank | mask;
|
||||
if (peer >= size) {
|
||||
continue;
|
||||
}
|
||||
if (ompi_op_is_commute(op)) {
|
||||
peer = (peer + root) % size;
|
||||
}
|
||||
else {
|
||||
peer = vrank | mask;
|
||||
if (peer >= size) {
|
||||
continue;
|
||||
}
|
||||
if (ompi_op_is_commute(op)) {
|
||||
peer = (peer + root) % size;
|
||||
}
|
||||
|
||||
/* Most of the time (all except the first one for commutative
|
||||
operations) we receive in the user provided buffer
|
||||
(rbuf). But the exception is here to allow us to dont have
|
||||
to copy from the sbuf to a temporary location. If the
|
||||
operation is commutative we dont care in which order we
|
||||
apply the operation, so for the first time we can receive
|
||||
the data in the pml_buffer and then apply to operation
|
||||
between this buffer and the user provided data. */
|
||||
/* Most of the time (all except the first one for commutative
|
||||
* operations) we receive in the user provided buffer
|
||||
* (rbuf). But the exception is here to allow us to dont have
|
||||
* to copy from the sbuf to a temporary location. If the
|
||||
* operation is commutative we dont care in which order we
|
||||
* apply the operation, so for the first time we can receive
|
||||
* the data in the pml_buffer and then apply to operation
|
||||
* between this buffer and the user provided data. */
|
||||
|
||||
err = MCA_PML_CALL(recv( rcv_buffer, count, dtype, peer,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE ));
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
if (NULL != free_rbuf) {
|
||||
free(free_rbuf);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
/* Perform the operation. The target is always the user
|
||||
provided buffer We do the operation only if we receive it
|
||||
not in the user buffer */
|
||||
if( snd_buffer != sbuf ) {
|
||||
/* the target buffer is the locally allocated one */
|
||||
ompi_op_reduce(op, rcv_buffer, pml_buffer, count, dtype);
|
||||
} else {
|
||||
/* If we're commutative, we don't care about the order of
|
||||
operations and we can just reduce the operations now.
|
||||
If we are not commutative, we have to copy the send
|
||||
buffer into a temp buffer (pml_buffer) and then reduce
|
||||
what we just received against it. */
|
||||
if( !ompi_op_is_commute(op) ) {
|
||||
ompi_ddt_sndrcv( sbuf, count, dtype, pml_buffer, count, dtype);
|
||||
ompi_op_reduce( op, rbuf, pml_buffer, count, dtype );
|
||||
} else {
|
||||
ompi_op_reduce(op, sbuf, pml_buffer, count, dtype);
|
||||
}
|
||||
/* now we have to send the buffer containing the computed data */
|
||||
snd_buffer = pml_buffer;
|
||||
/* starting from now we always receive in the user
|
||||
provided buffer */
|
||||
rcv_buffer = rbuf;
|
||||
}
|
||||
}
|
||||
}
|
||||
err = MCA_PML_CALL(recv(rcv_buffer, count, dtype, peer,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
if (NULL != free_rbuf) {
|
||||
free(free_rbuf);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
/* Perform the operation. The target is always the user
|
||||
* provided buffer We do the operation only if we receive it
|
||||
* not in the user buffer */
|
||||
if (snd_buffer != sbuf) {
|
||||
/* the target buffer is the locally allocated one */
|
||||
ompi_op_reduce(op, rcv_buffer, pml_buffer, count, dtype);
|
||||
} else {
|
||||
/* If we're commutative, we don't care about the order of
|
||||
* operations and we can just reduce the operations now.
|
||||
* If we are not commutative, we have to copy the send
|
||||
* buffer into a temp buffer (pml_buffer) and then reduce
|
||||
* what we just received against it. */
|
||||
if (!ompi_op_is_commute(op)) {
|
||||
ompi_ddt_sndrcv(sbuf, count, dtype, pml_buffer, count,
|
||||
dtype);
|
||||
ompi_op_reduce(op, rbuf, pml_buffer, count, dtype);
|
||||
} else {
|
||||
ompi_op_reduce(op, sbuf, pml_buffer, count, dtype);
|
||||
}
|
||||
/* now we have to send the buffer containing the computed data */
|
||||
snd_buffer = pml_buffer;
|
||||
/* starting from now we always receive in the user
|
||||
* provided buffer */
|
||||
rcv_buffer = rbuf;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the result to the root if needed. */
|
||||
err = MPI_SUCCESS;
|
||||
if (0 == vrank) {
|
||||
if (root == rank) {
|
||||
ompi_ddt_sndrcv( snd_buffer, count, dtype, rbuf, count, dtype);
|
||||
} else {
|
||||
err = MCA_PML_CALL(send( snd_buffer, count,
|
||||
dtype, root, MCA_COLL_BASE_TAG_REDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
} else if (rank == root) {
|
||||
err = MCA_PML_CALL(recv( rcv_buffer, count, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_REDUCE,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if( rcv_buffer != rbuf ) {
|
||||
ompi_op_reduce(op, rcv_buffer, rbuf, count, dtype);
|
||||
}
|
||||
}
|
||||
/* Get the result to the root if needed. */
|
||||
err = MPI_SUCCESS;
|
||||
if (0 == vrank) {
|
||||
if (root == rank) {
|
||||
ompi_ddt_sndrcv(snd_buffer, count, dtype, rbuf, count, dtype);
|
||||
} else {
|
||||
err = MCA_PML_CALL(send(snd_buffer, count,
|
||||
dtype, root, MCA_COLL_BASE_TAG_REDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
} else if (rank == root) {
|
||||
err = MCA_PML_CALL(recv(rcv_buffer, count, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_REDUCE,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (rcv_buffer != rbuf) {
|
||||
ompi_op_reduce(op, rcv_buffer, rbuf, count, dtype);
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
if (NULL != free_rbuf) {
|
||||
free(free_rbuf);
|
||||
}
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
if (NULL != free_rbuf) {
|
||||
free(free_rbuf);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@ -448,79 +452,78 @@ int mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count,
|
||||
* Accepts: - same as MPI_Reduce()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_reduce_lin_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_reduce_lin_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int rank;
|
||||
int err;
|
||||
int size;
|
||||
long true_lb, true_extent, lb, extent;
|
||||
char *free_buffer = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
int i;
|
||||
int rank;
|
||||
int err;
|
||||
int size;
|
||||
long true_lb, true_extent, lb, extent;
|
||||
char *free_buffer = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
|
||||
/* Initialize */
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_remote_size(comm);
|
||||
/* Initialize */
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_remote_size(comm);
|
||||
|
||||
if ( MPI_PROC_NULL == root ) {
|
||||
/* do nothing */
|
||||
err = OMPI_SUCCESS;
|
||||
}
|
||||
else if ( MPI_ROOT != root ) {
|
||||
/* If not root, send data to the root. */
|
||||
err = MCA_PML_CALL(send(sbuf, count, dtype, root,
|
||||
MCA_COLL_BASE_TAG_REDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
else {
|
||||
/* Root receives and reduces messages */
|
||||
ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
if (MPI_PROC_NULL == root) {
|
||||
/* do nothing */
|
||||
err = OMPI_SUCCESS;
|
||||
} else if (MPI_ROOT != root) {
|
||||
/* If not root, send data to the root. */
|
||||
err = MCA_PML_CALL(send(sbuf, count, dtype, root,
|
||||
MCA_COLL_BASE_TAG_REDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
} else {
|
||||
/* Root receives and reduces messages */
|
||||
ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
|
||||
free_buffer = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
pml_buffer = free_buffer - lb;
|
||||
|
||||
free_buffer = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
pml_buffer = free_buffer - lb;
|
||||
|
||||
/* Initialize the receive buffer. */
|
||||
err = MCA_PML_CALL(recv(rbuf, count, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Loop receiving and calling reduction function (C or Fortran). */
|
||||
for (i = 1; i < size; i++) {
|
||||
err = MCA_PML_CALL(recv(pml_buffer, count, dtype, i,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
/* Initialize the receive buffer. */
|
||||
err = MCA_PML_CALL(recv(rbuf, count, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Perform the reduction */
|
||||
ompi_op_reduce(op, pml_buffer, rbuf, count, dtype);
|
||||
}
|
||||
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return err;
|
||||
/* Loop receiving and calling reduction function (C or Fortran). */
|
||||
for (i = 1; i < size; i++) {
|
||||
err = MCA_PML_CALL(recv(pml_buffer, count, dtype, i,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Perform the reduction */
|
||||
ompi_op_reduce(op, pml_buffer, rbuf, count, dtype);
|
||||
}
|
||||
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@ -531,10 +534,11 @@ int mca_coll_basic_reduce_lin_inter(void *sbuf, void *rbuf, int count,
|
||||
* Accepts: - same as MPI_Reduce()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_reduce_log_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_reduce_log_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
{
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
@ -35,81 +35,85 @@
|
||||
* Accepts: - same as MPI_Reduce_scatter()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int err;
|
||||
int rank;
|
||||
int size;
|
||||
int count;
|
||||
long true_lb, true_extent, lb, extent;
|
||||
int *disps = NULL;
|
||||
char *free_buffer = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
int i;
|
||||
int err;
|
||||
int rank;
|
||||
int size;
|
||||
int count;
|
||||
long true_lb, true_extent, lb, extent;
|
||||
int *disps = NULL;
|
||||
char *free_buffer = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
|
||||
/* Initialize */
|
||||
/* Initialize */
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
/* Initialize reduce & scatterv info at the root (rank 0). */
|
||||
/* Initialize reduce & scatterv info at the root (rank 0). */
|
||||
|
||||
for (i = 0, count = 0; i < size; ++i) {
|
||||
if (rcounts[i] < 0) {
|
||||
return EINVAL;
|
||||
}
|
||||
count += rcounts[i];
|
||||
}
|
||||
|
||||
if (0 == rank) {
|
||||
disps = malloc((unsigned) size * sizeof(int));
|
||||
if (NULL == disps) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
for (i = 0, count = 0; i < size; ++i) {
|
||||
if (rcounts[i] < 0) {
|
||||
return EINVAL;
|
||||
}
|
||||
count += rcounts[i];
|
||||
}
|
||||
|
||||
/* There is lengthy rationale about how this malloc works in
|
||||
coll_basic_reduce.c */
|
||||
if (0 == rank) {
|
||||
disps = malloc((unsigned) size * sizeof(int));
|
||||
if (NULL == disps) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
/* There is lengthy rationale about how this malloc works in
|
||||
* coll_basic_reduce.c */
|
||||
|
||||
free_buffer = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
free(disps);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
|
||||
free_buffer = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
free(disps);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
pml_buffer = free_buffer - lb;
|
||||
|
||||
disps[0] = 0;
|
||||
for (i = 0; i < (size - 1); ++i) {
|
||||
disps[i + 1] = disps[i] + rcounts[i];
|
||||
}
|
||||
}
|
||||
pml_buffer = free_buffer - lb;
|
||||
|
||||
disps[0] = 0;
|
||||
for (i = 0; i < (size - 1); ++i) {
|
||||
disps[i + 1] = disps[i] + rcounts[i];
|
||||
/* reduction */
|
||||
|
||||
err =
|
||||
comm->c_coll.coll_reduce(sbuf, pml_buffer, count, dtype, op, 0,
|
||||
comm);
|
||||
|
||||
/* scatter */
|
||||
|
||||
if (MPI_SUCCESS == err) {
|
||||
err = comm->c_coll.coll_scatterv(pml_buffer, rcounts, disps, dtype,
|
||||
rbuf, rcounts[rank], dtype, 0,
|
||||
comm);
|
||||
}
|
||||
}
|
||||
|
||||
/* reduction */
|
||||
/* All done */
|
||||
|
||||
err = comm->c_coll.coll_reduce(sbuf, pml_buffer, count, dtype, op, 0, comm);
|
||||
if (NULL != disps) {
|
||||
free(disps);
|
||||
}
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
|
||||
/* scatter */
|
||||
|
||||
if (MPI_SUCCESS == err) {
|
||||
err = comm->c_coll.coll_scatterv(pml_buffer, rcounts, disps, dtype,
|
||||
rbuf, rcounts[rank], dtype, 0, comm);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
if (NULL != disps) {
|
||||
free(disps);
|
||||
}
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@ -120,175 +124,175 @@ int mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
|
||||
* Accepts: - same arguments as MPI_Reduce_scatter()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf, int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf, int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int err, i;
|
||||
int rank;
|
||||
int root=0;
|
||||
int root = 0;
|
||||
int rsize;
|
||||
int totalcounts, tcount;
|
||||
long lb, extent;
|
||||
char *tmpbuf=NULL, *tmpbuf2=NULL, *tbuf=NULL;
|
||||
char *tmpbuf = NULL, *tmpbuf2 = NULL, *tbuf = NULL;
|
||||
ompi_request_t *req;
|
||||
ompi_request_t **reqs=comm->c_coll_basic_data->mccb_reqs;
|
||||
ompi_request_t **reqs = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
rank = ompi_comm_rank (comm);
|
||||
rsize = ompi_comm_remote_size (comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
rsize = ompi_comm_remote_size(comm);
|
||||
|
||||
/* According to MPI-2, the total sum of elements transfered has to
|
||||
be identical in both groups. Thus, it is enough to calculate
|
||||
that locally.
|
||||
*/
|
||||
for ( totalcounts=0, i=0; i<rsize; i++ ){
|
||||
totalcounts += rcounts[i];
|
||||
* be identical in both groups. Thus, it is enough to calculate
|
||||
* that locally.
|
||||
*/
|
||||
for (totalcounts = 0, i = 0; i < rsize; i++) {
|
||||
totalcounts += rcounts[i];
|
||||
}
|
||||
|
||||
/* determine result of the remote group, you cannot
|
||||
use coll_reduce for inter-communicators, since than
|
||||
you would need to determine an order between the
|
||||
two groups (e.g. which group is providing the data
|
||||
and which one enters coll_reduce with providing
|
||||
MPI_PROC_NULL as root argument etc.) Here,
|
||||
we execute the data exchange for both groups
|
||||
simultaniously. */
|
||||
* use coll_reduce for inter-communicators, since than
|
||||
* you would need to determine an order between the
|
||||
* two groups (e.g. which group is providing the data
|
||||
* and which one enters coll_reduce with providing
|
||||
* MPI_PROC_NULL as root argument etc.) Here,
|
||||
* we execute the data exchange for both groups
|
||||
* simultaniously. */
|
||||
/*****************************************************************/
|
||||
if ( rank == root ) {
|
||||
err = ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
tmpbuf = (char *)malloc (totalcounts * extent);
|
||||
tmpbuf2 = (char *)malloc (totalcounts * extent);
|
||||
if ( NULL == tmpbuf || NULL == tmpbuf2 ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
if (rank == root) {
|
||||
err = ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Do a send-recv between the two root procs. to avoid deadlock */
|
||||
err = MCA_PML_CALL(isend (sbuf, totalcounts, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, &req ));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
tmpbuf = (char *) malloc(totalcounts * extent);
|
||||
tmpbuf2 = (char *) malloc(totalcounts * extent);
|
||||
if (NULL == tmpbuf || NULL == tmpbuf2) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
err = MCA_PML_CALL(recv(tmpbuf2, totalcounts, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all (1, &req, MPI_STATUS_IGNORE);
|
||||
if (OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
/* Do a send-recv between the two root procs. to avoid deadlock */
|
||||
err = MCA_PML_CALL(isend(sbuf, totalcounts, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, &req));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
/* Loop receiving and calling reduction function (C or Fortran)
|
||||
The result of this reduction operations is then in
|
||||
tmpbuf2.
|
||||
*/
|
||||
for (i = 1; i < rsize; i++) {
|
||||
err = MCA_PML_CALL(recv(tmpbuf, totalcounts, dtype, i,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
err = MCA_PML_CALL(recv(tmpbuf2, totalcounts, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* Perform the reduction */
|
||||
ompi_op_reduce(op, tmpbuf, tmpbuf2, totalcounts, dtype);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* If not root, send data to the root. */
|
||||
err = MCA_PML_CALL(send(sbuf, totalcounts, dtype, root,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
err = ompi_request_wait_all(1, &req, MPI_STATUS_IGNORE);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
/* Loop receiving and calling reduction function (C or Fortran)
|
||||
* The result of this reduction operations is then in
|
||||
* tmpbuf2.
|
||||
*/
|
||||
for (i = 1; i < rsize; i++) {
|
||||
err = MCA_PML_CALL(recv(tmpbuf, totalcounts, dtype, i,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* Perform the reduction */
|
||||
ompi_op_reduce(op, tmpbuf, tmpbuf2, totalcounts, dtype);
|
||||
}
|
||||
} else {
|
||||
/* If not root, send data to the root. */
|
||||
err = MCA_PML_CALL(send(sbuf, totalcounts, dtype, root,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* now we have on one process the result of the remote group. To distribute
|
||||
the data to all processes in the local group, we exchange the data between
|
||||
the two root processes. They then send it to every other process in the
|
||||
remote group.
|
||||
*/
|
||||
* the data to all processes in the local group, we exchange the data between
|
||||
* the two root processes. They then send it to every other process in the
|
||||
* remote group.
|
||||
*/
|
||||
/***************************************************************************/
|
||||
if ( rank == root ) {
|
||||
/* sendrecv between the two roots */
|
||||
err = MCA_PML_CALL(irecv (tmpbuf, totalcounts, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
comm, &req));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
err = MCA_PML_CALL(send (tmpbuf2, totalcounts, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm ));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all (1, &req, MPI_STATUS_IGNORE);
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* distribute the data to other processes in remote group.
|
||||
Note that we start from 1 (not from zero), since zero
|
||||
has already the correct data AND we avoid a potential
|
||||
deadlock here.
|
||||
*/
|
||||
err = MCA_PML_CALL(irecv (rbuf, rcounts[rank], dtype, root,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
comm, &req));
|
||||
|
||||
tcount = 0;
|
||||
for ( i=0; i<rsize; i++ ) {
|
||||
tbuf = (char *) tmpbuf + tcount *extent;
|
||||
err = MCA_PML_CALL(isend (tbuf, rcounts[i], dtype,i,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
reqs++));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
tcount += rcounts[i];
|
||||
}
|
||||
if (rank == root) {
|
||||
/* sendrecv between the two roots */
|
||||
err = MCA_PML_CALL(irecv(tmpbuf, totalcounts, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
comm, &req));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all (rsize, comm->c_coll_basic_data->mccb_reqs,
|
||||
MPI_STATUSES_IGNORE);
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
err = MCA_PML_CALL(send(tmpbuf2, totalcounts, dtype, 0,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all (1, &req, MPI_STATUS_IGNORE);
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
else {
|
||||
err = MCA_PML_CALL(recv (rbuf, rcounts[rank], dtype, root,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
err = ompi_request_wait_all(1, &req, MPI_STATUS_IGNORE);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* distribute the data to other processes in remote group.
|
||||
* Note that we start from 1 (not from zero), since zero
|
||||
* has already the correct data AND we avoid a potential
|
||||
* deadlock here.
|
||||
*/
|
||||
err = MCA_PML_CALL(irecv(rbuf, rcounts[rank], dtype, root,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
comm, &req));
|
||||
|
||||
tcount = 0;
|
||||
for (i = 0; i < rsize; i++) {
|
||||
tbuf = (char *) tmpbuf + tcount * extent;
|
||||
err = MCA_PML_CALL(isend(tbuf, rcounts[i], dtype, i,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
reqs++));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
tcount += rcounts[i];
|
||||
}
|
||||
|
||||
err =
|
||||
ompi_request_wait_all(rsize,
|
||||
comm->c_coll_basic_data->mccb_reqs,
|
||||
MPI_STATUSES_IGNORE);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all(1, &req, MPI_STATUS_IGNORE);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(rbuf, rcounts[rank], dtype, root,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
}
|
||||
|
||||
exit:
|
||||
if ( NULL != tmpbuf ) {
|
||||
free ( tmpbuf );
|
||||
exit:
|
||||
if (NULL != tmpbuf) {
|
||||
free(tmpbuf);
|
||||
}
|
||||
|
||||
if ( NULL != tmpbuf2 ) {
|
||||
free ( tmpbuf2 );
|
||||
if (NULL != tmpbuf2) {
|
||||
free(tmpbuf2);
|
||||
}
|
||||
|
||||
return err;
|
||||
|
@ -34,93 +34,93 @@
|
||||
* Accepts: - same arguments as MPI_Scan()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_scan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_scan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
long true_lb, true_extent, lb, extent;
|
||||
char *free_buffer = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
long true_lb, true_extent, lb, extent;
|
||||
char *free_buffer = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
|
||||
/* Initialize */
|
||||
/* Initialize */
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
/* If I'm rank 0, just copy into the receive buffer */
|
||||
/* If I'm rank 0, just copy into the receive buffer */
|
||||
|
||||
if (0 == rank) {
|
||||
err = ompi_ddt_sndrcv(sbuf, count, dtype,
|
||||
rbuf, count, dtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
if (0 == rank) {
|
||||
err = ompi_ddt_sndrcv(sbuf, count, dtype, rbuf, count, dtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise receive previous buffer and reduce. */
|
||||
/* Otherwise receive previous buffer and reduce. */
|
||||
|
||||
else {
|
||||
/* Allocate a temporary buffer. Rationale for this size is
|
||||
listed in coll_basic_reduce.c. Use this temporary buffer to
|
||||
receive into, later. */
|
||||
else {
|
||||
/* Allocate a temporary buffer. Rationale for this size is
|
||||
* listed in coll_basic_reduce.c. Use this temporary buffer to
|
||||
* receive into, later. */
|
||||
|
||||
if (size > 1) {
|
||||
ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
|
||||
free_buffer = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
pml_buffer = free_buffer - lb;
|
||||
}
|
||||
if (size > 1) {
|
||||
ompi_ddt_get_extent(dtype, &lb, &extent);
|
||||
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
|
||||
/* Copy the send buffer into the receive buffer. */
|
||||
free_buffer = malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
pml_buffer = free_buffer - lb;
|
||||
}
|
||||
|
||||
/* Copy the send buffer into the receive buffer. */
|
||||
|
||||
err = ompi_ddt_sndrcv(sbuf, count, dtype, rbuf, count, dtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Receive the prior answer */
|
||||
|
||||
err = MCA_PML_CALL(recv(pml_buffer, count, dtype,
|
||||
rank - 1, MCA_COLL_BASE_TAG_SCAN, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Perform the operation */
|
||||
|
||||
ompi_op_reduce(op, pml_buffer, rbuf, count, dtype);
|
||||
|
||||
/* All done */
|
||||
|
||||
err = ompi_ddt_sndrcv(sbuf, count, dtype, rbuf, count, dtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
free(free_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
/* Receive the prior answer */
|
||||
/* Send result to next process. */
|
||||
|
||||
err = MCA_PML_CALL(recv(pml_buffer, count, dtype,
|
||||
rank - 1, MCA_COLL_BASE_TAG_SCAN, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
if (rank < (size - 1)) {
|
||||
return MCA_PML_CALL(send(rbuf, count, dtype, rank + 1,
|
||||
MCA_COLL_BASE_TAG_SCAN,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
|
||||
/* Perform the operation */
|
||||
|
||||
ompi_op_reduce(op, pml_buffer, rbuf, count, dtype);
|
||||
|
||||
/* All done */
|
||||
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
/* Send result to next process. */
|
||||
|
||||
if (rank < (size - 1)) {
|
||||
return MCA_PML_CALL(send(rbuf, count, dtype, rank + 1,
|
||||
MCA_COLL_BASE_TAG_SCAN,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
@ -33,62 +33,64 @@
|
||||
* Accepts: - same arguments as MPI_Scatter()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_scatter_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_scatter_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int rank;
|
||||
int size;
|
||||
int err;
|
||||
char *ptmp;
|
||||
long lb;
|
||||
long incr;
|
||||
int i;
|
||||
int rank;
|
||||
int size;
|
||||
int err;
|
||||
char *ptmp;
|
||||
long lb;
|
||||
long incr;
|
||||
|
||||
/* Initialize */
|
||||
/* Initialize */
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
/* If not root, receive data. */
|
||||
/* If not root, receive data. */
|
||||
|
||||
if (rank != root) {
|
||||
err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
return err;
|
||||
}
|
||||
|
||||
/* I am the root, loop sending data. */
|
||||
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &incr);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
incr *= scount;
|
||||
for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
|
||||
|
||||
/* simple optimization */
|
||||
|
||||
if (i == rank) {
|
||||
err = ompi_ddt_sndrcv(ptmp, scount, sdtype, rbuf, rcount, rdtype);
|
||||
} else {
|
||||
err = MCA_PML_CALL(send(ptmp, scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (rank != root) {
|
||||
err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
return err;
|
||||
}
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
|
||||
/* I am the root, loop sending data. */
|
||||
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &incr);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
incr *= scount;
|
||||
for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
|
||||
|
||||
return MPI_SUCCESS;
|
||||
/* simple optimization */
|
||||
|
||||
if (i == rank) {
|
||||
err =
|
||||
ompi_ddt_sndrcv(ptmp, scount, sdtype, rbuf, rcount,
|
||||
rdtype);
|
||||
} else {
|
||||
err = MCA_PML_CALL(send(ptmp, scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@ -99,57 +101,57 @@ int mca_coll_basic_scatter_intra(void *sbuf, int scount,
|
||||
* Accepts: - same arguments as MPI_Scatter()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_scatter_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_scatter_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int rank;
|
||||
int size;
|
||||
int err;
|
||||
char *ptmp;
|
||||
long lb;
|
||||
long incr;
|
||||
ompi_request_t **reqs=comm->c_coll_basic_data->mccb_reqs;
|
||||
int i;
|
||||
int rank;
|
||||
int size;
|
||||
int err;
|
||||
char *ptmp;
|
||||
long lb;
|
||||
long incr;
|
||||
ompi_request_t **reqs = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
/* Initialize */
|
||||
/* Initialize */
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_remote_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_remote_size(comm);
|
||||
|
||||
if ( MPI_PROC_NULL == root ) {
|
||||
/* do nothing */
|
||||
err = OMPI_SUCCESS;
|
||||
}
|
||||
else if ( MPI_ROOT != root ) {
|
||||
/* If not root, receive data. */
|
||||
err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
}
|
||||
else{
|
||||
/* I am the root, loop sending data. */
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &incr);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
if (MPI_PROC_NULL == root) {
|
||||
/* do nothing */
|
||||
err = OMPI_SUCCESS;
|
||||
} else if (MPI_ROOT != root) {
|
||||
/* If not root, receive data. */
|
||||
err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
} else {
|
||||
/* I am the root, loop sending data. */
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &incr);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
incr *= scount;
|
||||
for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
|
||||
err = MCA_PML_CALL(isend(ptmp, scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, reqs++));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all (size, comm->c_coll_basic_data->mccb_reqs,
|
||||
MPI_STATUSES_IGNORE);
|
||||
}
|
||||
|
||||
return err;
|
||||
incr *= scount;
|
||||
for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
|
||||
err = MCA_PML_CALL(isend(ptmp, scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
reqs++));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err =
|
||||
ompi_request_wait_all(size, comm->c_coll_basic_data->mccb_reqs,
|
||||
MPI_STATUSES_IGNORE);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@ -33,11 +33,12 @@
|
||||
* Accepts: - same arguments as MPI_Scatterv()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_scatterv_intra(void *sbuf, int *scounts,
|
||||
int *disps, struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_scatterv_intra(void *sbuf, int *scounts,
|
||||
int *disps, struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int rank;
|
||||
@ -53,40 +54,42 @@ int mca_coll_basic_scatterv_intra(void *sbuf, int *scounts,
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
/* If not root, receive data. Note that we will only get here if
|
||||
rcount > 0 or rank == root. */
|
||||
* rcount > 0 or rank == root. */
|
||||
|
||||
if (rank != root) {
|
||||
err = MCA_PML_CALL(recv(rbuf, rcount, rdtype,
|
||||
root, MCA_COLL_BASE_TAG_SCATTERV,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
return err;
|
||||
err = MCA_PML_CALL(recv(rbuf, rcount, rdtype,
|
||||
root, MCA_COLL_BASE_TAG_SCATTERV,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
return err;
|
||||
}
|
||||
|
||||
/* I am the root, loop sending data. */
|
||||
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &extent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
ptmp = ((char *) sbuf) + (extent * disps[i]);
|
||||
ptmp = ((char *) sbuf) + (extent * disps[i]);
|
||||
|
||||
/* simple optimization */
|
||||
/* simple optimization */
|
||||
|
||||
if (i == rank) {
|
||||
if( 0 == scounts[i] ) { /* simple optimization or a local operation */
|
||||
continue;
|
||||
}
|
||||
err = ompi_ddt_sndrcv(ptmp, scounts[i], sdtype, rbuf, rcount, rdtype);
|
||||
} else {
|
||||
err = MCA_PML_CALL(send(ptmp, scounts[i], sdtype, i,
|
||||
MCA_COLL_BASE_TAG_SCATTERV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
if (i == rank) {
|
||||
if (0 == scounts[i]) { /* simple optimization or a local operation */
|
||||
continue;
|
||||
}
|
||||
err =
|
||||
ompi_ddt_sndrcv(ptmp, scounts[i], sdtype, rbuf, rcount,
|
||||
rdtype);
|
||||
} else {
|
||||
err = MCA_PML_CALL(send(ptmp, scounts[i], sdtype, i,
|
||||
MCA_COLL_BASE_TAG_SCATTERV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
@ -102,60 +105,59 @@ int mca_coll_basic_scatterv_intra(void *sbuf, int *scounts,
|
||||
* Accepts: - same arguments as MPI_Scatterv()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_basic_scatterv_inter(void *sbuf, int *scounts,
|
||||
int *disps, struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
int
|
||||
mca_coll_basic_scatterv_inter(void *sbuf, int *scounts,
|
||||
int *disps, struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int rank;
|
||||
int size;
|
||||
int err;
|
||||
char *ptmp;
|
||||
long lb;
|
||||
long extent;
|
||||
ompi_request_t **reqs=comm->c_coll_basic_data->mccb_reqs;
|
||||
int i;
|
||||
int rank;
|
||||
int size;
|
||||
int err;
|
||||
char *ptmp;
|
||||
long lb;
|
||||
long extent;
|
||||
ompi_request_t **reqs = comm->c_coll_basic_data->mccb_reqs;
|
||||
|
||||
/* Initialize */
|
||||
/* Initialize */
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_remote_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_remote_size(comm);
|
||||
|
||||
/* If not root, receive data. Note that we will only get here if
|
||||
rcount > 0 or rank == root. */
|
||||
/* If not root, receive data. Note that we will only get here if
|
||||
* rcount > 0 or rank == root. */
|
||||
|
||||
if ( MPI_PROC_NULL == root ) {
|
||||
/* do nothing */
|
||||
err = OMPI_SUCCESS;
|
||||
}
|
||||
else if ( MPI_ROOT != root ) {
|
||||
/* If not root, receive data. */
|
||||
err = MCA_PML_CALL(recv(rbuf, rcount, rdtype,
|
||||
root, MCA_COLL_BASE_TAG_SCATTERV,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
}
|
||||
else {
|
||||
/* I am the root, loop sending data. */
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &extent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
ptmp = ((char *) sbuf) + (extent * disps[i]);
|
||||
err = MCA_PML_CALL(isend(ptmp, scounts[i], sdtype, i,
|
||||
MCA_COLL_BASE_TAG_SCATTERV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
&(reqs[i])));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
if (MPI_PROC_NULL == root) {
|
||||
/* do nothing */
|
||||
err = OMPI_SUCCESS;
|
||||
} else if (MPI_ROOT != root) {
|
||||
/* If not root, receive data. */
|
||||
err = MCA_PML_CALL(recv(rbuf, rcount, rdtype,
|
||||
root, MCA_COLL_BASE_TAG_SCATTERV,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
} else {
|
||||
/* I am the root, loop sending data. */
|
||||
err = ompi_ddt_get_extent(rdtype, &lb, &extent);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all (size, reqs, MPI_STATUSES_IGNORE);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return err;
|
||||
for (i = 0; i < size; ++i) {
|
||||
ptmp = ((char *) sbuf) + (extent * disps[i]);
|
||||
err = MCA_PML_CALL(isend(ptmp, scounts[i], sdtype, i,
|
||||
MCA_COLL_BASE_TAG_SCATTERV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
&(reqs[i])));
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return err;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user