Add simple algorithms to support MPI_IN_PLACE for MPI_Alltoall, MPI_Alltoallv, and MPI_Alltoallw.
Working on faster algorithms for tuned that will come at a later time. cmr=v1.7.3:ticket=trac:2965 This commit was SVN r28952. The following Trac tickets were found above: Ticket 2965 --> https://svn.open-mpi.org/trac/ompi/ticket/2965
Этот коммит содержится в:
родитель
99adeb7f6e
Коммит
cb90a4a7fc
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -9,6 +10,8 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,6 +30,97 @@
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
|
||||
|
||||
static int
|
||||
mca_coll_basic_alltoall_intra_inplace(void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
|
||||
int i, j, size, rank, err;
|
||||
MPI_Request *preq;
|
||||
char *tmp_buffer;
|
||||
size_t max_size;
|
||||
ptrdiff_t ext;
|
||||
|
||||
/* Initialize. */
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* If only one process, we're done. */
|
||||
if (1 == size) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Find the largest receive amount */
|
||||
ompi_datatype_type_extent (rdtype, &ext);
|
||||
max_size = ext * rcount;
|
||||
|
||||
/* Allocate a temporary buffer */
|
||||
tmp_buffer = calloc (max_size, 1);
|
||||
if (NULL == tmp_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* in-place alltoall slow algorithm (but works) */
|
||||
for (i = 0 ; i < size ; ++i) {
|
||||
for (j = i+1 ; j < size ; ++j) {
|
||||
/* Initiate all send/recv to/from others. */
|
||||
preq = basic_module->mccb_reqs;
|
||||
|
||||
if (i == rank) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
|
||||
(char *) rbuf + j * max_size);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * j, rcount, rdtype,
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else if (j == rank) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
|
||||
(char *) rbuf + i * max_size);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * i, rcount, rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Wait for the requests to complete */
|
||||
err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUS_IGNORE);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Free the requests. */
|
||||
mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2);
|
||||
}
|
||||
}
|
||||
|
||||
error_hndl:
|
||||
/* Free the temporary buffer */
|
||||
free (tmp_buffer);
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* alltoall_intra
|
||||
*
|
||||
@ -59,6 +153,10 @@ mca_coll_basic_alltoall_intra(void *sbuf, int scount,
|
||||
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
|
||||
|
||||
/* Initialize. */
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
return mca_coll_basic_alltoall_intra_inplace (rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
}
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -9,6 +10,8 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,6 +30,101 @@
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
|
||||
|
||||
static int
|
||||
mca_coll_basic_alltoallv_intra_inplace(void *rbuf, const int *rcounts, const int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
|
||||
int i, j, size, rank, err;
|
||||
MPI_Request *preq;
|
||||
char *tmp_buffer;
|
||||
size_t max_size;
|
||||
ptrdiff_t ext;
|
||||
|
||||
/* Initialize. */
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* If only one process, we're done. */
|
||||
if (1 == size) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Find the largest receive amount */
|
||||
ompi_datatype_type_extent (rdtype, &ext);
|
||||
for (i = 0, max_size = 0 ; i < size ; ++i) {
|
||||
size_t size = ext * rcounts[rank];
|
||||
|
||||
max_size = size > max_size ? size : max_size;
|
||||
}
|
||||
|
||||
/* Allocate a temporary buffer */
|
||||
tmp_buffer = calloc (max_size, 1);
|
||||
if (NULL == tmp_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* in-place alltoallv slow algorithm (but works) */
|
||||
for (i = 0 ; i < size ; ++i) {
|
||||
for (j = i+1 ; j < size ; ++j) {
|
||||
/* Initiate all send/recv to/from others. */
|
||||
preq = basic_module->mccb_reqs;
|
||||
|
||||
if (i == rank && rcounts[j]) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j],
|
||||
tmp_buffer, (char *) rbuf + rdisps[j]);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtype,
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[j], rdtype,
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else if (j == rank && rcounts[i]) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i],
|
||||
tmp_buffer, (char *) rbuf + rdisps[i]);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[i], rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Wait for the requests to complete */
|
||||
err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUS_IGNORE);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Free the requests. */
|
||||
mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2);
|
||||
}
|
||||
}
|
||||
|
||||
error_hndl:
|
||||
/* Free the temporary buffer */
|
||||
free (tmp_buffer);
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* alltoallv_intra
|
||||
*
|
||||
@ -56,6 +154,10 @@ mca_coll_basic_alltoallv_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
|
||||
|
||||
/* Initialize. */
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
return mca_coll_basic_alltoallv_intra_inplace (rbuf, rcounts, rdisps,
|
||||
rdtype, comm, module);
|
||||
}
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,6 +11,8 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -28,6 +31,103 @@
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
|
||||
|
||||
static int
|
||||
mca_coll_basic_alltoallw_intra_inplace(void *rbuf, int *rcounts, const int *rdisps,
|
||||
struct ompi_datatype_t * const *rdtypes,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
|
||||
int i, j, size, rank, err, max_size;
|
||||
MPI_Request *preq;
|
||||
char *tmp_buffer;
|
||||
ptrdiff_t ext;
|
||||
|
||||
/* Initialize. */
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* If only one process, we're done. */
|
||||
if (1 == size) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Find the largest receive amount */
|
||||
for (i = 0, max_size = 0 ; i < size ; ++i) {
|
||||
ompi_datatype_type_extent (rdtypes[i], &ext);
|
||||
ext *= rcounts[rank];
|
||||
|
||||
max_size = ext > max_size ? ext : max_size;
|
||||
}
|
||||
|
||||
/* Allocate a temporary buffer */
|
||||
tmp_buffer = calloc (max_size, 1);
|
||||
if (NULL == tmp_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* in-place alltoallw slow algorithm (but works) */
|
||||
for (i = 0 ; i < size ; ++i) {
|
||||
for (j = i+1 ; j < size ; ++j) {
|
||||
ompi_datatype_type_extent (rdtypes[j], &ext);
|
||||
|
||||
/* Initiate all send/recv to/from others. */
|
||||
preq = basic_module->mccb_reqs;
|
||||
|
||||
if (i == rank && rcounts[j] != 0) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtypes[j], rcounts[j],
|
||||
tmp_buffer, (char *) rbuf + rdisps[j]);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtypes[j],
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALLW, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[j], rdtypes[j],
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALLW, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else if (j == rank && rcounts[i] != 0) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtypes[i], rcounts[i],
|
||||
tmp_buffer, (char *) rbuf + rdisps[i]);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtypes[i],
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLW, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[i], rdtypes[i],
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLW, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Wait for the requests to complete */
|
||||
err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUS_IGNORE);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Free the requests. */
|
||||
mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2);
|
||||
}
|
||||
}
|
||||
|
||||
error_hndl:
|
||||
/* Free the temporary buffer */
|
||||
free (tmp_buffer);
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* alltoallw_intra
|
||||
*
|
||||
@ -54,6 +154,10 @@ mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
|
||||
|
||||
/* Initialize. */
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
return mca_coll_basic_alltoallw_intra_inplace (rbuf, rcounts, rdisps,
|
||||
rdtypes, comm, module);
|
||||
}
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
@ -550,10 +550,6 @@ static inline void NBC_SchedCache_dictwipe(hb_tree *dict, int *size) {
|
||||
if(sendbuf == MPI_IN_PLACE) { \
|
||||
sendbuf = recvbuf; \
|
||||
inplace = 1; \
|
||||
} else \
|
||||
if(recvbuf == MPI_IN_PLACE) { \
|
||||
recvbuf = sendbuf; \
|
||||
inplace = 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -11,6 +12,8 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -424,6 +427,14 @@ struct mca_coll_tuned_module_t {
|
||||
typedef struct mca_coll_tuned_module_t mca_coll_tuned_module_t;
|
||||
OBJ_CLASS_DECLARATION(mca_coll_tuned_module_t);
|
||||
|
||||
static inline void mca_coll_tuned_free_reqs(ompi_request_t ** reqs,
|
||||
int count)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < count; ++i)
|
||||
ompi_request_free(reqs + i);
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#define COLL_TUNED_UPDATE_BINTREE( OMPI_COMM, TUNED_MODULE, ROOT ) \
|
||||
|
@ -51,6 +51,98 @@ static mca_base_var_enum_value_t alltoall_algorithms[] = {
|
||||
{0, NULL}
|
||||
};
|
||||
|
||||
/* MPI_IN_PLACE all to all algorithm. TODO: implement a better one. */
|
||||
static int
|
||||
mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
int i, j, size, rank, err;
|
||||
MPI_Request *preq;
|
||||
char *tmp_buffer;
|
||||
size_t max_size;
|
||||
ptrdiff_t ext;
|
||||
|
||||
/* Initialize. */
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* If only one process, we're done. */
|
||||
if (1 == size) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Find the largest receive amount */
|
||||
ompi_datatype_type_extent (rdtype, &ext);
|
||||
max_size = ext * rcount;
|
||||
|
||||
/* Allocate a temporary buffer */
|
||||
tmp_buffer = calloc (max_size, 1);
|
||||
if (NULL == tmp_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* in-place alltoall slow algorithm (but works) */
|
||||
for (i = 0 ; i < size ; ++i) {
|
||||
for (j = i+1 ; j < size ; ++j) {
|
||||
/* Initiate all send/recv to/from others. */
|
||||
preq = tuned_module->tuned_data->mcct_reqs;
|
||||
|
||||
if (i == rank) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
|
||||
(char *) rbuf + j * max_size);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * j, rcount, rdtype,
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else if (j == rank) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
|
||||
(char *) rbuf + i * max_size);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * i, rcount, rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Wait for the requests to complete */
|
||||
err = ompi_request_wait_all (2, tuned_module->tuned_data->mcct_reqs, MPI_STATUS_IGNORE);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Free the requests. */
|
||||
mca_coll_tuned_free_reqs(tuned_module->tuned_data->mcct_reqs, 2);
|
||||
}
|
||||
}
|
||||
|
||||
error_hndl:
|
||||
/* Free the temporary buffer */
|
||||
free (tmp_buffer);
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
@ -62,6 +154,11 @@ int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
|
||||
void * tmpsend, *tmprecv;
|
||||
ptrdiff_t lb, sext, rext;
|
||||
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
}
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
@ -121,6 +218,11 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
|
||||
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
|
||||
#endif
|
||||
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
}
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
@ -279,6 +381,11 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
|
||||
|
||||
ompi_request_t **reqs = NULL;
|
||||
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
}
|
||||
|
||||
/* Initialize. */
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
@ -418,6 +525,11 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
|
||||
void * tmpsend, *tmprecv;
|
||||
ptrdiff_t sext, rext, lb;
|
||||
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
}
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
@ -481,7 +593,7 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i, rank, size, err, nreqs;
|
||||
char *psnd, *prcv;
|
||||
@ -490,6 +602,11 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
|
||||
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
}
|
||||
|
||||
/* Initialize. */
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
|
@ -45,6 +45,101 @@ static mca_base_var_enum_value_t alltoallv_algorithms[] = {
|
||||
{0, NULL}
|
||||
};
|
||||
|
||||
static int
|
||||
mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
int i, j, size, rank, err;
|
||||
MPI_Request *preq;
|
||||
char *tmp_buffer;
|
||||
size_t max_size;
|
||||
ptrdiff_t ext;
|
||||
|
||||
/* Initialize. */
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* If only one process, we're done. */
|
||||
if (1 == size) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Find the largest receive amount */
|
||||
ompi_datatype_type_extent (rdtype, &ext);
|
||||
for (i = 0, max_size = 0 ; i < size ; ++i) {
|
||||
size_t size = ext * rcounts[rank];
|
||||
|
||||
max_size = size > max_size ? size : max_size;
|
||||
}
|
||||
|
||||
/* Allocate a temporary buffer */
|
||||
tmp_buffer = calloc (max_size, 1);
|
||||
if (NULL == tmp_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* in-place alltoallv slow algorithm (but works) */
|
||||
for (i = 0 ; i < size ; ++i) {
|
||||
for (j = i+1 ; j < size ; ++j) {
|
||||
/* Initiate all send/recv to/from others. */
|
||||
preq = tuned_module->tuned_data->mcct_reqs;
|
||||
|
||||
if (i == rank && rcounts[j]) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j],
|
||||
tmp_buffer, (char *) rbuf + rdisps[j]);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtype,
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[j], rdtype,
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else if (j == rank && rcounts[i]) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i],
|
||||
tmp_buffer, (char *) rbuf + rdisps[i]);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[i], rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Wait for the requests to complete */
|
||||
err = ompi_request_wait_all (2, tuned_module->tuned_data->mcct_reqs, MPI_STATUS_IGNORE);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Free the requests. */
|
||||
mca_coll_tuned_free_reqs(tuned_module->tuned_data->mcct_reqs, 2);
|
||||
}
|
||||
}
|
||||
|
||||
error_hndl:
|
||||
/* Free the temporary buffer */
|
||||
free (tmp_buffer);
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int
|
||||
ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
@ -57,6 +152,11 @@ ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
|
||||
void *psnd, *prcv;
|
||||
ptrdiff_t sext, rext;
|
||||
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
return mca_coll_tuned_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
|
||||
rdtype, comm, module);
|
||||
}
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
@ -119,6 +219,11 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
|
||||
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
return mca_coll_tuned_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
|
||||
rdtype, comm, module);
|
||||
}
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,6 +11,8 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -57,6 +60,11 @@ int MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
|
||||
/* Unrooted operation -- same checks for all ranks on both
|
||||
intracommunicators and intercommunicators */
|
||||
|
||||
if (MPI_IN_PLACE == sendbuf) {
|
||||
sendcount = recvcount;
|
||||
sendtype = recvtype;
|
||||
}
|
||||
|
||||
err = MPI_SUCCESS;
|
||||
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
|
||||
if (ompi_comm_invalid(comm)) {
|
||||
@ -66,7 +74,7 @@ int MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
|
||||
err = MPI_ERR_TYPE;
|
||||
} else if (recvcount < 0) {
|
||||
err = MPI_ERR_COUNT;
|
||||
} else if (MPI_IN_PLACE == sendbuf || MPI_IN_PLACE == recvbuf) {
|
||||
} else if (MPI_IN_PLACE == recvbuf) {
|
||||
err = MPI_ERR_ARG;
|
||||
} else {
|
||||
OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtype, sendcount);
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -82,9 +83,15 @@ int MPI_Alltoallv(void *sendbuf, int sendcounts[], int sdispls[],
|
||||
FUNC_NAME);
|
||||
}
|
||||
|
||||
if (MPI_IN_PLACE == sendbuf) {
|
||||
sendcounts = recvcounts;
|
||||
sdispls = rdispls;
|
||||
sendtype = recvtype;
|
||||
}
|
||||
|
||||
if ((NULL == sendcounts) || (NULL == sdispls) ||
|
||||
(NULL == recvcounts) || (NULL == rdispls) ||
|
||||
MPI_IN_PLACE == sendbuf || MPI_IN_PLACE == recvbuf) {
|
||||
MPI_IN_PLACE == recvbuf) {
|
||||
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -81,9 +82,15 @@ int MPI_Alltoallw(void *sendbuf, int sendcounts[], int sdispls[],
|
||||
FUNC_NAME);
|
||||
}
|
||||
|
||||
if (MPI_IN_PLACE == sendbuf) {
|
||||
sendcounts = recvcounts;
|
||||
sdispls = rdispls;
|
||||
sendtypes = recvtypes;
|
||||
}
|
||||
|
||||
if ((NULL == sendcounts) || (NULL == sdispls) || (NULL == sendtypes) ||
|
||||
(NULL == recvcounts) || (NULL == rdispls) || (NULL == recvtypes) ||
|
||||
MPI_IN_PLACE == sendbuf || MPI_IN_PLACE == recvbuf) {
|
||||
MPI_IN_PLACE == recvbuf) {
|
||||
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user