1
1

Add simple algorithms to support MPI_IN_PLACE for MPI_Alltoall, MPI_Alltoallv, and MPI_Alltoallw.

Working on faster algorithms for tuned that will come at a later time.

cmr=v1.7.3:ticket=trac:2965

This commit was SVN r28952.

The following Trac tickets were found above:
  Ticket 2965 --> https://svn.open-mpi.org/trac/ompi/ticket/2965
Этот коммит содержится в:
Nathan Hjelm 2013-07-25 19:19:41 +00:00
родитель 99adeb7f6e
Коммит cb90a4a7fc
10 изменённых файлов: 565 добавлений и 10 удалений

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -27,6 +30,97 @@
#include "ompi/mca/pml/pml.h"
static int
mca_coll_basic_alltoall_intra_inplace(void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
int i, j, size, rank, err;
MPI_Request *preq;
char *tmp_buffer;
size_t max_size;
ptrdiff_t ext;
/* Initialize. */
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
/* If only one process, we're done. */
if (1 == size) {
return MPI_SUCCESS;
}
/* Find the largest receive amount */
ompi_datatype_type_extent (rdtype, &ext);
max_size = ext * rcount;
/* Allocate a temporary buffer */
tmp_buffer = calloc (max_size, 1);
if (NULL == tmp_buffer) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* in-place alltoall slow algorithm (but works) */
for (i = 0 ; i < size ; ++i) {
for (j = i+1 ; j < size ; ++j) {
/* Initiate all send/recv to/from others. */
preq = basic_module->mccb_reqs;
if (i == rank) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
(char *) rbuf + j * max_size);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * j, rcount, rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else if (j == rank) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
(char *) rbuf + i * max_size);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * i, rcount, rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else {
continue;
}
/* Wait for the requests to complete */
err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUS_IGNORE);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Free the requests. */
mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2);
}
}
error_hndl:
/* Free the temporary buffer */
free (tmp_buffer);
/* All done */
return err;
}
/*
* alltoall_intra
*
@ -59,6 +153,10 @@ mca_coll_basic_alltoall_intra(void *sbuf, int scount,
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
/* Initialize. */
if (MPI_IN_PLACE == sbuf) {
return mca_coll_basic_alltoall_intra_inplace (rbuf, rcount, rdtype,
comm, module);
}
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -27,6 +30,101 @@
#include "ompi/mca/pml/pml.h"
static int
mca_coll_basic_alltoallv_intra_inplace(void *rbuf, const int *rcounts, const int *rdisps,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
int i, j, size, rank, err;
MPI_Request *preq;
char *tmp_buffer;
size_t max_size;
ptrdiff_t ext;
/* Initialize. */
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
/* If only one process, we're done. */
if (1 == size) {
return MPI_SUCCESS;
}
/* Find the largest receive amount */
ompi_datatype_type_extent (rdtype, &ext);
for (i = 0, max_size = 0 ; i < size ; ++i) {
size_t size = ext * rcounts[rank];
max_size = size > max_size ? size : max_size;
}
/* Allocate a temporary buffer */
tmp_buffer = calloc (max_size, 1);
if (NULL == tmp_buffer) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* in-place alltoallv slow algorithm (but works) */
for (i = 0 ; i < size ; ++i) {
for (j = i+1 ; j < size ; ++j) {
/* Initiate all send/recv to/from others. */
preq = basic_module->mccb_reqs;
if (i == rank && rcounts[j]) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j],
tmp_buffer, (char *) rbuf + rdisps[j]);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[j], rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else if (j == rank && rcounts[i]) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i],
tmp_buffer, (char *) rbuf + rdisps[i]);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[i], rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else {
continue;
}
/* Wait for the requests to complete */
err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUS_IGNORE);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Free the requests. */
mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2);
}
}
error_hndl:
/* Free the temporary buffer */
free (tmp_buffer);
/* All done */
return err;
}
/*
* alltoallv_intra
*
@ -56,6 +154,10 @@ mca_coll_basic_alltoallv_intra(void *sbuf, int *scounts, int *sdisps,
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
/* Initialize. */
if (MPI_IN_PLACE == sbuf) {
return mca_coll_basic_alltoallv_intra_inplace (rbuf, rcounts, rdisps,
rdtype, comm, module);
}
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -28,6 +31,103 @@
#include "ompi/mca/pml/pml.h"
static int
mca_coll_basic_alltoallw_intra_inplace(void *rbuf, int *rcounts, const int *rdisps,
struct ompi_datatype_t * const *rdtypes,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
int i, j, size, rank, err, max_size;
MPI_Request *preq;
char *tmp_buffer;
ptrdiff_t ext;
/* Initialize. */
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
/* If only one process, we're done. */
if (1 == size) {
return MPI_SUCCESS;
}
/* Find the largest receive amount */
for (i = 0, max_size = 0 ; i < size ; ++i) {
ompi_datatype_type_extent (rdtypes[i], &ext);
ext *= rcounts[rank];
max_size = ext > max_size ? ext : max_size;
}
/* Allocate a temporary buffer */
tmp_buffer = calloc (max_size, 1);
if (NULL == tmp_buffer) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* in-place alltoallw slow algorithm (but works) */
for (i = 0 ; i < size ; ++i) {
for (j = i+1 ; j < size ; ++j) {
ompi_datatype_type_extent (rdtypes[j], &ext);
/* Initiate all send/recv to/from others. */
preq = basic_module->mccb_reqs;
if (i == rank && rcounts[j] != 0) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtypes[j], rcounts[j],
tmp_buffer, (char *) rbuf + rdisps[j]);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtypes[j],
j, MCA_COLL_BASE_TAG_ALLTOALLW, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[j], rdtypes[j],
j, MCA_COLL_BASE_TAG_ALLTOALLW, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else if (j == rank && rcounts[i] != 0) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtypes[i], rcounts[i],
tmp_buffer, (char *) rbuf + rdisps[i]);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtypes[i],
i, MCA_COLL_BASE_TAG_ALLTOALLW, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[i], rdtypes[i],
i, MCA_COLL_BASE_TAG_ALLTOALLW, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else {
continue;
}
/* Wait for the requests to complete */
err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUS_IGNORE);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Free the requests. */
mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2);
}
}
error_hndl:
/* Free the temporary buffer */
free (tmp_buffer);
/* All done */
return err;
}
/*
* alltoallw_intra
*
@ -54,6 +154,10 @@ mca_coll_basic_alltoallw_intra(void *sbuf, int *scounts, int *sdisps,
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
/* Initialize. */
if (MPI_IN_PLACE == sbuf) {
return mca_coll_basic_alltoallw_intra_inplace (rbuf, rcounts, rdisps,
rdtypes, comm, module);
}
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);

Просмотреть файл

@ -550,10 +550,6 @@ static inline void NBC_SchedCache_dictwipe(hb_tree *dict, int *size) {
if(sendbuf == MPI_IN_PLACE) { \
sendbuf = recvbuf; \
inplace = 1; \
} else \
if(recvbuf == MPI_IN_PLACE) { \
recvbuf = sendbuf; \
inplace = 1; \
} \
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -11,6 +12,8 @@
* All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -424,6 +427,14 @@ struct mca_coll_tuned_module_t {
typedef struct mca_coll_tuned_module_t mca_coll_tuned_module_t;
OBJ_CLASS_DECLARATION(mca_coll_tuned_module_t);
static inline void mca_coll_tuned_free_reqs(ompi_request_t ** reqs,
int count)
{
int i;
for (i = 0; i < count; ++i)
ompi_request_free(reqs + i);
}
END_C_DECLS
#define COLL_TUNED_UPDATE_BINTREE( OMPI_COMM, TUNED_MODULE, ROOT ) \

Просмотреть файл

@ -51,6 +51,98 @@ static mca_base_var_enum_value_t alltoall_algorithms[] = {
{0, NULL}
};
/* MPI_IN_PLACE all to all algorithm. TODO: implement a better one. */
static int
mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
int i, j, size, rank, err;
MPI_Request *preq;
char *tmp_buffer;
size_t max_size;
ptrdiff_t ext;
/* Initialize. */
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
/* If only one process, we're done. */
if (1 == size) {
return MPI_SUCCESS;
}
/* Find the largest receive amount */
ompi_datatype_type_extent (rdtype, &ext);
max_size = ext * rcount;
/* Allocate a temporary buffer */
tmp_buffer = calloc (max_size, 1);
if (NULL == tmp_buffer) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* in-place alltoall slow algorithm (but works) */
for (i = 0 ; i < size ; ++i) {
for (j = i+1 ; j < size ; ++j) {
/* Initiate all send/recv to/from others. */
preq = tuned_module->tuned_data->mcct_reqs;
if (i == rank) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
(char *) rbuf + j * max_size);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * j, rcount, rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else if (j == rank) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
(char *) rbuf + i * max_size);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * i, rcount, rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else {
continue;
}
/* Wait for the requests to complete */
err = ompi_request_wait_all (2, tuned_module->tuned_data->mcct_reqs, MPI_STATUS_IGNORE);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Free the requests. */
mca_coll_tuned_free_reqs(tuned_module->tuned_data->mcct_reqs, 2);
}
}
error_hndl:
/* Free the temporary buffer */
free (tmp_buffer);
/* All done */
return err;
}
int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
@ -62,6 +154,11 @@ int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
void * tmpsend, *tmprecv;
ptrdiff_t lb, sext, rext;
if (MPI_IN_PLACE == sbuf) {
return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
comm, module);
}
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
@ -121,6 +218,11 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
#endif
if (MPI_IN_PLACE == sbuf) {
return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
comm, module);
}
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
@ -279,6 +381,11 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
ompi_request_t **reqs = NULL;
if (MPI_IN_PLACE == sbuf) {
return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
comm, module);
}
/* Initialize. */
size = ompi_comm_size(comm);
@ -418,6 +525,11 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
void * tmpsend, *tmprecv;
ptrdiff_t sext, rext, lb;
if (MPI_IN_PLACE == sbuf) {
return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
comm, module);
}
rank = ompi_comm_rank(comm);
OPAL_OUTPUT((ompi_coll_tuned_stream,
@ -481,7 +593,7 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
mca_coll_base_module_t *module)
{
int i, rank, size, err, nreqs;
char *psnd, *prcv;
@ -490,6 +602,11 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
if (MPI_IN_PLACE == sbuf) {
return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
comm, module);
}
/* Initialize. */
size = ompi_comm_size(comm);

Просмотреть файл

@ -45,6 +45,101 @@ static mca_base_var_enum_value_t alltoallv_algorithms[] = {
{0, NULL}
};
static int
mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
int i, j, size, rank, err;
MPI_Request *preq;
char *tmp_buffer;
size_t max_size;
ptrdiff_t ext;
/* Initialize. */
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
/* If only one process, we're done. */
if (1 == size) {
return MPI_SUCCESS;
}
/* Find the largest receive amount */
ompi_datatype_type_extent (rdtype, &ext);
for (i = 0, max_size = 0 ; i < size ; ++i) {
size_t size = ext * rcounts[rank];
max_size = size > max_size ? size : max_size;
}
/* Allocate a temporary buffer */
tmp_buffer = calloc (max_size, 1);
if (NULL == tmp_buffer) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* in-place alltoallv slow algorithm (but works) */
for (i = 0 ; i < size ; ++i) {
for (j = i+1 ; j < size ; ++j) {
/* Initiate all send/recv to/from others. */
preq = tuned_module->tuned_data->mcct_reqs;
if (i == rank && rcounts[j]) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j],
tmp_buffer, (char *) rbuf + rdisps[j]);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[j], rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else if (j == rank && rcounts[i]) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i],
tmp_buffer, (char *) rbuf + rdisps[i]);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[i], rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else {
continue;
}
/* Wait for the requests to complete */
err = ompi_request_wait_all (2, tuned_module->tuned_data->mcct_reqs, MPI_STATUS_IGNORE);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Free the requests. */
mca_coll_tuned_free_reqs(tuned_module->tuned_data->mcct_reqs, 2);
}
}
error_hndl:
/* Free the temporary buffer */
free (tmp_buffer);
/* All done */
return err;
}
int
ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
struct ompi_datatype_t *sdtype,
@ -57,6 +152,11 @@ ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
void *psnd, *prcv;
ptrdiff_t sext, rext;
if (MPI_IN_PLACE == sbuf) {
return mca_coll_tuned_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
rdtype, comm, module);
}
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
@ -119,6 +219,11 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
if (MPI_IN_PLACE == sbuf) {
return mca_coll_tuned_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
rdtype, comm, module);
}
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -57,6 +60,11 @@ int MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
/* Unrooted operation -- same checks for all ranks on both
intracommunicators and intercommunicators */
if (MPI_IN_PLACE == sendbuf) {
sendcount = recvcount;
sendtype = recvtype;
}
err = MPI_SUCCESS;
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
if (ompi_comm_invalid(comm)) {
@ -66,7 +74,7 @@ int MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
err = MPI_ERR_TYPE;
} else if (recvcount < 0) {
err = MPI_ERR_COUNT;
} else if (MPI_IN_PLACE == sendbuf || MPI_IN_PLACE == recvbuf) {
} else if (MPI_IN_PLACE == recvbuf) {
err = MPI_ERR_ARG;
} else {
OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtype, sendcount);

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -82,9 +83,15 @@ int MPI_Alltoallv(void *sendbuf, int sendcounts[], int sdispls[],
FUNC_NAME);
}
if (MPI_IN_PLACE == sendbuf) {
sendcounts = recvcounts;
sdispls = rdispls;
sendtype = recvtype;
}
if ((NULL == sendcounts) || (NULL == sdispls) ||
(NULL == recvcounts) || (NULL == rdispls) ||
MPI_IN_PLACE == sendbuf || MPI_IN_PLACE == recvbuf) {
MPI_IN_PLACE == recvbuf) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -81,9 +82,15 @@ int MPI_Alltoallw(void *sendbuf, int sendcounts[], int sdispls[],
FUNC_NAME);
}
if (MPI_IN_PLACE == sendbuf) {
sendcounts = recvcounts;
sdispls = rdispls;
sendtypes = recvtypes;
}
if ((NULL == sendcounts) || (NULL == sdispls) || (NULL == sendtypes) ||
(NULL == recvcounts) || (NULL == rdispls) || (NULL == recvtypes) ||
MPI_IN_PLACE == sendbuf || MPI_IN_PLACE == recvbuf) {
MPI_IN_PLACE == recvbuf) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}