1
1
openmpi/ompi/mpi/c/gather.c

186 строки
7.1 KiB
C
Исходник Обычный вид История

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008 University of Houston. All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdio.h>
#include "ompi/mpi/c/bindings.h"
#include "ompi/runtime/params.h"
#include "ompi/communicator/communicator.h"
#include "ompi/errhandler/errhandler.h"
- Split the datatype engine into two parts: an MPI specific part in OMPI and a language agnostic part in OPAL. The convertor is completely moved into OPAL. This offers several benefits as described in RFC http://www.open-mpi.org/community/lists/devel/2009/07/6387.php namely: - Fewer basic types (int* and float* types, boolean and wchar - Fixing naming scheme to ompi-nomenclature. - Usability outside of the ompi-layer. - Due to the fixed nature of simple opal types, their information is completely known at compile time and therefore constified - With fewer datatypes (22), the actual sizes of bit-field types may be reduced from 64 to 32 bits, allowing reorganizing the opal_datatype structure, eliminating holes and keeping data required in convertor (upon send/recv) in one cacheline... This has implications to the convertor-datastructure and other parts of the code. - Several performance tests have been run, the netpipe latency does not change with this patch on Linux/x86-64 on the smoky cluster. - Extensive tests have been done to verify correctness (no new regressions) using: 1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and ompi-ddt: a. running both trunk and ompi-ddt resulted in no differences (except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run correctly). b. with --enable-memchecker and running under valgrind (one buglet when run with static found in test-suite, commited) 2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt: all passed (except for the dynamic/ tests failed!! as trunk/MTT) 3. compilation and usage of HDF5 tests on Jaguar using PGI and PathScale compilers. 4. compilation and usage on Scicortex. - Please note, that for the heterogeneous case, (-m32 compiled binaries/ompi), neither ompi-trunk, nor ompi-ddt branch would successfully launch. This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/memchecker.h"
#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES
#pragma weak MPI_Gather = PMPI_Gather
#endif
#if OMPI_PROFILING_DEFINES
#include "ompi/mpi/c/profile/defines.h"
#endif
static const char FUNC_NAME[] = "MPI_Gather";
int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
int root, MPI_Comm comm)
{
int err;
MEMCHECKER(
int rank;
ptrdiff_t ext;
rank = ompi_comm_rank(comm);
- Split the datatype engine into two parts: an MPI specific part in OMPI and a language agnostic part in OPAL. The convertor is completely moved into OPAL. This offers several benefits as described in RFC http://www.open-mpi.org/community/lists/devel/2009/07/6387.php namely: - Fewer basic types (int* and float* types, boolean and wchar - Fixing naming scheme to ompi-nomenclature. - Usability outside of the ompi-layer. - Due to the fixed nature of simple opal types, their information is completely known at compile time and therefore constified - With fewer datatypes (22), the actual sizes of bit-field types may be reduced from 64 to 32 bits, allowing reorganizing the opal_datatype structure, eliminating holes and keeping data required in convertor (upon send/recv) in one cacheline... This has implications to the convertor-datastructure and other parts of the code. - Several performance tests have been run, the netpipe latency does not change with this patch on Linux/x86-64 on the smoky cluster. - Extensive tests have been done to verify correctness (no new regressions) using: 1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and ompi-ddt: a. running both trunk and ompi-ddt resulted in no differences (except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run correctly). b. with --enable-memchecker and running under valgrind (one buglet when run with static found in test-suite, commited) 2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt: all passed (except for the dynamic/ tests failed!! as trunk/MTT) 3. compilation and usage of HDF5 tests on Jaguar using PGI and PathScale compilers. 4. compilation and usage on Scicortex. - Please note, that for the heterogeneous case, (-m32 compiled binaries/ompi), neither ompi-trunk, nor ompi-ddt branch would successfully launch. This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
ompi_datatype_type_extent(recvtype, &ext);
memchecker_comm(comm);
if(OMPI_COMM_IS_INTRA(comm)) {
if(ompi_comm_rank(comm) == root) {
/* check whether root's send buffer is defined. */
if (MPI_IN_PLACE == sendbuf) {
memchecker_call(&opal_memchecker_base_isdefined,
(char *)(recvbuf)+rank*ext,
recvcount, recvtype);
} else {
memchecker_datatype(sendtype);
memchecker_call(&opal_memchecker_base_isdefined, sendbuf, sendcount, sendtype);
}
memchecker_datatype(recvtype);
/* check whether root's receive buffer is addressable. */
memchecker_call(&opal_memchecker_base_isaddressable, recvbuf, recvcount, recvtype);
} else {
memchecker_datatype(sendtype);
/* check whether send buffer is defined on other processes. */
memchecker_call(&opal_memchecker_base_isdefined, sendbuf, sendcount, sendtype);
}
} else {
if (MPI_ROOT == root) {
memchecker_datatype(recvtype);
/* check whether root's receive buffer is addressable. */
memchecker_call(&opal_memchecker_base_isaddressable, recvbuf, recvcount, recvtype);
} else if (MPI_PROC_NULL != root) {
memchecker_datatype(sendtype);
/* check whether send buffer is defined. */
memchecker_call(&opal_memchecker_base_isdefined, sendbuf, sendcount, sendtype);
}
}
);
if (MPI_PARAM_CHECK) {
err = MPI_SUCCESS;
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
if (ompi_comm_invalid(comm)) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM,
FUNC_NAME);
} else if ((ompi_comm_rank(comm) != root && MPI_IN_PLACE == sendbuf) ||
(ompi_comm_rank(comm) == root && MPI_IN_PLACE == recvbuf)) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}
/* Errors for intracommunicators */
if (OMPI_COMM_IS_INTRA(comm)) {
/* Errors for all ranks */
if ((root >= ompi_comm_size(comm)) || (root < 0)) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ROOT, FUNC_NAME);
}
if (MPI_IN_PLACE != sendbuf) {
OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtype, sendcount);
}
OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME);
/* Errors for the root. Some of these could have been
combined into compound if statements above, but since
this whole section can be compiled out (or turned off at
run time) for efficiency, it's more clear to separate
them out into individual tests. */
if (ompi_comm_rank(comm) == root) {
if (MPI_DATATYPE_NULL == recvtype || NULL == recvtype) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TYPE, FUNC_NAME);
}
if (recvcount < 0) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COUNT, FUNC_NAME);
}
}
}
/* Errors for intercommunicators */
else {
if (! ((root >= 0 && root < ompi_comm_remote_size(comm)) ||
MPI_ROOT == root || MPI_PROC_NULL == root)) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ROOT, FUNC_NAME);
}
/* Errors for the senders */
if (MPI_ROOT != root && MPI_PROC_NULL != root) {
OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtype, sendcount);
OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME);
}
/* Errors for the root. Ditto on the comment above -- these
error checks could have been combined above, but let's
make the code easier to read. */
else if (MPI_ROOT == root) {
if (recvcount < 0) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COUNT, FUNC_NAME);
}
if (MPI_DATATYPE_NULL == recvtype || NULL == recvtype) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_TYPE, FUNC_NAME);
}
}
}
}
/* Do we need to do anything? */
if ((0 == sendcount && MPI_ROOT != root &&
(ompi_comm_rank(comm) != root ||
(ompi_comm_rank(comm) == root && MPI_IN_PLACE != sendbuf))) ||
(ompi_comm_rank(comm) == root && MPI_IN_PLACE == sendbuf &&
0 == recvcount) ||
(0 == recvcount && (MPI_ROOT == root || MPI_PROC_NULL == root))) {
return MPI_SUCCESS;
}
OPAL_CR_ENTER_LIBRARY();
/* Invoke the coll component to perform the back-end operation */
/* XXX -- CONST -- do not cast away const -- update mca/coll */
err = comm->c_coll.coll_gather((void *) sendbuf, sendcount, sendtype, recvbuf,
recvcount, recvtype, root, comm,
comm->c_coll.coll_gather_module);
OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
}