1
1

Fix the communication ordering for all cartesian neighbor collectives.

This work is rooted in the [MPI Forum issue
153](https://github.com/mpi-forum/mpi-issues/issues/153).

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
(cherry picked from commit 86acdee4606c1ac3b38070d1b7973a00a991f1d6)
Этот коммит содержится в:
George Bosilca 2019-12-11 12:40:38 -05:00 коммит произвёл Jeff Squyres
родитель 21221eb70a
Коммит be58cf7982
6 изменённых файлов: 33 добавлений и 25 удалений

Просмотреть файл

@ -43,6 +43,8 @@
#define MCA_COLL_BASE_TAG_SCATTERV -26
#define MCA_COLL_BASE_TAG_NONBLOCKING_BASE -27
#define MCA_COLL_BASE_TAG_NONBLOCKING_END ((-1 * INT_MAX/2) + 1)
#define MCA_COLL_BASE_TAG_NEIGHBOR_BASE (MCA_COLL_BASE_TAG_NONBLOCKING_END - 1)
#define MCA_COLL_BASE_TAG_NEIGHBOR_END (MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 1024)
#define MCA_COLL_BASE_TAG_HCOLL_BASE (-1 * INT_MAX/2)
#define MCA_COLL_BASE_TAG_HCOLL_END (-1 * INT_MAX)
#endif /* MCA_COLL_BASE_TAGS_H */

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2016 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -70,7 +70,7 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount,
if (MPI_PROC_NULL != srank) {
nreqs++;
rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, srank,
MCA_COLL_BASE_TAG_ALLGATHER,
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim,
comm, preqs++));
if (OMPI_SUCCESS != rc) break;
@ -78,7 +78,7 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount,
/* remove cast from const when the pml layer is updated to take
* a const for the send buffer. */
rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank,
MCA_COLL_BASE_TAG_ALLGATHER,
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1,
MCA_PML_BASE_SEND_STANDARD,
comm, preqs++));
if (OMPI_SUCCESS != rc) break;
@ -89,13 +89,13 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount,
if (MPI_PROC_NULL != drank) {
nreqs++;
rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, drank,
MCA_COLL_BASE_TAG_ALLGATHER,
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1,
comm, preqs++));
if (OMPI_SUCCESS != rc) break;
nreqs++;
rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank,
MCA_COLL_BASE_TAG_ALLGATHER,
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim,
MCA_PML_BASE_SEND_STANDARD,
comm, preqs++));
if (OMPI_SUCCESS != rc) break;

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2016 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -69,13 +69,14 @@ mca_coll_basic_neighbor_allgatherv_cart(const void *sbuf, int scount, struct omp
if (MPI_PROC_NULL != srank) {
nreqs++;
rc = MCA_PML_CALL(irecv((char *) rbuf + disps[i] * extent, rcounts[i], rdtype, srank,
MCA_COLL_BASE_TAG_ALLGATHER, comm, preqs++));
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, comm, preqs++));
if (OMPI_SUCCESS != rc) break;
/* remove cast from const when the pml layer is updated to take
* a const for the send buffer. */
nreqs++;
rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank, MCA_COLL_BASE_TAG_ALLGATHER,
rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank,
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1,
MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
if (OMPI_SUCCESS != rc) break;
}
@ -83,11 +84,12 @@ mca_coll_basic_neighbor_allgatherv_cart(const void *sbuf, int scount, struct omp
if (MPI_PROC_NULL != drank) {
nreqs++;
rc = MCA_PML_CALL(irecv((char *) rbuf + disps[i+1] * extent, rcounts[i+1], rdtype, drank,
MCA_COLL_BASE_TAG_ALLGATHER, comm, preqs++));
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, comm, preqs++));
if (OMPI_SUCCESS != rc) break;
nreqs++;
rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank, MCA_COLL_BASE_TAG_ALLGATHER,
rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank,
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim,
MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
if (OMPI_SUCCESS != rc) break;
}

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2016 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -37,6 +37,10 @@
#include "coll_basic.h"
#include "ompi/mca/topo/base/base.h"
/**
* We only have 1024 tags for the neighbor collective, so for now we only support
* 512 dimensions.
*/
static int
mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf,
int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm,
@ -68,7 +72,7 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_
if (MPI_PROC_NULL != srank) {
nreqs++;
rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, srank,
MCA_COLL_BASE_TAG_NONBLOCKING_BASE - 2 * dim,
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim,
comm, preqs++));
if (OMPI_SUCCESS != rc) break;
}
@ -78,7 +82,7 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_
if (MPI_PROC_NULL != drank) {
nreqs++;
rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, drank,
MCA_COLL_BASE_TAG_NONBLOCKING_BASE - 2 * dim - 1,
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1,
comm, preqs++));
if (OMPI_SUCCESS != rc) break;
}
@ -105,7 +109,7 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_
* a const for the send buffer. */
nreqs++;
rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank,
MCA_COLL_BASE_TAG_NONBLOCKING_BASE - 2 * dim - 1,
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1,
MCA_PML_BASE_SEND_STANDARD,
comm, preqs++));
if (OMPI_SUCCESS != rc) break;
@ -116,7 +120,7 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_
if (MPI_PROC_NULL != drank) {
nreqs++;
rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank,
MCA_COLL_BASE_TAG_NONBLOCKING_BASE - 2 * dim,
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim,
MCA_PML_BASE_SEND_STANDARD,
comm, preqs++));
if (OMPI_SUCCESS != rc) break;

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2016 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -68,14 +68,14 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co
if (MPI_PROC_NULL != srank) {
nreqs++;
rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i] * rdextent, rcounts[i], rdtype, srank,
MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++));
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, comm, preqs++));
if (OMPI_SUCCESS != rc) break;
}
if (MPI_PROC_NULL != drank) {
nreqs++;
rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i+1] * rdextent, rcounts[i+1], rdtype, drank,
MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++));
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, comm, preqs++));
if (OMPI_SUCCESS != rc) break;
}
}
@ -98,14 +98,14 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co
nreqs++;
/* remove cast from const when the pml layer is updated to take a const for the send buffer */
rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i] * sdextent, scounts[i], sdtype, srank,
MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
if (OMPI_SUCCESS != rc) break;
}
if (MPI_PROC_NULL != drank) {
nreqs++;
rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i+1] * sdextent, scounts[i+1], sdtype, drank,
MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
if (OMPI_SUCCESS != rc) break;
}
}

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2016 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -65,14 +65,14 @@ mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], co
if (MPI_PROC_NULL != srank) {
nreqs++;
rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i], rcounts[i], rdtypes[i], srank,
MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++));
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, comm, preqs++));
if (OMPI_SUCCESS != rc) break;
}
if (MPI_PROC_NULL != drank) {
nreqs++;
rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i+1], rcounts[i+1], rdtypes[i+1], drank,
MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++));
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, comm, preqs++));
if (OMPI_SUCCESS != rc) break;
}
}
@ -95,14 +95,14 @@ mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], co
nreqs++;
/* remove cast from const when the pml layer is updated to take a const for the send buffer */
rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i], scounts[i], sdtypes[i], srank,
MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim - 1, MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
if (OMPI_SUCCESS != rc) break;
}
if (MPI_PROC_NULL != drank) {
nreqs++;
rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i+1], scounts[i+1], sdtypes[i+1], drank,
MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
MCA_COLL_BASE_TAG_NEIGHBOR_BASE - 2 * dim, MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
if (OMPI_SUCCESS != rc) break;
}
}