From 3e559a14a99ac737d501818eca6a2d1acfdbeb42 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 5 Jul 2016 13:36:56 +0900 Subject: [PATCH] coll/inter: fix non standard ddt handling - correctly handle non zero lower bound ddt - correctly handle ddt with size > extent Thanks Yuki Matsumoto for the report --- ompi/mca/coll/inter/coll_inter_allgather.c | 22 +++++------- ompi/mca/coll/inter/coll_inter_allgatherv.c | 33 +++++++---------- ompi/mca/coll/inter/coll_inter_allreduce.c | 15 ++++---- ompi/mca/coll/inter/coll_inter_gather.c | 27 ++++++-------- ompi/mca/coll/inter/coll_inter_gatherv.c | 31 ++++++---------- ompi/mca/coll/inter/coll_inter_reduce.c | 20 +++++------ ompi/mca/coll/inter/coll_inter_scatter.c | 24 ++++++------- ompi/mca/coll/inter/coll_inter_scatterv.c | 39 ++++++++------------- 8 files changed, 85 insertions(+), 126 deletions(-) diff --git a/ompi/mca/coll/inter/coll_inter_allgather.c b/ompi/mca/coll/inter/coll_inter_allgather.c index 38b85df494..7d5e22254d 100644 --- a/ompi/mca/coll/inter/coll_inter_allgather.c +++ b/ompi/mca/coll/inter/coll_inter_allgather.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -49,8 +49,8 @@ mca_coll_inter_allgather_inter(const void *sbuf, int scount, mca_coll_base_module_t *module) { int rank, root = 0, size, rsize, err; - char *ptmp = NULL; - ptrdiff_t slb, sextent, incr; + char *ptmp_free = NULL, *ptmp; + ptrdiff_t gap, span; ompi_request_t *req[2]; rank = ompi_comm_rank(comm); @@ -58,17 +58,13 @@ mca_coll_inter_allgather_inter(const void *sbuf, int scount, rsize = ompi_comm_remote_size(comm); /* Perform the gather locally at the root */ - err = ompi_datatype_get_extent(sdtype, &slb, &sextent); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } - if ( scount > 0 ) { - incr = sextent * scount; - ptmp = (char*)malloc(size * incr); - if (NULL == ptmp) { + span = opal_datatype_span(&sdtype->super, scount*size, &gap); + ptmp_free = (char*)malloc(span); + if (NULL == ptmp_free) { return OMPI_ERR_OUT_OF_RESOURCE; } + ptmp = ptmp_free - gap; err = comm->c_local_comm->c_coll.coll_gather(sbuf, scount, sdtype, ptmp, scount, sdtype, @@ -112,8 +108,8 @@ mca_coll_inter_allgather_inter(const void *sbuf, int scount, } exit: - if (NULL != ptmp) { - free(ptmp); + if (NULL != ptmp_free) { + free(ptmp_free); } return err; diff --git a/ompi/mca/coll/inter/coll_inter_allgatherv.c b/ompi/mca/coll/inter/coll_inter_allgatherv.c index 933b2fb4fa..a06d18be8f 100644 --- a/ompi/mca/coll/inter/coll_inter_allgatherv.c +++ b/ompi/mca/coll/inter/coll_inter_allgatherv.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -49,10 +49,7 @@ mca_coll_inter_allgatherv_inter(const void *sbuf, int scount, { int i, rank, size, size_local, total=0, err; int *count=NULL,*displace=NULL; - char *ptmp=NULL; - MPI_Aint incr; - MPI_Aint extent; - MPI_Aint lb; + char *ptmp_free=NULL, *ptmp; ompi_datatype_t *ndtype = NULL; ompi_request_t *req[2]; @@ -81,22 +78,19 @@ mca_coll_inter_allgatherv_inter(const void *sbuf, int scount, for (i = 1; i < size_local; i++) { displace[i] = displace[i-1] + count[i-1]; } - /* Perform the gatherv locally with the first process as root */ - err = ompi_datatype_get_extent(sdtype, &lb, &extent); - if (OMPI_SUCCESS != err) { - err = OMPI_ERROR; - goto exit; - } - incr = 0; + total = 0; for (i = 0; i < size_local; i++) { - incr = incr + extent*count[i]; + total = total + count[i]; } - if ( incr > 0 ) { - ptmp = (char*)malloc(incr); - if (NULL == ptmp) { + if ( total > 0 ) { + ptrdiff_t gap, span; + span = opal_datatype_span(&sdtype->super, total, &gap); + ptmp_free = (char*)malloc(span); + if (NULL == ptmp_free) { err = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } + ptmp = ptmp_free - gap; } } err = comm->c_local_comm->c_coll.coll_gatherv(sbuf, scount, sdtype, @@ -111,9 +105,6 @@ mca_coll_inter_allgatherv_inter(const void *sbuf, int scount, ompi_datatype_commit(&ndtype); if (0 == rank) { - for (i = 0; i < size_local; i++) { - total = total + count[i]; - } /* Exchange data between roots */ err = MCA_PML_CALL(irecv(rbuf, 1, ndtype, 0, MCA_COLL_BASE_TAG_ALLGATHERV, comm, @@ -144,8 +135,8 @@ mca_coll_inter_allgatherv_inter(const void *sbuf, int scount, if( NULL != ndtype ) { ompi_datatype_destroy(&ndtype); } - if (NULL != ptmp) { - free(ptmp); + if (NULL != ptmp_free) { + free(ptmp_free); } if (NULL != displace) { free(displace); diff --git a/ompi/mca/coll/inter/coll_inter_allreduce.c b/ompi/mca/coll/inter/coll_inter_allreduce.c index 152fd2467c..58665461d1 100644 --- a/ompi/mca/coll/inter/coll_inter_allreduce.c +++ b/ompi/mca/coll/inter/coll_inter_allreduce.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -48,23 +48,20 @@ mca_coll_inter_allreduce_inter(const void *sbuf, void *rbuf, int count, mca_coll_base_module_t *module) { int err, rank, root = 0; - ptrdiff_t lb, extent; char *tmpbuf = NULL, *pml_buffer = NULL; ompi_request_t *req[2]; + ptrdiff_t gap, span; rank = ompi_comm_rank(comm); /* Perform the reduction locally */ - err = ompi_datatype_get_extent(dtype, &lb, &extent); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } + span = opal_datatype_span(&dtype->super, count, &gap); - tmpbuf = (char *) malloc(count * extent); + tmpbuf = (char *) malloc(span); if (NULL == tmpbuf) { return OMPI_ERR_OUT_OF_RESOURCE; } - pml_buffer = tmpbuf - lb; + pml_buffer = tmpbuf - gap; err = comm->c_local_comm->c_coll.coll_reduce(sbuf, pml_buffer, count, dtype, op, root, diff --git a/ompi/mca/coll/inter/coll_inter_gather.c b/ompi/mca/coll/inter/coll_inter_gather.c index 9b10c6f556..2921ffd716 100644 --- a/ompi/mca/coll/inter/coll_inter_gather.c +++ b/ompi/mca/coll/inter/coll_inter_gather.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -47,11 +47,7 @@ mca_coll_inter_gather_inter(const void *sbuf, int scount, { int err; int rank; - int size,size_local; - char *ptmp = NULL; - MPI_Aint incr; - MPI_Aint extent; - MPI_Aint lb; + int size; size = ompi_comm_remote_size(comm); rank = ompi_comm_rank(comm); @@ -61,17 +57,18 @@ mca_coll_inter_gather_inter(const void *sbuf, int scount, err = OMPI_SUCCESS; } else if (MPI_ROOT != root) { /* Perform the gather locally with the first process as root */ - err = ompi_datatype_get_extent(sdtype, &lb, &extent); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } + char *ptmp_free = NULL, *ptmp; + int size_local; + ptrdiff_t gap, span; - incr = extent * scount; size_local = ompi_comm_size(comm->c_local_comm); - ptmp = (char*)malloc(size_local * incr); - if (NULL == ptmp) { + span = opal_datatype_span(&sdtype->super, scount*size_local, &gap); + + ptmp_free = (char*)malloc(span); + if (NULL == ptmp_free) { return OMPI_ERR_OUT_OF_RESOURCE; } + ptmp = ptmp_free - gap; err = comm->c_local_comm->c_coll.coll_gather(sbuf, scount, sdtype, ptmp, scount, sdtype, @@ -86,9 +83,7 @@ mca_coll_inter_gather_inter(const void *sbuf, int scount, return err; } } - if (NULL != ptmp) { - free(ptmp); - } + free(ptmp_free); } else { /* I am the root, loop receiving the data. */ err = MCA_PML_CALL(recv(rbuf, rcount*size, rdtype, 0, diff --git a/ompi/mca/coll/inter/coll_inter_gatherv.c b/ompi/mca/coll/inter/coll_inter_gatherv.c index cffeb4b4ef..7c69fd9aad 100644 --- a/ompi/mca/coll/inter/coll_inter_gatherv.c +++ b/ompi/mca/coll/inter/coll_inter_gatherv.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -46,10 +46,7 @@ mca_coll_inter_gatherv_inter(const void *sbuf, int scount, { int i, rank, size, size_local, total=0, err; int *count=NULL, *displace=NULL; - char *ptmp=NULL; - MPI_Aint incr; - MPI_Aint extent; - MPI_Aint lb; + char *ptmp_free=NULL, *ptmp; ompi_datatype_t *ndtype; if (MPI_PROC_NULL == root) { /* do nothing */ @@ -92,21 +89,18 @@ mca_coll_inter_gatherv_inter(const void *sbuf, int scount, displace[i] = displace[i-1] + count[i-1]; } /* Perform the gatherv locally with the first process as root */ - err = ompi_datatype_get_extent(sdtype, &lb, &extent); - if (OMPI_SUCCESS != err) { - err = OMPI_ERROR; - goto exit; - } - incr = 0; for (i = 0; i < size_local; i++) { - incr = incr + extent*count[i]; + total = total + count[i]; } - if ( incr > 0 ) { - ptmp = (char*)malloc(incr); - if (NULL == ptmp) { + if ( total > 0 ) { + ptrdiff_t gap, span; + span = opal_datatype_span(&sdtype->super, total, &gap); + ptmp_free = (char*)malloc(span); + if (NULL == ptmp_free) { err = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } + ptmp = ptmp_free - gap; } } err = comm->c_local_comm->c_coll.coll_gatherv(sbuf, scount, sdtype, @@ -118,9 +112,6 @@ mca_coll_inter_gatherv_inter(const void *sbuf, int scount, } if (0 == rank) { - for (i = 0; i < size_local; i++) { - total = total + count[i]; - } /* First process sends data to the root */ err = MCA_PML_CALL(send(ptmp, total, sdtype, root, MCA_COLL_BASE_TAG_GATHERV, @@ -128,8 +119,8 @@ mca_coll_inter_gatherv_inter(const void *sbuf, int scount, } exit: - if (NULL != ptmp) { - free(ptmp); + if (NULL != ptmp_free) { + free(ptmp_free); } if (NULL != displace) { free(displace); diff --git a/ompi/mca/coll/inter/coll_inter_reduce.c b/ompi/mca/coll/inter/coll_inter_reduce.c index 14085ec243..a0a2ec9085 100644 --- a/ompi/mca/coll/inter/coll_inter_reduce.c +++ b/ompi/mca/coll/inter/coll_inter_reduce.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -47,9 +47,6 @@ mca_coll_inter_reduce_inter(const void *sbuf, void *rbuf, int count, mca_coll_base_module_t *module) { int rank, err; - ptrdiff_t true_lb, true_extent, lb, extent; - char *free_buffer = NULL; - char *pml_buffer = NULL; /* Initialize */ rank = ompi_comm_rank(comm); @@ -58,15 +55,18 @@ mca_coll_inter_reduce_inter(const void *sbuf, void *rbuf, int count, /* do nothing */ err = OMPI_SUCCESS; } else if (MPI_ROOT != root) { - /* Perform the reduce locally with the first process as root */ - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); + ptrdiff_t gap, span; + char *free_buffer = NULL; + char *pml_buffer = NULL; - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); + /* Perform the reduce locally with the first process as root */ + span = opal_datatype_span(&dtype->super, count, &gap); + + free_buffer = (char*)malloc(span); if (NULL == free_buffer) { return OMPI_ERR_OUT_OF_RESOURCE; } - pml_buffer = free_buffer - true_lb; + pml_buffer = free_buffer - gap; err = comm->c_local_comm->c_coll.coll_reduce(sbuf, pml_buffer, count, dtype, op, 0, comm->c_local_comm, diff --git a/ompi/mca/coll/inter/coll_inter_scatter.c b/ompi/mca/coll/inter/coll_inter_scatter.c index 3fd91f3753..6a9de919ae 100644 --- a/ompi/mca/coll/inter/coll_inter_scatter.c +++ b/ompi/mca/coll/inter/coll_inter_scatter.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2008 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -44,9 +44,7 @@ mca_coll_inter_scatter_inter(const void *sbuf, int scount, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int rank, size, size_local, err; - char *ptmp = NULL; - ptrdiff_t lb, incr; + int rank, size, err; /* Initialize */ @@ -58,18 +56,18 @@ mca_coll_inter_scatter_inter(const void *sbuf, int scount, err = OMPI_SUCCESS; } else if (MPI_ROOT != root) { /* First process receives the data from root */ + char *ptmp_free = NULL, *ptmp; if(0 == rank) { - err = ompi_datatype_get_extent(rdtype, &lb, &incr); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } + int size_local; + ptrdiff_t gap, span; - incr *= rcount; size_local = ompi_comm_size(comm->c_local_comm); - ptmp = (char*)malloc(size_local * incr); - if (NULL == ptmp) { + span = opal_datatype_span(&rdtype->super, rcount*size_local, &gap); + ptmp_free = malloc(span); + if (NULL == ptmp_free) { return OMPI_ERR_OUT_OF_RESOURCE; } + ptmp = ptmp_free - gap; err = MCA_PML_CALL(recv(ptmp, rcount*size_local, rdtype, root, MCA_COLL_BASE_TAG_SCATTER, @@ -83,8 +81,8 @@ mca_coll_inter_scatter_inter(const void *sbuf, int scount, rbuf, rcount, rdtype, 0, comm->c_local_comm, comm->c_local_comm->c_coll.coll_scatter_module); - if (NULL != ptmp) { - free(ptmp); + if (NULL != ptmp_free) { + free(ptmp_free); } } else { /* Root sends data to the first process in the remote group */ diff --git a/ompi/mca/coll/inter/coll_inter_scatterv.c b/ompi/mca/coll/inter/coll_inter_scatterv.c index ba6e5598bd..4d17ee8efc 100644 --- a/ompi/mca/coll/inter/coll_inter_scatterv.c +++ b/ompi/mca/coll/inter/coll_inter_scatterv.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -45,12 +45,9 @@ mca_coll_inter_scatterv_inter(const void *sbuf, const int *scounts, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int i, rank, size, err, total, size_local; + int i, rank, size, err, total=0, size_local; int *counts=NULL,*displace=NULL; - char *ptmp=NULL; - MPI_Aint incr; - MPI_Aint extent; - MPI_Aint lb; + char *ptmp_free=NULL, *ptmp; ompi_datatype_t *ndtype; /* Initialize */ @@ -72,25 +69,19 @@ mca_coll_inter_scatterv_inter(const void *sbuf, const int *scounts, if (OMPI_SUCCESS != err) { return err; } - /* calculate the whole buffer size and recieve it from root */ - err = ompi_datatype_get_extent(rdtype, &lb, &extent); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } - incr = 0; - for (i = 0; i < size_local; i++) { - incr = incr + extent*counts[i]; - } - if ( incr > 0 ) { - ptmp = (char*)malloc(incr); - if (NULL == ptmp) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - } - total = 0; + /* calculate the whole buffer size and receive it from root */ for (i = 0; i < size_local; i++) { total = total + counts[i]; } + if ( total > 0 ) { + ptrdiff_t gap, span; + span = opal_datatype_span(&rdtype->super, total, &gap); + ptmp_free = (char*)malloc(span); + if (NULL == ptmp_free) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + ptmp = ptmp_free - gap; + } err = MCA_PML_CALL(recv(ptmp, total, rdtype, root, MCA_COLL_BASE_TAG_SCATTERV, comm, MPI_STATUS_IGNORE)); @@ -113,8 +104,8 @@ mca_coll_inter_scatterv_inter(const void *sbuf, const int *scounts, return err; } - if (NULL != ptmp) { - free(ptmp); + if (NULL != ptmp_free) { + free(ptmp_free); } if (NULL != displace) { free(displace);