From 76292951e530cfa8b859deeb5a403cb222be32c2 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Mon, 9 Jul 2018 15:28:02 +0900 Subject: [PATCH] coll/libnbc: fix integer overflow Use internal pack/unpack subroutines that operate on MPI_Aint instead of int and hence solve some integer overflows. Thanks Clyde Stanfield for reporting this issue. Refs open-mpi/ompi#5383 Signed-off-by: Gilles Gouaillardet --- ompi/mca/coll/libnbc/nbc_ialltoall.c | 49 ++++++++++---------- ompi/mca/coll/libnbc/nbc_internal.h | 68 +++++----------------------- 2 files changed, 37 insertions(+), 80 deletions(-) diff --git a/ompi/mca/coll/libnbc/nbc_ialltoall.c b/ompi/mca/coll/libnbc/nbc_ialltoall.c index f51562708e..0b93af0530 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoall.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoall.c @@ -8,8 +8,8 @@ * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -58,7 +58,8 @@ static int nbc_alltoall_init(const void* sendbuf, int sendcount, MPI_Datatype se MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_3_0_t *module, bool persistent) { - int rank, p, res, datasize; + int rank, p, res; + MPI_Aint datasize; size_t a2asize, sndsize; NBC_Schedule *schedule; MPI_Aint rcvext, sndext; @@ -118,16 +119,15 @@ static int nbc_alltoall_init(const void* sendbuf, int sendcount, MPI_Datatype se return OMPI_ERR_OUT_OF_RESOURCE; } } else if (alg == NBC_A2A_DISS) { - /* persistent operation is not supported currently for this algorithm; - * we need to replace PMPI_Pack, PMPI_Unpack, and mempcy */ + /* persistent operation is not supported currently for this algorithm */ assert(! persistent); if(NBC_Type_intrinsic(sendtype)) { datasize = sndext * sendcount; } else { - res = PMPI_Pack_size (sendcount, sendtype, comm, &datasize); + res = ompi_datatype_pack_external_size("external32", sendcount, sendtype, &datasize); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in PMPI_Pack_size() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_pack_external_size() (%i)", res); return res; } } @@ -156,23 +156,23 @@ static int nbc_alltoall_init(const void* sendbuf, int sendcount, MPI_Datatype se memcpy ((char *) tmpbuf + datasize * (p - rank), sendbuf, datasize * rank); } } else { - int pos=0; + MPI_Aint pos=0; /* non-contiguous - pack */ - res = PMPI_Pack ((char *) sendbuf + rank * sendcount * sndext, (p - rank) * sendcount, sendtype, tmpbuf, - (p - rank) * datasize, &pos, comm); + res = ompi_datatype_pack_external ("external32", (char *) sendbuf + (intptr_t)rank * (intptr_t)sendcount * sndext, (intptr_t)(p - rank) * (intptr_t)sendcount, sendtype, tmpbuf, + (intptr_t)(p - rank) * datasize, &pos); if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { - NBC_Error("MPI Error in PMPI_Pack() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_pack_external() (%i)", res); free(tmpbuf); return res; } if (rank != 0) { pos = 0; - res = PMPI_Pack(sendbuf, rank * sendcount, sendtype, (char *) tmpbuf + datasize * (p - rank), - rank * datasize, &pos, comm); + res = ompi_datatype_pack_external("external32", sendbuf, (intptr_t)rank * (intptr_t)sendcount, sendtype, (char *) tmpbuf + datasize * (intptr_t)(p - rank), + rank * datasize, &pos); if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { - NBC_Error("MPI Error in PMPI_Pack() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_pack_external() (%i)", res); free(tmpbuf); return res; } @@ -200,8 +200,8 @@ static int nbc_alltoall_init(const void* sendbuf, int sendcount, MPI_Datatype se if (!inplace) { /* copy my data to receive buffer */ - rbuf = (char *) recvbuf + rank * recvcount * rcvext; - sbuf = (char *) sendbuf + rank * sendcount * sndext; + rbuf = (char *) recvbuf + (MPI_Aint)rank * (MPI_Aint)recvcount * rcvext; + sbuf = (char *) sendbuf + (MPI_Aint)rank * (MPI_Aint)sendcount * sndext; res = NBC_Sched_copy (sbuf, false, sendcount, sendtype, rbuf, false, recvcount, recvtype, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -424,13 +424,13 @@ static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rc continue; } - char *rbuf = (char *) recvbuf + r * recvcount * rcvext; + char *rbuf = (char *) recvbuf + (intptr_t)r * (intptr_t)recvcount * rcvext; res = NBC_Sched_recv (rbuf, false, recvcount, recvtype, r, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } - char *sbuf = (char *) sendbuf + r * sendcount * sndext; + char *sbuf = (char *) sendbuf + (intptr_t)r * (intptr_t)sendcount * sndext; res = NBC_Sched_send (sbuf, false, sendcount, sendtype, r, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; @@ -443,7 +443,8 @@ static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rc static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, void* tmpbuf) { - int res, speer, rpeer, datasize, offset, virtp; + int res, speer, rpeer, virtp; + MPI_Aint datasize, offset; char *rbuf, *rtmpbuf, *stmpbuf; if (p < 2) { @@ -453,9 +454,9 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve if(NBC_Type_intrinsic(sendtype)) { datasize = sndext*sendcount; } else { - res = PMPI_Pack_size(sendcount, sendtype, comm, &datasize); + res = ompi_datatype_pack_external_size("external32", sendcount, sendtype, &datasize); if (MPI_SUCCESS != res) { - NBC_Error("MPI Error in PMPI_Pack_size() (%i)", res); + NBC_Error("MPI Error in ompi_datatype_pack_external_size() (%i)", res); return res; } } @@ -540,8 +541,8 @@ static inline int a2a_sched_inplace(int rank, int p, NBC_Schedule* schedule, voi for (int i = 1 ; i < (p+1)/2 ; i++) { int speer = (rank + i) % p; int rpeer = (rank + p - i) % p; - char *sbuf = (char *) buf + speer * count * ext; - char *rbuf = (char *) buf + rpeer * count * ext; + char *sbuf = (char *) buf + (intptr_t)speer * (intptr_t)count * ext; + char *rbuf = (char *) buf + (intptr_t)rpeer * (intptr_t)count * ext; res = NBC_Sched_copy (rbuf, false, count, type, (void *)(-gap), true, count, type, @@ -570,7 +571,7 @@ static inline int a2a_sched_inplace(int rank, int p, NBC_Schedule* schedule, voi if (0 == (p%2)) { int peer = (rank + p/2) % p; - char *tbuf = (char *) buf + peer * count * ext; + char *tbuf = (char *) buf + (intptr_t)peer * (intptr_t)count * ext; res = NBC_Sched_copy (tbuf, false, count, type, (void *)(-gap), true, count, type, schedule, true); diff --git a/ompi/mca/coll/libnbc/nbc_internal.h b/ompi/mca/coll/libnbc/nbc_internal.h index dc085f0293..da9786dbb6 100644 --- a/ompi/mca/coll/libnbc/nbc_internal.h +++ b/ompi/mca/coll/libnbc/nbc_internal.h @@ -10,8 +10,8 @@ * * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. @@ -500,60 +500,20 @@ static inline int NBC_Type_intrinsic(MPI_Datatype type) { /* let's give a try to inline functions */ static inline int NBC_Copy(const void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm) { - int size, pos, res; - void *packbuf; + int res; -#if OPAL_CUDA_SUPPORT - if((srctype == tgttype) && NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) { -#else - if((srctype == tgttype) && NBC_Type_intrinsic(srctype)) { -#endif /* OPAL_CUDA_SUPPORT */ - /* if we have the same types and they are contiguous (intrinsic - * types are contiguous), we can just use a single memcpy */ - ptrdiff_t gap, span; - span = opal_datatype_span(&srctype->super, srccount, &gap); - - memcpy(tgt, src, span); - } else { - /* we have to pack and unpack */ - res = PMPI_Pack_size(srccount, srctype, comm, &size); - if (MPI_SUCCESS != res) { - NBC_Error ("MPI Error in PMPI_Pack_size() (%i:%i)", res, size); - return res; - } - - if (0 == size) { - return OMPI_SUCCESS; - } - packbuf = malloc(size); - if (NULL == packbuf) { - NBC_Error("Error in malloc()"); - return res; - } - - pos=0; - res = PMPI_Pack(src, srccount, srctype, packbuf, size, &pos, comm); - - if (MPI_SUCCESS != res) { - NBC_Error ("MPI Error in PMPI_Pack() (%i)", res); - free (packbuf); - return res; - } - - pos=0; - res = PMPI_Unpack(packbuf, size, &pos, tgt, tgtcount, tgttype, comm); - free(packbuf); - if (MPI_SUCCESS != res) { - NBC_Error ("MPI Error in PMPI_Unpack() (%i)", res); - return res; - } + res = ompi_datatype_sndrcv(src, srccount, srctype, tgt, tgtcount, tgttype); + if (OMPI_SUCCESS != res) { + NBC_Error ("MPI Error in ompi_datatype_sndrcv() (%i)", res); + return res; } return OMPI_SUCCESS; } static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void *tgt, MPI_Comm comm) { - int size, pos, res; + MPI_Aint size, pos; + int res; ptrdiff_t ext, lb; #if OPAL_CUDA_SUPPORT @@ -563,6 +523,7 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void #endif /* OPAL_CUDA_SUPPORT */ /* if we have the same types and they are contiguous (intrinsic * types are contiguous), we can just use a single memcpy */ + res = ompi_datatype_pack_external_size("external32", srccount, srctype, &size); res = ompi_datatype_get_extent (srctype, &lb, &ext); if (OMPI_SUCCESS != res) { NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res); @@ -573,15 +534,10 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void } else { /* we have to unpack */ - res = PMPI_Pack_size(srccount, srctype, comm, &size); - if (MPI_SUCCESS != res) { - NBC_Error ("MPI Error in PMPI_Pack_size() (%i)", res); - return res; - } pos = 0; - res = PMPI_Unpack(src, size, &pos, tgt, srccount, srctype, comm); + res = ompi_datatype_unpack_external("external32", src, size, &pos, tgt, srccount, srctype); if (MPI_SUCCESS != res) { - NBC_Error ("MPI Error in PMPI_Unpack() (%i)", res); + NBC_Error ("MPI Error in ompi_datatype_unpack_external() (%i)", res); return res; } }