1
1

coll/libnbc: fix integer overflow

Use internal pack/unpack subroutines that operate on MPI_Aint instead of int
and hence solve some integer overflows.

Thanks Clyde Stanfield for reporting this issue.

Refs open-mpi/ompi#5383

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
Этот коммит содержится в:
Gilles Gouaillardet 2018-07-09 15:28:02 +09:00 коммит произвёл Nathan Hjelm
родитель 22fa5a8a67
Коммит 76292951e5
2 изменённых файлов: 37 добавлений и 80 удалений

Просмотреть файл

@ -8,8 +8,8 @@
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
* $COPYRIGHT$
@ -58,7 +58,8 @@ static int nbc_alltoall_init(const void* sendbuf, int sendcount, MPI_Datatype se
MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_3_0_t *module, bool persistent)
{
int rank, p, res, datasize;
int rank, p, res;
MPI_Aint datasize;
size_t a2asize, sndsize;
NBC_Schedule *schedule;
MPI_Aint rcvext, sndext;
@ -118,16 +119,15 @@ static int nbc_alltoall_init(const void* sendbuf, int sendcount, MPI_Datatype se
return OMPI_ERR_OUT_OF_RESOURCE;
}
} else if (alg == NBC_A2A_DISS) {
/* persistent operation is not supported currently for this algorithm;
* we need to replace PMPI_Pack, PMPI_Unpack, and mempcy */
/* persistent operation is not supported currently for this algorithm */
assert(! persistent);
if(NBC_Type_intrinsic(sendtype)) {
datasize = sndext * sendcount;
} else {
res = PMPI_Pack_size (sendcount, sendtype, comm, &datasize);
res = ompi_datatype_pack_external_size("external32", sendcount, sendtype, &datasize);
if (MPI_SUCCESS != res) {
NBC_Error("MPI Error in PMPI_Pack_size() (%i)", res);
NBC_Error("MPI Error in ompi_datatype_pack_external_size() (%i)", res);
return res;
}
}
@ -156,23 +156,23 @@ static int nbc_alltoall_init(const void* sendbuf, int sendcount, MPI_Datatype se
memcpy ((char *) tmpbuf + datasize * (p - rank), sendbuf, datasize * rank);
}
} else {
int pos=0;
MPI_Aint pos=0;
/* non-contiguous - pack */
res = PMPI_Pack ((char *) sendbuf + rank * sendcount * sndext, (p - rank) * sendcount, sendtype, tmpbuf,
(p - rank) * datasize, &pos, comm);
res = ompi_datatype_pack_external ("external32", (char *) sendbuf + (intptr_t)rank * (intptr_t)sendcount * sndext, (intptr_t)(p - rank) * (intptr_t)sendcount, sendtype, tmpbuf,
(intptr_t)(p - rank) * datasize, &pos);
if (OPAL_UNLIKELY(MPI_SUCCESS != res)) {
NBC_Error("MPI Error in PMPI_Pack() (%i)", res);
NBC_Error("MPI Error in ompi_datatype_pack_external() (%i)", res);
free(tmpbuf);
return res;
}
if (rank != 0) {
pos = 0;
res = PMPI_Pack(sendbuf, rank * sendcount, sendtype, (char *) tmpbuf + datasize * (p - rank),
rank * datasize, &pos, comm);
res = ompi_datatype_pack_external("external32", sendbuf, (intptr_t)rank * (intptr_t)sendcount, sendtype, (char *) tmpbuf + datasize * (intptr_t)(p - rank),
rank * datasize, &pos);
if (OPAL_UNLIKELY(MPI_SUCCESS != res)) {
NBC_Error("MPI Error in PMPI_Pack() (%i)", res);
NBC_Error("MPI Error in ompi_datatype_pack_external() (%i)", res);
free(tmpbuf);
return res;
}
@ -200,8 +200,8 @@ static int nbc_alltoall_init(const void* sendbuf, int sendcount, MPI_Datatype se
if (!inplace) {
/* copy my data to receive buffer */
rbuf = (char *) recvbuf + rank * recvcount * rcvext;
sbuf = (char *) sendbuf + rank * sendcount * sndext;
rbuf = (char *) recvbuf + (MPI_Aint)rank * (MPI_Aint)recvcount * rcvext;
sbuf = (char *) sendbuf + (MPI_Aint)rank * (MPI_Aint)sendcount * sndext;
res = NBC_Sched_copy (sbuf, false, sendcount, sendtype,
rbuf, false, recvcount, recvtype, schedule, false);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
@ -424,13 +424,13 @@ static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rc
continue;
}
char *rbuf = (char *) recvbuf + r * recvcount * rcvext;
char *rbuf = (char *) recvbuf + (intptr_t)r * (intptr_t)recvcount * rcvext;
res = NBC_Sched_recv (rbuf, false, recvcount, recvtype, r, schedule, false);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
return res;
}
char *sbuf = (char *) sendbuf + r * sendcount * sndext;
char *sbuf = (char *) sendbuf + (intptr_t)r * (intptr_t)sendcount * sndext;
res = NBC_Sched_send (sbuf, false, sendcount, sendtype, r, schedule, false);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
return res;
@ -443,7 +443,8 @@ static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rc
static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule,
const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
MPI_Datatype recvtype, MPI_Comm comm, void* tmpbuf) {
int res, speer, rpeer, datasize, offset, virtp;
int res, speer, rpeer, virtp;
MPI_Aint datasize, offset;
char *rbuf, *rtmpbuf, *stmpbuf;
if (p < 2) {
@ -453,9 +454,9 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve
if(NBC_Type_intrinsic(sendtype)) {
datasize = sndext*sendcount;
} else {
res = PMPI_Pack_size(sendcount, sendtype, comm, &datasize);
res = ompi_datatype_pack_external_size("external32", sendcount, sendtype, &datasize);
if (MPI_SUCCESS != res) {
NBC_Error("MPI Error in PMPI_Pack_size() (%i)", res);
NBC_Error("MPI Error in ompi_datatype_pack_external_size() (%i)", res);
return res;
}
}
@ -540,8 +541,8 @@ static inline int a2a_sched_inplace(int rank, int p, NBC_Schedule* schedule, voi
for (int i = 1 ; i < (p+1)/2 ; i++) {
int speer = (rank + i) % p;
int rpeer = (rank + p - i) % p;
char *sbuf = (char *) buf + speer * count * ext;
char *rbuf = (char *) buf + rpeer * count * ext;
char *sbuf = (char *) buf + (intptr_t)speer * (intptr_t)count * ext;
char *rbuf = (char *) buf + (intptr_t)rpeer * (intptr_t)count * ext;
res = NBC_Sched_copy (rbuf, false, count, type,
(void *)(-gap), true, count, type,
@ -570,7 +571,7 @@ static inline int a2a_sched_inplace(int rank, int p, NBC_Schedule* schedule, voi
if (0 == (p%2)) {
int peer = (rank + p/2) % p;
char *tbuf = (char *) buf + peer * count * ext;
char *tbuf = (char *) buf + (intptr_t)peer * (intptr_t)count * ext;
res = NBC_Sched_copy (tbuf, false, count, type,
(void *)(-gap), true, count, type,
schedule, true);

Просмотреть файл

@ -10,8 +10,8 @@
*
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
@ -500,60 +500,20 @@ static inline int NBC_Type_intrinsic(MPI_Datatype type) {
/* let's give a try to inline functions */
static inline int NBC_Copy(const void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm) {
int size, pos, res;
void *packbuf;
int res;
#if OPAL_CUDA_SUPPORT
if((srctype == tgttype) && NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
#else
if((srctype == tgttype) && NBC_Type_intrinsic(srctype)) {
#endif /* OPAL_CUDA_SUPPORT */
/* if we have the same types and they are contiguous (intrinsic
* types are contiguous), we can just use a single memcpy */
ptrdiff_t gap, span;
span = opal_datatype_span(&srctype->super, srccount, &gap);
memcpy(tgt, src, span);
} else {
/* we have to pack and unpack */
res = PMPI_Pack_size(srccount, srctype, comm, &size);
if (MPI_SUCCESS != res) {
NBC_Error ("MPI Error in PMPI_Pack_size() (%i:%i)", res, size);
return res;
}
if (0 == size) {
return OMPI_SUCCESS;
}
packbuf = malloc(size);
if (NULL == packbuf) {
NBC_Error("Error in malloc()");
return res;
}
pos=0;
res = PMPI_Pack(src, srccount, srctype, packbuf, size, &pos, comm);
if (MPI_SUCCESS != res) {
NBC_Error ("MPI Error in PMPI_Pack() (%i)", res);
free (packbuf);
return res;
}
pos=0;
res = PMPI_Unpack(packbuf, size, &pos, tgt, tgtcount, tgttype, comm);
free(packbuf);
if (MPI_SUCCESS != res) {
NBC_Error ("MPI Error in PMPI_Unpack() (%i)", res);
return res;
}
res = ompi_datatype_sndrcv(src, srccount, srctype, tgt, tgtcount, tgttype);
if (OMPI_SUCCESS != res) {
NBC_Error ("MPI Error in ompi_datatype_sndrcv() (%i)", res);
return res;
}
return OMPI_SUCCESS;
}
static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void *tgt, MPI_Comm comm) {
int size, pos, res;
MPI_Aint size, pos;
int res;
ptrdiff_t ext, lb;
#if OPAL_CUDA_SUPPORT
@ -563,6 +523,7 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void
#endif /* OPAL_CUDA_SUPPORT */
/* if we have the same types and they are contiguous (intrinsic
* types are contiguous), we can just use a single memcpy */
res = ompi_datatype_pack_external_size("external32", srccount, srctype, &size);
res = ompi_datatype_get_extent (srctype, &lb, &ext);
if (OMPI_SUCCESS != res) {
NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res);
@ -573,15 +534,10 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void
} else {
/* we have to unpack */
res = PMPI_Pack_size(srccount, srctype, comm, &size);
if (MPI_SUCCESS != res) {
NBC_Error ("MPI Error in PMPI_Pack_size() (%i)", res);
return res;
}
pos = 0;
res = PMPI_Unpack(src, size, &pos, tgt, srccount, srctype, comm);
res = ompi_datatype_unpack_external("external32", src, size, &pos, tgt, srccount, srctype);
if (MPI_SUCCESS != res) {
NBC_Error ("MPI Error in PMPI_Unpack() (%i)", res);
NBC_Error ("MPI Error in ompi_datatype_unpack_external() (%i)", res);
return res;
}
}