Small change to allow CUDA-aware to work with non-reduction nonblocking collectives.
Only used when CUDA-aware feature compiled in. This commit was SVN r32750.
Этот коммит содержится в:
родитель
3a437cbdb3
Коммит
8db1f89dd1
@ -6,6 +6,7 @@
|
||||
* rights reserved.
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
|
||||
*
|
||||
@ -108,7 +109,11 @@ int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendty
|
||||
}
|
||||
|
||||
/* phase 1 - rotate n data blocks upwards into the tmpbuffer */
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if(NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
|
||||
#else
|
||||
if(NBC_Type_intrinsic(sendtype)) {
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
/* contiguous - just copy (1st copy) */
|
||||
memcpy(handle->tmpbuf, (char*)sendbuf+datasize*rank, datasize*(p-rank));
|
||||
if(rank != 0) memcpy((char*)handle->tmpbuf+datasize*(p-rank), sendbuf, datasize*(rank));
|
||||
|
@ -8,6 +8,7 @@
|
||||
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
|
||||
*
|
||||
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#ifndef __NBC_INTERNAL_H__
|
||||
@ -20,6 +21,10 @@
|
||||
#include "mpi.h"
|
||||
|
||||
#include "coll_libnbc.h"
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "opal/datatype/opal_datatype_cuda.h"
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
#include "ompi/include/ompi/constants.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
@ -483,7 +488,11 @@ static inline int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *
|
||||
OPAL_PTRDIFF_TYPE ext, lb;
|
||||
void *packbuf;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if((srctype == tgttype) && NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
|
||||
#else
|
||||
if((srctype == tgttype) && NBC_Type_intrinsic(srctype)) {
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
/* if we have the same types and they are contiguous (intrinsic
|
||||
* types are contiguous), we can just use a single memcpy */
|
||||
res = ompi_datatype_get_extent(srctype, &lb, &ext);
|
||||
@ -511,7 +520,11 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void
|
||||
int size, pos, res;
|
||||
OPAL_PTRDIFF_TYPE ext, lb;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if(NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
|
||||
#else
|
||||
if(NBC_Type_intrinsic(srctype)) {
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
/* if we have the same types and they are contiguous (intrinsic
|
||||
* types are contiguous), we can just use a single memcpy */
|
||||
res = ompi_datatype_get_extent (srctype, &lb, &ext);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user