Small change to allow CUDA-aware to work with non-reduction nonblocking collectives.
Only used when CUDA-aware feature compiled in. This commit was SVN r32750.
Этот коммит содержится в:
родитель
3a437cbdb3
Коммит
8db1f89dd1
@ -6,6 +6,7 @@
|
|||||||
* rights reserved.
|
* rights reserved.
|
||||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
|
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
|
||||||
*
|
*
|
||||||
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
|
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
|
||||||
*
|
*
|
||||||
@ -108,7 +109,11 @@ int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendty
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* phase 1 - rotate n data blocks upwards into the tmpbuffer */
|
/* phase 1 - rotate n data blocks upwards into the tmpbuffer */
|
||||||
|
#if OPAL_CUDA_SUPPORT
|
||||||
|
if(NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
|
||||||
|
#else
|
||||||
if(NBC_Type_intrinsic(sendtype)) {
|
if(NBC_Type_intrinsic(sendtype)) {
|
||||||
|
#endif /* OPAL_CUDA_SUPPORT */
|
||||||
/* contiguous - just copy (1st copy) */
|
/* contiguous - just copy (1st copy) */
|
||||||
memcpy(handle->tmpbuf, (char*)sendbuf+datasize*rank, datasize*(p-rank));
|
memcpy(handle->tmpbuf, (char*)sendbuf+datasize*rank, datasize*(p-rank));
|
||||||
if(rank != 0) memcpy((char*)handle->tmpbuf+datasize*(p-rank), sendbuf, datasize*(rank));
|
if(rank != 0) memcpy((char*)handle->tmpbuf+datasize*(p-rank), sendbuf, datasize*(rank));
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
|
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
|
||||||
*
|
*
|
||||||
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#ifndef __NBC_INTERNAL_H__
|
#ifndef __NBC_INTERNAL_H__
|
||||||
@ -20,6 +21,10 @@
|
|||||||
#include "mpi.h"
|
#include "mpi.h"
|
||||||
|
|
||||||
#include "coll_libnbc.h"
|
#include "coll_libnbc.h"
|
||||||
|
#if OPAL_CUDA_SUPPORT
|
||||||
|
#include "opal/datatype/opal_convertor.h"
|
||||||
|
#include "opal/datatype/opal_datatype_cuda.h"
|
||||||
|
#endif /* OPAL_CUDA_SUPPORT */
|
||||||
#include "ompi/include/ompi/constants.h"
|
#include "ompi/include/ompi/constants.h"
|
||||||
#include "ompi/request/request.h"
|
#include "ompi/request/request.h"
|
||||||
#include "ompi/datatype/ompi_datatype.h"
|
#include "ompi/datatype/ompi_datatype.h"
|
||||||
@ -483,7 +488,11 @@ static inline int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *
|
|||||||
OPAL_PTRDIFF_TYPE ext, lb;
|
OPAL_PTRDIFF_TYPE ext, lb;
|
||||||
void *packbuf;
|
void *packbuf;
|
||||||
|
|
||||||
|
#if OPAL_CUDA_SUPPORT
|
||||||
|
if((srctype == tgttype) && NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
|
||||||
|
#else
|
||||||
if((srctype == tgttype) && NBC_Type_intrinsic(srctype)) {
|
if((srctype == tgttype) && NBC_Type_intrinsic(srctype)) {
|
||||||
|
#endif /* OPAL_CUDA_SUPPORT */
|
||||||
/* if we have the same types and they are contiguous (intrinsic
|
/* if we have the same types and they are contiguous (intrinsic
|
||||||
* types are contiguous), we can just use a single memcpy */
|
* types are contiguous), we can just use a single memcpy */
|
||||||
res = ompi_datatype_get_extent(srctype, &lb, &ext);
|
res = ompi_datatype_get_extent(srctype, &lb, &ext);
|
||||||
@ -511,7 +520,11 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void
|
|||||||
int size, pos, res;
|
int size, pos, res;
|
||||||
OPAL_PTRDIFF_TYPE ext, lb;
|
OPAL_PTRDIFF_TYPE ext, lb;
|
||||||
|
|
||||||
|
#if OPAL_CUDA_SUPPORT
|
||||||
|
if(NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
|
||||||
|
#else
|
||||||
if(NBC_Type_intrinsic(srctype)) {
|
if(NBC_Type_intrinsic(srctype)) {
|
||||||
|
#endif /* OPAL_CUDA_SUPPORT */
|
||||||
/* if we have the same types and they are contiguous (intrinsic
|
/* if we have the same types and they are contiguous (intrinsic
|
||||||
* types are contiguous), we can just use a single memcpy */
|
* types are contiguous), we can just use a single memcpy */
|
||||||
res = ompi_datatype_get_extent (srctype, &lb, &ext);
|
res = ompi_datatype_get_extent (srctype, &lb, &ext);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user