1
1

Small change to allow CUDA-aware to work with non-reduction nonblocking collectives.

Only used when CUDA-aware feature compiled in.

This commit was SVN r32750.
Этот коммит содержится в:
Rolf vandeVaart 2014-09-17 16:55:01 +00:00
родитель 3a437cbdb3
Коммит 8db1f89dd1
2 изменённых файлов: 18 добавлений и 0 удалений

Просмотреть файл

@ -6,6 +6,7 @@
* rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
@ -108,7 +109,11 @@ int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendty
}
/* phase 1 - rotate n data blocks upwards into the tmpbuffer */
#if OPAL_CUDA_SUPPORT
if(NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
#else
if(NBC_Type_intrinsic(sendtype)) {
#endif /* OPAL_CUDA_SUPPORT */
/* contiguous - just copy (1st copy) */
memcpy(handle->tmpbuf, (char*)sendbuf+datasize*rank, datasize*(p-rank));
if(rank != 0) memcpy((char*)handle->tmpbuf+datasize*(p-rank), sendbuf, datasize*(rank));

Просмотреть файл

@ -8,6 +8,7 @@
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
*
*/
#ifndef __NBC_INTERNAL_H__
@ -20,6 +21,10 @@
#include "mpi.h"
#include "coll_libnbc.h"
#if OPAL_CUDA_SUPPORT
#include "opal/datatype/opal_convertor.h"
#include "opal/datatype/opal_datatype_cuda.h"
#endif /* OPAL_CUDA_SUPPORT */
#include "ompi/include/ompi/constants.h"
#include "ompi/request/request.h"
#include "ompi/datatype/ompi_datatype.h"
@ -483,7 +488,11 @@ static inline int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *
OPAL_PTRDIFF_TYPE ext, lb;
void *packbuf;
#if OPAL_CUDA_SUPPORT
if((srctype == tgttype) && NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
#else
if((srctype == tgttype) && NBC_Type_intrinsic(srctype)) {
#endif /* OPAL_CUDA_SUPPORT */
/* if we have the same types and they are contiguous (intrinsic
* types are contiguous), we can just use a single memcpy */
res = ompi_datatype_get_extent(srctype, &lb, &ext);
@ -511,7 +520,11 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void
int size, pos, res;
OPAL_PTRDIFF_TYPE ext, lb;
#if OPAL_CUDA_SUPPORT
if(NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
#else
if(NBC_Type_intrinsic(srctype)) {
#endif /* OPAL_CUDA_SUPPORT */
/* if we have the same types and they are contiguous (intrinsic
* types are contiguous), we can just use a single memcpy */
res = ompi_datatype_get_extent (srctype, &lb, &ext);