1
1

Add support for CUDA Unified memory. Basically, add a new flag and disable some

optimizations when that flag is detected.  Lightly reviewed by bosilca.
Этот коммит содержится в:
rolfv 2014-10-29 06:17:23 -07:00 коммит произвёл Rolf vandeVaart
родитель 52ed5a9bf8
Коммит f471b09ae9
4 изменённых файлов: 18 добавлений и 16 удалений

Просмотреть файл

@ -11,6 +11,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -45,11 +46,12 @@ BEGIN_C_DECLS
#define CONVERTOR_WITH_CHECKSUM 0x00200000
#define CONVERTOR_CUDA 0x00400000
#define CONVERTOR_CUDA_ASYNC 0x00800000
#define CONVERTOR_TYPE_MASK 0x00FF0000
#define CONVERTOR_TYPE_MASK 0x10FF0000
#define CONVERTOR_STATE_START 0x01000000
#define CONVERTOR_STATE_COMPLETE 0x02000000
#define CONVERTOR_STATE_ALLOC 0x04000000
#define CONVERTOR_COMPLETED 0x08000000
#define CONVERTOR_CUDA_UNIFIED 0x10000000
union dt_elem_desc;
typedef struct opal_convertor_t opal_convertor_t;
@ -177,7 +179,7 @@ static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConv
if (OPAL_UNLIKELY(0 == (pConvertor->flags & CONVERTOR_HOMOGENEOUS))) return 1;
#endif
#if OPAL_CUDA_SUPPORT
if( pConvertor->flags & CONVERTOR_CUDA ) return 1;
if( pConvertor->flags & (CONVERTOR_CUDA | CONVERTOR_CUDA_UNIFIED)) return 1;
#endif
if( pConvertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS ) return 0;
if( (pConvertor->count == 1) && (pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) return 0;

Просмотреть файл

@ -57,7 +57,7 @@ void mca_cuda_convertor_init(opal_convertor_t* convertor, const void *pUserBuf)
return;
}
if (ftable.gpu_is_gpu_buffer(pUserBuf)) {
if (ftable.gpu_is_gpu_buffer(pUserBuf, convertor)) {
convertor->flags |= CONVERTOR_CUDA;
}
}
@ -78,7 +78,7 @@ bool opal_cuda_check_bufs(char *dest, char *src)
return false;
}
if (ftable.gpu_is_gpu_buffer(dest) || ftable.gpu_is_gpu_buffer(src)) {
if (ftable.gpu_is_gpu_buffer(dest, NULL) || ftable.gpu_is_gpu_buffer(src, NULL)) {
return true;
} else {
return false;

Просмотреть файл

@ -14,7 +14,7 @@
* common cuda code is initialized. This removes any dependency on <cuda.h>
* in the opal cuda datatype code. */
struct opal_common_cuda_function_table {
int (*gpu_is_gpu_buffer)(const void*);
int (*gpu_is_gpu_buffer)(const void*, opal_convertor_t*);
int (*gpu_cu_memcpy_async)(void*, const void*, size_t, opal_convertor_t*);
int (*gpu_cu_memcpy)(void*, const void*, size_t);
int (*gpu_memmove)(void*, void*, size_t);

Просмотреть файл

@ -127,7 +127,7 @@ static opal_mutex_t common_cuda_dtoh_lock;
static opal_mutex_t common_cuda_ipc_lock;
/* Functions called by opal layer - plugged into opal function table */
static int mca_common_cuda_is_gpu_buffer(const void*);
static int mca_common_cuda_is_gpu_buffer(const void*, opal_convertor_t*);
static int mca_common_cuda_memmove(void*, void*, size_t);
static int mca_common_cuda_cu_memcpy_async(void*, const void*, size_t, opal_convertor_t*);
static int mca_common_cuda_cu_memcpy(void*, const void*, size_t);
@ -1700,7 +1700,7 @@ static float mydifftime(opal_timer_t ts_start, opal_timer_t ts_end) {
#endif /* OPAL_CUDA_SUPPORT_41 */
/* Routines that get plugged into the opal datatype code */
static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t *convertor)
{
int res;
CUmemorytype memType = 0;
@ -1715,6 +1715,15 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
void *attrdata[] = {(void *)&memType, (void *)&ctx, (void *)&isManaged};
res = cuFunc.cuPointerGetAttributes(3, attributes, attrdata, dbuf);
/* Mark unified memory buffers with a flag. This will allow all unified
* memory to be forced through host buffers. Note that this memory can
* be either host or device so we need to set this flag prior to that check. */
if (1 == isManaged) {
if (NULL != convertor) {
convertor->flags |= CONVERTOR_CUDA_UNIFIED;
}
}
if (res != CUDA_SUCCESS) {
/* If we cannot determine it is device pointer,
* just assume it is not. */
@ -1779,15 +1788,6 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
}
}
#if OPAL_CUDA_GET_ATTRIBUTES
if (1 == isManaged) {
/* Currently cannot support managed memory */
opal_output(0, "CUDA: ptr=%p: CUDA-aware Open MPI detected managed memory but there "
"is no support for it. Result will be unpredictable.", pUserBuf);
return OPAL_ERROR;
}
#endif /* OPAL_CUDA_GET_ATTRIBUTES */
/* First access on a device pointer finalizes CUDA support initialization.
* If initialization fails, disable support. */
if (!stage_three_init_complete) {