From 399dc3db43f4ad2567920333fc5d84abe3b62dbe Mon Sep 17 00:00:00 2001 From: Rolf vandeVaart Date: Fri, 26 Sep 2014 16:24:45 +0000 Subject: [PATCH] Code to check for managed memory. Configure support also. This commit was SVN r32801. --- config/opal_check_cuda.m4 | 11 ++++++++ opal/mca/common/cuda/common_cuda.c | 40 +++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/config/opal_check_cuda.m4 b/config/opal_check_cuda.m4 index afb5559676..55f39bda9c 100644 --- a/config/opal_check_cuda.m4 +++ b/config/opal_check_cuda.m4 @@ -93,6 +93,13 @@ AC_COMPILE_IFELSE( [CUDA_VERSION_60_OR_GREATER=1], [CUDA_VERSION_60_OR_GREATER=0]) +# If we have CUDA support, check to see if we have support for cuPointerGetAttributes +# which was first introduced in CUDA 7.0. +AS_IF([test "$opal_check_cuda_happy"="yes"], + AC_CHECK_DECL([cuPointerGetAttributes], [CUDA_GET_ATTRIBUTES=1], [CUDA_GET_ATTRIBUTES=0], + [#include <$opal_cuda_incdir/cuda.h>]), + []) + AC_MSG_CHECKING([if have cuda support]) if test "$opal_check_cuda_happy" = "yes"; then AC_MSG_RESULT([yes (-I$with_cuda)]) @@ -116,6 +123,10 @@ AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"]) AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS, [Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available]) +AM_CONDITIONAL([OPAL_cuda_get_attributes], [test "x$CUDA_GET_ATTRIBUTES" = "x1"]) +AC_DEFINE_UNQUOTED([OPAL_CUDA_GET_ATTRIBUTES],$CUDA_GET_ATTRIBUTES, + [Whether we have CUDA cuPointerGetAttributes function available]) + # There is nothing specific we can check for to see if GPU Direct RDMA is available. # Therefore, we check to see whether we have CUDA 6.0 or later. AM_CONDITIONAL([OPAL_cuda_gdr_support], [test "x$CUDA_VERSION_60_OR_GREATER" = "x1"]) diff --git a/opal/mca/common/cuda/common_cuda.c b/opal/mca/common/cuda/common_cuda.c index 019793dfa2..5c72f0630b 100644 --- a/opal/mca/common/cuda/common_cuda.c +++ b/opal/mca/common/cuda/common_cuda.c @@ -100,6 +100,9 @@ struct cudaFunctionTable { int (*cuEventSynchronize)(CUevent); int (*cuStreamSynchronize)(CUstream); int (*cuStreamDestroy)(CUstream); +#if OPAL_CUDA_GET_ATTRIBUTES + int (*cuPointerGetAttributes)(unsigned int, CUpointer_attribute *, void **, CUdeviceptr); +#endif /* OPAL_CUDA_GET_ATTRIBUTES */ } cudaFunctionTable; typedef struct cudaFunctionTable cudaFunctionTable_t; cudaFunctionTable_t cuFunc; @@ -494,6 +497,9 @@ int mca_common_cuda_stage_one_init(void) OPAL_CUDA_DLSYM(libcuda_handle, cuEventSynchronize); OPAL_CUDA_DLSYM(libcuda_handle, cuStreamSynchronize); OPAL_CUDA_DLSYM(libcuda_handle, cuStreamDestroy); +#if OPAL_CUDA_GET_ATTRIBUTES + OPAL_CUDA_DLSYM(libcuda_handle, cuPointerGetAttributes); +#endif /* OPAL_CUDA_GET_ATTRIBUTES */ return 0; } @@ -1715,10 +1721,32 @@ static float mydifftime(struct timespec ts_start, struct timespec ts_end) { static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf) { int res; - CUmemorytype memType; + CUmemorytype memType = 0; CUdeviceptr dbuf = (CUdeviceptr)pUserBuf; CUcontext ctx = NULL; +#if OPAL_CUDA_GET_ATTRIBUTES + uint32_t isManaged = 0; + /* With CUDA 7.0, we can get multiple attributes with a single call */ + CUpointer_attribute attributes[3] = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE, + CU_POINTER_ATTRIBUTE_CONTEXT, + CU_POINTER_ATTRIBUTE_IS_MANAGED}; + void *attrdata[] = {(void *)&memType, (void *)&ctx, (void *)&isManaged}; + res = cuFunc.cuPointerGetAttributes(3, attributes, attrdata, dbuf); + if (res != CUDA_SUCCESS) { + /* If we cannot determine it is device pointer, + * just assume it is not. */ + return 0; + } else if (memType == CU_MEMORYTYPE_HOST) { + /* Host memory, nothing to do here */ + return 0; + } else if (memType == 0) { + /* This can happen when CUDA is initialized but dbuf is not valid CUDA pointer */ + return 0; + } + /* Must be a device pointer */ + assert(memType == CU_MEMORYTYPE_DEVICE); +#else /* OPAL_CUDA_GET_ATTRIBUTES */ res = cuFunc.cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, dbuf); if (res != CUDA_SUCCESS) { @@ -1741,6 +1769,7 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf) * and set the current context to that. It is rare that we will not * have a context. */ res = cuFunc.cuCtxGetCurrent(&ctx); +#endif /* OPAL_CUDA_GET_ATTRIBUTES */ if (OPAL_UNLIKELY(NULL == ctx)) { if (CUDA_SUCCESS == res) { res = cuFunc.cuPointerGetAttribute(&ctx, @@ -1768,6 +1797,15 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf) } } +#if OPAL_CUDA_GET_ATTRIBUTES + if (1 == isManaged) { + /* Currently cannot support managed memory */ + opal_output(0, "CUDA: ptr=%p: CUDA-aware Open MPI detected managed memory but there " + "is no support for it. Result will be unpredictable.", pUserBuf); + return OPAL_ERROR; + } +#endif /* OPAL_CUDA_GET_ATTRIBUTES */ + /* First access on a device pointer finalizes CUDA support initialization. * If initialization fails, disable support. */ if (!stage_three_init_complete) {