From 399dc3db43f4ad2567920333fc5d84abe3b62dbe Mon Sep 17 00:00:00 2001
From: Rolf vandeVaart <rvandevaart@nvidia.com>
Date: Fri, 26 Sep 2014 16:24:45 +0000
Subject: [PATCH] Code to check for managed memory.  Configure support also.

This commit was SVN r32801.
---
 config/opal_check_cuda.m4          | 11 ++++++++
 opal/mca/common/cuda/common_cuda.c | 40 +++++++++++++++++++++++++++++-
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/config/opal_check_cuda.m4 b/config/opal_check_cuda.m4
index afb5559676..55f39bda9c 100644
--- a/config/opal_check_cuda.m4
+++ b/config/opal_check_cuda.m4
@@ -93,6 +93,13 @@ AC_COMPILE_IFELSE(
         [CUDA_VERSION_60_OR_GREATER=1],
         [CUDA_VERSION_60_OR_GREATER=0])
 
+# If we have CUDA support, check to see if we have support for cuPointerGetAttributes
+# which was first introduced in CUDA 7.0.
+AS_IF([test "$opal_check_cuda_happy"="yes"],
+    AC_CHECK_DECL([cuPointerGetAttributes], [CUDA_GET_ATTRIBUTES=1], [CUDA_GET_ATTRIBUTES=0],
+        [#include <$opal_cuda_incdir/cuda.h>]),
+    [])
+
 AC_MSG_CHECKING([if have cuda support])
 if test "$opal_check_cuda_happy" = "yes"; then
     AC_MSG_RESULT([yes (-I$with_cuda)])
@@ -116,6 +123,10 @@ AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
 AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
                    [Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])
 
+AM_CONDITIONAL([OPAL_cuda_get_attributes], [test "x$CUDA_GET_ATTRIBUTES" = "x1"])
+AC_DEFINE_UNQUOTED([OPAL_CUDA_GET_ATTRIBUTES],$CUDA_GET_ATTRIBUTES,
+                   [Whether we have CUDA cuPointerGetAttributes function available])
+
 # There is nothing specific we can check for to see if GPU Direct RDMA is available.
 # Therefore, we check to see whether we have CUDA 6.0 or later.
 AM_CONDITIONAL([OPAL_cuda_gdr_support], [test "x$CUDA_VERSION_60_OR_GREATER" = "x1"])
diff --git a/opal/mca/common/cuda/common_cuda.c b/opal/mca/common/cuda/common_cuda.c
index 019793dfa2..5c72f0630b 100644
--- a/opal/mca/common/cuda/common_cuda.c
+++ b/opal/mca/common/cuda/common_cuda.c
@@ -100,6 +100,9 @@ struct cudaFunctionTable {
     int (*cuEventSynchronize)(CUevent);
     int (*cuStreamSynchronize)(CUstream);
     int (*cuStreamDestroy)(CUstream);
+#if OPAL_CUDA_GET_ATTRIBUTES
+    int (*cuPointerGetAttributes)(unsigned int, CUpointer_attribute *, void **, CUdeviceptr);
+#endif /* OPAL_CUDA_GET_ATTRIBUTES */
 } cudaFunctionTable;
 typedef struct cudaFunctionTable cudaFunctionTable_t;
 cudaFunctionTable_t cuFunc;
@@ -494,6 +497,9 @@ int mca_common_cuda_stage_one_init(void)
     OPAL_CUDA_DLSYM(libcuda_handle, cuEventSynchronize);
     OPAL_CUDA_DLSYM(libcuda_handle, cuStreamSynchronize);
     OPAL_CUDA_DLSYM(libcuda_handle, cuStreamDestroy);
+#if OPAL_CUDA_GET_ATTRIBUTES
+    OPAL_CUDA_DLSYM(libcuda_handle, cuPointerGetAttributes);
+#endif /* OPAL_CUDA_GET_ATTRIBUTES */
     return 0;
 }
 
@@ -1715,10 +1721,32 @@ static float mydifftime(struct timespec ts_start, struct timespec ts_end) {
 static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
 {
     int res;
-    CUmemorytype memType;
+    CUmemorytype memType = 0;
     CUdeviceptr dbuf = (CUdeviceptr)pUserBuf;
     CUcontext ctx = NULL;
+#if OPAL_CUDA_GET_ATTRIBUTES
+    uint32_t isManaged = 0;
+    /* With CUDA 7.0, we can get multiple attributes with a single call */
+    CUpointer_attribute attributes[3] = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
+                                         CU_POINTER_ATTRIBUTE_CONTEXT,
+                                         CU_POINTER_ATTRIBUTE_IS_MANAGED};
+    void *attrdata[] = {(void *)&memType, (void *)&ctx, (void *)&isManaged};
 
+    res = cuFunc.cuPointerGetAttributes(3, attributes, attrdata, dbuf);
+    if (res != CUDA_SUCCESS) {
+        /* If we cannot determine it is device pointer,
+         * just assume it is not. */
+        return 0;
+    } else if (memType == CU_MEMORYTYPE_HOST) {
+        /* Host memory, nothing to do here */
+        return 0;
+    } else if (memType == 0) {
+        /* This can happen when CUDA is initialized but dbuf is not valid CUDA pointer */
+        return 0;
+    }
+    /* Must be a device pointer */
+    assert(memType == CU_MEMORYTYPE_DEVICE);
+#else /* OPAL_CUDA_GET_ATTRIBUTES */
     res = cuFunc.cuPointerGetAttribute(&memType,
                                        CU_POINTER_ATTRIBUTE_MEMORY_TYPE, dbuf);
     if (res != CUDA_SUCCESS) {
@@ -1741,6 +1769,7 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
      * and set the current context to that.  It is rare that we will not
      * have a context. */
     res = cuFunc.cuCtxGetCurrent(&ctx);
+#endif /* OPAL_CUDA_GET_ATTRIBUTES */
     if (OPAL_UNLIKELY(NULL == ctx)) {
         if (CUDA_SUCCESS == res) {
             res = cuFunc.cuPointerGetAttribute(&ctx,
@@ -1768,6 +1797,15 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
         }
     }
 
+#if OPAL_CUDA_GET_ATTRIBUTES
+    if (1 == isManaged) {
+        /* Currently cannot support managed memory */
+        opal_output(0, "CUDA: ptr=%p: CUDA-aware Open MPI detected managed memory but there "
+                    "is no support for it.  Result will be unpredictable.", pUserBuf);
+        return OPAL_ERROR;
+    }
+#endif /* OPAL_CUDA_GET_ATTRIBUTES */
+
     /* First access on a device pointer finalizes CUDA support initialization.
      * If initialization fails, disable support. */
     if (!stage_three_init_complete) {