Code to check for managed memory. Configure support also.

This commit was SVN r32801.
2014-09-26 16:24:45 +00:00 · 2014-09-26 16:24:45 +00:00 · 399dc3db43
--- a/config/opal_check_cuda.m4
+++ b/config/opal_check_cuda.m4
@ -93,6 +93,13 @@ AC_COMPILE_IFELSE(
        [CUDA_VERSION_60_OR_GREATER=1],
        [CUDA_VERSION_60_OR_GREATER=0])

+# If we have CUDA support, check to see if we have support for cuPointerGetAttributes
+# which was first introduced in CUDA 7.0.
+AS_IF([test "$opal_check_cuda_happy"="yes"],
+    AC_CHECK_DECL([cuPointerGetAttributes], [CUDA_GET_ATTRIBUTES=1], [CUDA_GET_ATTRIBUTES=0],
+        [#include <$opal_cuda_incdir/cuda.h>]),
+    [])
+
 AC_MSG_CHECKING([if have cuda support])
 if test "$opal_check_cuda_happy" = "yes"; then
    AC_MSG_RESULT([yes (-I$with_cuda)])
@ -116,6 +123,10 @@ AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
 AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
                   [Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])

+AM_CONDITIONAL([OPAL_cuda_get_attributes], [test "x$CUDA_GET_ATTRIBUTES" = "x1"])
+AC_DEFINE_UNQUOTED([OPAL_CUDA_GET_ATTRIBUTES],$CUDA_GET_ATTRIBUTES,
+                   [Whether we have CUDA cuPointerGetAttributes function available])
+
 # There is nothing specific we can check for to see if GPU Direct RDMA is available.
 # Therefore, we check to see whether we have CUDA 6.0 or later.
 AM_CONDITIONAL([OPAL_cuda_gdr_support], [test "x$CUDA_VERSION_60_OR_GREATER" = "x1"])
--- a/opal/mca/common/cuda/common_cuda.c
+++ b/opal/mca/common/cuda/common_cuda.c
@ -100,6 +100,9 @@ struct cudaFunctionTable {
    int (*cuEventSynchronize)(CUevent);
    int (*cuStreamSynchronize)(CUstream);
    int (*cuStreamDestroy)(CUstream);
+#if OPAL_CUDA_GET_ATTRIBUTES
+    int (*cuPointerGetAttributes)(unsigned int, CUpointer_attribute *, void **, CUdeviceptr);
+#endif /* OPAL_CUDA_GET_ATTRIBUTES */
 } cudaFunctionTable;
 typedef struct cudaFunctionTable cudaFunctionTable_t;
 cudaFunctionTable_t cuFunc;
@ -494,6 +497,9 @@ int mca_common_cuda_stage_one_init(void)
    OPAL_CUDA_DLSYM(libcuda_handle, cuEventSynchronize);
    OPAL_CUDA_DLSYM(libcuda_handle, cuStreamSynchronize);
    OPAL_CUDA_DLSYM(libcuda_handle, cuStreamDestroy);
+#if OPAL_CUDA_GET_ATTRIBUTES
+    OPAL_CUDA_DLSYM(libcuda_handle, cuPointerGetAttributes);
+#endif /* OPAL_CUDA_GET_ATTRIBUTES */
    return 0;
 }

@ -1715,10 +1721,32 @@ static float mydifftime(struct timespec ts_start, struct timespec ts_end) {
 static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
 {
    int res;
-    CUmemorytype memType;
+    CUmemorytype memType = 0;
    CUdeviceptr dbuf = (CUdeviceptr)pUserBuf;
    CUcontext ctx = NULL;
+#if OPAL_CUDA_GET_ATTRIBUTES
+    uint32_t isManaged = 0;
+    /* With CUDA 7.0, we can get multiple attributes with a single call */
+    CUpointer_attribute attributes[3] = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
+                                         CU_POINTER_ATTRIBUTE_CONTEXT,
+                                         CU_POINTER_ATTRIBUTE_IS_MANAGED};
+    void *attrdata[] = {(void *)&memType, (void *)&ctx, (void *)&isManaged};

+    res = cuFunc.cuPointerGetAttributes(3, attributes, attrdata, dbuf);
+    if (res != CUDA_SUCCESS) {
+        /* If we cannot determine it is device pointer,
+         * just assume it is not. */
+        return 0;
+    } else if (memType == CU_MEMORYTYPE_HOST) {
+        /* Host memory, nothing to do here */
+        return 0;
+    } else if (memType == 0) {
+        /* This can happen when CUDA is initialized but dbuf is not valid CUDA pointer */
+        return 0;
+    }
+    /* Must be a device pointer */
+    assert(memType == CU_MEMORYTYPE_DEVICE);
+#else /* OPAL_CUDA_GET_ATTRIBUTES */
    res = cuFunc.cuPointerGetAttribute(&memType,
                                       CU_POINTER_ATTRIBUTE_MEMORY_TYPE, dbuf);
    if (res != CUDA_SUCCESS) {
@ -1741,6 +1769,7 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
     * and set the current context to that.  It is rare that we will not
     * have a context. */
    res = cuFunc.cuCtxGetCurrent(&ctx);
+#endif /* OPAL_CUDA_GET_ATTRIBUTES */
    if (OPAL_UNLIKELY(NULL == ctx)) {
        if (CUDA_SUCCESS == res) {
            res = cuFunc.cuPointerGetAttribute(&ctx,
@ -1768,6 +1797,15 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
        }
    }

+#if OPAL_CUDA_GET_ATTRIBUTES
+    if (1 == isManaged) {
+        /* Currently cannot support managed memory */
+        opal_output(0, "CUDA: ptr=%p: CUDA-aware Open MPI detected managed memory but there "
+                    "is no support for it.  Result will be unpredictable.", pUserBuf);
+        return OPAL_ERROR;
+    }
+#endif /* OPAL_CUDA_GET_ATTRIBUTES */
+
    /* First access on a device pointer finalizes CUDA support initialization.
     * If initialization fails, disable support. */
    if (!stage_three_init_complete) {