diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h
index ea1546393a..878fe724c0 100644
--- a/opal/datatype/opal_convertor.h
+++ b/opal/datatype/opal_convertor.h
@@ -11,6 +11,7 @@
  * Copyright (c) 2004-2006 The Regents of the University of California.
  *                         All rights reserved.
  * Copyright (c) 2009      Oak Ridge National Labs.  All rights reserved.
+ * Copyright (c) 2014      NVIDIA Corporation.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -45,11 +46,12 @@ BEGIN_C_DECLS
 #define CONVERTOR_WITH_CHECKSUM    0x00200000
 #define CONVERTOR_CUDA             0x00400000
 #define CONVERTOR_CUDA_ASYNC       0x00800000
-#define CONVERTOR_TYPE_MASK        0x00FF0000
+#define CONVERTOR_TYPE_MASK        0x10FF0000
 #define CONVERTOR_STATE_START      0x01000000
 #define CONVERTOR_STATE_COMPLETE   0x02000000
 #define CONVERTOR_STATE_ALLOC      0x04000000
 #define CONVERTOR_COMPLETED        0x08000000
+#define CONVERTOR_CUDA_UNIFIED     0x10000000
 
 union dt_elem_desc;
 typedef struct opal_convertor_t opal_convertor_t;
@@ -177,7 +179,7 @@ static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConv
     if (OPAL_UNLIKELY(0 == (pConvertor->flags & CONVERTOR_HOMOGENEOUS))) return 1;
 #endif
 #if OPAL_CUDA_SUPPORT
-    if( pConvertor->flags & CONVERTOR_CUDA ) return 1;
+    if( pConvertor->flags & (CONVERTOR_CUDA | CONVERTOR_CUDA_UNIFIED)) return 1;
 #endif
     if( pConvertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS ) return 0;
     if( (pConvertor->count == 1) && (pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) return 0;
diff --git a/opal/datatype/opal_datatype_cuda.c b/opal/datatype/opal_datatype_cuda.c
index 3d51a2201d..d62af3fa8b 100644
--- a/opal/datatype/opal_datatype_cuda.c
+++ b/opal/datatype/opal_datatype_cuda.c
@@ -57,7 +57,7 @@ void mca_cuda_convertor_init(opal_convertor_t* convertor, const void *pUserBuf)
         return;
     }
 
-    if (ftable.gpu_is_gpu_buffer(pUserBuf)) {
+    if (ftable.gpu_is_gpu_buffer(pUserBuf, convertor)) {
         convertor->flags |= CONVERTOR_CUDA;
     }
 }
@@ -78,7 +78,7 @@ bool opal_cuda_check_bufs(char *dest, char *src)
         return false;
     }
 
-    if (ftable.gpu_is_gpu_buffer(dest) || ftable.gpu_is_gpu_buffer(src)) {
+    if (ftable.gpu_is_gpu_buffer(dest, NULL) || ftable.gpu_is_gpu_buffer(src, NULL)) {
         return true;
     } else {
         return false;
diff --git a/opal/datatype/opal_datatype_cuda.h b/opal/datatype/opal_datatype_cuda.h
index acce495962..676af80273 100644
--- a/opal/datatype/opal_datatype_cuda.h
+++ b/opal/datatype/opal_datatype_cuda.h
@@ -14,7 +14,7 @@
  * common cuda code is initialized.  This removes any dependency on <cuda.h>
  * in the opal cuda datatype code. */
 struct opal_common_cuda_function_table {
-    int (*gpu_is_gpu_buffer)(const void*);
+    int (*gpu_is_gpu_buffer)(const void*, opal_convertor_t*);
     int (*gpu_cu_memcpy_async)(void*, const void*, size_t, opal_convertor_t*);
     int (*gpu_cu_memcpy)(void*, const void*, size_t);
     int (*gpu_memmove)(void*, void*, size_t);
diff --git a/opal/mca/common/cuda/common_cuda.c b/opal/mca/common/cuda/common_cuda.c
index 6209c6b21b..a56a9b5c09 100644
--- a/opal/mca/common/cuda/common_cuda.c
+++ b/opal/mca/common/cuda/common_cuda.c
@@ -127,7 +127,7 @@ static opal_mutex_t common_cuda_dtoh_lock;
 static opal_mutex_t common_cuda_ipc_lock;
 
 /* Functions called by opal layer - plugged into opal function table */
-static int mca_common_cuda_is_gpu_buffer(const void*);
+static int mca_common_cuda_is_gpu_buffer(const void*, opal_convertor_t*);
 static int mca_common_cuda_memmove(void*, void*, size_t);
 static int mca_common_cuda_cu_memcpy_async(void*, const void*, size_t, opal_convertor_t*);
 static int mca_common_cuda_cu_memcpy(void*, const void*, size_t);
@@ -1700,7 +1700,7 @@ static float mydifftime(opal_timer_t ts_start, opal_timer_t ts_end) {
 #endif /* OPAL_CUDA_SUPPORT_41 */
 
 /* Routines that get plugged into the opal datatype code */
-static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
+static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t *convertor)
 {
     int res;
     CUmemorytype memType = 0;
@@ -1715,6 +1715,15 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
     void *attrdata[] = {(void *)&memType, (void *)&ctx, (void *)&isManaged};
 
     res = cuFunc.cuPointerGetAttributes(3, attributes, attrdata, dbuf);
+
+    /* Mark unified memory buffers with a flag.  This will allow all unified
+     * memory to be forced through host buffers.  Note that this memory can
+     * be either host or device so we need to set this flag prior to that check. */
+    if (1 == isManaged) {
+        if (NULL != convertor) {
+            convertor->flags |= CONVERTOR_CUDA_UNIFIED;
+        }
+    }
     if (res != CUDA_SUCCESS) {
         /* If we cannot determine it is device pointer,
          * just assume it is not. */
@@ -1779,15 +1788,6 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
         }
     }
 
-#if OPAL_CUDA_GET_ATTRIBUTES
-    if (1 == isManaged) {
-        /* Currently cannot support managed memory */
-        opal_output(0, "CUDA: ptr=%p: CUDA-aware Open MPI detected managed memory but there "
-                    "is no support for it.  Result will be unpredictable.", pUserBuf);
-        return OPAL_ERROR;
-    }
-#endif /* OPAL_CUDA_GET_ATTRIBUTES */
-
     /* First access on a device pointer finalizes CUDA support initialization.
      * If initialization fails, disable support. */
     if (!stage_three_init_complete) {