diff --git a/ompi/mca/common/cuda/common_cuda.c b/ompi/mca/common/cuda/common_cuda.c
index 7fc9d45c41..63423f4636 100644
--- a/ompi/mca/common/cuda/common_cuda.c
+++ b/ompi/mca/common/cuda/common_cuda.c
@@ -1529,3 +1529,66 @@ int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2)
     }
     return 0;
 }
+
+int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base)
+{
+    CUresult result;
+    result = cuFunc.cuMemGetAddressRange((CUdeviceptr *)pbase, psize, (CUdeviceptr)base);
+    if (CUDA_SUCCESS != result) {
+        opal_show_help("help-mpi-common-cuda.txt", "cuMemGetAddressRange failed",
+                       true, result, base);
+        return OMPI_ERROR;
+    } else {
+        opal_output_verbose(10, mca_common_cuda_output,
+                            "CUDA: cuMemGetAddressRange passed: addr=%p, pbase=%p, psize=%lu ",
+                            base, *(char **)pbase, *psize);
+    }
+    return 0;
+}
+
+#if OMPI_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
+int mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg)
+{
+    int res;
+    unsigned long long bufID;
+    unsigned char *dbuf = reg->base;
+
+    res = cuFunc.cuPointerGetAttribute(&bufID, CU_POINTER_ATTRIBUTE_BUFFER_ID,
+                                       (CUdeviceptr)dbuf);
+    if (res != CUDA_SUCCESS) {
+        opal_show_help("help-mpi-common-cuda.txt", "bufferID failed", true, res);
+        return 0;
+    }
+    opal_output_verbose(50, mca_common_cuda_output,
+                        "CUDA: base=%p, bufID=%llu, reg->gpu_bufID=%llu, %s", dbuf, bufID, reg->gpu_bufID,
+                        (reg->gpu_bufID == bufID ? "BUFFER_ID match":"BUFFER_ID do not match"));
+    if (bufID != reg->gpu_bufID) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+/*
+ * Get the buffer ID from the memory and store it in the registration.
+ * This is needed to ensure the cached registration is not stale.  If
+ * we fail to get buffer ID, print an error and set buffer ID to 0.
+ */
+void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg)
+{
+    int res;
+    unsigned long long bufID = 0;
+    unsigned char *dbuf = reg->base;
+
+    res = cuFunc.cuPointerGetAttribute(&bufID, CU_POINTER_ATTRIBUTE_BUFFER_ID,
+                                       (CUdeviceptr)dbuf);
+
+    if (res != CUDA_SUCCESS) {
+        opal_show_help("help-mpi-common-cuda.txt", "bufferID failed", true, res);
+    }
+
+    reg->gpu_bufID = bufID;
+
+}
+#endif /* OMPI_CUDA_SUPPORT_60 */       
+
diff --git a/ompi/mca/common/cuda/common_cuda.h b/ompi/mca/common/cuda/common_cuda.h
index 6e01fe6adf..5e17246c0d 100644
--- a/ompi/mca/common/cuda/common_cuda.h
+++ b/ompi/mca/common/cuda/common_cuda.h
@@ -20,6 +20,7 @@
 #ifndef OMPI_MCA_COMMON_CUDA_H
 #define OMPI_MCA_COMMON_CUDA_H
 #include "ompi/mca/btl/btl.h"
+#include "opal/datatype/opal_convertor.h"
 
 #define MEMHANDLE_SIZE 8
 #define EVTHANDLE_SIZE 8
@@ -31,6 +32,7 @@ struct mca_mpool_common_cuda_reg_t {
 };
 typedef struct mca_mpool_common_cuda_reg_t mca_mpool_common_cuda_reg_t;
 extern bool mca_common_cuda_enabled;
+#define OMPI_GDR_SUPPORT 0
 
 OMPI_DECLSPEC int mca_common_cuda_register_mca_variables(void);
 
@@ -72,6 +74,27 @@ OMPI_DECLSPEC int cuda_closememhandle(void *reg_data, mca_mpool_base_registratio
 OMPI_DECLSPEC int mca_common_cuda_get_device(int *devicenum);
 OMPI_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2);
 OMPI_DECLSPEC int mca_common_cuda_stage_one_init(void);
-
+OMPI_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base);
+#if OMPI_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
+OMPI_DECLSPEC int mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg);
+OMPI_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg);
+#endif /* OMPI_CUDA_SUPPORT_60 */
+/**
+ * Return:   0 if no packing is required for sending (the upper layer
+ *             can use directly the pointer to the contiguous user
+ *             buffer).
+ *           1 if data does need to be packed, i.e. heterogeneous peers
+ *             (source arch != dest arch) or non contiguous memory
+ *             layout.
+ */
+static inline int32_t opal_convertor_cuda_need_buffers( opal_convertor_t* pConvertor )
+{
+    int32_t retval;
+    uint32_t cudaflag = pConvertor->flags & CONVERTOR_CUDA; /* Save CUDA flag */
+    pConvertor->flags &= ~CONVERTOR_CUDA;              /* Clear CUDA flag if it exists */
+    retval = opal_convertor_need_buffers(pConvertor);
+    pConvertor->flags |= cudaflag; /* Restore CUDA flag */
+    return retval;
+}
 
 #endif /* OMPI_MCA_COMMON_CUDA_H */
diff --git a/ompi/mca/common/cuda/help-mpi-common-cuda.txt b/ompi/mca/common/cuda/help-mpi-common-cuda.txt
index 1e6e713637..d834ea5705 100644
--- a/ompi/mca/common/cuda/help-mpi-common-cuda.txt
+++ b/ompi/mca/common/cuda/help-mpi-common-cuda.txt
@@ -165,3 +165,10 @@ An error occurred while trying to map in the address of a function.
   Function Name: %s
   Error string:  %s
 CUDA-aware support is disabled.
+#
+[bufferID failed]
+An error occurred while trying to get the BUFFER_ID of a GPU memory regiion.  This
+could cause incorrect results.  Turn of GPU Direct RDMA support by running with
+--mca btl_openib_cuda_want_gdr_support 0.
+  cuPointerGetAttribute return value:   %d
+Check the cuda.h file for what the return value means.