diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c
index b964901822..f1f2744b2e 100644
--- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c
+++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c
@@ -965,9 +965,17 @@ cannot_pack:
 
         /* makes sure that we don't exceed BTL max send size */
         if(bml_btl->btl->btl_max_send_size != 0) {
+#if OPAL_CUDA_SUPPORT
+            size_t max_send_size;
+            if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) && (bml_btl->btl->btl_cuda_max_send_size != 0)) {
+                max_send_size = bml_btl->btl->btl_cuda_max_send_size - sizeof(mca_pml_ob1_frag_hdr_t);
+            } else {
+                max_send_size = bml_btl->btl->btl_max_send_size - sizeof(mca_pml_ob1_frag_hdr_t);
+            }
+#else /* OPAL_CUDA_SUPPORT */
             size_t max_send_size = bml_btl->btl->btl_max_send_size -
                 sizeof(mca_pml_ob1_frag_hdr_t);
-
+#endif /* OPAL_CUDA_SUPPORT */
             if (size > max_send_size) {
                 size = max_send_size;
             }
diff --git a/opal/mca/btl/base/btl_base_mca.c b/opal/mca/btl/base/btl_base_mca.c
index 0e46d591fd..3d8a2e54c6 100644
--- a/opal/mca/btl/base/btl_base_mca.c
+++ b/opal/mca/btl/base/btl_base_mca.c
@@ -135,6 +135,14 @@ int mca_btl_base_param_register(mca_base_component_t *version,
                                            MCA_BASE_VAR_SCOPE_READONLY,
                                            &module->btl_cuda_rdma_limit);
 #endif /* OPAL_CUDA_GDR_SUPPORT */
+#if OPAL_CUDA_SUPPORT
+    module->btl_cuda_max_send_size = 0;
+    (void) mca_base_component_var_register(version, "cuda_max_send_size", "Maximum size (in bytes) of a single GPU \"phase 2\" fragment of a long message when using the pipeline protocol (must be >= 1) (only valid on smcuda btl)",
+                                           MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
+                                           OPAL_INFO_LVL_4,
+                                           MCA_BASE_VAR_SCOPE_READONLY,
+                                           &module->btl_cuda_max_send_size);
+#endif /* OPAL_CUDA_SUPPORT */
 
     (void) mca_base_component_var_register(version, "max_send_size", "Maximum size (in bytes) of a single \"phase 2\" fragment of a long message when using the pipeline protocol (must be >= 1)",
                                            MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
diff --git a/opal/mca/btl/btl.h b/opal/mca/btl/btl.h
index b41e54b353..885a6fc0f4 100644
--- a/opal/mca/btl/btl.h
+++ b/opal/mca/btl/btl.h
@@ -1170,6 +1170,9 @@ struct mca_btl_base_module_t {
     size_t      btl_cuda_eager_limit;  /**< switch from eager to RDMA */
     size_t      btl_cuda_rdma_limit;   /**< switch from RDMA to rndv pipeline */
 #endif /* OPAL_CUDA_GDR_SUPPORT */
+#if OPAL_CUDA_SUPPORT
+    size_t      btl_cuda_max_send_size;   /**< set if CUDA max send_size is different from host max send size */
+#endif /* OPAL_CUDA_SUPPORT */
 };
 typedef struct mca_btl_base_module_t mca_btl_base_module_t;
 
diff --git a/opal/mca/btl/openib/btl_openib_mca.c b/opal/mca/btl/openib/btl_openib_mca.c
index 19fa22c338..1c3edc5a7e 100644
--- a/opal/mca/btl/openib/btl_openib_mca.c
+++ b/opal/mca/btl/openib/btl_openib_mca.c
@@ -815,6 +815,11 @@ int btl_openib_verify_mca_params (void)
         }
     }
 #endif /* Workaround */
+    if (0 != mca_btl_openib_module.super.btl_cuda_max_send_size) {
+        opal_show_help("help-mpi-btl-openib.txt", "do_not_set_openib_value",
+                       true, opal_process_info.nodename);
+        mca_btl_openib_module.super.btl_cuda_max_send_size = 0;
+    }
 #endif
 
 #if BTL_OPENIB_MALLOC_HOOKS_ENABLED
diff --git a/opal/mca/btl/openib/help-mpi-btl-openib.txt b/opal/mca/btl/openib/help-mpi-btl-openib.txt
index 94dcc7b8f5..7266893b6e 100644
--- a/opal/mca/btl/openib/help-mpi-btl-openib.txt
+++ b/opal/mca/btl/openib/help-mpi-btl-openib.txt
@@ -700,3 +700,9 @@ with CUDA GPU Direct RDMA. Either disable GPU Direct RDMA support or
 enable "leave pinned" support. Deactivating the openib BTL.
 
   Local host:              %s
+#
+[do_not_set_openib_value]
+Open MPI has detected that you have attempted to set the btl_openib_cuda_max_send_size
+value. This is not supported. Setting back to default value of 0.
+
+  Local host:              %s
diff --git a/opal/mca/btl/smcuda/btl_smcuda_component.c b/opal/mca/btl/smcuda/btl_smcuda_component.c
index f9639f13d3..f18e95e079 100644
--- a/opal/mca/btl/smcuda/btl_smcuda_component.c
+++ b/opal/mca/btl/smcuda/btl_smcuda_component.c
@@ -173,7 +173,7 @@ static int smcuda_register(void)
 #endif /* OPAL_CUDA_SUPPORT */
     mca_btl_smcuda.super.btl_eager_limit = 4*1024;
     mca_btl_smcuda.super.btl_rndv_eager_limit = 4*1024;
-    mca_btl_smcuda.super.btl_max_send_size = 128*1024;
+    mca_btl_smcuda.super.btl_max_send_size = 32*1024;
     mca_btl_smcuda.super.btl_rdma_pipeline_send_length = 64*1024;
     mca_btl_smcuda.super.btl_rdma_pipeline_frag_size = 64*1024;
     mca_btl_smcuda.super.btl_min_rdma_pipeline_size = 64*1024;
@@ -185,7 +185,12 @@ static int smcuda_register(void)
     /* Call the BTL based to register its MCA params */
     mca_btl_base_param_register(&mca_btl_smcuda_component.super.btl_version,
                                 &mca_btl_smcuda.super);
-
+#if OPAL_CUDA_SUPPORT
+    /* If user has not set the value, then set to the defalt */
+    if (0 == mca_btl_smcuda.super.btl_cuda_max_send_size) {
+        mca_btl_smcuda.super.btl_cuda_max_send_size = 128*1024;
+    }
+#endif /* OPAL_CUDA_SUPPORT */
     return mca_btl_smcuda_component_verify();
 }
 
@@ -214,6 +219,17 @@ static int mca_btl_smcuda_component_open(void)
     mca_btl_smcuda_component.max_frag_size = mca_btl_smcuda.super.btl_max_send_size;
     mca_btl_smcuda_component.eager_limit = mca_btl_smcuda.super.btl_eager_limit;
 
+#if OPAL_CUDA_SUPPORT
+    /* Possibly adjust max_frag_size if the cuda size is bigger */
+    if (mca_btl_smcuda.super.btl_cuda_max_send_size > mca_btl_smcuda.super.btl_max_send_size) {
+        mca_btl_smcuda_component.max_frag_size = mca_btl_smcuda.super.btl_cuda_max_send_size;
+    }
+    opal_output_verbose(10, opal_btl_base_framework.framework_output,
+                        "btl: smcuda: cuda_max_send_size=%d, max_send_size=%d, max_frag_size=%d",
+                        (int)mca_btl_smcuda.super.btl_cuda_max_send_size, (int)mca_btl_smcuda.super.btl_max_send_size,
+                        (int)mca_btl_smcuda_component.max_frag_size);
+#endif /* OPAL_CUDA_SUPPORT */
+
     /* initialize objects */
     OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_lock, opal_mutex_t);
     OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_frags_eager, opal_free_list_t);