diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c
index 9915308629..6b3c5de07f 100644
--- a/ompi/mca/btl/openib/btl_openib_component.c
+++ b/ompi/mca/btl/openib/btl_openib_component.c
@@ -2676,17 +2676,6 @@ btl_openib_component_init(int *num_btl_modules,
         }
     }
 
-    index = mca_base_param_find("btl", "openib", "flags");
-    if (index >= 0) {
-        if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &value)) {
-            if (value & MCA_BTL_FLAGS_GET) {
-                /* Until GET flow is fixed - we do not support GET
-                   in openib btl. */
-                BTL_ERROR(("openib btl does not support GET flag"));
-            }
-        }
-    }
-
     OBJ_CONSTRUCT(&mca_btl_openib_component.send_free_coalesced, ompi_free_list_t);
     OBJ_CONSTRUCT(&mca_btl_openib_component.send_user_free, ompi_free_list_t);
     OBJ_CONSTRUCT(&mca_btl_openib_component.recv_user_free, ompi_free_list_t);
diff --git a/ompi/mca/btl/openib/btl_openib_mca.c b/ompi/mca/btl/openib/btl_openib_mca.c
index 6ab26d2c50..df54b4b5cb 100644
--- a/ompi/mca/btl/openib/btl_openib_mca.c
+++ b/ompi/mca/btl/openib/btl_openib_mca.c
@@ -542,7 +542,7 @@ int btl_openib_register_mca_params(void)
     mca_btl_openib_module.super.btl_rdma_pipeline_send_length = 1024 * 1024;
     mca_btl_openib_module.super.btl_rdma_pipeline_frag_size = 1024 * 1024;
     mca_btl_openib_module.super.btl_min_rdma_pipeline_size = 256 * 1024;
-    mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_PUT |
+    mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_RDMA |
         MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
 #if BTL_OPENIB_FAILOVER_ENABLED
     mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_FAILOVER_SUPPORT;
diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c
index 9008b8ddc9..175688bd79 100644
--- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c
+++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c
@@ -330,13 +330,15 @@ static void mca_pml_ob1_rget_completion( mca_btl_base_module_t* btl,
         orte_errmgr.abort(-1, NULL);
     }
 
-    mca_pml_ob1_send_fin(recvreq->req_recv.req_base.req_proc,
-                         bml_btl,
-                         frag->rdma_hdr.hdr_rget.hdr_des,
-                         des->order, 0); 
-
     /* is receive request complete */
     OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
+    if (recvreq->req_bytes_expected <= recvreq->req_bytes_received) {
+        mca_pml_ob1_send_fin(recvreq->req_recv.req_base.req_proc,
+                              bml_btl,
+    	                      frag->rdma_hdr.hdr_rget.hdr_des,
+    	                      des->order, 0);
+    }
+
     recv_request_pml_complete_check(recvreq);
 
     MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
@@ -540,7 +542,11 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
     mca_pml_ob1_rdma_frag_t* frag;
     size_t i, size = 0;
     int rc;
+    size_t bytes_remaining, prev_sent, offset;
+    mca_btl_base_segment_t * r_segments;
 
+    prev_sent = offset = 0;
+    bytes_remaining = hdr->hdr_rndv.hdr_msg_length;
     recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
 
     MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_rndv.hdr_match);
@@ -549,7 +555,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
      * fall back to copy in/out protocol. It is a pity because buffer on the
      * sender side is already registered. We need to be smarter here, perhaps
      * do couple of RDMA reads */
-    if(opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == true) {
+    if (opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == true) {
 #if OMPI_CUDA_SUPPORT
         if (mca_pml_ob1_cuda_need_buffers(recvreq, btl)) {
             mca_pml_ob1_recv_request_ack(recvreq, &hdr->hdr_rndv, 0);
@@ -561,68 +567,91 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
 #endif /* OMPI_CUDA_SUPPORT */
     }
     
-    MCA_PML_OB1_RDMA_FRAG_ALLOC(frag,rc);
-    if( OPAL_UNLIKELY(NULL == frag) ) {
-        /* GLB - FIX */
-         ORTE_ERROR_LOG(rc);
-         orte_errmgr.abort(-1, NULL);
-    }
 
-    /* lookup bml datastructures */
-    bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_bml; 
+    /* The while loop adds a fragmentation mechanism. The variable bytes_remaining holds the num
+     * of bytes left to be send. In each iteration we send the max possible bytes supported
+     * by the HCA. The field  frag->rdma_length holds the actual num of  bytes that were
+     * sent in each iteration. We subtract this number from bytes_remaining and continue to
+     * the next iteration with the updated size.
+     * Also - In each iteration we update the location in the buffer to be used for writing
+     * the message ,and the location to read from. This is done using the offset variable that
+     * accumulates the number of bytes that were sent so far. */
+    while (bytes_remaining > 0) {
+        MCA_PML_OB1_RDMA_FRAG_ALLOC(frag,rc);
+        if (OPAL_UNLIKELY(NULL == frag)) {
+            /* GLB - FIX */
+             ORTE_ERROR_LOG(rc);
+             orte_errmgr.abort(-1, NULL);
+        }
 
-    assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs));
+        /* lookup bml datastructures */
+        bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_bml;
 
-    /* allocate/initialize a fragment */
-    memcpy (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt);
+        assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs));
 
-    for(i = 0; i < hdr->hdr_seg_cnt; i++) {
-        mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *)(frag->rdma_segs + i * btl->btl_seg_size);
+        /* allocate/initialize a fragment */
+        memcpy (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt);
+
+        /* updating the read location */
+        r_segments = (mca_btl_base_segment_t *) frag->rdma_segs;
+        r_segments->seg_addr.lval += offset;
+
+        /* updating the write location */
+        OPAL_THREAD_LOCK(&recvreq->lock);
+        opal_convertor_set_position( &recvreq->req_recv.req_base.req_convertor, &offset);
+        OPAL_THREAD_UNLOCK(&recvreq->lock);
+
+        for (i = 0; i < hdr->hdr_seg_cnt; i++) {
+            mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *)(frag->rdma_segs + i * btl->btl_seg_size);
 
 #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
-        if ((recvreq->req_recv.req_base.req_proc->proc_arch & OPAL_ARCH_ISBIGENDIAN) !=
-            (ompi_proc_local()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
-            size += opal_swap_bytes4(seg->seg_len);
-        } else 
+            if ((recvreq->req_recv.req_base.req_proc->proc_arch & OPAL_ARCH_ISBIGENDIAN) !=
+                (ompi_proc_local()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
+                size += opal_swap_bytes4(seg->seg_len);
+            } else
 #endif
-        {
-            size += seg->seg_len;
+            {
+                size += seg->seg_len;
+            }
         }
-    }
-    frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
+        frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
 #if OMPI_CUDA_SUPPORT
-    if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
-        if (recvreq->req_recv.req_base.req_convertor.flags & CONVERTOR_CUDA) {
-            /* Check to see if this is a CUDA get */
-            if (btl->btl_flags & MCA_BTL_FLAGS_CUDA_GET) {
-                frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_send, btl);
+        if (OPAL_UNLIKELY(NULL == frag->rdma_bml)) {
+            if (recvreq->req_recv.req_base.req_convertor.flags & CONVERTOR_CUDA) {
+                /* Check to see if this is a CUDA get */
+                if (btl->btl_flags & MCA_BTL_FLAGS_CUDA_GET) {
+                    frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_send, btl);
+                }
+                if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
+                    opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
+                    orte_errmgr.abort(-1, NULL);
+                }
+            } else {
+                /* Just default back to send and receive.  Must be mix of GPU and HOST memory. */
+                mca_pml_ob1_recv_request_ack(recvreq, &hdr->hdr_rndv, 0);
+                return;
             }
-            if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
-                opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
-                orte_errmgr.abort(-1, NULL);
-            }
-        } else {
-            /* Just default back to send and receive.  Must be mix of GPU and HOST memory. */
-            mca_pml_ob1_recv_request_ack(recvreq, &hdr->hdr_rndv, 0);
-            return;
         }
-    }
 #else /* OMPI_CUDA_SUPPORT */
-    if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
-        opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
-        orte_errmgr.abort(-1, NULL);
-    }
+        if (OPAL_UNLIKELY(NULL == frag->rdma_bml)) {
+            opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
+            orte_errmgr.abort(-1, NULL);
+        }
 #endif /* OMPI_CUDA_SUPPORT */
 
-    frag->rdma_hdr.hdr_rget = *hdr;
-    frag->retries = 0;
-    frag->rdma_req = recvreq;
-    frag->rdma_ep = bml_endpoint;
-    frag->rdma_length = size;
-    frag->rdma_state = MCA_PML_OB1_RDMA_GET;
-    frag->reg = NULL;
-
-    mca_pml_ob1_recv_request_get_frag(frag);
+        frag->rdma_hdr.hdr_rget = *hdr;
+        frag->retries = 0;
+        frag->rdma_req = recvreq;
+        frag->rdma_ep = bml_endpoint;
+        frag->rdma_length = size;
+        frag->rdma_state = MCA_PML_OB1_RDMA_GET;
+        frag->reg = NULL;
+        frag->rdma_length = bytes_remaining;
+        mca_pml_ob1_recv_request_get_frag(frag);
+        prev_sent = frag->rdma_length;
+        bytes_remaining -= prev_sent;
+        offset += prev_sent;
+    }
     return;
 }