diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c
index 53fdeb889d..31c3fc29be 100644
--- a/ompi/mca/osc/rdma/osc_rdma_accumulate.c
+++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c
@@ -892,7 +892,7 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
     /* if the datatype is small enough (and the count is 1) then try to directly use the hardware to execute
      * the atomic operation. this should be safe in all cases as either 1) the user has assured us they will
      * never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock */
-    if (origin_extent <= 8 && 1 == origin_count) {
+    if (origin_extent <= 8 && 1 == origin_count && !ompi_osc_rdma_peer_local_base (peer)) {
         if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) {
             if (NULL == result_addr) {
                 ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, origin_extent, peer, target_address,
diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c
index 522f953a2f..bf6c1a84bb 100644
--- a/ompi/mca/osc/rdma/osc_rdma_component.c
+++ b/ompi/mca/osc/rdma/osc_rdma_component.c
@@ -759,9 +759,14 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
                 ex_peer->size = temp[i].size;
             }
 
-            if (module->use_cpu_atomics && MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
+            if (module->use_cpu_atomics && (MPI_WIN_FLAVOR_ALLOCATE == module->flavor || peer_rank == my_rank)) {
                 /* base is local and cpu atomics are available */
-                ex_peer->super.base = (uintptr_t) module->segment_base + offset;
+                if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
+                    ex_peer->super.base = (uintptr_t) module->segment_base + offset;
+                } else {
+                    ex_peer->super.base = (uintptr_t) *base;
+                }
+
                 peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
                 offset += temp[i].size;
             } else {