From cd67642183954d5dad560d907ded422d47ee50d4 Mon Sep 17 00:00:00 2001
From: Alex Mikheev <alexm@mellanox.com>
Date: Tue, 9 Sep 2014 14:34:03 +0300
Subject: [PATCH] OSHMEM: sshmem verbs: workaround shared_mr procfs bug

dereg shared_mr before doing dereg on its mr.
---
 oshmem/mca/sshmem/verbs/sshmem_verbs_component.c | 9 +++++----
 oshmem/mca/sshmem/verbs/sshmem_verbs_module.c    | 9 +++++----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/oshmem/mca/sshmem/verbs/sshmem_verbs_component.c b/oshmem/mca/sshmem/verbs/sshmem_verbs_component.c
index b961b4c11e..da40c67721 100644
--- a/oshmem/mca/sshmem/verbs/sshmem_verbs_component.c
+++ b/oshmem/mca/sshmem/verbs/sshmem_verbs_component.c
@@ -221,14 +221,15 @@ verbs_runtime_query(mca_base_module_t **module,
 
 out:
     if (device) {
-        if (opal_value_array_get_size(&device->ib_mr_array)) {
+        if (0 < (i = opal_value_array_get_size(&device->ib_mr_array))) {
             struct ibv_mr** array;
             struct ibv_mr* ib_mr = NULL;
             array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
-            while (opal_value_array_get_size(&device->ib_mr_array) > 0) {
-                ib_mr = array[0];
+            /* destruct shared_mr first in order to avoid proc fs race */
+            for (i--;i >= 0; i--) {
+                ib_mr = array[i];
                 ibv_dereg_mr(ib_mr);
-                opal_value_array_remove_item(&device->ib_mr_array, 0);
+                opal_value_array_remove_item(&device->ib_mr_array, i);
             }
 
             if (device->ib_mr_shared) {
diff --git a/oshmem/mca/sshmem/verbs/sshmem_verbs_module.c b/oshmem/mca/sshmem/verbs/sshmem_verbs_module.c
index 4b699da943..6bed6a0584 100644
--- a/oshmem/mca/sshmem/verbs/sshmem_verbs_module.c
+++ b/oshmem/mca/sshmem/verbs/sshmem_verbs_module.c
@@ -416,6 +416,7 @@ segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
 {
     int rc = OSHMEM_SUCCESS;
     openib_device_t *device = &memheap_device;
+    int i;
 
     assert(ds_buf);
 
@@ -429,12 +430,12 @@ segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
     );
 
     if (device) {
-        if (opal_value_array_get_size(&device->ib_mr_array)) {
+        if (0 < (i = opal_value_array_get_size(&device->ib_mr_array))) {
             struct ibv_mr** array;
             struct ibv_mr* ib_mr = NULL;
             array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
-            while (opal_value_array_get_size(&device->ib_mr_array) > 0) {
-                ib_mr = array[0];
+            for (i--;i >= 0; i--) {
+                ib_mr = array[i];
                 if(ibv_dereg_mr(ib_mr)) {
                     OPAL_OUTPUT_VERBOSE(
                         (5, oshmem_sshmem_base_framework.framework_output,
@@ -443,7 +444,7 @@ segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
                         );
                     rc = OSHMEM_ERROR;
                 }
-                opal_value_array_remove_item(&device->ib_mr_array, 0);
+                opal_value_array_remove_item(&device->ib_mr_array, i);
             }
 
             if (!rc && device->ib_mr_shared) {