From cd67642183954d5dad560d907ded422d47ee50d4 Mon Sep 17 00:00:00 2001 From: Alex Mikheev <alexm@mellanox.com> Date: Tue, 9 Sep 2014 14:34:03 +0300 Subject: [PATCH] OSHMEM: sshmem verbs: workaround shared_mr procfs bug dereg shared_mr before doing dereg on its mr. --- oshmem/mca/sshmem/verbs/sshmem_verbs_component.c | 9 +++++---- oshmem/mca/sshmem/verbs/sshmem_verbs_module.c | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/oshmem/mca/sshmem/verbs/sshmem_verbs_component.c b/oshmem/mca/sshmem/verbs/sshmem_verbs_component.c index b961b4c11e..da40c67721 100644 --- a/oshmem/mca/sshmem/verbs/sshmem_verbs_component.c +++ b/oshmem/mca/sshmem/verbs/sshmem_verbs_component.c @@ -221,14 +221,15 @@ verbs_runtime_query(mca_base_module_t **module, out: if (device) { - if (opal_value_array_get_size(&device->ib_mr_array)) { + if (0 < (i = opal_value_array_get_size(&device->ib_mr_array))) { struct ibv_mr** array; struct ibv_mr* ib_mr = NULL; array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *); - while (opal_value_array_get_size(&device->ib_mr_array) > 0) { - ib_mr = array[0]; + /* destruct shared_mr first in order to avoid proc fs race */ + for (i--;i >= 0; i--) { + ib_mr = array[i]; ibv_dereg_mr(ib_mr); - opal_value_array_remove_item(&device->ib_mr_array, 0); + opal_value_array_remove_item(&device->ib_mr_array, i); } if (device->ib_mr_shared) { diff --git a/oshmem/mca/sshmem/verbs/sshmem_verbs_module.c b/oshmem/mca/sshmem/verbs/sshmem_verbs_module.c index 4b699da943..6bed6a0584 100644 --- a/oshmem/mca/sshmem/verbs/sshmem_verbs_module.c +++ b/oshmem/mca/sshmem/verbs/sshmem_verbs_module.c @@ -416,6 +416,7 @@ segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey) { int rc = OSHMEM_SUCCESS; openib_device_t *device = &memheap_device; + int i; assert(ds_buf); @@ -429,12 +430,12 @@ segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey) ); if (device) { - if (opal_value_array_get_size(&device->ib_mr_array)) { + if (0 < (i = opal_value_array_get_size(&device->ib_mr_array))) { struct ibv_mr** array; struct ibv_mr* ib_mr = NULL; array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *); - while (opal_value_array_get_size(&device->ib_mr_array) > 0) { - ib_mr = array[0]; + for (i--;i >= 0; i--) { + ib_mr = array[i]; if(ibv_dereg_mr(ib_mr)) { OPAL_OUTPUT_VERBOSE( (5, oshmem_sshmem_base_framework.framework_output, @@ -443,7 +444,7 @@ segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey) ); rc = OSHMEM_ERROR; } - opal_value_array_remove_item(&device->ib_mr_array, 0); + opal_value_array_remove_item(&device->ib_mr_array, i); } if (!rc && device->ib_mr_shared) {