From 7aece0a7fd7ad198b29bff0f47c6008c915a0abc Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 8 Apr 2014 21:55:00 +0000 Subject: [PATCH] osc/sm: fix bugs in both the passive and active target paths While testing one-sided on LANL systems I found a couple more OSC bugs that were not caught during the initial testing: - In the passive target code we read the read lock count as a char instead of the intended uint32_t. This causes lock to lockup when using shared locks after 127 iterations. - The post code used the wrong group when trying to increment post counters. This causes a segmentation fault. - Both the post and wait code used the wrong check in the inner loop leading to an infinite loop. cmr=v1.8.1:reviewer=jsquyres This commit was SVN r31354. --- ompi/mca/osc/sm/osc_sm_active_target.c | 17 ++++++++++------- ompi/mca/osc/sm/osc_sm_passive_target.c | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/ompi/mca/osc/sm/osc_sm_active_target.c b/ompi/mca/osc/sm/osc_sm_active_target.c index 17a78da4e0..7e50d853e5 100644 --- a/ompi/mca/osc/sm/osc_sm_active_target.c +++ b/ompi/mca/osc/sm/osc_sm_active_target.c @@ -1,5 +1,8 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -82,7 +85,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win) { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; - int i, j, gsize, csize; + int gsize, csize; /* ensure all memory operations have completed */ opal_atomic_mb(); @@ -93,8 +96,8 @@ ompi_osc_sm_complete(struct ompi_win_t *win) gsize = ompi_group_size(module->start_group); csize = ompi_comm_size(module->comm); - for (i = 0 ; i < gsize ; ++i) { - for (j = 0 ; i < csize ; ++j) { + for (int i = 0 ; i < gsize ; ++i) { + for (int j = 0 ; j < csize ; ++j) { if (ompi_group_peer_lookup(module->start_group, i) == ompi_comm_peer_lookup(module->comm, j)) { opal_atomic_add_32(&module->node_states[j].complete_count, 1); @@ -118,7 +121,7 @@ ompi_osc_sm_post(struct ompi_group_t *group, { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; - int i, j, gsize, csize; + int gsize, csize; if (0 == (assert & MPI_MODE_NOCHECK)) { OBJ_RETAIN(group); @@ -129,9 +132,9 @@ ompi_osc_sm_post(struct ompi_group_t *group, gsize = ompi_group_size(module->post_group); csize = ompi_comm_size(module->comm); - for (i = 0 ; i < gsize ; ++i) { - for (j = 0 ; i < csize ; ++j) { - if (ompi_group_peer_lookup(module->start_group, i) == + for (int i = 0 ; i < gsize ; ++i) { + for (int j = 0 ; j < csize ; ++j) { + if (ompi_group_peer_lookup(module->post_group, i) == ompi_comm_peer_lookup(module->comm, j)) { opal_atomic_add_32(&module->node_states[j].post_count, 1); } diff --git a/ompi/mca/osc/sm/osc_sm_passive_target.c b/ompi/mca/osc/sm/osc_sm_passive_target.c index c9dfcfd8f0..a9a8fe90bc 100644 --- a/ompi/mca/osc/sm/osc_sm_passive_target.c +++ b/ompi/mca/osc/sm/osc_sm_passive_target.c @@ -49,7 +49,7 @@ lk_fetch32(ompi_osc_sm_module_t *module, size_t offset) { opal_atomic_mb (); - return (uint32_t) *((char*) &module->node_states[target].lock + offset); + return *((uint32_t *)((char*) &module->node_states[target].lock + offset)); }