1
1

osc/sm: fix bugs in both the passive and active target paths

While testing one-sided on LANL systems I found a couple more OSC
bugs that were not caught during the initial testing:

 - In the passive target code we read the read lock count as a
   char instead of the intended uint32_t. This causes lock to
   lockup when using shared locks after 127 iterations.

 - The post code used the wrong group when trying to increment post
   counters. This causes a segmentation fault.

 - Both the post and wait code used the wrong check in the inner
   loop leading to an infinite loop.

cmr=v1.8.1:reviewer=jsquyres

This commit was SVN r31354.
Этот коммит содержится в:
Nathan Hjelm 2014-04-08 21:55:00 +00:00
родитель a31bfbeb2c
Коммит 7aece0a7fd
2 изменённых файлов: 11 добавлений и 8 удалений

Просмотреть файл

@ -1,5 +1,8 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -82,7 +85,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win)
{
ompi_osc_sm_module_t *module =
(ompi_osc_sm_module_t*) win->w_osc_module;
int i, j, gsize, csize;
int gsize, csize;
/* ensure all memory operations have completed */
opal_atomic_mb();
@ -93,8 +96,8 @@ ompi_osc_sm_complete(struct ompi_win_t *win)
gsize = ompi_group_size(module->start_group);
csize = ompi_comm_size(module->comm);
for (i = 0 ; i < gsize ; ++i) {
for (j = 0 ; i < csize ; ++j) {
for (int i = 0 ; i < gsize ; ++i) {
for (int j = 0 ; j < csize ; ++j) {
if (ompi_group_peer_lookup(module->start_group, i) ==
ompi_comm_peer_lookup(module->comm, j)) {
opal_atomic_add_32(&module->node_states[j].complete_count, 1);
@ -118,7 +121,7 @@ ompi_osc_sm_post(struct ompi_group_t *group,
{
ompi_osc_sm_module_t *module =
(ompi_osc_sm_module_t*) win->w_osc_module;
int i, j, gsize, csize;
int gsize, csize;
if (0 == (assert & MPI_MODE_NOCHECK)) {
OBJ_RETAIN(group);
@ -129,9 +132,9 @@ ompi_osc_sm_post(struct ompi_group_t *group,
gsize = ompi_group_size(module->post_group);
csize = ompi_comm_size(module->comm);
for (i = 0 ; i < gsize ; ++i) {
for (j = 0 ; i < csize ; ++j) {
if (ompi_group_peer_lookup(module->start_group, i) ==
for (int i = 0 ; i < gsize ; ++i) {
for (int j = 0 ; j < csize ; ++j) {
if (ompi_group_peer_lookup(module->post_group, i) ==
ompi_comm_peer_lookup(module->comm, j)) {
opal_atomic_add_32(&module->node_states[j].post_count, 1);
}

Просмотреть файл

@ -49,7 +49,7 @@ lk_fetch32(ompi_osc_sm_module_t *module,
size_t offset)
{
opal_atomic_mb ();
return (uint32_t) *((char*) &module->node_states[target].lock + offset);
return *((uint32_t *)((char*) &module->node_states[target].lock + offset));
}