osc/sm: fix bugs in both the passive and active target paths
While testing one-sided on LANL systems I found a couple more OSC bugs that were not caught during the initial testing: - In the passive target code we read the read lock count as a char instead of the intended uint32_t. This causes lock to lockup when using shared locks after 127 iterations. - The post code used the wrong group when trying to increment post counters. This causes a segmentation fault. - Both the post and wait code used the wrong check in the inner loop leading to an infinite loop. cmr=v1.8.1:reviewer=jsquyres This commit was SVN r31354.
Этот коммит содержится в:
родитель
a31bfbeb2c
Коммит
7aece0a7fd
@ -1,5 +1,8 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -82,7 +85,7 @@ ompi_osc_sm_complete(struct ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_sm_module_t *module =
|
||||
(ompi_osc_sm_module_t*) win->w_osc_module;
|
||||
int i, j, gsize, csize;
|
||||
int gsize, csize;
|
||||
|
||||
/* ensure all memory operations have completed */
|
||||
opal_atomic_mb();
|
||||
@ -93,8 +96,8 @@ ompi_osc_sm_complete(struct ompi_win_t *win)
|
||||
|
||||
gsize = ompi_group_size(module->start_group);
|
||||
csize = ompi_comm_size(module->comm);
|
||||
for (i = 0 ; i < gsize ; ++i) {
|
||||
for (j = 0 ; i < csize ; ++j) {
|
||||
for (int i = 0 ; i < gsize ; ++i) {
|
||||
for (int j = 0 ; j < csize ; ++j) {
|
||||
if (ompi_group_peer_lookup(module->start_group, i) ==
|
||||
ompi_comm_peer_lookup(module->comm, j)) {
|
||||
opal_atomic_add_32(&module->node_states[j].complete_count, 1);
|
||||
@ -118,7 +121,7 @@ ompi_osc_sm_post(struct ompi_group_t *group,
|
||||
{
|
||||
ompi_osc_sm_module_t *module =
|
||||
(ompi_osc_sm_module_t*) win->w_osc_module;
|
||||
int i, j, gsize, csize;
|
||||
int gsize, csize;
|
||||
|
||||
if (0 == (assert & MPI_MODE_NOCHECK)) {
|
||||
OBJ_RETAIN(group);
|
||||
@ -129,9 +132,9 @@ ompi_osc_sm_post(struct ompi_group_t *group,
|
||||
|
||||
gsize = ompi_group_size(module->post_group);
|
||||
csize = ompi_comm_size(module->comm);
|
||||
for (i = 0 ; i < gsize ; ++i) {
|
||||
for (j = 0 ; i < csize ; ++j) {
|
||||
if (ompi_group_peer_lookup(module->start_group, i) ==
|
||||
for (int i = 0 ; i < gsize ; ++i) {
|
||||
for (int j = 0 ; j < csize ; ++j) {
|
||||
if (ompi_group_peer_lookup(module->post_group, i) ==
|
||||
ompi_comm_peer_lookup(module->comm, j)) {
|
||||
opal_atomic_add_32(&module->node_states[j].post_count, 1);
|
||||
}
|
||||
|
@ -49,7 +49,7 @@ lk_fetch32(ompi_osc_sm_module_t *module,
|
||||
size_t offset)
|
||||
{
|
||||
opal_atomic_mb ();
|
||||
return (uint32_t) *((char*) &module->node_states[target].lock + offset);
|
||||
return *((uint32_t *)((char*) &module->node_states[target].lock + offset));
|
||||
}
|
||||
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user