1
1

osc/rdma: fix bugs when running more than one process per node

A previous commit updated the one-sided code to register the state
region only once. This created an issue when using the scratch lock
with fetching atomics. In this case on any rank that isn't local rank
0 the module->state_handle is NULL. This commit fixes the issue by
removing the scratch lock and using a fragment pointer instead.

Fixes open-mpi/ompi#1290

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2015-12-15 11:21:35 -07:00
родитель 7a82174747
Коммит 0de9445fc7
3 изменённых файлов: 48 добавлений и 18 удалений

Просмотреть файл

@ -419,9 +419,11 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
{
ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_sync_t *sync = &module->all_sync;
ompi_osc_rdma_frag_t *frag = NULL;
ompi_osc_rdma_peer_t **peers;
void *scratch_lock = NULL;
ompi_group_t *group;
int group_size;
int group_size, ret;
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "complete: %s", win->w_name);
@ -454,11 +456,17 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
ompi_osc_rdma_sync_rdma_complete (sync);
if (MCA_BTL_FLAGS_ATOMIC_OPS & module->selected_btl->btl_flags) {
ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &scratch_lock);
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
return ret;
}
}
/* for each process in the group increment their number of complete messages */
for (int i = 0 ; i < group_size ; ++i) {
ompi_osc_rdma_peer_t *peer = peers[i];
intptr_t target = (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, num_complete_msgs);
int ret;
if (!ompi_osc_rdma_peer_local_state (peer)) {
do {
@ -468,8 +476,8 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
ompi_osc_rdma_atomic_complete, NULL, NULL);
} else {
/* don't care about the read value so use the scratch lock */
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, &module->state->scratch_lock,
target, module->state_handle, peer->state_handle, MCA_BTL_ATOMIC_ADD, 1,
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, scratch_lock,
target, frag->handle, peer->state_handle, MCA_BTL_ATOMIC_ADD, 1,
0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, NULL, NULL);
}
@ -482,6 +490,10 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
}
}
if (frag) {
ompi_osc_rdma_frag_complete (frag);
}
/* release our reference to peers in this group */
ompi_osc_rdma_release_peers (peers, group_size);

Просмотреть файл

@ -49,8 +49,8 @@ static inline int ompi_osc_rdma_lock_release_shared (ompi_osc_rdma_module_t *mod
ompi_osc_rdma_lock_t value, ptrdiff_t offset)
{
uint64_t lock = (uint64_t) (intptr_t) peer->state + offset;
void *temp = &module->state->scratch_lock;
volatile bool atomic_complete = false;
void *temp;
int ret;
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing shared lock %" PRIx64 " on peer %d. value 0x%lx", lock,
@ -58,24 +58,36 @@ static inline int ompi_osc_rdma_lock_release_shared (ompi_osc_rdma_module_t *mod
/* spin until the lock has been acquired */
if (!ompi_osc_rdma_peer_local_state (peer)) {
ompi_osc_rdma_frag_t *frag = NULL;
if (module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS) {
ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->state_endpoint, (intptr_t) lock, peer->state_handle,
MCA_BTL_ATOMIC_ADD, value, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete,
(void *) &atomic_complete, NULL);
} else {
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, (intptr_t) lock, module->state_handle,
peer->state_handle, MCA_BTL_ATOMIC_ADD, value, 0, MCA_BTL_NO_ORDER,
ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, NULL);
ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &temp);
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
return ret;
}
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, (intptr_t) lock,
frag->handle, peer->state_handle, MCA_BTL_ATOMIC_ADD, value, 0,
MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, (void *) &atomic_complete,
NULL);
}
if (OPAL_SUCCESS == ret) {
while (!atomic_complete) {
ompi_osc_rdma_progress (module);
}
} else if (1 == OPAL_SUCCESS) {
} else if (1 == ret) {
ret = OMPI_SUCCESS;
}
if (frag) {
ompi_osc_rdma_frag_complete (frag);
}
return ret;
} else {
(void) ompi_osc_rdma_lock_add ((volatile ompi_osc_rdma_lock_t *) lock, value);
@ -279,32 +291,40 @@ static inline int ompi_osc_rdma_lock_release_exclusive (ompi_osc_rdma_module_t *
ptrdiff_t offset)
{
uint64_t lock = (uint64_t) (intptr_t) peer->state + offset;
void *temp = &module->state->scratch_lock;
volatile bool atomic_complete = false;
void *temp;
int ret;
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing exclusive lock %" PRIx64 " on peer %d", lock, peer->rank);
if (!ompi_osc_rdma_peer_local_state (peer)) {
ompi_osc_rdma_frag_t *frag = NULL;
if (module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS) {
ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->state_endpoint, lock, peer->state_handle, MCA_BTL_ATOMIC_ADD,
-OMPI_OSC_RDMA_LOCK_EXCLUSIVE, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete,
(void *) &atomic_complete, NULL);
} else {
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, lock, module->state_handle,
ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &temp);
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
return ret;
}
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, lock, frag->handle,
peer->state_handle, MCA_BTL_ATOMIC_ADD, -OMPI_OSC_RDMA_LOCK_EXCLUSIVE, 0,
MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, NULL);
}
if (OPAL_UNLIKELY(OMPI_SUCCESS > ret)) {
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "error releasing exclusive lock");
return ret;
}
if (OPAL_SUCCESS == ret) {
while (!atomic_complete) {
ompi_osc_rdma_progress (module);
}
} else if (1 == ret) {
ret = OMPI_SUCCESS;
}
if (frag) {
ompi_osc_rdma_frag_complete (frag);
}
} else {
ompi_osc_rdma_unlock_local ((volatile ompi_osc_rdma_lock_t *)(intptr_t) lock);

Просмотреть файл

@ -151,8 +151,6 @@ struct ompi_osc_rdma_state_t {
ompi_osc_rdma_lock_t local_lock;
/** lock for the accumulate state to ensure ordering and consistency */
ompi_osc_rdma_lock_t accumulate_lock;
/** persistent scratch space for fetch and op/cswap when the result is not needed */
ompi_osc_rdma_lock_t scratch_lock;
/** current index to post to. compare-and-swap must be used to ensure
* the index is free */
osc_rdma_counter_t post_index;