Merge pull request #1224 from hjelmn/osc_fixes
osc/rdma: fix bugs when running more than one process per node
Этот коммит содержится в:
Коммит
4992c22f4a
@ -419,9 +419,11 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_sync_t *sync = &module->all_sync;
|
||||
ompi_osc_rdma_frag_t *frag = NULL;
|
||||
ompi_osc_rdma_peer_t **peers;
|
||||
void *scratch_lock = NULL;
|
||||
ompi_group_t *group;
|
||||
int group_size;
|
||||
int group_size, ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "complete: %s", win->w_name);
|
||||
|
||||
@ -454,11 +456,17 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
|
||||
|
||||
ompi_osc_rdma_sync_rdma_complete (sync);
|
||||
|
||||
if (MCA_BTL_FLAGS_ATOMIC_OPS & module->selected_btl->btl_flags) {
|
||||
ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &scratch_lock);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/* for each process in the group increment their number of complete messages */
|
||||
for (int i = 0 ; i < group_size ; ++i) {
|
||||
ompi_osc_rdma_peer_t *peer = peers[i];
|
||||
intptr_t target = (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, num_complete_msgs);
|
||||
int ret;
|
||||
|
||||
if (!ompi_osc_rdma_peer_local_state (peer)) {
|
||||
do {
|
||||
@ -468,8 +476,8 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
|
||||
ompi_osc_rdma_atomic_complete, NULL, NULL);
|
||||
} else {
|
||||
/* don't care about the read value so use the scratch lock */
|
||||
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, &module->state->scratch_lock,
|
||||
target, module->state_handle, peer->state_handle, MCA_BTL_ATOMIC_ADD, 1,
|
||||
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, scratch_lock,
|
||||
target, frag->handle, peer->state_handle, MCA_BTL_ATOMIC_ADD, 1,
|
||||
0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, NULL, NULL);
|
||||
}
|
||||
|
||||
@ -482,6 +490,10 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
|
||||
}
|
||||
}
|
||||
|
||||
if (frag) {
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
}
|
||||
|
||||
/* release our reference to peers in this group */
|
||||
ompi_osc_rdma_release_peers (peers, group_size);
|
||||
|
||||
|
@ -49,8 +49,8 @@ static inline int ompi_osc_rdma_lock_release_shared (ompi_osc_rdma_module_t *mod
|
||||
ompi_osc_rdma_lock_t value, ptrdiff_t offset)
|
||||
{
|
||||
uint64_t lock = (uint64_t) (intptr_t) peer->state + offset;
|
||||
void *temp = &module->state->scratch_lock;
|
||||
volatile bool atomic_complete = false;
|
||||
void *temp;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing shared lock %" PRIx64 " on peer %d. value 0x%lx", lock,
|
||||
@ -58,24 +58,36 @@ static inline int ompi_osc_rdma_lock_release_shared (ompi_osc_rdma_module_t *mod
|
||||
|
||||
/* spin until the lock has been acquired */
|
||||
if (!ompi_osc_rdma_peer_local_state (peer)) {
|
||||
ompi_osc_rdma_frag_t *frag = NULL;
|
||||
|
||||
if (module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS) {
|
||||
ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->state_endpoint, (intptr_t) lock, peer->state_handle,
|
||||
MCA_BTL_ATOMIC_ADD, value, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete,
|
||||
(void *) &atomic_complete, NULL);
|
||||
} else {
|
||||
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, (intptr_t) lock, module->state_handle,
|
||||
peer->state_handle, MCA_BTL_ATOMIC_ADD, value, 0, MCA_BTL_NO_ORDER,
|
||||
ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, NULL);
|
||||
ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &temp);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, (intptr_t) lock,
|
||||
frag->handle, peer->state_handle, MCA_BTL_ATOMIC_ADD, value, 0,
|
||||
MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, (void *) &atomic_complete,
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS == ret) {
|
||||
while (!atomic_complete) {
|
||||
ompi_osc_rdma_progress (module);
|
||||
}
|
||||
} else if (1 == OPAL_SUCCESS) {
|
||||
} else if (1 == ret) {
|
||||
ret = OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
if (frag) {
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
}
|
||||
|
||||
return ret;
|
||||
} else {
|
||||
(void) ompi_osc_rdma_lock_add ((volatile ompi_osc_rdma_lock_t *) lock, value);
|
||||
@ -279,32 +291,40 @@ static inline int ompi_osc_rdma_lock_release_exclusive (ompi_osc_rdma_module_t *
|
||||
ptrdiff_t offset)
|
||||
{
|
||||
uint64_t lock = (uint64_t) (intptr_t) peer->state + offset;
|
||||
void *temp = &module->state->scratch_lock;
|
||||
volatile bool atomic_complete = false;
|
||||
void *temp;
|
||||
int ret;
|
||||
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing exclusive lock %" PRIx64 " on peer %d", lock, peer->rank);
|
||||
|
||||
if (!ompi_osc_rdma_peer_local_state (peer)) {
|
||||
ompi_osc_rdma_frag_t *frag = NULL;
|
||||
|
||||
if (module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS) {
|
||||
ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->state_endpoint, lock, peer->state_handle, MCA_BTL_ATOMIC_ADD,
|
||||
-OMPI_OSC_RDMA_LOCK_EXCLUSIVE, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete,
|
||||
(void *) &atomic_complete, NULL);
|
||||
} else {
|
||||
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, lock, module->state_handle,
|
||||
ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &temp);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, lock, frag->handle,
|
||||
peer->state_handle, MCA_BTL_ATOMIC_ADD, -OMPI_OSC_RDMA_LOCK_EXCLUSIVE, 0,
|
||||
MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, NULL);
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS > ret)) {
|
||||
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "error releasing exclusive lock");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS == ret) {
|
||||
while (!atomic_complete) {
|
||||
ompi_osc_rdma_progress (module);
|
||||
}
|
||||
} else if (1 == ret) {
|
||||
ret = OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
if (frag) {
|
||||
ompi_osc_rdma_frag_complete (frag);
|
||||
}
|
||||
} else {
|
||||
ompi_osc_rdma_unlock_local ((volatile ompi_osc_rdma_lock_t *)(intptr_t) lock);
|
||||
|
@ -151,8 +151,6 @@ struct ompi_osc_rdma_state_t {
|
||||
ompi_osc_rdma_lock_t local_lock;
|
||||
/** lock for the accumulate state to ensure ordering and consistency */
|
||||
ompi_osc_rdma_lock_t accumulate_lock;
|
||||
/** persistent scratch space for fetch and op/cswap when the result is not needed */
|
||||
ompi_osc_rdma_lock_t scratch_lock;
|
||||
/** current index to post to. compare-and-swap must be used to ensure
|
||||
* the index is free */
|
||||
osc_rdma_counter_t post_index;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user