Merge pull request #6687 from yosefe/topic/osc-ucx-fix-ud-self-deadlock
OSC/UCX: Fix deadlock with atomic lock
Этот коммит содержится в:
Коммит
0c1da0fcab
@ -272,6 +272,7 @@ int ompi_osc_ucx_post(struct ompi_group_t *group, int assert, struct ompi_win_t
|
|||||||
ompi_osc_ucx_handle_incoming_post(module, &(module->state.post_state[j]), NULL, 0);
|
ompi_osc_ucx_handle_incoming_post(module, &(module->state.post_state[j]), NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ucp_worker_progress(mca_osc_ucx_component.wpool->dflt_worker);
|
||||||
usleep(100);
|
usleep(100);
|
||||||
} while (1);
|
} while (1);
|
||||||
}
|
}
|
||||||
|
@ -240,7 +240,7 @@ static inline int start_atomicity(ompi_osc_ucx_module_t *module, int target) {
|
|||||||
uint64_t remote_addr = (module->state_addrs)[target] + OSC_UCX_STATE_ACC_LOCK_OFFSET;
|
uint64_t remote_addr = (module->state_addrs)[target] + OSC_UCX_STATE_ACC_LOCK_OFFSET;
|
||||||
int ret = OMPI_SUCCESS;
|
int ret = OMPI_SUCCESS;
|
||||||
|
|
||||||
while (result_value != TARGET_LOCK_UNLOCKED) {
|
for (;;) {
|
||||||
ret = opal_common_ucx_wpmem_cmpswp(module->state_mem,
|
ret = opal_common_ucx_wpmem_cmpswp(module->state_mem,
|
||||||
TARGET_LOCK_UNLOCKED, TARGET_LOCK_EXCLUSIVE,
|
TARGET_LOCK_UNLOCKED, TARGET_LOCK_EXCLUSIVE,
|
||||||
target, &result_value, sizeof(result_value),
|
target, &result_value, sizeof(result_value),
|
||||||
@ -249,9 +249,12 @@ static inline int start_atomicity(ompi_osc_ucx_module_t *module, int target) {
|
|||||||
OSC_UCX_VERBOSE(1, "opal_common_ucx_mem_cmpswp failed: %d", ret);
|
OSC_UCX_VERBOSE(1, "opal_common_ucx_mem_cmpswp failed: %d", ret);
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
}
|
if (result_value == TARGET_LOCK_UNLOCKED) {
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
ucp_worker_progress(mca_osc_ucx_component.wpool->dflt_worker);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int end_atomicity(ompi_osc_ucx_module_t *module, int target) {
|
static inline int end_atomicity(ompi_osc_ucx_module_t *module, int target) {
|
||||||
|
@ -42,6 +42,7 @@ static inline int start_shared(ompi_osc_ucx_module_t *module, int target) {
|
|||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
ucp_worker_progress(mca_osc_ucx_component.wpool->dflt_worker);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -58,7 +59,7 @@ static inline int start_exclusive(ompi_osc_ucx_module_t *module, int target) {
|
|||||||
uint64_t remote_addr = (module->state_addrs)[target] + OSC_UCX_STATE_LOCK_OFFSET;
|
uint64_t remote_addr = (module->state_addrs)[target] + OSC_UCX_STATE_LOCK_OFFSET;
|
||||||
int ret = OMPI_SUCCESS;
|
int ret = OMPI_SUCCESS;
|
||||||
|
|
||||||
while (result_value != TARGET_LOCK_UNLOCKED) {
|
for (;;) {
|
||||||
ret = opal_common_ucx_wpmem_cmpswp(module->state_mem,
|
ret = opal_common_ucx_wpmem_cmpswp(module->state_mem,
|
||||||
TARGET_LOCK_UNLOCKED, TARGET_LOCK_EXCLUSIVE,
|
TARGET_LOCK_UNLOCKED, TARGET_LOCK_EXCLUSIVE,
|
||||||
target, &result_value, sizeof(result_value),
|
target, &result_value, sizeof(result_value),
|
||||||
@ -66,9 +67,12 @@ static inline int start_exclusive(ompi_osc_ucx_module_t *module, int target) {
|
|||||||
if (OMPI_SUCCESS != ret) {
|
if (OMPI_SUCCESS != ret) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
if (result_value == TARGET_LOCK_UNLOCKED) {
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
ucp_worker_progress(mca_osc_ucx_component.wpool->dflt_worker);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int end_exclusive(ompi_osc_ucx_module_t *module, int target) {
|
static inline int end_exclusive(ompi_osc_ucx_module_t *module, int target) {
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user