osc/rdma: fix some threading bugs
There were two bugs in osc/rdma when using threads: - Deadlock is ompi_osc_rdma_start_atomic. This occurs because ompi_osc_rdma_frag_alloc is called with the module lock. To fix the issue the module lock is now recursive. In the future I will add a new lock to protect just the current rdma fragment. - Do not drop the lock in ompi_osc_rdma_frag_alloc when calling ompi_osc_rdma_frag_complete. Not only is it not needed but dropping the lock at this point can cause a competing thread to mess up the state. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
84eb21d6bf
Коммит
9ef0821856
@ -998,7 +998,7 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
|
||||
}
|
||||
|
||||
/* initialize the objects, so that always free in cleanup */
|
||||
OBJ_CONSTRUCT(&module->lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&module->lock, opal_recursive_mutex_t);
|
||||
OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t);
|
||||
OBJ_CONSTRUCT(&module->pending_posts, opal_list_t);
|
||||
OBJ_CONSTRUCT(&module->peer_lock, opal_mutex_t);
|
||||
|
@ -73,9 +73,7 @@ static inline int ompi_osc_rdma_frag_alloc (ompi_osc_rdma_module_t *module, size
|
||||
module->rdma_frag = NULL;
|
||||
|
||||
if (curr) {
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
ompi_osc_rdma_frag_complete (curr);
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
}
|
||||
|
||||
item = opal_free_list_get (&mca_osc_rdma_component.frags);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user