Destroying an rcache vma while the rcache lock is held
as this can result in a low level free of memory which can require the rcache lock resulting in a deadlock This fixes trac:2107 cmr:v1.4 This commit was SVN r22679. The following Trac tickets were found above: Ticket 2107 --> https://svn.open-mpi.org/trac/ompi/ticket/2107
Этот коммит содержится в:
родитель
11500e3267
Коммит
a0b8f061a6
@ -287,6 +287,9 @@ int mca_mpool_rdma_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
*reg = rdma_reg;
|
||||
(*reg)->ref_count++;
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
|
||||
/* Cleanup any vmas that we have deferred deletion on */
|
||||
mpool->rcache->rcache_clean(mpool->rcache);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -386,6 +389,9 @@ int mca_mpool_rdma_deregister(struct mca_mpool_base_module_t *mpool,
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
|
||||
/* Cleanup any vmas that we have deferred deletion on */
|
||||
mpool->rcache->rcache_clean(mpool->rcache);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -476,6 +482,10 @@ void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool)
|
||||
OBJ_DESTRUCT(&mpool_rdma->gc_list);
|
||||
OBJ_DESTRUCT(&mpool_rdma->reg_list);
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
|
||||
/* Cleanup any vmas that we have deferred deletion on */
|
||||
mpool->rcache->rcache_clean(mpool->rcache);
|
||||
|
||||
}
|
||||
|
||||
int mca_mpool_rdma_ft_event(int state) {
|
||||
|
@ -49,6 +49,11 @@ typedef int (*mca_rcache_base_module_delete_fn_t)(
|
||||
struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* registration);
|
||||
|
||||
/* Do not call the clean function with the rcache lock held */
|
||||
typedef int (*mca_rcache_base_module_clean_fn_t)(
|
||||
struct mca_rcache_base_module_t* rcache);
|
||||
|
||||
|
||||
/**
|
||||
* finalize
|
||||
*/
|
||||
@ -80,6 +85,7 @@ struct mca_rcache_base_module_t {
|
||||
mca_rcache_base_module_find_all_fn_t rcache_find_all;
|
||||
mca_rcache_base_module_insert_fn_t rcache_insert;
|
||||
mca_rcache_base_module_delete_fn_t rcache_delete;
|
||||
mca_rcache_base_module_clean_fn_t rcache_clean;
|
||||
mca_rcache_base_module_finalize_fn_t rcache_finalize;
|
||||
opal_mutex_t lock;
|
||||
};
|
||||
|
@ -36,6 +36,7 @@ void mca_rcache_vma_module_init( mca_rcache_vma_module_t* rcache ) {
|
||||
rcache->base.rcache_find_all = mca_rcache_vma_find_all;
|
||||
rcache->base.rcache_insert = mca_rcache_vma_insert;
|
||||
rcache->base.rcache_delete = mca_rcache_vma_delete;
|
||||
rcache->base.rcache_clean = mca_rcache_vma_clean;
|
||||
rcache->base.rcache_finalize = mca_rcache_vma_finalize;
|
||||
OBJ_CONSTRUCT(&rcache->base.lock, opal_mutex_t);
|
||||
mca_rcache_vma_tree_init(rcache);
|
||||
@ -100,6 +101,30 @@ int mca_rcache_vma_delete(struct mca_rcache_base_module_t* rcache,
|
||||
return mca_rcache_vma_tree_delete(vma_rcache, reg);
|
||||
}
|
||||
|
||||
void mca_rcache_vma_clean(struct mca_rcache_base_module_t* rcache)
|
||||
{
|
||||
mca_rcache_vma_module_t *vma_rcache = (mca_rcache_vma_module_t*)rcache;
|
||||
mca_rcache_vma_t *vma;
|
||||
opal_list_item_t *i;
|
||||
|
||||
do {
|
||||
OPAL_THREAD_LOCK(&rcache->lock);
|
||||
i = opal_list_get_first(&vma_rcache->vma_delete_list);
|
||||
if(opal_list_get_end(&vma_rcache->vma_delete_list) == i) {
|
||||
vma = NULL;
|
||||
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||
} else {
|
||||
vma = (mca_rcache_vma_t *)i;
|
||||
opal_list_remove_item(&vma_rcache->vma_delete_list, &vma->super);
|
||||
|
||||
/* Need to drop the rcache lock before destroying the vma */
|
||||
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||
|
||||
mca_rcache_vma_destroy(vma);
|
||||
}
|
||||
} while (NULL != vma);
|
||||
}
|
||||
|
||||
/**
|
||||
* finalize
|
||||
*/
|
||||
|
@ -35,6 +35,7 @@ struct mca_rcache_vma_module_t {
|
||||
mca_rcache_base_module_t base;
|
||||
ompi_rb_tree_t rb_tree;
|
||||
opal_list_t vma_list;
|
||||
opal_list_t vma_delete_list;
|
||||
size_t reg_cur_cache_size;
|
||||
};
|
||||
typedef struct mca_rcache_vma_module_t mca_rcache_vma_module_t;
|
||||
@ -63,6 +64,12 @@ int mca_rcache_vma_insert(struct mca_rcache_base_module_t* rcache,
|
||||
int mca_rcache_vma_delete(struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* registration);
|
||||
|
||||
/* It is not safe to call mca_rcache_vma_clean with the rcache lock held */
|
||||
void mca_rcache_vma_clean(struct mca_rcache_base_module_t* rcache);
|
||||
/* Destroy vma objects which are on the deferred delete list. These were placed
|
||||
on the list earlier when the rcache lock was held and it was not safe to
|
||||
destory them. They should not be linked into any other structure anymore except
|
||||
the vma_list_delete list */
|
||||
|
||||
/**
|
||||
* init/finalize
|
||||
|
@ -120,7 +120,7 @@ static inline mca_rcache_vma_t *mca_rcache_vma_new(
|
||||
return vma;
|
||||
}
|
||||
|
||||
static inline void mca_rcache_vma_destroy(mca_rcache_vma_t *vma)
|
||||
void mca_rcache_vma_destroy(mca_rcache_vma_t *vma)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
|
||||
@ -254,6 +254,7 @@ int mca_rcache_vma_tree_init(mca_rcache_vma_module_t* rcache)
|
||||
{
|
||||
OBJ_CONSTRUCT(&rcache->rb_tree, ompi_rb_tree_t);
|
||||
OBJ_CONSTRUCT(&rcache->vma_list, opal_list_t);
|
||||
OBJ_CONSTRUCT(&rcache->vma_delete_list, opal_list_t);
|
||||
rcache->reg_cur_cache_size = 0;
|
||||
return ompi_rb_tree_init(&rcache->rb_tree,
|
||||
mca_rcache_vma_tree_node_compare);
|
||||
@ -487,7 +488,7 @@ int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache,
|
||||
mca_rcache_vma_update_byte_count(vma_rcache,
|
||||
vma->start - vma->end - 1);
|
||||
opal_list_remove_item(&vma_rcache->vma_list, &vma->super);
|
||||
mca_rcache_vma_destroy(vma);
|
||||
opal_list_append(&vma_rcache->vma_delete_list, &vma->super);
|
||||
vma = next;
|
||||
} else {
|
||||
int merged;
|
||||
@ -504,7 +505,7 @@ int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache,
|
||||
prev->end = vma->end;
|
||||
opal_list_remove_item(&vma_rcache->vma_list, &vma->super);
|
||||
ompi_rb_tree_delete(&vma_rcache->rb_tree, vma);
|
||||
mca_rcache_vma_destroy(vma);
|
||||
opal_list_append(&vma_rcache->vma_delete_list, &vma->super);
|
||||
vma = prev;
|
||||
merged = 1;
|
||||
}
|
||||
@ -517,7 +518,7 @@ int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache,
|
||||
vma->end = next->end;
|
||||
opal_list_remove_item(&vma_rcache->vma_list, &next->super);
|
||||
ompi_rb_tree_delete(&vma_rcache->rb_tree, next);
|
||||
mca_rcache_vma_destroy(next);
|
||||
opal_list_append(&vma_rcache->vma_delete_list, &next->super);
|
||||
merged = 1;
|
||||
}
|
||||
} while(merged);
|
||||
|
@ -91,6 +91,11 @@ int mca_rcache_vma_tree_delete(
|
||||
mca_mpool_base_registration_t* reg
|
||||
);
|
||||
|
||||
/*
|
||||
* Destroy a vma
|
||||
* Do not call this function with rcache lock as it can deadlock
|
||||
*/
|
||||
void mca_rcache_vma_destroy(mca_rcache_vma_t *vma);
|
||||
|
||||
#endif /* MCA_RCACHE_VMA_TREE_H */
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user