Destroying an rcache vma while the rcache lock is held
as this can result in a low level free of memory which can require the rcache lock resulting in a deadlock This fixes trac:2107 cmr:v1.4 This commit was SVN r22679. The following Trac tickets were found above: Ticket 2107 --> https://svn.open-mpi.org/trac/ompi/ticket/2107
Этот коммит содержится в:
родитель
11500e3267
Коммит
a0b8f061a6
@ -287,6 +287,9 @@ int mca_mpool_rdma_register(mca_mpool_base_module_t *mpool, void *addr,
|
|||||||
*reg = rdma_reg;
|
*reg = rdma_reg;
|
||||||
(*reg)->ref_count++;
|
(*reg)->ref_count++;
|
||||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||||
|
|
||||||
|
/* Cleanup any vmas that we have deferred deletion on */
|
||||||
|
mpool->rcache->rcache_clean(mpool->rcache);
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -386,6 +389,9 @@ int mca_mpool_rdma_deregister(struct mca_mpool_base_module_t *mpool,
|
|||||||
}
|
}
|
||||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||||
|
|
||||||
|
/* Cleanup any vmas that we have deferred deletion on */
|
||||||
|
mpool->rcache->rcache_clean(mpool->rcache);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -476,6 +482,10 @@ void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool)
|
|||||||
OBJ_DESTRUCT(&mpool_rdma->gc_list);
|
OBJ_DESTRUCT(&mpool_rdma->gc_list);
|
||||||
OBJ_DESTRUCT(&mpool_rdma->reg_list);
|
OBJ_DESTRUCT(&mpool_rdma->reg_list);
|
||||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||||
|
|
||||||
|
/* Cleanup any vmas that we have deferred deletion on */
|
||||||
|
mpool->rcache->rcache_clean(mpool->rcache);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int mca_mpool_rdma_ft_event(int state) {
|
int mca_mpool_rdma_ft_event(int state) {
|
||||||
|
@ -49,6 +49,11 @@ typedef int (*mca_rcache_base_module_delete_fn_t)(
|
|||||||
struct mca_rcache_base_module_t* rcache,
|
struct mca_rcache_base_module_t* rcache,
|
||||||
mca_mpool_base_registration_t* registration);
|
mca_mpool_base_registration_t* registration);
|
||||||
|
|
||||||
|
/* Do not call the clean function with the rcache lock held */
|
||||||
|
typedef int (*mca_rcache_base_module_clean_fn_t)(
|
||||||
|
struct mca_rcache_base_module_t* rcache);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* finalize
|
* finalize
|
||||||
*/
|
*/
|
||||||
@ -80,6 +85,7 @@ struct mca_rcache_base_module_t {
|
|||||||
mca_rcache_base_module_find_all_fn_t rcache_find_all;
|
mca_rcache_base_module_find_all_fn_t rcache_find_all;
|
||||||
mca_rcache_base_module_insert_fn_t rcache_insert;
|
mca_rcache_base_module_insert_fn_t rcache_insert;
|
||||||
mca_rcache_base_module_delete_fn_t rcache_delete;
|
mca_rcache_base_module_delete_fn_t rcache_delete;
|
||||||
|
mca_rcache_base_module_clean_fn_t rcache_clean;
|
||||||
mca_rcache_base_module_finalize_fn_t rcache_finalize;
|
mca_rcache_base_module_finalize_fn_t rcache_finalize;
|
||||||
opal_mutex_t lock;
|
opal_mutex_t lock;
|
||||||
};
|
};
|
||||||
|
@ -36,6 +36,7 @@ void mca_rcache_vma_module_init( mca_rcache_vma_module_t* rcache ) {
|
|||||||
rcache->base.rcache_find_all = mca_rcache_vma_find_all;
|
rcache->base.rcache_find_all = mca_rcache_vma_find_all;
|
||||||
rcache->base.rcache_insert = mca_rcache_vma_insert;
|
rcache->base.rcache_insert = mca_rcache_vma_insert;
|
||||||
rcache->base.rcache_delete = mca_rcache_vma_delete;
|
rcache->base.rcache_delete = mca_rcache_vma_delete;
|
||||||
|
rcache->base.rcache_clean = mca_rcache_vma_clean;
|
||||||
rcache->base.rcache_finalize = mca_rcache_vma_finalize;
|
rcache->base.rcache_finalize = mca_rcache_vma_finalize;
|
||||||
OBJ_CONSTRUCT(&rcache->base.lock, opal_mutex_t);
|
OBJ_CONSTRUCT(&rcache->base.lock, opal_mutex_t);
|
||||||
mca_rcache_vma_tree_init(rcache);
|
mca_rcache_vma_tree_init(rcache);
|
||||||
@ -100,6 +101,30 @@ int mca_rcache_vma_delete(struct mca_rcache_base_module_t* rcache,
|
|||||||
return mca_rcache_vma_tree_delete(vma_rcache, reg);
|
return mca_rcache_vma_tree_delete(vma_rcache, reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mca_rcache_vma_clean(struct mca_rcache_base_module_t* rcache)
|
||||||
|
{
|
||||||
|
mca_rcache_vma_module_t *vma_rcache = (mca_rcache_vma_module_t*)rcache;
|
||||||
|
mca_rcache_vma_t *vma;
|
||||||
|
opal_list_item_t *i;
|
||||||
|
|
||||||
|
do {
|
||||||
|
OPAL_THREAD_LOCK(&rcache->lock);
|
||||||
|
i = opal_list_get_first(&vma_rcache->vma_delete_list);
|
||||||
|
if(opal_list_get_end(&vma_rcache->vma_delete_list) == i) {
|
||||||
|
vma = NULL;
|
||||||
|
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||||
|
} else {
|
||||||
|
vma = (mca_rcache_vma_t *)i;
|
||||||
|
opal_list_remove_item(&vma_rcache->vma_delete_list, &vma->super);
|
||||||
|
|
||||||
|
/* Need to drop the rcache lock before destroying the vma */
|
||||||
|
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||||
|
|
||||||
|
mca_rcache_vma_destroy(vma);
|
||||||
|
}
|
||||||
|
} while (NULL != vma);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* finalize
|
* finalize
|
||||||
*/
|
*/
|
||||||
|
@ -35,6 +35,7 @@ struct mca_rcache_vma_module_t {
|
|||||||
mca_rcache_base_module_t base;
|
mca_rcache_base_module_t base;
|
||||||
ompi_rb_tree_t rb_tree;
|
ompi_rb_tree_t rb_tree;
|
||||||
opal_list_t vma_list;
|
opal_list_t vma_list;
|
||||||
|
opal_list_t vma_delete_list;
|
||||||
size_t reg_cur_cache_size;
|
size_t reg_cur_cache_size;
|
||||||
};
|
};
|
||||||
typedef struct mca_rcache_vma_module_t mca_rcache_vma_module_t;
|
typedef struct mca_rcache_vma_module_t mca_rcache_vma_module_t;
|
||||||
@ -63,6 +64,12 @@ int mca_rcache_vma_insert(struct mca_rcache_base_module_t* rcache,
|
|||||||
int mca_rcache_vma_delete(struct mca_rcache_base_module_t* rcache,
|
int mca_rcache_vma_delete(struct mca_rcache_base_module_t* rcache,
|
||||||
mca_mpool_base_registration_t* registration);
|
mca_mpool_base_registration_t* registration);
|
||||||
|
|
||||||
|
/* It is not safe to call mca_rcache_vma_clean with the rcache lock held */
|
||||||
|
void mca_rcache_vma_clean(struct mca_rcache_base_module_t* rcache);
|
||||||
|
/* Destroy vma objects which are on the deferred delete list. These were placed
|
||||||
|
on the list earlier when the rcache lock was held and it was not safe to
|
||||||
|
destory them. They should not be linked into any other structure anymore except
|
||||||
|
the vma_list_delete list */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* init/finalize
|
* init/finalize
|
||||||
|
@ -120,7 +120,7 @@ static inline mca_rcache_vma_t *mca_rcache_vma_new(
|
|||||||
return vma;
|
return vma;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mca_rcache_vma_destroy(mca_rcache_vma_t *vma)
|
void mca_rcache_vma_destroy(mca_rcache_vma_t *vma)
|
||||||
{
|
{
|
||||||
opal_list_item_t *item;
|
opal_list_item_t *item;
|
||||||
|
|
||||||
@ -254,6 +254,7 @@ int mca_rcache_vma_tree_init(mca_rcache_vma_module_t* rcache)
|
|||||||
{
|
{
|
||||||
OBJ_CONSTRUCT(&rcache->rb_tree, ompi_rb_tree_t);
|
OBJ_CONSTRUCT(&rcache->rb_tree, ompi_rb_tree_t);
|
||||||
OBJ_CONSTRUCT(&rcache->vma_list, opal_list_t);
|
OBJ_CONSTRUCT(&rcache->vma_list, opal_list_t);
|
||||||
|
OBJ_CONSTRUCT(&rcache->vma_delete_list, opal_list_t);
|
||||||
rcache->reg_cur_cache_size = 0;
|
rcache->reg_cur_cache_size = 0;
|
||||||
return ompi_rb_tree_init(&rcache->rb_tree,
|
return ompi_rb_tree_init(&rcache->rb_tree,
|
||||||
mca_rcache_vma_tree_node_compare);
|
mca_rcache_vma_tree_node_compare);
|
||||||
@ -487,7 +488,7 @@ int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache,
|
|||||||
mca_rcache_vma_update_byte_count(vma_rcache,
|
mca_rcache_vma_update_byte_count(vma_rcache,
|
||||||
vma->start - vma->end - 1);
|
vma->start - vma->end - 1);
|
||||||
opal_list_remove_item(&vma_rcache->vma_list, &vma->super);
|
opal_list_remove_item(&vma_rcache->vma_list, &vma->super);
|
||||||
mca_rcache_vma_destroy(vma);
|
opal_list_append(&vma_rcache->vma_delete_list, &vma->super);
|
||||||
vma = next;
|
vma = next;
|
||||||
} else {
|
} else {
|
||||||
int merged;
|
int merged;
|
||||||
@ -504,7 +505,7 @@ int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache,
|
|||||||
prev->end = vma->end;
|
prev->end = vma->end;
|
||||||
opal_list_remove_item(&vma_rcache->vma_list, &vma->super);
|
opal_list_remove_item(&vma_rcache->vma_list, &vma->super);
|
||||||
ompi_rb_tree_delete(&vma_rcache->rb_tree, vma);
|
ompi_rb_tree_delete(&vma_rcache->rb_tree, vma);
|
||||||
mca_rcache_vma_destroy(vma);
|
opal_list_append(&vma_rcache->vma_delete_list, &vma->super);
|
||||||
vma = prev;
|
vma = prev;
|
||||||
merged = 1;
|
merged = 1;
|
||||||
}
|
}
|
||||||
@ -517,7 +518,7 @@ int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache,
|
|||||||
vma->end = next->end;
|
vma->end = next->end;
|
||||||
opal_list_remove_item(&vma_rcache->vma_list, &next->super);
|
opal_list_remove_item(&vma_rcache->vma_list, &next->super);
|
||||||
ompi_rb_tree_delete(&vma_rcache->rb_tree, next);
|
ompi_rb_tree_delete(&vma_rcache->rb_tree, next);
|
||||||
mca_rcache_vma_destroy(next);
|
opal_list_append(&vma_rcache->vma_delete_list, &next->super);
|
||||||
merged = 1;
|
merged = 1;
|
||||||
}
|
}
|
||||||
} while(merged);
|
} while(merged);
|
||||||
|
@ -91,6 +91,11 @@ int mca_rcache_vma_tree_delete(
|
|||||||
mca_mpool_base_registration_t* reg
|
mca_mpool_base_registration_t* reg
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Destroy a vma
|
||||||
|
* Do not call this function with rcache lock as it can deadlock
|
||||||
|
*/
|
||||||
|
void mca_rcache_vma_destroy(mca_rcache_vma_t *vma);
|
||||||
|
|
||||||
#endif /* MCA_RCACHE_VMA_TREE_H */
|
#endif /* MCA_RCACHE_VMA_TREE_H */
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user