decrement ref count on removal of registration from mru and tree.
add misc asserts to check for proper reference counting. ugly hack 1 -- use mallopt to never release memory ala sbrk - this is commented out in mca_btl_mvapi_component_init ugly hack 2 -- test registrations comming out of the tree via rcache_find, for an unknown reason the tree is returning registrations where the address is not within the base or bound of the registration. If this happens, we return NULL. comment out code to enable mem hooks if leave_pinned is set, note we can do this via an mca param and will default it to leave_pinned with mem_hooks when we iron out these issues. I am adding a unit test for the rcache. Note that we have a unit test for the rb tree but the compare function is significantly different than that used for registrations. After we have tracked down the issues with rcache_rb we will remove the above hacks. This commit was SVN r7499.
Этот коммит содержится в:
родитель
50dc5499b4
Коммит
9fe5844071
@ -305,6 +305,12 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
|
||||
frag->vapi_reg = vapi_reg;
|
||||
|
||||
btl->btl_mpool->mpool_retain(btl->btl_mpool, (mca_mpool_base_registration_t*) vapi_reg);
|
||||
if(vapi_reg->base_reg.flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
assert(vapi_reg->base_reg.ref_count >= 4);
|
||||
} else {
|
||||
assert(vapi_reg->base_reg.ref_count >= 2);
|
||||
}
|
||||
|
||||
return &frag->base;
|
||||
|
||||
} else if( max_data > btl->btl_max_send_size &&
|
||||
@ -458,6 +464,11 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
|
||||
if(NULL!= vapi_reg){
|
||||
/* the memory is already pinned- use it*/
|
||||
btl->btl_mpool->mpool_retain(btl->btl_mpool, (mca_mpool_base_registration_t*) vapi_reg);
|
||||
if(vapi_reg->base_reg.flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
assert(vapi_reg->base_reg.ref_count >= 4);
|
||||
} else {
|
||||
assert(vapi_reg->base_reg.ref_count >= 2);
|
||||
}
|
||||
} else {
|
||||
/* we didn't get a memory registration passed in, so we have to register the region
|
||||
* ourselves
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include "mca/mpool/mvapi/mpool_mvapi.h"
|
||||
#include "btl_mvapi_endpoint.h"
|
||||
#include "mca/pml/base/pml_base_module_exchange.h"
|
||||
#include <malloc.h>
|
||||
|
||||
mca_btl_mvapi_component_t mca_btl_mvapi_component = {
|
||||
{
|
||||
@ -293,6 +294,10 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
||||
mca_btl_mvapi_module_t * mvapi_btl;
|
||||
mca_btl_base_selected_module_t* ib_selected;
|
||||
opal_list_item_t* item;
|
||||
|
||||
/* ugly HACK!! */
|
||||
/* mallopt(M_TRIM_THRESHOLD, -1); */
|
||||
/* mallopt(M_MMAP_MAX, 0); */
|
||||
/* initialization */
|
||||
*num_btl_modules = 0;
|
||||
|
||||
|
@ -50,6 +50,7 @@ void mca_mpool_base_mem_cb(void* base, size_t size, void* cbdata)
|
||||
}
|
||||
for(i = 0; i < cnt; i++) {
|
||||
reg = (mca_mpool_base_registration_t*)ompi_pointer_array_get_item(®s, i);
|
||||
assert(reg->base == base);
|
||||
current->mpool_module->mpool_deregister(current->mpool_module, reg);
|
||||
}
|
||||
}
|
||||
|
@ -78,12 +78,12 @@ int mca_mpool_base_open(void)
|
||||
0,
|
||||
&mca_mpool_base_use_mem_hooks);
|
||||
|
||||
if(0 == mca_mpool_base_use_mem_hooks) {
|
||||
int param;
|
||||
mca_base_param_register_int("mpi", NULL, "leave_pinned", "leave_pinned", 0);
|
||||
param = mca_base_param_find("mpi", NULL, "leave_pinned");
|
||||
mca_base_param_lookup_int(param, &mca_mpool_base_use_mem_hooks);
|
||||
}
|
||||
/* if(0 == mca_mpool_base_use_mem_hooks) { */
|
||||
/* int param; */
|
||||
/* mca_base_param_register_int("mpi", NULL, "leave_pinned", "leave_pinned", 0); */
|
||||
/* param = mca_base_param_find("mpi", NULL, "leave_pinned"); */
|
||||
/* mca_base_param_lookup_int(param, &mca_mpool_base_use_mem_hooks); */
|
||||
/* } */
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -88,6 +88,7 @@ int mca_mpool_mvapi_register(
|
||||
|
||||
VAPI_ret_t ret;
|
||||
|
||||
assert(size > 0);
|
||||
memset(&mr_in, 0, sizeof(VAPI_mrw_t));
|
||||
memset(&mr_out, 0, sizeof(VAPI_mrw_t));
|
||||
|
||||
@ -95,9 +96,9 @@ int mca_mpool_mvapi_register(
|
||||
vapi_reg->base_reg.mpool = mpool;
|
||||
vapi_reg->base_reg.flags = flags;
|
||||
vapi_reg->hndl = VAPI_INVAL_HNDL;
|
||||
OPAL_THREAD_ADD32(&vapi_reg->base_reg.ref_count, 1);
|
||||
*registration = &vapi_reg->base_reg;
|
||||
|
||||
|
||||
*registration = &vapi_reg->base_reg;
|
||||
mr_in.acl = VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE | VAPI_EN_REMOTE_READ;
|
||||
mr_in.l_key = 0;
|
||||
mr_in.r_key = 0;
|
||||
@ -114,7 +115,7 @@ int mca_mpool_mvapi_register(
|
||||
);
|
||||
|
||||
if(VAPI_OK != ret){
|
||||
opal_output(0, "error registering memory: %s ", VAPI_strerror(ret));
|
||||
opal_output(0, "error registering memory of size %d: %s ", size, VAPI_strerror(ret));
|
||||
OBJ_RELEASE(vapi_reg);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -123,11 +124,13 @@ int mca_mpool_mvapi_register(
|
||||
vapi_reg->r_key = mr_out.r_key;
|
||||
vapi_reg->base_reg.base = addr;
|
||||
vapi_reg->base_reg.bound = (void*) ((char*) addr + size - 1);
|
||||
assert(vapi_reg->base_reg.bound - vapi_reg->base_reg.base > 0);
|
||||
if(flags & (MCA_MPOOL_FLAGS_CACHE | MCA_MPOOL_FLAGS_PERSIST)) {
|
||||
mpool->rcache->rcache_insert(mpool->rcache,
|
||||
(mca_mpool_base_registration_t*) vapi_reg,
|
||||
flags);
|
||||
}
|
||||
OPAL_THREAD_ADD32(&vapi_reg->base_reg.ref_count, 1);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -60,7 +60,7 @@ void* mca_mpool_openib_alloc(
|
||||
void* addr_malloc = (void*)memalign(mca_mpool_openib_component.page_size, size);
|
||||
void* addr = addr_malloc;
|
||||
|
||||
if(OMPI_SUCCESS != mpool->mpool_register(mpool, addr, size, 0, registration)) {
|
||||
if(OMPI_SUCCESS != mpool->mpool_register(mpool, addr, size, flags, registration)) {
|
||||
free(addr_malloc);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -78,6 +78,9 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
if(reg_cnt == 1) {
|
||||
mca_mpool_base_registration_t* reg = ompi_pointer_array_get_item(®s, 0);
|
||||
size_t reg_len = reg->bound - base + 1;
|
||||
if(reg->flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
assert(reg->ref_count >= 3);
|
||||
}
|
||||
|
||||
/* is the existing registration the required size */
|
||||
if(reg->base <= base && reg_len >= size) {
|
||||
@ -90,6 +93,7 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
} else if(mca_pml_ob1.leave_pinned) {
|
||||
unsigned char* new_base = reg->base;
|
||||
size_t new_len = (base - reg->base) + size;
|
||||
assert(new_len >= size);
|
||||
btl_mpool->mpool_deregister(btl_mpool, reg);
|
||||
btl_mpool->mpool_register(btl_mpool,
|
||||
new_base,
|
||||
@ -114,6 +118,10 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
for(r = 0; r < reg_cnt; r++) {
|
||||
mca_mpool_base_registration_t* reg = ompi_pointer_array_get_item(®s, r);
|
||||
size_t reg_len = reg->bound - base + 1;
|
||||
|
||||
if(reg->flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
assert(reg->ref_count >= 3);
|
||||
}
|
||||
if(reg->base <= base && reg_len >= size) {
|
||||
fit = reg;
|
||||
break;
|
||||
@ -138,6 +146,7 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
size,
|
||||
MCA_MPOOL_FLAGS_CACHE,
|
||||
&fit);
|
||||
assert(fit->ref_count == 3);
|
||||
if(NULL == fit) {
|
||||
opal_output(0, "[%s:%d] unable to register memory\n", __FILE__, __LINE__);
|
||||
continue;
|
||||
@ -151,11 +160,13 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
/* simplify cleanup - bump reference count as we decrement again below */
|
||||
btl_mpool->mpool_retain(btl_mpool,largest);
|
||||
btl_mpool->mpool_deregister(btl_mpool, largest);
|
||||
assert(new_len >= size);
|
||||
btl_mpool->mpool_register(btl_mpool,
|
||||
new_base,
|
||||
new_len,
|
||||
MCA_MPOOL_FLAGS_CACHE,
|
||||
&fit);
|
||||
assert(fit->ref_count == 3);
|
||||
}
|
||||
}
|
||||
|
||||
@ -213,6 +224,10 @@ mca_mpool_base_registration_t* mca_pml_ob1_rdma_registration(
|
||||
for(r = 0; r < reg_cnt; r++) {
|
||||
mca_mpool_base_registration_t* reg = ompi_pointer_array_get_item(®s, r);
|
||||
size_t reg_len = reg->bound - base + 1;
|
||||
if(reg->flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
assert(reg->ref_count >= 3);
|
||||
}
|
||||
|
||||
if(reg->base <= base && reg_len >= size) {
|
||||
fit = reg;
|
||||
break;
|
||||
@ -237,17 +252,19 @@ mca_mpool_base_registration_t* mca_pml_ob1_rdma_registration(
|
||||
size,
|
||||
MCA_MPOOL_FLAGS_CACHE,
|
||||
&fit);
|
||||
|
||||
assert(fit->ref_count >= 3);
|
||||
/* a registration exists but is not large enough */
|
||||
} else {
|
||||
unsigned char* new_base = largest->base;
|
||||
size_t new_len = (base - largest->base) + size;
|
||||
btl_mpool->mpool_deregister(btl_mpool, largest);
|
||||
assert(new_len >= size);
|
||||
btl_mpool->mpool_register(btl_mpool,
|
||||
new_base,
|
||||
new_len,
|
||||
MCA_MPOOL_FLAGS_CACHE,
|
||||
&fit);
|
||||
assert(fit->ref_count >= 3);
|
||||
}
|
||||
}
|
||||
|
||||
@ -261,4 +278,3 @@ mca_mpool_base_registration_t* mca_pml_ob1_rdma_registration(
|
||||
OBJ_DESTRUCT(®s);
|
||||
return fit;
|
||||
}
|
||||
|
||||
|
@ -59,14 +59,18 @@ int mca_rcache_rb_find (
|
||||
return rc;
|
||||
}
|
||||
|
||||
if( !(tree_item->reg->flags & MCA_MPOOL_FLAGS_PERSIST) ) {
|
||||
rc = mca_rcache_rb_mru_touch((mca_rcache_rb_module_t*)rcache, tree_item->reg);
|
||||
if( tree_item->reg->flags & MCA_MPOOL_FLAGS_CACHE ) {
|
||||
rc = mca_rcache_rb_mru_touch((mca_rcache_rb_module_t*)rcache,
|
||||
tree_item->reg);
|
||||
}
|
||||
OPAL_THREAD_ADD32((int32_t*) &tree_item->reg->ref_count, 1);
|
||||
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||
if(rc == OMPI_SUCCESS) {
|
||||
*cnt = 1;
|
||||
}
|
||||
assert(tree_item->reg->bound - tree_item->reg->base > 0);
|
||||
assert(tree_item->reg->base <= addr);
|
||||
assert(tree_item->reg->bound >= addr);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -77,16 +81,15 @@ int mca_rcache_rb_insert (
|
||||
) {
|
||||
int rc = OMPI_SUCCESS;
|
||||
OPAL_THREAD_LOCK(&rcache->lock);
|
||||
if(!(flags & MCA_MPOOL_FLAGS_PERSIST)) {
|
||||
if(flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
rc = mca_rcache_rb_mru_insert( (mca_rcache_rb_module_t*) rcache, reg);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||
return rc;
|
||||
} else {
|
||||
}
|
||||
OPAL_THREAD_ADD32((int32_t*)®->ref_count, 1);
|
||||
}
|
||||
}
|
||||
rc = mca_rcache_rb_tree_insert((mca_rcache_rb_module_t*)rcache, reg );
|
||||
rc = mca_rcache_rb_tree_insert((mca_rcache_rb_module_t*)rcache, reg);
|
||||
OPAL_THREAD_ADD32((int32_t*) ®->ref_count, 1);
|
||||
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||
return rc;
|
||||
@ -98,8 +101,11 @@ int mca_rcache_rb_delete (
|
||||
uint32_t flags
|
||||
) {
|
||||
int rc = OMPI_SUCCESS;
|
||||
assert(reg->ref_count >= 1);
|
||||
OPAL_THREAD_LOCK(&rcache->lock);
|
||||
if(!(flags & MCA_MPOOL_FLAGS_PERSIST)) {
|
||||
if(flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
assert(reg->ref_count >= 2);
|
||||
OPAL_THREAD_ADD32((int32_t*)®->ref_count, -1);
|
||||
rc = mca_rcache_rb_mru_delete( (mca_rcache_rb_module_t*) rcache, reg);
|
||||
}
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
@ -107,6 +113,7 @@ int mca_rcache_rb_delete (
|
||||
return rc;
|
||||
}
|
||||
reg->flags = 0;
|
||||
OPAL_THREAD_ADD32((int32_t*)®->ref_count, -1);
|
||||
rc = mca_rcache_rb_tree_delete((mca_rcache_rb_module_t*)rcache, reg );
|
||||
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||
return rc;
|
||||
|
@ -47,6 +47,7 @@ int mca_rcache_rb_mru_insert(
|
||||
*/
|
||||
old_reg = (mca_mpool_base_registration_t*)
|
||||
opal_list_get_first(&rcache->mru_list);
|
||||
old_reg->mpool->mpool_retain(old_reg->mpool, old_reg);
|
||||
old_reg->mpool->mpool_deregister(old_reg->mpool, old_reg);
|
||||
}
|
||||
opal_list_append(&rcache->mru_list,(opal_list_item_t*) reg);
|
||||
|
@ -71,7 +71,7 @@ struct mca_rcache_rb_tree_item_t * mca_rcache_rb_tree_find(
|
||||
void * base
|
||||
)
|
||||
{
|
||||
mca_rcache_rb_tree_item_t* found;
|
||||
mca_rcache_rb_tree_item_t* found = NULL;
|
||||
mca_rcache_rb_tree_key_t key;
|
||||
|
||||
|
||||
@ -80,6 +80,11 @@ struct mca_rcache_rb_tree_item_t * mca_rcache_rb_tree_find(
|
||||
found = (mca_rcache_rb_tree_item_t *)
|
||||
ompi_rb_tree_find(&rcache->rb_tree, &key);
|
||||
|
||||
if(NULL != found ) {
|
||||
if(found->reg->base <= base || found->reg->bound >= base){
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user