diff --git a/ompi/mca/btl/mvapi/btl_mvapi_component.c b/ompi/mca/btl/mvapi/btl_mvapi_component.c index 46c7609bed..682cfef561 100644 --- a/ompi/mca/btl/mvapi/btl_mvapi_component.c +++ b/ompi/mca/btl/mvapi/btl_mvapi_component.c @@ -296,7 +296,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules, opal_list_item_t* item; /* ugly HACK!! */ - /* mallopt(M_TRIM_THRESHOLD, -1); */ + /* mallopt(M_TRIM_THRESHOLD, -1); */ /* mallopt(M_MMAP_MAX, 0); */ /* initialization */ *num_btl_modules = 0; diff --git a/ompi/mca/mpool/base/base.h b/ompi/mca/mpool/base/base.h index 9e9df155ac..77679ba7e6 100644 --- a/ompi/mca/mpool/base/base.h +++ b/ompi/mca/mpool/base/base.h @@ -31,6 +31,22 @@ extern "C" { #endif +static inline unsigned long my_log2(unsigned long val) { + unsigned long count = -1; + while(val > 0) { + val = val >> 1; + count++; + } + return count; +} +static inline void *down_align_addr(void* addr, unsigned int shift) { + return (void*) (((unsigned long) addr) & (~(unsigned long) 0) << shift); +} + +static inline void *up_align_addr(void*addr, unsigned int shift) { + return (void*) ((((unsigned long) addr) | ~((~(unsigned long) 0) << shift)) + 1); +} + struct mca_mpool_base_selected_module_t { opal_list_item_t super; mca_mpool_base_component_t *mpool_component; diff --git a/ompi/mca/mpool/base/mpool_base_mem_cb.c b/ompi/mca/mpool/base/mpool_base_mem_cb.c index e7908fdf47..0c670e3e48 100644 --- a/ompi/mca/mpool/base/mpool_base_mem_cb.c +++ b/ompi/mca/mpool/base/mpool_base_mem_cb.c @@ -19,6 +19,10 @@ #include "mpool_base_mem_cb.h" #include "base.h" + +extern uint32_t mca_mpool_base_page_size; +extern uint32_t mca_mpool_base_page_size_log; + /* * memory hook callback, called when memory is free'd out from under us */ @@ -30,30 +34,42 @@ void mca_mpool_base_mem_cb(void* base, size_t size, void* cbdata) mca_mpool_base_selected_module_t* current; int rc; opal_list_item_t* item; - + void* base_addr; + void* bound_addr; + base_addr = down_align_addr( base, mca_mpool_base_page_size_log); + bound_addr = up_align_addr((void*) ((unsigned long) base + size - 1), mca_mpool_base_page_size_log); OBJ_CONSTRUCT(®s, ompi_pointer_array_t); for(item = opal_list_get_first(&mca_mpool_base_modules); item != opal_list_get_end(&mca_mpool_base_modules); item = opal_list_get_next(item)) { current = (mca_mpool_base_selected_module_t*) item; - if(NULL != current->mpool_module->mpool_find) { - rc = current->mpool_module->mpool_find( - current->mpool_module, - base, - size, - ®s, - &cnt - ); - if(OMPI_SUCCESS != rc) { - continue; + + for( ; base_addr <= bound_addr; + base_addr =(void*) ((unsigned long) base_addr + mca_mpool_base_page_size)) { + + if(NULL != current->mpool_module->mpool_find) { + rc = current->mpool_module->mpool_find( + current->mpool_module, + base_addr, + size, + ®s, + &cnt + ); + if(OMPI_SUCCESS != rc) { + continue; + } + for(i = 0; i < cnt; i++) { + + reg = (mca_mpool_base_registration_t*)ompi_pointer_array_get_item(®s, i); + if(base_addr < reg->bound) { + base_addr = down_align_addr( reg->bound, mca_mpool_base_page_size_log ); + } + current->mpool_module->mpool_deregister(current->mpool_module, reg); + } } - for(i = 0; i < cnt; i++) { - reg = (mca_mpool_base_registration_t*)ompi_pointer_array_get_item(®s, i); - assert(reg->base == base); - current->mpool_module->mpool_deregister(current->mpool_module, reg); - } - } + } + } OBJ_DESTRUCT(®s); } diff --git a/ompi/mca/mpool/base/mpool_base_open.c b/ompi/mca/mpool/base/mpool_base_open.c index af99ddcee7..c4dc391b54 100644 --- a/ompi/mca/mpool/base/mpool_base_open.c +++ b/ompi/mca/mpool/base/mpool_base_open.c @@ -23,6 +23,8 @@ #include "mca/mpool/mpool.h" #include "mca/mpool/base/base.h" #include "ompi/include/constants.h" +#include + /* * The following file was created by configure. It contains extern @@ -42,6 +44,9 @@ */ int mca_mpool_base_output = -1; int mca_mpool_base_use_mem_hooks = 0; +uint32_t mca_mpool_base_page_size; +uint32_t mca_mpool_base_page_size_log; + opal_list_t mca_mpool_base_components; opal_list_t mca_mpool_base_modules; @@ -84,6 +89,11 @@ int mca_mpool_base_open(void) /* param = mca_base_param_find("mpi", NULL, "leave_pinned"); */ /* mca_base_param_lookup_int(param, &mca_mpool_base_use_mem_hooks); */ /* } */ + + /* get the page size for this architecture*/ + mca_mpool_base_page_size = sysconf(_SC_PAGESIZE); + mca_mpool_base_page_size_log = my_log2(mca_mpool_base_page_size); + return OMPI_SUCCESS; } diff --git a/ompi/mca/mpool/gm/mpool_gm_module.c b/ompi/mca/mpool/gm/mpool_gm_module.c index 69d884c720..60adafab97 100644 --- a/ompi/mca/mpool/gm/mpool_gm_module.c +++ b/ompi/mca/mpool/gm/mpool_gm_module.c @@ -18,8 +18,12 @@ #include #include "opal/util/output.h" #include "mpool_gm.h" -#include "mca/rcache/rcache.h" -#include "mca/rcache/base/base.h" +#include "ompi/mca/rcache/rcache.h" +#include "ompi/mca/rcache/base/base.h" +#include "ompi/mca/mpool/base/base.h" + +extern uint32_t mca_mpool_base_page_size; +extern uint32_t mca_mpool_base_page_size_log; /* @@ -91,6 +95,10 @@ int mca_mpool_gm_register( reg->base = addr; reg->flags = flags; reg->bound = reg->base + size - 1; + reg->base_align = down_align_addr(addr, mca_mpool_base_page_size_log); + reg->bound_align = up_align_addr(reg->bound + , mca_mpool_base_page_size_log); + OPAL_THREAD_ADD32(®->ref_count,1); if(flags & (MCA_MPOOL_FLAGS_CACHE | MCA_MPOOL_FLAGS_PERSIST)) { diff --git a/ompi/mca/mpool/mpool.h b/ompi/mca/mpool/mpool.h index 37827cb06c..b09e2cb73c 100644 --- a/ompi/mca/mpool/mpool.h +++ b/ompi/mca/mpool/mpool.h @@ -35,7 +35,9 @@ struct mca_mpool_base_registration_t { struct mca_mpool_base_module_t *mpool; unsigned char* base; unsigned char* bound; - unsigned char* alloc_base; + unsigned char* alloc_base; + unsigned char* base_align; + unsigned char* bound_align; void* user_data; uint32_t ref_count; uint32_t flags; diff --git a/ompi/mca/mpool/mvapi/mpool_mvapi.h b/ompi/mca/mpool/mvapi/mpool_mvapi.h index fdb32ec1a9..cdee7f76ca 100644 --- a/ompi/mca/mpool/mvapi/mpool_mvapi.h +++ b/ompi/mca/mpool/mvapi/mpool_mvapi.h @@ -30,20 +30,9 @@ extern "C" { #endif -static inline void * DOWN_ALIGN_ADDR(void * addr, uint32_t cnt) { - return (void*)((MT_virt_addr_t)(addr) & (~((MT_virt_addr_t)0) << (cnt))); -} - -static inline void* ALIGN_ADDR(void* addr, uint32_t cnt ) { - DOWN_ALIGN_ADDR(((addr) + ~(~((MT_virt_addr_t)0) << (cnt))), (cnt)); - return addr; -} - - + struct mca_mpool_mvapi_component_t { mca_mpool_base_component_t super; - long page_size; - long page_size_log; char* rcache_name; }; diff --git a/ompi/mca/mpool/mvapi/mpool_mvapi_component.c b/ompi/mca/mpool/mvapi/mpool_mvapi_component.c index 74adf5eee5..651e111f14 100644 --- a/ompi/mca/mpool/mvapi/mpool_mvapi_component.c +++ b/ompi/mca/mpool/mvapi/mpool_mvapi_component.c @@ -21,7 +21,6 @@ #include "mpool_mvapi.h" #include "util/proc_info.h" #include "util/sys_info.h" -#include /* * Local functions @@ -66,6 +65,8 @@ static void mca_mpool_mvapi_registration_constructor( mca_mpool_mvapi_registrati { registration->base_reg.base = NULL; registration->base_reg.bound = NULL; + registration->base_reg.base_align = NULL; + registration->base_reg.bound_align = NULL; registration->base_reg.flags = 0; } @@ -96,9 +97,8 @@ static int mca_mpool_mvapi_open(void) { /* register VAPI component parameters */ - /* get the page size for this architecture*/ - mca_mpool_mvapi_component.page_size = sysconf(_SC_PAGESIZE); - + + return OMPI_SUCCESS; } @@ -106,7 +106,6 @@ static mca_mpool_base_module_t* mca_mpool_mvapi_init( struct mca_mpool_base_resources_t* resources) { mca_mpool_mvapi_module_t* mpool_module; - long page_size = mca_mpool_mvapi_component.page_size; mca_base_param_reg_string(&mca_mpool_mvapi_component.super.mpool_version, "rcache_name", "The name of the registration cache the mpool should use", @@ -115,12 +114,6 @@ static mca_mpool_base_module_t* mca_mpool_mvapi_init( "rb", &(mca_mpool_mvapi_component.rcache_name)); - mca_mpool_mvapi_component.page_size_log = 0; - while(page_size > 1){ - page_size = page_size >> 1; - mca_mpool_mvapi_component.page_size_log++; - } - mpool_module = (mca_mpool_mvapi_module_t*)malloc(sizeof(mca_mpool_mvapi_module_t)); mca_mpool_mvapi_module_init(mpool_module); diff --git a/ompi/mca/mpool/mvapi/mpool_mvapi_module.c b/ompi/mca/mpool/mvapi/mpool_mvapi_module.c index 257ce82317..6a9bb1ed96 100644 --- a/ompi/mca/mpool/mvapi/mpool_mvapi_module.c +++ b/ompi/mca/mpool/mvapi/mpool_mvapi_module.c @@ -24,7 +24,10 @@ #include #include "mca/rcache/rcache.h" #include "mca/rcache/base/base.h" +#include "ompi/mca/mpool/base/base.h" +extern uint32_t mca_mpool_base_page_size; +extern uint32_t mca_mpool_base_page_size_log; /* @@ -60,8 +63,8 @@ void* mca_mpool_mvapi_alloc( mca_mpool_base_registration_t** registration) { - void* addr_malloc = (void*)malloc(size + mca_mpool_mvapi_component.page_size); - void* addr = (void*) ALIGN_ADDR(addr_malloc, mca_mpool_mvapi_component.page_size_log); + void* addr_malloc = (void*)malloc(size + mca_mpool_base_page_size); + void* addr = (void*) up_align_addr(addr_malloc, mca_mpool_base_page_size_log); if(OMPI_SUCCESS != mpool->mpool_register(mpool, addr, size, flags, registration)) { free(addr_malloc); return NULL; @@ -85,9 +88,10 @@ int mca_mpool_mvapi_register( mca_mpool_mvapi_module_t * mpool_module = (mca_mpool_mvapi_module_t*) mpool; mca_mpool_mvapi_registration_t * vapi_reg; VAPI_mrw_t mr_in, mr_out; - VAPI_ret_t ret; + + assert(size > 0); memset(&mr_in, 0, sizeof(VAPI_mrw_t)); memset(&mr_out, 0, sizeof(VAPI_mrw_t)); @@ -103,7 +107,7 @@ int mca_mpool_mvapi_register( mr_in.l_key = 0; mr_in.r_key = 0; mr_in.pd_hndl = mpool_module->hca_pd.pd_tag; - mr_in.size = size; + mr_in.size = size; mr_in.start = (VAPI_virt_addr_t) (MT_virt_addr_t) addr; mr_in.type = VAPI_MR; @@ -123,8 +127,13 @@ int mca_mpool_mvapi_register( vapi_reg->l_key = mr_out.l_key; vapi_reg->r_key = mr_out.r_key; vapi_reg->base_reg.base = addr; - vapi_reg->base_reg.bound = (void*) ((char*) addr + size - 1); + vapi_reg->base_reg.bound = (unsigned char*) (unsigned long) addr + size - 1; + vapi_reg->base_reg.base_align = down_align_addr(addr, mca_mpool_base_page_size_log); + vapi_reg->base_reg.bound_align = up_align_addr(vapi_reg->base_reg.bound + , mca_mpool_base_page_size_log); + assert(vapi_reg->base_reg.bound - vapi_reg->base_reg.base > 0); + if(flags & (MCA_MPOOL_FLAGS_CACHE | MCA_MPOOL_FLAGS_PERSIST)) { mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t*) vapi_reg, diff --git a/ompi/mca/mpool/openib/mpool_openib.h b/ompi/mca/mpool/openib/mpool_openib.h index 0add664133..02f144defc 100644 --- a/ompi/mca/mpool/openib/mpool_openib.h +++ b/ompi/mca/mpool/openib/mpool_openib.h @@ -41,8 +41,6 @@ static inline void* ALIGN_ADDR(void* addr, uint32_t cnt ) { struct mca_mpool_openib_component_t { mca_mpool_base_component_t super; - long page_size; - long page_size_log; char* rcache_name; }; diff --git a/ompi/mca/mpool/openib/mpool_openib_component.c b/ompi/mca/mpool/openib/mpool_openib_component.c index 787714ca5b..eb9502e0ec 100644 --- a/ompi/mca/mpool/openib/mpool_openib_component.c +++ b/ompi/mca/mpool/openib/mpool_openib_component.c @@ -95,9 +95,7 @@ OBJ_CLASS_INSTANCE( */ static int mca_mpool_openib_open(void) { - /* get the page size for this architecture*/ - mca_mpool_openib_component.page_size = sysconf(_SC_PAGESIZE); - + return OMPI_SUCCESS; } @@ -105,7 +103,6 @@ static mca_mpool_base_module_t* mca_mpool_openib_init( struct mca_mpool_base_resources_t* resources) { mca_mpool_openib_module_t* mpool_module; - long page_size = mca_mpool_openib_component.page_size; mca_base_param_reg_string(&mca_mpool_openib_component.super.mpool_version, "rcache_name", "The name of the registration cache the mpool should use", @@ -115,12 +112,6 @@ static mca_mpool_base_module_t* mca_mpool_openib_init( &(mca_mpool_openib_component.rcache_name)); - mca_mpool_openib_component.page_size_log = 0; - while(page_size > 1){ - page_size = page_size >> 1; - mca_mpool_openib_component.page_size_log++; - } - mpool_module = (mca_mpool_openib_module_t*)malloc(sizeof(mca_mpool_openib_module_t)); mca_mpool_openib_module_init(mpool_module); diff --git a/ompi/mca/mpool/openib/mpool_openib_module.c b/ompi/mca/mpool/openib/mpool_openib_module.c index e5a6cd4839..eb4389de6d 100644 --- a/ompi/mca/mpool/openib/mpool_openib_module.c +++ b/ompi/mca/mpool/openib/mpool_openib_module.c @@ -23,6 +23,10 @@ #include #include "mca/rcache/rcache.h" #include "mca/rcache/base/base.h" +#include "ompi/mca/mpool/base/base.h" + +extern uint32_t mca_mpool_base_page_size; +extern uint32_t mca_mpool_base_page_size_log; /* * Initializes the mpool module. @@ -57,7 +61,7 @@ void* mca_mpool_openib_alloc( mca_mpool_base_registration_t** registration) { - void* addr_malloc = (void*)memalign(mca_mpool_openib_component.page_size, size); + void* addr_malloc = (void*)memalign(mca_mpool_base_page_size, size); void* addr = addr_malloc; if(OMPI_SUCCESS != mpool->mpool_register(mpool, addr, size, flags, registration)) { @@ -101,7 +105,11 @@ int mca_mpool_openib_register(mca_mpool_base_module_t* mpool, vapi_reg->base_reg.base = addr; vapi_reg->base_reg.bound = (void*) ((char*) addr + size - 1); - + vapi_reg->base_reg.base_align = down_align_addr(addr, mca_mpool_base_page_size_log); + vapi_reg->base_reg.bound_align = up_align_addr(vapi_reg->base_reg.bound + , mca_mpool_base_page_size_log); + + if(flags & (MCA_MPOOL_FLAGS_CACHE | MCA_MPOOL_FLAGS_PERSIST)) { mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t*) vapi_reg, diff --git a/ompi/mca/pml/ob1/pml_ob1_rdma.c b/ompi/mca/pml/ob1/pml_ob1_rdma.c index a20cacda05..3074696a99 100644 --- a/ompi/mca/pml/ob1/pml_ob1_rdma.c +++ b/ompi/mca/pml/ob1/pml_ob1_rdma.c @@ -93,6 +93,7 @@ size_t mca_pml_ob1_rdma_btls( } else if(mca_pml_ob1.leave_pinned) { unsigned char* new_base = reg->base; size_t new_len = (base - reg->base) + size; + assert(new_len >= size); btl_mpool->mpool_deregister(btl_mpool, reg); btl_mpool->mpool_register(btl_mpool, @@ -111,7 +112,6 @@ size_t mca_pml_ob1_rdma_btls( } continue; } - /* * find the best fit when there are multiple registrations */ @@ -257,6 +257,7 @@ mca_mpool_base_registration_t* mca_pml_ob1_rdma_registration( } else { unsigned char* new_base = largest->base; size_t new_len = (base - largest->base) + size; + btl_mpool->mpool_deregister(btl_mpool, largest); assert(new_len >= size); btl_mpool->mpool_register(btl_mpool, diff --git a/ompi/mca/rcache/rb/rcache_rb.c b/ompi/mca/rcache/rb/rcache_rb.c index 2f9f8f174c..c286ce3141 100644 --- a/ompi/mca/rcache/rb/rcache_rb.c +++ b/ompi/mca/rcache/rb/rcache_rb.c @@ -14,11 +14,11 @@ * $HEADER$ */ -#include "mca/rcache/rcache.h" +#include "ompi/mca/rcache/rcache.h" #include "rcache_rb.h" #include "rcache_rb_tree.h" #include "rcache_rb_mru.h" - +#include "opal/util/output.h" /** * Initialize the rcache @@ -42,7 +42,7 @@ int mca_rcache_rb_find ( uint32_t *cnt ){ - int rc = OMPI_SUCCESS; + int pos, rc = OMPI_SUCCESS; mca_rcache_rb_tree_item_t* tree_item; OPAL_THREAD_LOCK(&rcache->lock); *cnt = 0; @@ -55,7 +55,11 @@ int mca_rcache_rb_find ( OBJ_DESTRUCT(regs); OBJ_CONSTRUCT(regs, ompi_pointer_array_t); - rc = ompi_pointer_array_set_item(regs, 0, (void*) tree_item->reg); + pos = ompi_pointer_array_add(regs, (void*) tree_item->reg); + if(0 != pos) { + opal_output(0, "error inserting registration in 1st position"); + return OMPI_ERROR; + } if(OMPI_SUCCESS != rc) { OPAL_THREAD_UNLOCK(&rcache->lock); @@ -72,8 +76,8 @@ int mca_rcache_rb_find ( *cnt = 1; } assert(tree_item->reg->bound - tree_item->reg->base > 0); - assert(((void*) tree_item->reg->base) <= addr); - assert(((void*) tree_item->reg->bound) >= addr); + assert(((void*) tree_item->reg->base_align) <= addr); + assert(((void*) tree_item->reg->bound_align) >= addr); return rc; } diff --git a/ompi/mca/rcache/rb/rcache_rb_tree.c b/ompi/mca/rcache/rb/rcache_rb_tree.c index acb629ef9d..0fd622d65d 100644 --- a/ompi/mca/rcache/rb/rcache_rb_tree.c +++ b/ompi/mca/rcache/rb/rcache_rb_tree.c @@ -35,27 +35,6 @@ int mca_rcache_rb_tree_init(mca_rcache_rb_module_t* rcache) { return ompi_rb_tree_init(&rcache->rb_tree, mca_rcache_rb_tree_node_compare); } -/** - * Searches the rcache to see if it has allocated the memory that is passed in. - * If so it returns an array of rcaches the memory is registered with. - * - * @param base pointer to the memory to lookup - * - * @retval NULL if the memory is not in any rcache - * @retval pointer to an array of type mca_rcache_base_reg_rcache_t - */ -static inline struct mca_rcache_rb_tree_item_t * - mca_rcache_rb_tree_find_nl( - mca_rcache_rb_module_t* rcache, - void* base - ) -{ - mca_rcache_rb_tree_key_t key; - key.base = base; - key.bound = base; - return (mca_rcache_rb_tree_item_t *) - ompi_rb_tree_find(&rcache->rb_tree, &key); -} /** * Searches the mpool to see if it has allocated the memory that is passed in. @@ -81,8 +60,8 @@ struct mca_rcache_rb_tree_item_t * mca_rcache_rb_tree_find( ompi_rb_tree_find(&rcache->rb_tree, &key); if(NULL != found ) { - if((void*) found->reg->base > base || - (void*) found->reg->bound < base){ + if((void*) found->reg->base_align > base || + (void*) found->reg->bound_align < base){ assert(0); } } @@ -137,8 +116,8 @@ int mca_rcache_rb_tree_insert( return rc; rb_tree_item = (mca_rcache_rb_tree_item_t*) item; - rb_tree_item->key.base = reg->base; - rb_tree_item->key.bound = reg->bound; + rb_tree_item->key.base = reg->base_align; + rb_tree_item->key.bound = reg->bound_align; rb_tree_item->reg = reg; rc = ompi_rb_tree_insert(&rb_module->rb_tree,