From 3c97b3f7226524e843ec65d3a45511664f721ee3 Mon Sep 17 00:00:00 2001 From: Galen Shipman Date: Tue, 27 Sep 2005 02:01:21 +0000 Subject: [PATCH] Modified the registration to include a base_align and bound_align for searching the tree. Modified the memory callback to search the tree at each page boundary for registrations. This is necessary as an application may malloc memory and send out of any portion of that memory, even discontiguous regions. This commit was SVN r7510. --- ompi/mca/btl/mvapi/btl_mvapi_component.c | 2 +- ompi/mca/mpool/base/base.h | 16 ++++++ ompi/mca/mpool/base/mpool_base_mem_cb.c | 50 ++++++++++++------- ompi/mca/mpool/base/mpool_base_open.c | 10 ++++ ompi/mca/mpool/gm/mpool_gm_module.c | 12 ++++- ompi/mca/mpool/mpool.h | 4 +- ompi/mca/mpool/mvapi/mpool_mvapi.h | 13 +---- ompi/mca/mpool/mvapi/mpool_mvapi_component.c | 15 ++---- ompi/mca/mpool/mvapi/mpool_mvapi_module.c | 19 +++++-- ompi/mca/mpool/openib/mpool_openib.h | 2 - .../mca/mpool/openib/mpool_openib_component.c | 11 +--- ompi/mca/mpool/openib/mpool_openib_module.c | 12 ++++- ompi/mca/pml/ob1/pml_ob1_rdma.c | 3 +- ompi/mca/rcache/rb/rcache_rb.c | 16 +++--- ompi/mca/rcache/rb/rcache_rb_tree.c | 29 ++--------- 15 files changed, 119 insertions(+), 95 deletions(-) diff --git a/ompi/mca/btl/mvapi/btl_mvapi_component.c b/ompi/mca/btl/mvapi/btl_mvapi_component.c index 46c7609bed..682cfef561 100644 --- a/ompi/mca/btl/mvapi/btl_mvapi_component.c +++ b/ompi/mca/btl/mvapi/btl_mvapi_component.c @@ -296,7 +296,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules, opal_list_item_t* item; /* ugly HACK!! */ - /* mallopt(M_TRIM_THRESHOLD, -1); */ + /* mallopt(M_TRIM_THRESHOLD, -1); */ /* mallopt(M_MMAP_MAX, 0); */ /* initialization */ *num_btl_modules = 0; diff --git a/ompi/mca/mpool/base/base.h b/ompi/mca/mpool/base/base.h index 9e9df155ac..77679ba7e6 100644 --- a/ompi/mca/mpool/base/base.h +++ b/ompi/mca/mpool/base/base.h @@ -31,6 +31,22 @@ extern "C" { #endif +static inline unsigned long my_log2(unsigned long val) { + unsigned long count = -1; + while(val > 0) { + val = val >> 1; + count++; + } + return count; +} +static inline void *down_align_addr(void* addr, unsigned int shift) { + return (void*) (((unsigned long) addr) & (~(unsigned long) 0) << shift); +} + +static inline void *up_align_addr(void*addr, unsigned int shift) { + return (void*) ((((unsigned long) addr) | ~((~(unsigned long) 0) << shift)) + 1); +} + struct mca_mpool_base_selected_module_t { opal_list_item_t super; mca_mpool_base_component_t *mpool_component; diff --git a/ompi/mca/mpool/base/mpool_base_mem_cb.c b/ompi/mca/mpool/base/mpool_base_mem_cb.c index e7908fdf47..0c670e3e48 100644 --- a/ompi/mca/mpool/base/mpool_base_mem_cb.c +++ b/ompi/mca/mpool/base/mpool_base_mem_cb.c @@ -19,6 +19,10 @@ #include "mpool_base_mem_cb.h" #include "base.h" + +extern uint32_t mca_mpool_base_page_size; +extern uint32_t mca_mpool_base_page_size_log; + /* * memory hook callback, called when memory is free'd out from under us */ @@ -30,30 +34,42 @@ void mca_mpool_base_mem_cb(void* base, size_t size, void* cbdata) mca_mpool_base_selected_module_t* current; int rc; opal_list_item_t* item; - + void* base_addr; + void* bound_addr; + base_addr = down_align_addr( base, mca_mpool_base_page_size_log); + bound_addr = up_align_addr((void*) ((unsigned long) base + size - 1), mca_mpool_base_page_size_log); OBJ_CONSTRUCT(®s, ompi_pointer_array_t); for(item = opal_list_get_first(&mca_mpool_base_modules); item != opal_list_get_end(&mca_mpool_base_modules); item = opal_list_get_next(item)) { current = (mca_mpool_base_selected_module_t*) item; - if(NULL != current->mpool_module->mpool_find) { - rc = current->mpool_module->mpool_find( - current->mpool_module, - base, - size, - ®s, - &cnt - ); - if(OMPI_SUCCESS != rc) { - continue; + + for( ; base_addr <= bound_addr; + base_addr =(void*) ((unsigned long) base_addr + mca_mpool_base_page_size)) { + + if(NULL != current->mpool_module->mpool_find) { + rc = current->mpool_module->mpool_find( + current->mpool_module, + base_addr, + size, + ®s, + &cnt + ); + if(OMPI_SUCCESS != rc) { + continue; + } + for(i = 0; i < cnt; i++) { + + reg = (mca_mpool_base_registration_t*)ompi_pointer_array_get_item(®s, i); + if(base_addr < reg->bound) { + base_addr = down_align_addr( reg->bound, mca_mpool_base_page_size_log ); + } + current->mpool_module->mpool_deregister(current->mpool_module, reg); + } } - for(i = 0; i < cnt; i++) { - reg = (mca_mpool_base_registration_t*)ompi_pointer_array_get_item(®s, i); - assert(reg->base == base); - current->mpool_module->mpool_deregister(current->mpool_module, reg); - } - } + } + } OBJ_DESTRUCT(®s); } diff --git a/ompi/mca/mpool/base/mpool_base_open.c b/ompi/mca/mpool/base/mpool_base_open.c index af99ddcee7..c4dc391b54 100644 --- a/ompi/mca/mpool/base/mpool_base_open.c +++ b/ompi/mca/mpool/base/mpool_base_open.c @@ -23,6 +23,8 @@ #include "mca/mpool/mpool.h" #include "mca/mpool/base/base.h" #include "ompi/include/constants.h" +#include + /* * The following file was created by configure. It contains extern @@ -42,6 +44,9 @@ */ int mca_mpool_base_output = -1; int mca_mpool_base_use_mem_hooks = 0; +uint32_t mca_mpool_base_page_size; +uint32_t mca_mpool_base_page_size_log; + opal_list_t mca_mpool_base_components; opal_list_t mca_mpool_base_modules; @@ -84,6 +89,11 @@ int mca_mpool_base_open(void) /* param = mca_base_param_find("mpi", NULL, "leave_pinned"); */ /* mca_base_param_lookup_int(param, &mca_mpool_base_use_mem_hooks); */ /* } */ + + /* get the page size for this architecture*/ + mca_mpool_base_page_size = sysconf(_SC_PAGESIZE); + mca_mpool_base_page_size_log = my_log2(mca_mpool_base_page_size); + return OMPI_SUCCESS; } diff --git a/ompi/mca/mpool/gm/mpool_gm_module.c b/ompi/mca/mpool/gm/mpool_gm_module.c index 69d884c720..60adafab97 100644 --- a/ompi/mca/mpool/gm/mpool_gm_module.c +++ b/ompi/mca/mpool/gm/mpool_gm_module.c @@ -18,8 +18,12 @@ #include #include "opal/util/output.h" #include "mpool_gm.h" -#include "mca/rcache/rcache.h" -#include "mca/rcache/base/base.h" +#include "ompi/mca/rcache/rcache.h" +#include "ompi/mca/rcache/base/base.h" +#include "ompi/mca/mpool/base/base.h" + +extern uint32_t mca_mpool_base_page_size; +extern uint32_t mca_mpool_base_page_size_log; /* @@ -91,6 +95,10 @@ int mca_mpool_gm_register( reg->base = addr; reg->flags = flags; reg->bound = reg->base + size - 1; + reg->base_align = down_align_addr(addr, mca_mpool_base_page_size_log); + reg->bound_align = up_align_addr(reg->bound + , mca_mpool_base_page_size_log); + OPAL_THREAD_ADD32(®->ref_count,1); if(flags & (MCA_MPOOL_FLAGS_CACHE | MCA_MPOOL_FLAGS_PERSIST)) { diff --git a/ompi/mca/mpool/mpool.h b/ompi/mca/mpool/mpool.h index 37827cb06c..b09e2cb73c 100644 --- a/ompi/mca/mpool/mpool.h +++ b/ompi/mca/mpool/mpool.h @@ -35,7 +35,9 @@ struct mca_mpool_base_registration_t { struct mca_mpool_base_module_t *mpool; unsigned char* base; unsigned char* bound; - unsigned char* alloc_base; + unsigned char* alloc_base; + unsigned char* base_align; + unsigned char* bound_align; void* user_data; uint32_t ref_count; uint32_t flags; diff --git a/ompi/mca/mpool/mvapi/mpool_mvapi.h b/ompi/mca/mpool/mvapi/mpool_mvapi.h index fdb32ec1a9..cdee7f76ca 100644 --- a/ompi/mca/mpool/mvapi/mpool_mvapi.h +++ b/ompi/mca/mpool/mvapi/mpool_mvapi.h @@ -30,20 +30,9 @@ extern "C" { #endif -static inline void * DOWN_ALIGN_ADDR(void * addr, uint32_t cnt) { - return (void*)((MT_virt_addr_t)(addr) & (~((MT_virt_addr_t)0) << (cnt))); -} - -static inline void* ALIGN_ADDR(void* addr, uint32_t cnt ) { - DOWN_ALIGN_ADDR(((addr) + ~(~((MT_virt_addr_t)0) << (cnt))), (cnt)); - return addr; -} - - + struct mca_mpool_mvapi_component_t { mca_mpool_base_component_t super; - long page_size; - long page_size_log; char* rcache_name; }; diff --git a/ompi/mca/mpool/mvapi/mpool_mvapi_component.c b/ompi/mca/mpool/mvapi/mpool_mvapi_component.c index 74adf5eee5..651e111f14 100644 --- a/ompi/mca/mpool/mvapi/mpool_mvapi_component.c +++ b/ompi/mca/mpool/mvapi/mpool_mvapi_component.c @@ -21,7 +21,6 @@ #include "mpool_mvapi.h" #include "util/proc_info.h" #include "util/sys_info.h" -#include /* * Local functions @@ -66,6 +65,8 @@ static void mca_mpool_mvapi_registration_constructor( mca_mpool_mvapi_registrati { registration->base_reg.base = NULL; registration->base_reg.bound = NULL; + registration->base_reg.base_align = NULL; + registration->base_reg.bound_align = NULL; registration->base_reg.flags = 0; } @@ -96,9 +97,8 @@ static int mca_mpool_mvapi_open(void) { /* register VAPI component parameters */ - /* get the page size for this architecture*/ - mca_mpool_mvapi_component.page_size = sysconf(_SC_PAGESIZE); - + + return OMPI_SUCCESS; } @@ -106,7 +106,6 @@ static mca_mpool_base_module_t* mca_mpool_mvapi_init( struct mca_mpool_base_resources_t* resources) { mca_mpool_mvapi_module_t* mpool_module; - long page_size = mca_mpool_mvapi_component.page_size; mca_base_param_reg_string(&mca_mpool_mvapi_component.super.mpool_version, "rcache_name", "The name of the registration cache the mpool should use", @@ -115,12 +114,6 @@ static mca_mpool_base_module_t* mca_mpool_mvapi_init( "rb", &(mca_mpool_mvapi_component.rcache_name)); - mca_mpool_mvapi_component.page_size_log = 0; - while(page_size > 1){ - page_size = page_size >> 1; - mca_mpool_mvapi_component.page_size_log++; - } - mpool_module = (mca_mpool_mvapi_module_t*)malloc(sizeof(mca_mpool_mvapi_module_t)); mca_mpool_mvapi_module_init(mpool_module); diff --git a/ompi/mca/mpool/mvapi/mpool_mvapi_module.c b/ompi/mca/mpool/mvapi/mpool_mvapi_module.c index 257ce82317..6a9bb1ed96 100644 --- a/ompi/mca/mpool/mvapi/mpool_mvapi_module.c +++ b/ompi/mca/mpool/mvapi/mpool_mvapi_module.c @@ -24,7 +24,10 @@ #include #include "mca/rcache/rcache.h" #include "mca/rcache/base/base.h" +#include "ompi/mca/mpool/base/base.h" +extern uint32_t mca_mpool_base_page_size; +extern uint32_t mca_mpool_base_page_size_log; /* @@ -60,8 +63,8 @@ void* mca_mpool_mvapi_alloc( mca_mpool_base_registration_t** registration) { - void* addr_malloc = (void*)malloc(size + mca_mpool_mvapi_component.page_size); - void* addr = (void*) ALIGN_ADDR(addr_malloc, mca_mpool_mvapi_component.page_size_log); + void* addr_malloc = (void*)malloc(size + mca_mpool_base_page_size); + void* addr = (void*) up_align_addr(addr_malloc, mca_mpool_base_page_size_log); if(OMPI_SUCCESS != mpool->mpool_register(mpool, addr, size, flags, registration)) { free(addr_malloc); return NULL; @@ -85,9 +88,10 @@ int mca_mpool_mvapi_register( mca_mpool_mvapi_module_t * mpool_module = (mca_mpool_mvapi_module_t*) mpool; mca_mpool_mvapi_registration_t * vapi_reg; VAPI_mrw_t mr_in, mr_out; - VAPI_ret_t ret; + + assert(size > 0); memset(&mr_in, 0, sizeof(VAPI_mrw_t)); memset(&mr_out, 0, sizeof(VAPI_mrw_t)); @@ -103,7 +107,7 @@ int mca_mpool_mvapi_register( mr_in.l_key = 0; mr_in.r_key = 0; mr_in.pd_hndl = mpool_module->hca_pd.pd_tag; - mr_in.size = size; + mr_in.size = size; mr_in.start = (VAPI_virt_addr_t) (MT_virt_addr_t) addr; mr_in.type = VAPI_MR; @@ -123,8 +127,13 @@ int mca_mpool_mvapi_register( vapi_reg->l_key = mr_out.l_key; vapi_reg->r_key = mr_out.r_key; vapi_reg->base_reg.base = addr; - vapi_reg->base_reg.bound = (void*) ((char*) addr + size - 1); + vapi_reg->base_reg.bound = (unsigned char*) (unsigned long) addr + size - 1; + vapi_reg->base_reg.base_align = down_align_addr(addr, mca_mpool_base_page_size_log); + vapi_reg->base_reg.bound_align = up_align_addr(vapi_reg->base_reg.bound + , mca_mpool_base_page_size_log); + assert(vapi_reg->base_reg.bound - vapi_reg->base_reg.base > 0); + if(flags & (MCA_MPOOL_FLAGS_CACHE | MCA_MPOOL_FLAGS_PERSIST)) { mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t*) vapi_reg, diff --git a/ompi/mca/mpool/openib/mpool_openib.h b/ompi/mca/mpool/openib/mpool_openib.h index 0add664133..02f144defc 100644 --- a/ompi/mca/mpool/openib/mpool_openib.h +++ b/ompi/mca/mpool/openib/mpool_openib.h @@ -41,8 +41,6 @@ static inline void* ALIGN_ADDR(void* addr, uint32_t cnt ) { struct mca_mpool_openib_component_t { mca_mpool_base_component_t super; - long page_size; - long page_size_log; char* rcache_name; }; diff --git a/ompi/mca/mpool/openib/mpool_openib_component.c b/ompi/mca/mpool/openib/mpool_openib_component.c index 787714ca5b..eb9502e0ec 100644 --- a/ompi/mca/mpool/openib/mpool_openib_component.c +++ b/ompi/mca/mpool/openib/mpool_openib_component.c @@ -95,9 +95,7 @@ OBJ_CLASS_INSTANCE( */ static int mca_mpool_openib_open(void) { - /* get the page size for this architecture*/ - mca_mpool_openib_component.page_size = sysconf(_SC_PAGESIZE); - + return OMPI_SUCCESS; } @@ -105,7 +103,6 @@ static mca_mpool_base_module_t* mca_mpool_openib_init( struct mca_mpool_base_resources_t* resources) { mca_mpool_openib_module_t* mpool_module; - long page_size = mca_mpool_openib_component.page_size; mca_base_param_reg_string(&mca_mpool_openib_component.super.mpool_version, "rcache_name", "The name of the registration cache the mpool should use", @@ -115,12 +112,6 @@ static mca_mpool_base_module_t* mca_mpool_openib_init( &(mca_mpool_openib_component.rcache_name)); - mca_mpool_openib_component.page_size_log = 0; - while(page_size > 1){ - page_size = page_size >> 1; - mca_mpool_openib_component.page_size_log++; - } - mpool_module = (mca_mpool_openib_module_t*)malloc(sizeof(mca_mpool_openib_module_t)); mca_mpool_openib_module_init(mpool_module); diff --git a/ompi/mca/mpool/openib/mpool_openib_module.c b/ompi/mca/mpool/openib/mpool_openib_module.c index e5a6cd4839..eb4389de6d 100644 --- a/ompi/mca/mpool/openib/mpool_openib_module.c +++ b/ompi/mca/mpool/openib/mpool_openib_module.c @@ -23,6 +23,10 @@ #include #include "mca/rcache/rcache.h" #include "mca/rcache/base/base.h" +#include "ompi/mca/mpool/base/base.h" + +extern uint32_t mca_mpool_base_page_size; +extern uint32_t mca_mpool_base_page_size_log; /* * Initializes the mpool module. @@ -57,7 +61,7 @@ void* mca_mpool_openib_alloc( mca_mpool_base_registration_t** registration) { - void* addr_malloc = (void*)memalign(mca_mpool_openib_component.page_size, size); + void* addr_malloc = (void*)memalign(mca_mpool_base_page_size, size); void* addr = addr_malloc; if(OMPI_SUCCESS != mpool->mpool_register(mpool, addr, size, flags, registration)) { @@ -101,7 +105,11 @@ int mca_mpool_openib_register(mca_mpool_base_module_t* mpool, vapi_reg->base_reg.base = addr; vapi_reg->base_reg.bound = (void*) ((char*) addr + size - 1); - + vapi_reg->base_reg.base_align = down_align_addr(addr, mca_mpool_base_page_size_log); + vapi_reg->base_reg.bound_align = up_align_addr(vapi_reg->base_reg.bound + , mca_mpool_base_page_size_log); + + if(flags & (MCA_MPOOL_FLAGS_CACHE | MCA_MPOOL_FLAGS_PERSIST)) { mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t*) vapi_reg, diff --git a/ompi/mca/pml/ob1/pml_ob1_rdma.c b/ompi/mca/pml/ob1/pml_ob1_rdma.c index a20cacda05..3074696a99 100644 --- a/ompi/mca/pml/ob1/pml_ob1_rdma.c +++ b/ompi/mca/pml/ob1/pml_ob1_rdma.c @@ -93,6 +93,7 @@ size_t mca_pml_ob1_rdma_btls( } else if(mca_pml_ob1.leave_pinned) { unsigned char* new_base = reg->base; size_t new_len = (base - reg->base) + size; + assert(new_len >= size); btl_mpool->mpool_deregister(btl_mpool, reg); btl_mpool->mpool_register(btl_mpool, @@ -111,7 +112,6 @@ size_t mca_pml_ob1_rdma_btls( } continue; } - /* * find the best fit when there are multiple registrations */ @@ -257,6 +257,7 @@ mca_mpool_base_registration_t* mca_pml_ob1_rdma_registration( } else { unsigned char* new_base = largest->base; size_t new_len = (base - largest->base) + size; + btl_mpool->mpool_deregister(btl_mpool, largest); assert(new_len >= size); btl_mpool->mpool_register(btl_mpool, diff --git a/ompi/mca/rcache/rb/rcache_rb.c b/ompi/mca/rcache/rb/rcache_rb.c index 2f9f8f174c..c286ce3141 100644 --- a/ompi/mca/rcache/rb/rcache_rb.c +++ b/ompi/mca/rcache/rb/rcache_rb.c @@ -14,11 +14,11 @@ * $HEADER$ */ -#include "mca/rcache/rcache.h" +#include "ompi/mca/rcache/rcache.h" #include "rcache_rb.h" #include "rcache_rb_tree.h" #include "rcache_rb_mru.h" - +#include "opal/util/output.h" /** * Initialize the rcache @@ -42,7 +42,7 @@ int mca_rcache_rb_find ( uint32_t *cnt ){ - int rc = OMPI_SUCCESS; + int pos, rc = OMPI_SUCCESS; mca_rcache_rb_tree_item_t* tree_item; OPAL_THREAD_LOCK(&rcache->lock); *cnt = 0; @@ -55,7 +55,11 @@ int mca_rcache_rb_find ( OBJ_DESTRUCT(regs); OBJ_CONSTRUCT(regs, ompi_pointer_array_t); - rc = ompi_pointer_array_set_item(regs, 0, (void*) tree_item->reg); + pos = ompi_pointer_array_add(regs, (void*) tree_item->reg); + if(0 != pos) { + opal_output(0, "error inserting registration in 1st position"); + return OMPI_ERROR; + } if(OMPI_SUCCESS != rc) { OPAL_THREAD_UNLOCK(&rcache->lock); @@ -72,8 +76,8 @@ int mca_rcache_rb_find ( *cnt = 1; } assert(tree_item->reg->bound - tree_item->reg->base > 0); - assert(((void*) tree_item->reg->base) <= addr); - assert(((void*) tree_item->reg->bound) >= addr); + assert(((void*) tree_item->reg->base_align) <= addr); + assert(((void*) tree_item->reg->bound_align) >= addr); return rc; } diff --git a/ompi/mca/rcache/rb/rcache_rb_tree.c b/ompi/mca/rcache/rb/rcache_rb_tree.c index acb629ef9d..0fd622d65d 100644 --- a/ompi/mca/rcache/rb/rcache_rb_tree.c +++ b/ompi/mca/rcache/rb/rcache_rb_tree.c @@ -35,27 +35,6 @@ int mca_rcache_rb_tree_init(mca_rcache_rb_module_t* rcache) { return ompi_rb_tree_init(&rcache->rb_tree, mca_rcache_rb_tree_node_compare); } -/** - * Searches the rcache to see if it has allocated the memory that is passed in. - * If so it returns an array of rcaches the memory is registered with. - * - * @param base pointer to the memory to lookup - * - * @retval NULL if the memory is not in any rcache - * @retval pointer to an array of type mca_rcache_base_reg_rcache_t - */ -static inline struct mca_rcache_rb_tree_item_t * - mca_rcache_rb_tree_find_nl( - mca_rcache_rb_module_t* rcache, - void* base - ) -{ - mca_rcache_rb_tree_key_t key; - key.base = base; - key.bound = base; - return (mca_rcache_rb_tree_item_t *) - ompi_rb_tree_find(&rcache->rb_tree, &key); -} /** * Searches the mpool to see if it has allocated the memory that is passed in. @@ -81,8 +60,8 @@ struct mca_rcache_rb_tree_item_t * mca_rcache_rb_tree_find( ompi_rb_tree_find(&rcache->rb_tree, &key); if(NULL != found ) { - if((void*) found->reg->base > base || - (void*) found->reg->bound < base){ + if((void*) found->reg->base_align > base || + (void*) found->reg->bound_align < base){ assert(0); } } @@ -137,8 +116,8 @@ int mca_rcache_rb_tree_insert( return rc; rb_tree_item = (mca_rcache_rb_tree_item_t*) item; - rb_tree_item->key.base = reg->base; - rb_tree_item->key.bound = reg->bound; + rb_tree_item->key.base = reg->base_align; + rb_tree_item->key.bound = reg->bound_align; rb_tree_item->reg = reg; rc = ompi_rb_tree_insert(&rb_module->rb_tree,