diff --git a/opal/mca/btl/vader/btl_vader.h b/opal/mca/btl/vader/btl_vader.h index 14e3b2dc11..5290a7faa7 100644 --- a/opal/mca/btl/vader/btl_vader.h +++ b/opal/mca/btl/vader/btl_vader.h @@ -48,6 +48,7 @@ #include "opal/sys/atomic.h" #include "opal/mca/btl/btl.h" #include "opal/mca/rcache/rcache.h" +#include "opal/mca/rcache/base/rcache_base_vma.h" #include "opal/mca/btl/base/base.h" #include "opal/mca/rcache/rcache.h" #include "opal/mca/rcache/base/base.h" @@ -103,6 +104,7 @@ struct mca_btl_vader_component_t { int vader_free_list_inc; /**< number of elements to alloc when growing free lists */ #if OPAL_BTL_VADER_HAVE_XPMEM xpmem_segid_t my_seg_id; /**< this rank's xpmem segment id */ + mca_rcache_base_vma_module_t *vma_module; /**< registration cache for xpmem segments */ #endif opal_shmem_ds_t seg_ds; /**< this rank's shared memory segment (when not using xpmem) */ diff --git a/opal/mca/btl/vader/btl_vader_endpoint.h b/opal/mca/btl/vader/btl_vader_endpoint.h index 3de9d6e477..d3a39e08f2 100644 --- a/opal/mca/btl/vader/btl_vader_endpoint.h +++ b/opal/mca/btl/vader/btl_vader_endpoint.h @@ -28,7 +28,6 @@ #include "opal_config.h" #include "btl_vader_xpmem.h" -#include "opal/mca/rcache/base/rcache_base_vma.h" #define MCA_BTL_VADER_FBOX_ALIGNMENT 32 #define MCA_BTL_VADER_FBOX_ALIGNMENT_MASK (MCA_BTL_VADER_FBOX_ALIGNMENT - 1) @@ -75,7 +74,6 @@ typedef struct mca_btl_base_endpoint_t { union { #if OPAL_BTL_VADER_HAVE_XPMEM struct { - mca_rcache_base_vma_module_t *vma_module; xpmem_apid_t apid; /**< xpmem apid for remote peer */ } xpmem; #endif diff --git a/opal/mca/btl/vader/btl_vader_module.c b/opal/mca/btl/vader/btl_vader_module.c index f54b4079b9..5c9c084947 100644 --- a/opal/mca/btl/vader/btl_vader_module.c +++ b/opal/mca/btl/vader/btl_vader_module.c @@ -145,6 +145,12 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n) /* set flag indicating btl has been inited */ vader_btl->btl_inited = true; +#if OPAL_BTL_VADER_HAVE_XPMEM + if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { + mca_btl_vader_component.vma_module = mca_rcache_base_vma_module_alloc (); + } +#endif + return OPAL_SUCCESS; } @@ -171,7 +177,6 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_ if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { /* always use xpmem if it is available */ ep->segment_data.xpmem.apid = xpmem_get (modex->xpmem.seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666); - ep->segment_data.xpmem.vma_module = mca_rcache_base_vma_module_alloc (); (void) vader_get_registation (ep, modex->xpmem.segment_base, mca_btl_vader_component.segment_size, MCA_RCACHE_FLAGS_PERSIST, (void **) &ep->segment_base); } else { @@ -354,6 +359,12 @@ static int vader_finalize(struct mca_btl_base_module_t *btl) opal_shmem_segment_detach (&mca_btl_vader_component.seg_ds); } +#if OPAL_BTL_VADER_HAVE_XPMEM + if (NULL != mca_btl_vader_component.vma_module) { + OBJ_RELEASE(mca_btl_vader_component.vma_module); + } +#endif + return OPAL_SUCCESS; } @@ -540,14 +551,6 @@ static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep) } #if OPAL_BTL_VADER_HAVE_XPMEM -static int mca_btl_vader_endpoint_rcache_cleanup (mca_rcache_base_registration_t *reg, void *ctx) -{ - mca_rcache_base_vma_module_t *vma_module = (mca_rcache_base_vma_module_t *) ctx; - /* otherwise dereg will fail on assert */ - reg->ref_count = 0; - (void) mca_rcache_base_vma_delete (vma_module, reg); - return OPAL_SUCCESS; -} #endif static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep) @@ -557,19 +560,7 @@ static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep) #if OPAL_BTL_VADER_HAVE_XPMEM if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { - if (ep->segment_data.xpmem.vma_module) { - /* clean out the registration cache */ - (void) mca_rcache_base_vma_iterate (ep->segment_data.xpmem.vma_module, - NULL, (size_t) -1, - mca_btl_vader_endpoint_rcache_cleanup, - (void *) ep->segment_data.xpmem.vma_module); - OBJ_RELEASE(ep->segment_data.xpmem.vma_module); - } - - if (ep->segment_base) { - xpmem_release (ep->segment_data.xpmem.apid); - ep->segment_data.xpmem.apid = 0; - } + mca_btl_vader_xpmem_cleanup_endpoint (ep); } else #endif if (ep->segment_data.other.seg_ds) { diff --git a/opal/mca/btl/vader/btl_vader_xpmem.c b/opal/mca/btl/vader/btl_vader_xpmem.c index f1fcf8b8fb..09203c9202 100644 --- a/opal/mca/btl/vader/btl_vader_xpmem.c +++ b/opal/mca/btl/vader/btl_vader_xpmem.c @@ -32,113 +32,121 @@ int mca_btl_vader_xpmem_init (void) return OPAL_SUCCESS; } +struct vader_check_reg_ctx_t { + mca_rcache_base_vma_module_t *vma_module; + mca_btl_base_endpoint_t *ep; + mca_rcache_base_registration_t **reg; + uintptr_t base; + uintptr_t bound; +}; +typedef struct vader_check_reg_ctx_t vader_check_reg_ctx_t; + +static int vader_check_reg (mca_rcache_base_registration_t *reg, void *ctx) +{ + vader_check_reg_ctx_t *vader_ctx = (vader_check_reg_ctx_t *) ctx; + + if ((intptr_t) reg->alloc_base != vader_ctx->ep->peer_smp_rank || + (reg->flags & MCA_RCACHE_FLAGS_PERSIST)) { + /* ignore this registration */ + return OPAL_SUCCESS; + } + + vader_ctx->reg[0] = reg; + + if (vader_ctx->bound <= (uintptr_t) reg->bound && vader_ctx->base >= (uintptr_t) reg->base) { + (void)opal_atomic_add (®->ref_count, 1); + return 1; + } + + /* remove this pointer from the rcache and decrement its reference count + (so it is detached later) */ + mca_rcache_base_vma_delete (vader_ctx->vma_module, reg); + + return 2; +} + /* look up the remote pointer in the peer rcache and attach if * necessary */ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr, size_t size, int flags, void **local_ptr) { - mca_rcache_base_vma_module_t *vma_module = ep->segment_data.xpmem.vma_module; - mca_rcache_base_registration_t *regs[10], *reg = NULL; + mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module; + uint64_t attach_align = 1 << mca_btl_vader_component.log_attach_align; + mca_rcache_base_registration_t *reg = NULL; + vader_check_reg_ctx_t check_ctx = {.ep = ep, .reg = ®, .vma_module = vma_module}; xpmem_addr_t xpmem_addr; uintptr_t base, bound; - uint64_t attach_align = 1 << mca_btl_vader_component.log_attach_align; int rc, i; - /* protect rcache access */ - OPAL_THREAD_LOCK(&ep->lock); - - /* use btl/self for self communication */ - assert (ep->peer_smp_rank != MCA_BTL_VADER_LOCAL_RANK); - base = OPAL_DOWN_ALIGN((uintptr_t) rem_ptr, attach_align, uintptr_t); bound = OPAL_ALIGN((uintptr_t) rem_ptr + size - 1, attach_align, uintptr_t) + 1; if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) { bound = VADER_MAX_ADDRESS; } + check_ctx.base = base; + check_ctx.bound = bound; + /* several segments may match the base pointer */ - rc = mca_rcache_base_vma_find_all (vma_module, (void *) base, bound - base, regs, 10); - for (i = 0 ; i < rc ; ++i) { - if (bound <= (uintptr_t)regs[i]->bound && base >= (uintptr_t)regs[i]->base) { - (void)opal_atomic_add (®s[i]->ref_count, 1); - reg = regs[i]; - goto reg_found; - } - - if (regs[i]->flags & MCA_RCACHE_FLAGS_PERSIST) { - continue; - } - - /* remove this pointer from the rcache and decrement its reference count - (so it is detached later) */ - rc = mca_rcache_base_vma_delete (vma_module, regs[i]); - if (OPAL_UNLIKELY(0 != rc)) { - /* someone beat us to it? */ - break; - } - + rc = mca_rcache_base_vma_iterate (vma_module, (void *) base, bound - base, vader_check_reg, &check_ctx); + if (2 == rc) { /* start the new segment from the lower of the two bases */ - base = (uintptr_t) regs[i]->base < base ? (uintptr_t) regs[i]->base : base; + base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base; - (void)opal_atomic_add (®s[i]->ref_count, -1); - - if (OPAL_LIKELY(0 == regs[i]->ref_count)) { + if (OPAL_LIKELY(0 == opal_atomic_add_32 (®->ref_count, -1))) { /* this pointer is not in use */ - (void) xpmem_detach (regs[i]->rcache_context); - OBJ_RELEASE(regs[i]); + (void) xpmem_detach (reg->rcache_context); + OBJ_RELEASE(reg); } - break; + reg = NULL; } - reg = OBJ_NEW(mca_rcache_base_registration_t); - if (OPAL_LIKELY(NULL != reg)) { - /* stick around for awhile */ - reg->ref_count = 2; - reg->base = (unsigned char *) base; - reg->bound = (unsigned char *) bound; - reg->flags = flags; + if (NULL == reg) { + reg = OBJ_NEW(mca_rcache_base_registration_t); + if (OPAL_LIKELY(NULL != reg)) { + /* stick around for awhile */ + reg->ref_count = 2; + reg->base = (unsigned char *) base; + reg->bound = (unsigned char *) bound; + reg->flags = flags; + reg->alloc_base = (void *) (intptr_t) ep->peer_smp_rank; #if defined(HAVE_SN_XPMEM_H) - xpmem_addr.id = ep->segment_data.xpmem.apid; + xpmem_addr.id = ep->segment_data.xpmem.apid; #else - xpmem_addr.apid = ep->segment_data.xpmem.apid; + xpmem_addr.apid = ep->segment_data.xpmem.apid; #endif - xpmem_addr.offset = base; + xpmem_addr.offset = base; - reg->rcache_context = xpmem_attach (xpmem_addr, bound - base, NULL); - if (OPAL_UNLIKELY((void *)-1 == reg->rcache_context)) { - OPAL_THREAD_UNLOCK(&ep->lock); - OBJ_RELEASE(reg); - return NULL; + reg->rcache_context = xpmem_attach (xpmem_addr, bound - base, NULL); + if (OPAL_UNLIKELY((void *)-1 == reg->rcache_context)) { + OBJ_RELEASE(reg); + return NULL; + } + + opal_memchecker_base_mem_defined (reg->rcache_context, bound - base); + + mca_rcache_base_vma_insert (vma_module, reg, 0); } - - opal_memchecker_base_mem_defined (reg->rcache_context, bound - base); - - mca_rcache_base_vma_insert (vma_module, reg, 0); } -reg_found: opal_atomic_wmb (); *local_ptr = (void *) ((uintptr_t) reg->rcache_context + (ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base)); - OPAL_THREAD_UNLOCK(&ep->lock); - return reg; } void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep) { - mca_rcache_base_vma_module_t *vma_module = ep->segment_data.xpmem.vma_module; + mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module; int32_t ref_count; ref_count = opal_atomic_add_32 (®->ref_count, -1); if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) { /* protect rcache access */ - OPAL_THREAD_LOCK(&ep->lock); mca_rcache_base_vma_delete (vma_module, reg); - OPAL_THREAD_UNLOCK(&ep->lock); opal_memchecker_base_mem_noaccess (reg->rcache_context, (uintptr_t)(reg->bound - reg->base)); (void)xpmem_detach (reg->rcache_context); @@ -146,4 +154,31 @@ void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_ } } +static int mca_btl_vader_endpoint_xpmem_rcache_cleanup (mca_rcache_base_registration_t *reg, void *ctx) +{ + mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module; + mca_btl_vader_endpoint_t *ep = (mca_btl_vader_endpoint_t *) ctx; + if ((intptr_t) reg->alloc_base == ep->peer_smp_rank) { + /* otherwise dereg will fail on assert */ + reg->ref_count = 0; + (void) mca_rcache_base_vma_delete (vma_module, reg); + OBJ_RELEASE(reg); + } + + return OPAL_SUCCESS; +} + +void mca_btl_vader_xpmem_cleanup_endpoint (struct mca_btl_base_endpoint_t *ep) +{ + /* clean out the registration cache */ + (void) mca_rcache_base_vma_iterate (mca_btl_vader_component.vma_module, + NULL, (size_t) -1, + mca_btl_vader_endpoint_xpmem_rcache_cleanup, + (void *) ep); + if (ep->segment_base) { + xpmem_release (ep->segment_data.xpmem.apid); + ep->segment_data.xpmem.apid = 0; + } +} + #endif /* OPAL_BTL_VADER_HAVE_XPMEM */ diff --git a/opal/mca/btl/vader/btl_vader_xpmem.h b/opal/mca/btl/vader/btl_vader_xpmem.h index 3d0b24ff5f..fa47773697 100644 --- a/opal/mca/btl/vader/btl_vader_xpmem.h +++ b/opal/mca/btl/vader/btl_vader_xpmem.h @@ -39,6 +39,7 @@ #define VADER_MAX_ADDRESS XPMEM_MAXADDR_SIZE #endif +struct mca_btl_base_endpoint_t; int mca_btl_vader_xpmem_init (void); @@ -46,6 +47,7 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo size_t size, int flags, void **local_ptr); void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint); +void mca_btl_vader_xpmem_cleanup_endpoint (struct mca_btl_base_endpoint_t *ep); #else