2013-03-27 22:10:02 +00:00
|
|
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
|
|
/*
|
2018-02-16 12:13:17 -07:00
|
|
|
* Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
|
2013-03-27 22:10:02 +00:00
|
|
|
* reserved.
|
2014-10-15 21:47:32 -04:00
|
|
|
* Copyright (c) 2014 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2020-01-07 21:48:01 -07:00
|
|
|
* Copyright (c) 2020 Google, LLC. All rights reserved.
|
2013-03-27 22:10:02 +00:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
#include "btl_sm.h"
|
2014-09-23 18:11:22 +00:00
|
|
|
|
2013-03-27 22:10:02 +00:00
|
|
|
#include "opal/include/opal/align.h"
|
2013-10-22 15:33:32 +00:00
|
|
|
#include "opal/mca/memchecker/base/base.h"
|
2013-03-27 22:10:02 +00:00
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
#if OPAL_BTL_SM_HAVE_XPMEM
|
2014-01-06 19:51:44 +00:00
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
int mca_btl_sm_xpmem_init (void)
|
2014-12-12 09:09:01 -07:00
|
|
|
{
|
2020-01-14 11:49:32 -07:00
|
|
|
/* Any attachment that goes past the Linux TASK_SIZE will always fail. To prevent this we need to
|
2020-04-29 13:17:41 -06:00
|
|
|
* determine the value of TASK_SIZE. On x86_64 the value was hard-coded in sm to be
|
2020-01-14 11:49:32 -07:00
|
|
|
* 0x7ffffffffffful but this approach does not work with AARCH64 (and possibly other architectures).
|
|
|
|
* Since there is really no way to directly determine the value we can (in all cases?) look through
|
|
|
|
* the mapping for this process to determine what the largest address is. This should be the top
|
|
|
|
* of the stack. No heap allocations should be larger than this value. Since the largest address
|
|
|
|
* may differ between processes the value must be shared as part of the modex and stored in the
|
|
|
|
* endpoint. */
|
|
|
|
FILE *fh = fopen("/proc/self/maps", "r");
|
|
|
|
if (NULL == fh) {
|
|
|
|
BTL_ERROR(("could not open /proc/self/maps for reading. disabling XPMEM"));
|
|
|
|
return OPAL_ERR_NOT_AVAILABLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
char buffer[1024];
|
|
|
|
uintptr_t address_max = 0;
|
|
|
|
while (fgets(buffer, sizeof(buffer), fh)) {
|
|
|
|
uintptr_t low, high;
|
|
|
|
char *tmp;
|
|
|
|
/* each line of /proc/self/maps starts with low-high in hexidecimal (without a 0x) */
|
|
|
|
low = strtoul(buffer, &tmp, 16);
|
|
|
|
high = strtoul(tmp+1, NULL, 16);
|
|
|
|
if (address_max < high) {
|
|
|
|
address_max = high;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fclose (fh);
|
|
|
|
|
|
|
|
if (0 == address_max) {
|
|
|
|
BTL_ERROR(("could not determine the address max"));
|
|
|
|
return OPAL_ERR_NOT_AVAILABLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* save the calcuated maximum */
|
2020-04-29 13:17:41 -06:00
|
|
|
mca_btl_sm_component.my_address_max = address_max - 1;
|
2020-01-14 11:49:32 -07:00
|
|
|
|
|
|
|
/* it is safe to use XPMEM_MAXADDR_SIZE here (which is always (size_t)-1 even though
|
|
|
|
* it is not safe for attach */
|
2020-04-29 13:17:41 -06:00
|
|
|
mca_btl_sm_component.my_seg_id = xpmem_make (0, XPMEM_MAXADDR_SIZE, XPMEM_PERMIT_MODE,
|
2020-01-14 11:49:32 -07:00
|
|
|
(void *)0666);
|
2020-04-29 13:17:41 -06:00
|
|
|
if (-1 == mca_btl_sm_component.my_seg_id) {
|
2014-12-12 09:09:01 -07:00
|
|
|
return OPAL_ERR_NOT_AVAILABLE;
|
|
|
|
}
|
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
mca_btl_sm.super.btl_get = mca_btl_sm_get_xpmem;
|
|
|
|
mca_btl_sm.super.btl_put = mca_btl_sm_put_xpmem;
|
2014-12-12 09:09:01 -07:00
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
struct sm_check_reg_ctx_t {
|
2016-10-27 10:09:43 -06:00
|
|
|
mca_btl_base_endpoint_t *ep;
|
|
|
|
mca_rcache_base_registration_t **reg;
|
|
|
|
uintptr_t base;
|
|
|
|
uintptr_t bound;
|
|
|
|
};
|
2020-04-29 13:17:41 -06:00
|
|
|
typedef struct sm_check_reg_ctx_t sm_check_reg_ctx_t;
|
2016-10-27 10:09:43 -06:00
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
static int sm_check_reg (mca_rcache_base_registration_t *reg, void *ctx)
|
2016-10-27 10:09:43 -06:00
|
|
|
{
|
2020-04-29 13:17:41 -06:00
|
|
|
sm_check_reg_ctx_t *sm_ctx = (sm_check_reg_ctx_t *) ctx;
|
2016-10-27 10:09:43 -06:00
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
if ((intptr_t) reg->alloc_base != sm_ctx->ep->peer_smp_rank) {
|
2016-10-27 10:09:43 -06:00
|
|
|
/* ignore this registration */
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
sm_ctx->reg[0] = reg;
|
2016-10-27 10:09:43 -06:00
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
if (sm_ctx->bound <= (uintptr_t) reg->bound && sm_ctx->base >= (uintptr_t) reg->base) {
|
2020-01-07 21:48:01 -07:00
|
|
|
if (0 == opal_atomic_fetch_add_32 (®->ref_count, 1)) {
|
2020-04-29 13:17:41 -06:00
|
|
|
/* registration is being deleted by a thread in sm_return_registration. the
|
2020-01-07 21:48:01 -07:00
|
|
|
* VMA tree implementation will block in mca_rcache_delete until we finish
|
|
|
|
* iterating over the VMA tree so it is safe to just ignore this registration
|
|
|
|
* and continue. */
|
2020-04-29 13:17:41 -06:00
|
|
|
sm_ctx->reg[0] = NULL;
|
2020-01-07 21:48:01 -07:00
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
2016-10-27 10:09:43 -06:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2020-01-07 21:48:01 -07:00
|
|
|
if (MCA_RCACHE_FLAGS_INVALID & opal_atomic_fetch_or_32(®->flags, MCA_RCACHE_FLAGS_INVALID)) {
|
|
|
|
/* another thread has already marked this registration as invalid. ignore and continue. */
|
2020-04-29 13:17:41 -06:00
|
|
|
sm_ctx->reg[0] = NULL;
|
2020-01-07 21:48:01 -07:00
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* let the caller know we found an overlapping registration that can be coalesced into
|
|
|
|
* the requested interval. the caller will remove the last reference and delete the
|
|
|
|
* registration. */
|
2016-10-27 10:09:43 -06:00
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
void sm_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep)
|
2018-02-16 12:13:17 -07:00
|
|
|
{
|
2020-04-29 13:17:41 -06:00
|
|
|
mca_rcache_base_vma_module_t *vma_module = mca_btl_sm_component.vma_module;
|
2018-02-16 12:13:17 -07:00
|
|
|
int32_t ref_count;
|
|
|
|
|
|
|
|
ref_count = opal_atomic_add_fetch_32 (®->ref_count, -1);
|
|
|
|
if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) {
|
2020-01-07 21:48:01 -07:00
|
|
|
#if OPAL_DEBUG
|
|
|
|
int ret = mca_rcache_base_vma_delete (vma_module, reg);
|
|
|
|
assert (OPAL_SUCCESS == ret);
|
|
|
|
#else
|
|
|
|
(void) mca_rcache_base_vma_delete (vma_module, reg);
|
|
|
|
#endif
|
2018-02-16 12:13:17 -07:00
|
|
|
opal_memchecker_base_mem_noaccess (reg->rcache_context, (uintptr_t)(reg->bound - reg->base));
|
|
|
|
(void)xpmem_detach (reg->rcache_context);
|
|
|
|
OBJ_RELEASE (reg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-03-27 22:10:02 +00:00
|
|
|
/* look up the remote pointer in the peer rcache and attach if
|
|
|
|
* necessary */
|
2020-04-29 13:17:41 -06:00
|
|
|
mca_rcache_base_registration_t *sm_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr,
|
2015-11-02 12:07:08 -07:00
|
|
|
size_t size, int flags, void **local_ptr)
|
2013-03-27 22:10:02 +00:00
|
|
|
{
|
2020-04-29 13:17:41 -06:00
|
|
|
mca_rcache_base_vma_module_t *vma_module = mca_btl_sm_component.vma_module;
|
|
|
|
uint64_t attach_align = 1 << mca_btl_sm_component.log_attach_align;
|
2016-10-27 10:09:43 -06:00
|
|
|
mca_rcache_base_registration_t *reg = NULL;
|
2020-04-29 13:17:41 -06:00
|
|
|
sm_check_reg_ctx_t check_ctx = {.ep = ep, .reg = ®};
|
2014-01-29 18:35:47 +00:00
|
|
|
xpmem_addr_t xpmem_addr;
|
2013-03-27 22:10:02 +00:00
|
|
|
uintptr_t base, bound;
|
2016-11-21 14:58:34 -06:00
|
|
|
int rc;
|
2013-03-27 22:10:02 +00:00
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
base = OPAL_DOWN_ALIGN((uintptr_t) rem_ptr, attach_align, uintptr_t);
|
|
|
|
bound = OPAL_ALIGN((uintptr_t) rem_ptr + size - 1, attach_align, uintptr_t) + 1;
|
2020-01-14 11:49:32 -07:00
|
|
|
if (OPAL_UNLIKELY(bound > ep->segment_data.xpmem.address_max)) {
|
|
|
|
bound = ep->segment_data.xpmem.address_max;
|
2013-03-27 22:10:02 +00:00
|
|
|
}
|
|
|
|
|
2016-10-27 10:09:43 -06:00
|
|
|
check_ctx.base = base;
|
|
|
|
check_ctx.bound = bound;
|
2013-03-27 22:10:02 +00:00
|
|
|
|
2016-10-27 10:09:43 -06:00
|
|
|
/* several segments may match the base pointer */
|
2020-04-29 13:17:41 -06:00
|
|
|
rc = mca_rcache_base_vma_iterate (vma_module, (void *) base, bound - base, true, sm_check_reg, &check_ctx);
|
2016-10-27 10:09:43 -06:00
|
|
|
if (2 == rc) {
|
2020-01-07 21:48:01 -07:00
|
|
|
bound = bound < (uintptr_t) reg->bound ? (uintptr_t) reg->bound : bound;
|
|
|
|
base = base > (uintptr_t) reg->base ? (uintptr_t) reg->base : base;
|
2020-04-29 13:17:41 -06:00
|
|
|
sm_return_registration(reg, ep);
|
2016-10-27 10:09:43 -06:00
|
|
|
reg = NULL;
|
2013-03-27 22:10:02 +00:00
|
|
|
}
|
|
|
|
|
2016-10-27 10:09:43 -06:00
|
|
|
if (NULL == reg) {
|
|
|
|
reg = OBJ_NEW(mca_rcache_base_registration_t);
|
|
|
|
if (OPAL_LIKELY(NULL != reg)) {
|
|
|
|
/* stick around for awhile */
|
|
|
|
reg->ref_count = 2;
|
|
|
|
reg->base = (unsigned char *) base;
|
|
|
|
reg->bound = (unsigned char *) bound;
|
|
|
|
reg->flags = flags;
|
|
|
|
reg->alloc_base = (void *) (intptr_t) ep->peer_smp_rank;
|
2014-01-29 18:35:47 +00:00
|
|
|
|
|
|
|
#if defined(HAVE_SN_XPMEM_H)
|
2016-10-27 10:09:43 -06:00
|
|
|
xpmem_addr.id = ep->segment_data.xpmem.apid;
|
2014-01-29 18:35:47 +00:00
|
|
|
#else
|
2016-10-27 10:09:43 -06:00
|
|
|
xpmem_addr.apid = ep->segment_data.xpmem.apid;
|
2014-01-29 18:35:47 +00:00
|
|
|
#endif
|
2016-10-27 10:09:43 -06:00
|
|
|
xpmem_addr.offset = base;
|
2013-03-27 22:10:02 +00:00
|
|
|
|
2016-10-27 10:09:43 -06:00
|
|
|
reg->rcache_context = xpmem_attach (xpmem_addr, bound - base, NULL);
|
|
|
|
if (OPAL_UNLIKELY((void *)-1 == reg->rcache_context)) {
|
|
|
|
OBJ_RELEASE(reg);
|
|
|
|
return NULL;
|
|
|
|
}
|
2013-07-11 20:54:12 +00:00
|
|
|
|
2016-10-27 10:09:43 -06:00
|
|
|
opal_memchecker_base_mem_defined (reg->rcache_context, bound - base);
|
2013-10-22 15:33:32 +00:00
|
|
|
|
2018-02-16 12:13:17 -07:00
|
|
|
if (!(flags & MCA_RCACHE_FLAGS_PERSIST)) {
|
|
|
|
mca_rcache_base_vma_insert (vma_module, reg, 0);
|
|
|
|
}
|
2016-10-27 10:09:43 -06:00
|
|
|
}
|
2013-03-27 22:10:02 +00:00
|
|
|
}
|
|
|
|
|
2013-07-11 20:54:12 +00:00
|
|
|
opal_atomic_wmb ();
|
2015-11-02 12:07:08 -07:00
|
|
|
*local_ptr = (void *) ((uintptr_t) reg->rcache_context +
|
2013-07-11 20:54:12 +00:00
|
|
|
(ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base));
|
|
|
|
|
2013-03-27 22:10:02 +00:00
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
struct sm_cleanup_reg_ctx {
|
|
|
|
mca_btl_sm_endpoint_t *ep;
|
2020-01-07 21:48:01 -07:00
|
|
|
opal_list_t *registrations;
|
|
|
|
};
|
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
static int mca_btl_sm_endpoint_xpmem_rcache_cleanup (mca_rcache_base_registration_t *reg, void *ctx)
|
2016-10-27 10:09:43 -06:00
|
|
|
{
|
2020-04-29 13:17:41 -06:00
|
|
|
struct sm_cleanup_reg_ctx *cleanup_ctx = (struct sm_cleanup_reg_ctx *) ctx;
|
2020-01-07 21:48:01 -07:00
|
|
|
if ((intptr_t) reg->alloc_base == cleanup_ctx->ep->peer_smp_rank) {
|
|
|
|
opal_list_append(cleanup_ctx->registrations, ®->super.super);
|
2016-10-27 10:09:43 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
void mca_btl_sm_xpmem_cleanup_endpoint (struct mca_btl_base_endpoint_t *ep)
|
2016-10-27 10:09:43 -06:00
|
|
|
{
|
2020-01-07 21:48:01 -07:00
|
|
|
mca_rcache_base_registration_t *reg;
|
|
|
|
opal_list_t registrations;
|
2020-04-29 13:17:41 -06:00
|
|
|
struct sm_cleanup_reg_ctx cleanup_ctx = {.ep = ep, .registrations = ®istrations};
|
2020-01-07 21:48:01 -07:00
|
|
|
|
|
|
|
OBJ_CONSTRUCT(®istrations, opal_list_t);
|
|
|
|
|
2016-10-27 10:09:43 -06:00
|
|
|
/* clean out the registration cache */
|
2020-04-29 13:17:41 -06:00
|
|
|
(void) mca_rcache_base_vma_iterate (mca_btl_sm_component.vma_module,
|
2018-02-16 12:13:17 -07:00
|
|
|
NULL, (size_t) -1, true,
|
2020-04-29 13:17:41 -06:00
|
|
|
mca_btl_sm_endpoint_xpmem_rcache_cleanup,
|
2020-01-07 21:48:01 -07:00
|
|
|
(void *) &cleanup_ctx);
|
|
|
|
while (NULL != (reg = (mca_rcache_base_registration_t *) opal_list_remove_first(®istrations))) {
|
2020-04-29 13:17:41 -06:00
|
|
|
sm_return_registration (reg, ep);
|
2020-01-07 21:48:01 -07:00
|
|
|
}
|
|
|
|
OBJ_DESTRUCT(®istrations);
|
|
|
|
|
2016-10-27 10:09:43 -06:00
|
|
|
if (ep->segment_base) {
|
|
|
|
xpmem_release (ep->segment_data.xpmem.apid);
|
|
|
|
ep->segment_data.xpmem.apid = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-29 13:17:41 -06:00
|
|
|
#endif /* OPAL_BTL_SM_HAVE_XPMEM */
|