diff --git a/opal/mca/btl/vader/btl_vader.h b/opal/mca/btl/vader/btl_vader.h index eab5f5a87d..e3921429d5 100644 --- a/opal/mca/btl/vader/btl_vader.h +++ b/opal/mca/btl/vader/btl_vader.h @@ -17,6 +17,7 @@ * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2020 Google, LLC. All rights reserved. * * $COPYRIGHT$ * @@ -82,6 +83,7 @@ union vader_modex_t { struct vader_modex_xpmem_t { xpmem_segid_t seg_id; void *segment_base; + uintptr_t address_max; } xpmem; #endif struct vader_modex_other_t { @@ -113,6 +115,7 @@ struct mca_btl_vader_component_t { int vader_free_list_inc; /**< number of elements to alloc when growing free lists */ #if OPAL_BTL_VADER_HAVE_XPMEM xpmem_segid_t my_seg_id; /**< this rank's xpmem segment id */ + uintptr_t my_address_max; /**< largest address */ mca_rcache_base_vma_module_t *vma_module; /**< registration cache for xpmem segments */ #endif opal_shmem_ds_t seg_ds; /**< this rank's shared memory segment (when not using xpmem) */ diff --git a/opal/mca/btl/vader/btl_vader_component.c b/opal/mca/btl/vader/btl_vader_component.c index 1eab736848..f8e25db1f4 100644 --- a/opal/mca/btl/vader/btl_vader_component.c +++ b/opal/mca/btl/vader/btl_vader_component.c @@ -21,7 +21,7 @@ * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. - * Copyright (c) 2019 Google, Inc. All rights reserved. + * Copyright (c) 2019-2020 Google, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -383,6 +383,7 @@ static int mca_btl_base_vader_modex_send (void) if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { modex.xpmem.seg_id = mca_btl_vader_component.my_seg_id; modex.xpmem.segment_base = mca_btl_vader_component.my_segment; + modex.xpmem.address_max = mca_btl_vader_component.my_address_max; modex_size = sizeof (modex.xpmem); } else { diff --git a/opal/mca/btl/vader/btl_vader_endpoint.h b/opal/mca/btl/vader/btl_vader_endpoint.h index 5c5b2478b8..4d066a446f 100644 --- a/opal/mca/btl/vader/btl_vader_endpoint.h +++ b/opal/mca/btl/vader/btl_vader_endpoint.h @@ -78,6 +78,7 @@ typedef struct mca_btl_base_endpoint_t { #if OPAL_BTL_VADER_HAVE_XPMEM struct { xpmem_apid_t apid; /**< xpmem apid for remote peer */ + uintptr_t address_max; /**< largest address that can be attached */ } xpmem; #endif struct { diff --git a/opal/mca/btl/vader/btl_vader_module.c b/opal/mca/btl/vader/btl_vader_module.c index e54c02b569..1a54bbfcab 100644 --- a/opal/mca/btl/vader/btl_vader_module.c +++ b/opal/mca/btl/vader/btl_vader_module.c @@ -19,6 +19,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2018-2019 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2020 Google, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -216,6 +217,7 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_ if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { /* always use xpmem if it is available */ ep->segment_data.xpmem.apid = xpmem_get (modex->xpmem.seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666); + ep->segment_data.xpmem.address_max = modex->xpmem.address_max; (void) vader_get_registation (ep, modex->xpmem.segment_base, mca_btl_vader_component.segment_size, MCA_RCACHE_FLAGS_PERSIST, (void **) &ep->segment_base); } else { diff --git a/opal/mca/btl/vader/btl_vader_xpmem.c b/opal/mca/btl/vader/btl_vader_xpmem.c index 5bfe8e1a64..17abc17270 100644 --- a/opal/mca/btl/vader/btl_vader_xpmem.c +++ b/opal/mca/btl/vader/btl_vader_xpmem.c @@ -22,7 +22,47 @@ int mca_btl_vader_xpmem_init (void) { - mca_btl_vader_component.my_seg_id = xpmem_make (0, VADER_MAX_ADDRESS, XPMEM_PERMIT_MODE, (void *)0666); + /* Any attachment that goes past the Linux TASK_SIZE will always fail. To prevent this we need to + * determine the value of TASK_SIZE. On x86_64 the value was hard-coded in vader to be + * 0x7ffffffffffful but this approach does not work with AARCH64 (and possibly other architectures). + * Since there is really no way to directly determine the value we can (in all cases?) look through + * the mapping for this process to determine what the largest address is. This should be the top + * of the stack. No heap allocations should be larger than this value. Since the largest address + * may differ between processes the value must be shared as part of the modex and stored in the + * endpoint. */ + FILE *fh = fopen("/proc/self/maps", "r"); + if (NULL == fh) { + BTL_ERROR(("could not open /proc/self/maps for reading. disabling XPMEM")); + return OPAL_ERR_NOT_AVAILABLE; + } + + char buffer[1024]; + uintptr_t address_max = 0; + while (fgets(buffer, sizeof(buffer), fh)) { + uintptr_t low, high; + char *tmp; + /* each line of /proc/self/maps starts with low-high in hexidecimal (without a 0x) */ + low = strtoul(buffer, &tmp, 16); + high = strtoul(tmp+1, NULL, 16); + if (address_max < high) { + address_max = high; + } + } + + fclose (fh); + + if (0 == address_max) { + BTL_ERROR(("could not determine the address max")); + return OPAL_ERR_NOT_AVAILABLE; + } + + /* save the calcuated maximum */ + mca_btl_vader_component.my_address_max = address_max - 1; + + /* it is safe to use XPMEM_MAXADDR_SIZE here (which is always (size_t)-1 even though + * it is not safe for attach */ + mca_btl_vader_component.my_seg_id = xpmem_make (0, XPMEM_MAXADDR_SIZE, XPMEM_PERMIT_MODE, + (void *)0666); if (-1 == mca_btl_vader_component.my_seg_id) { return OPAL_ERR_NOT_AVAILABLE; } @@ -110,8 +150,8 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo base = OPAL_DOWN_ALIGN((uintptr_t) rem_ptr, attach_align, uintptr_t); bound = OPAL_ALIGN((uintptr_t) rem_ptr + size - 1, attach_align, uintptr_t) + 1; - if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) { - bound = VADER_MAX_ADDRESS; + if (OPAL_UNLIKELY(bound > ep->segment_data.xpmem.address_max)) { + bound = ep->segment_data.xpmem.address_max; } check_ctx.base = base; diff --git a/opal/mca/btl/vader/btl_vader_xpmem.h b/opal/mca/btl/vader/btl_vader_xpmem.h index fa47773697..5b6e7f01fb 100644 --- a/opal/mca/btl/vader/btl_vader_xpmem.h +++ b/opal/mca/btl/vader/btl_vader_xpmem.h @@ -3,6 +3,7 @@ * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 ARM, Inc. All rights reserved. + * Copyright (c) 2020 Google, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,13 +33,6 @@ /* look up the remote pointer in the peer rcache and attach if * necessary */ -/* largest address we can attach to using xpmem */ -#if defined(__x86_64__) -#define VADER_MAX_ADDRESS ((uintptr_t)0x7ffffffff000ul) -#else -#define VADER_MAX_ADDRESS XPMEM_MAXADDR_SIZE -#endif - struct mca_btl_base_endpoint_t; int mca_btl_vader_xpmem_init (void);