btl/vader: modify how the max attachment address is determined
This PR removes the constant defining the max attachment address and replaces it with the largest address that shows up in /proc/self/maps. This should address issues found on AARCH64 where the max address may differ based on the configuration. Since the calculated max address may differ between processes the max address is sent as part of the modex and stored in the endpoint data. Signed-off-by: Nathan Hjelm <hjelmn@google.com>
Этот коммит содержится в:
родитель
f86f805be1
Коммит
728d51f9f3
@ -17,6 +17,7 @@
|
||||
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2018 Triad National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2020 Google, LLC. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -82,6 +83,7 @@ union vader_modex_t {
|
||||
struct vader_modex_xpmem_t {
|
||||
xpmem_segid_t seg_id;
|
||||
void *segment_base;
|
||||
uintptr_t address_max;
|
||||
} xpmem;
|
||||
#endif
|
||||
struct vader_modex_other_t {
|
||||
@ -113,6 +115,7 @@ struct mca_btl_vader_component_t {
|
||||
int vader_free_list_inc; /**< number of elements to alloc when growing free lists */
|
||||
#if OPAL_BTL_VADER_HAVE_XPMEM
|
||||
xpmem_segid_t my_seg_id; /**< this rank's xpmem segment id */
|
||||
uintptr_t my_address_max; /**< largest address */
|
||||
mca_rcache_base_vma_module_t *vma_module; /**< registration cache for xpmem segments */
|
||||
#endif
|
||||
opal_shmem_ds_t seg_ds; /**< this rank's shared memory segment (when not using xpmem) */
|
||||
|
@ -21,7 +21,7 @@
|
||||
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
|
||||
* Copyright (c) 2018 Triad National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2019 Google, Inc. All rights reserved.
|
||||
* Copyright (c) 2019-2020 Google, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -383,6 +383,7 @@ static int mca_btl_base_vader_modex_send (void)
|
||||
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
|
||||
modex.xpmem.seg_id = mca_btl_vader_component.my_seg_id;
|
||||
modex.xpmem.segment_base = mca_btl_vader_component.my_segment;
|
||||
modex.xpmem.address_max = mca_btl_vader_component.my_address_max;
|
||||
|
||||
modex_size = sizeof (modex.xpmem);
|
||||
} else {
|
||||
|
@ -78,6 +78,7 @@ typedef struct mca_btl_base_endpoint_t {
|
||||
#if OPAL_BTL_VADER_HAVE_XPMEM
|
||||
struct {
|
||||
xpmem_apid_t apid; /**< xpmem apid for remote peer */
|
||||
uintptr_t address_max; /**< largest address that can be attached */
|
||||
} xpmem;
|
||||
#endif
|
||||
struct {
|
||||
|
@ -19,6 +19,7 @@
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2018-2019 Triad National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2020 Google, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -216,6 +217,7 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_
|
||||
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
|
||||
/* always use xpmem if it is available */
|
||||
ep->segment_data.xpmem.apid = xpmem_get (modex->xpmem.seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666);
|
||||
ep->segment_data.xpmem.address_max = modex->xpmem.address_max;
|
||||
(void) vader_get_registation (ep, modex->xpmem.segment_base, mca_btl_vader_component.segment_size,
|
||||
MCA_RCACHE_FLAGS_PERSIST, (void **) &ep->segment_base);
|
||||
} else {
|
||||
|
@ -22,7 +22,47 @@
|
||||
|
||||
int mca_btl_vader_xpmem_init (void)
|
||||
{
|
||||
mca_btl_vader_component.my_seg_id = xpmem_make (0, VADER_MAX_ADDRESS, XPMEM_PERMIT_MODE, (void *)0666);
|
||||
/* Any attachment that goes past the Linux TASK_SIZE will always fail. To prevent this we need to
|
||||
* determine the value of TASK_SIZE. On x86_64 the value was hard-coded in vader to be
|
||||
* 0x7ffffffffffful but this approach does not work with AARCH64 (and possibly other architectures).
|
||||
* Since there is really no way to directly determine the value we can (in all cases?) look through
|
||||
* the mapping for this process to determine what the largest address is. This should be the top
|
||||
* of the stack. No heap allocations should be larger than this value. Since the largest address
|
||||
* may differ between processes the value must be shared as part of the modex and stored in the
|
||||
* endpoint. */
|
||||
FILE *fh = fopen("/proc/self/maps", "r");
|
||||
if (NULL == fh) {
|
||||
BTL_ERROR(("could not open /proc/self/maps for reading. disabling XPMEM"));
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
char buffer[1024];
|
||||
uintptr_t address_max = 0;
|
||||
while (fgets(buffer, sizeof(buffer), fh)) {
|
||||
uintptr_t low, high;
|
||||
char *tmp;
|
||||
/* each line of /proc/self/maps starts with low-high in hexidecimal (without a 0x) */
|
||||
low = strtoul(buffer, &tmp, 16);
|
||||
high = strtoul(tmp+1, NULL, 16);
|
||||
if (address_max < high) {
|
||||
address_max = high;
|
||||
}
|
||||
}
|
||||
|
||||
fclose (fh);
|
||||
|
||||
if (0 == address_max) {
|
||||
BTL_ERROR(("could not determine the address max"));
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
/* save the calcuated maximum */
|
||||
mca_btl_vader_component.my_address_max = address_max - 1;
|
||||
|
||||
/* it is safe to use XPMEM_MAXADDR_SIZE here (which is always (size_t)-1 even though
|
||||
* it is not safe for attach */
|
||||
mca_btl_vader_component.my_seg_id = xpmem_make (0, XPMEM_MAXADDR_SIZE, XPMEM_PERMIT_MODE,
|
||||
(void *)0666);
|
||||
if (-1 == mca_btl_vader_component.my_seg_id) {
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
@ -110,8 +150,8 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
|
||||
|
||||
base = OPAL_DOWN_ALIGN((uintptr_t) rem_ptr, attach_align, uintptr_t);
|
||||
bound = OPAL_ALIGN((uintptr_t) rem_ptr + size - 1, attach_align, uintptr_t) + 1;
|
||||
if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) {
|
||||
bound = VADER_MAX_ADDRESS;
|
||||
if (OPAL_UNLIKELY(bound > ep->segment_data.xpmem.address_max)) {
|
||||
bound = ep->segment_data.xpmem.address_max;
|
||||
}
|
||||
|
||||
check_ctx.base = base;
|
||||
|
@ -3,6 +3,7 @@
|
||||
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2016 ARM, Inc. All rights reserved.
|
||||
* Copyright (c) 2020 Google, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -32,13 +33,6 @@
|
||||
/* look up the remote pointer in the peer rcache and attach if
|
||||
* necessary */
|
||||
|
||||
/* largest address we can attach to using xpmem */
|
||||
#if defined(__x86_64__)
|
||||
#define VADER_MAX_ADDRESS ((uintptr_t)0x7ffffffff000ul)
|
||||
#else
|
||||
#define VADER_MAX_ADDRESS XPMEM_MAXADDR_SIZE
|
||||
#endif
|
||||
|
||||
struct mca_btl_base_endpoint_t;
|
||||
|
||||
int mca_btl_vader_xpmem_init (void);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user