From 655a06f559a6cbb1b47143d513a99cfb80aa088d Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Wed, 5 Apr 2017 16:39:36 -0400 Subject: [PATCH] IB fork The key change was in btl_openib_connect_udcm.c where a buffer was being pinned with size 65664 (whether openib was being used or not). The start of the buffer was page aligned, but because of the size the end wasn't. That makes it too easy for a forked child to accidentally touch pinned memory on the same page as the end of that buffer. So this change increases the size of the allocated buffer to use the rest of the page. I inspected the rest of the ibv_reg_mr() calls and changed one other place to page align its buffer too, although I think the above is the one that really matters. Signed-off-by: Mark Allen --- .../openib/connect/btl_openib_connect_base.c | 22 ++++++++++++++++++- .../openib/connect/btl_openib_connect_udcm.c | 8 +++++-- opal/util/sys_limits.c | 15 +++++++++---- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_base.c b/opal/mca/btl/openib/connect/btl_openib_connect_base.c index ca67d0f363..7f1f75c6d9 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_base.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_base.c @@ -32,6 +32,9 @@ #include "opal/util/proc.h" #include "opal/util/show_help.h" +#include "opal/util/sys_limits.h" +#include "opal/align.h" + /* * Array of all possible connection functions */ @@ -421,10 +424,27 @@ int opal_btl_openib_connect_base_alloc_cts(mca_btl_base_endpoint_t *endpoint) sizeof(mca_btl_openib_footer_t) + mca_btl_openib_component.qp_infos[mca_btl_openib_component.credits_qp].size; + int align_it = 0; + int page_size; + + page_size = opal_getpagesize(); + if (length >= page_size / 2) { align_it = 1; } + if (align_it) { +// I think this is only active for ~64k+ buffers anyway, but I'm not +// positive, so I'm only increasing the buffer size and alignment if +// it's not too small. That way we'd avoid wasting excessive memory +// in case this code was active for tiny buffers. + length = OPAL_ALIGN(length, page_size, int); + } + /* Explicitly don't use the mpool registration */ fli = &(endpoint->endpoint_cts_frag.super.super.base.super); fli->registration = NULL; - fli->ptr = malloc(length); + if (!align_it) { + fli->ptr = malloc(length); + } else { + posix_memalign((void**)&(fli->ptr), page_size, length); + } if (NULL == fli->ptr) { BTL_ERROR(("malloc failed")); return OPAL_ERR_OUT_OF_RESOURCE; diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c index 29b7de3554..25d141ffce 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c @@ -75,6 +75,7 @@ #include "connect/connect.h" #include "opal/util/sys_limits.h" +#include "opal/align.h" #if (ENABLE_DYNAMIC_SL) #include "connect/btl_openib_connect_sl.h" @@ -1030,7 +1031,7 @@ static void udcm_module_destroy_listen_qp (udcm_module_t *m) static int udcm_module_allocate_buffers (udcm_module_t *m) { - size_t total_size; + size_t total_size, page_size; m->msg_length = sizeof (udcm_msg_hdr_t) + mca_btl_openib_component.num_qps * sizeof (udcm_qp_t); @@ -1038,8 +1039,11 @@ static int udcm_module_allocate_buffers (udcm_module_t *m) total_size = (udcm_recv_count + 1) * (m->msg_length + UDCM_GRH_SIZE); + page_size = opal_getpagesize(); + total_size = OPAL_ALIGN(total_size, page_size, size_t); + m->cm_buffer = NULL; - posix_memalign ((void **)&m->cm_buffer, (size_t)opal_getpagesize(), + posix_memalign ((void **)&m->cm_buffer, (size_t)page_size, total_size); if (NULL == m->cm_buffer) { BTL_ERROR(("malloc failed! errno = %d", errno)); diff --git a/opal/util/sys_limits.c b/opal/util/sys_limits.c index 9be0a6120f..16d11cdb78 100644 --- a/opal/util/sys_limits.c +++ b/opal/util/sys_limits.c @@ -235,13 +235,20 @@ out: int opal_getpagesize(void) { + static int page_size = -1; + + if (page_size != -1) { +// testing in a loop showed sysconf() took ~5 usec vs ~0.3 usec with it cached + return page_size; + } + #ifdef HAVE_GETPAGESIZE - return getpagesize(); + return page_size = getpagesize(); #elif defined(_SC_PAGESIZE ) - return sysconf(_SC_PAGESIZE); + return page_size = sysconf(_SC_PAGESIZE); #elif defined(_SC_PAGE_SIZE) - return sysconf(_SC_PAGE_SIZE); + return page_size = sysconf(_SC_PAGE_SIZE); #else - return 65536; /* safer to overestimate than under */ + return page_size = 65536; /* safer to overestimate than under */ #endif }