diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_base.c b/opal/mca/btl/openib/connect/btl_openib_connect_base.c index ca67d0f363..7f1f75c6d9 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_base.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_base.c @@ -32,6 +32,9 @@ #include "opal/util/proc.h" #include "opal/util/show_help.h" +#include "opal/util/sys_limits.h" +#include "opal/align.h" + /* * Array of all possible connection functions */ @@ -421,10 +424,27 @@ int opal_btl_openib_connect_base_alloc_cts(mca_btl_base_endpoint_t *endpoint) sizeof(mca_btl_openib_footer_t) + mca_btl_openib_component.qp_infos[mca_btl_openib_component.credits_qp].size; + int align_it = 0; + int page_size; + + page_size = opal_getpagesize(); + if (length >= page_size / 2) { align_it = 1; } + if (align_it) { +// I think this is only active for ~64k+ buffers anyway, but I'm not +// positive, so I'm only increasing the buffer size and alignment if +// it's not too small. That way we'd avoid wasting excessive memory +// in case this code was active for tiny buffers. + length = OPAL_ALIGN(length, page_size, int); + } + /* Explicitly don't use the mpool registration */ fli = &(endpoint->endpoint_cts_frag.super.super.base.super); fli->registration = NULL; - fli->ptr = malloc(length); + if (!align_it) { + fli->ptr = malloc(length); + } else { + posix_memalign((void**)&(fli->ptr), page_size, length); + } if (NULL == fli->ptr) { BTL_ERROR(("malloc failed")); return OPAL_ERR_OUT_OF_RESOURCE; diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c index f411f48f34..ee5678120a 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c @@ -75,6 +75,7 @@ #include "connect/connect.h" #include "opal/util/sys_limits.h" +#include "opal/align.h" #if (ENABLE_DYNAMIC_SL) #include "connect/btl_openib_connect_sl.h" @@ -1040,7 +1041,7 @@ static void udcm_module_destroy_listen_qp (udcm_module_t *m) static int udcm_module_allocate_buffers (udcm_module_t *m) { - size_t total_size; + size_t total_size, page_size; m->msg_length = sizeof (udcm_msg_hdr_t) + mca_btl_openib_component.num_qps * sizeof (udcm_qp_t); @@ -1048,8 +1049,11 @@ static int udcm_module_allocate_buffers (udcm_module_t *m) total_size = (udcm_recv_count + 1) * (m->msg_length + UDCM_GRH_SIZE); + page_size = opal_getpagesize(); + total_size = OPAL_ALIGN(total_size, page_size, size_t); + m->cm_buffer = NULL; - posix_memalign ((void **)&m->cm_buffer, (size_t)opal_getpagesize(), + posix_memalign ((void **)&m->cm_buffer, (size_t)page_size, total_size); if (NULL == m->cm_buffer) { BTL_ERROR(("malloc failed! errno = %d", errno)); diff --git a/opal/util/sys_limits.c b/opal/util/sys_limits.c index 9be0a6120f..16d11cdb78 100644 --- a/opal/util/sys_limits.c +++ b/opal/util/sys_limits.c @@ -235,13 +235,20 @@ out: int opal_getpagesize(void) { + static int page_size = -1; + + if (page_size != -1) { +// testing in a loop showed sysconf() took ~5 usec vs ~0.3 usec with it cached + return page_size; + } + #ifdef HAVE_GETPAGESIZE - return getpagesize(); + return page_size = getpagesize(); #elif defined(_SC_PAGESIZE ) - return sysconf(_SC_PAGESIZE); + return page_size = sysconf(_SC_PAGESIZE); #elif defined(_SC_PAGE_SIZE) - return sysconf(_SC_PAGE_SIZE); + return page_size = sysconf(_SC_PAGE_SIZE); #else - return 65536; /* safer to overestimate than under */ + return page_size = 65536; /* safer to overestimate than under */ #endif }