From ce4ee2aa8d960eb72292f5bb64108e5a85796e1d Mon Sep 17 00:00:00 2001 From: Nadezhda Kogteva Date: Mon, 13 Oct 2014 10:40:48 +0300 Subject: [PATCH] oshmem mmap: new mca parameters were introduced - sshmem_mmap_anonymous, sshmem_mmap_fixed and sshmem_base_backing_file_dir - for runtime mmap management. (cherry picked up from Mellanox-v1.8 repo commit 4c391a) --- oshmem/mca/memheap/base/memheap_base_alloc.c | 10 ++- oshmem/mca/sshmem/base/base.h | 25 +++++- oshmem/mca/sshmem/base/sshmem_base_open.c | 16 ++++ .../sshmem/mmap/help-oshmem-sshmem-mmap.txt | 23 +++++ oshmem/mca/sshmem/mmap/sshmem_mmap.h | 2 + .../mca/sshmem/mmap/sshmem_mmap_component.c | 17 ++++ oshmem/mca/sshmem/mmap/sshmem_mmap_module.c | 90 ++++++++++++++----- 7 files changed, 157 insertions(+), 26 deletions(-) diff --git a/oshmem/mca/memheap/base/memheap_base_alloc.c b/oshmem/mca/memheap/base/memheap_base_alloc.c index f5fc5ecf96..6081722768 100644 --- a/oshmem/mca/memheap/base/memheap_base_alloc.c +++ b/oshmem/mca/memheap/base/memheap_base_alloc.c @@ -4,9 +4,9 @@ * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -22,12 +22,14 @@ int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size) { int ret = OSHMEM_SUCCESS; + char * seg_filename = NULL; assert(map); assert(HEAP_SEG_INDEX == map->n_segments); map_segment_t *s = &map->mem_segs[map->n_segments]; - ret = mca_sshmem_segment_create(s, "", size); + seg_filename = oshmem_get_unique_file_name(oshmem_my_proc_id()); + ret = mca_sshmem_segment_create(s, seg_filename, size); if (OSHMEM_SUCCESS == ret) { map->n_segments++; @@ -36,6 +38,8 @@ int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size) (unsigned long long)size, map->n_segments, s->type); } + free(seg_filename); + return ret; } diff --git a/oshmem/mca/sshmem/base/base.h b/oshmem/mca/sshmem/base/base.h index 82a3fb02fb..795ebce4ad 100644 --- a/oshmem/mca/sshmem/base/base.h +++ b/oshmem/mca/sshmem/base/base.h @@ -12,15 +12,17 @@ #define MCA_SSHMEM_BASE_H #include "oshmem_config.h" +#include "oshmem/mca/sshmem/sshmem.h" +#include "oshmem/proc/proc.h" #include "opal/mca/base/mca_base_framework.h" -#include "oshmem/mca/sshmem/sshmem.h" +#include "orte/runtime/orte_globals.h" BEGIN_C_DECLS extern void* mca_sshmem_base_start_address; - +extern char* mca_sshmem_base_backing_file_dir; /* ////////////////////////////////////////////////////////////////////////// */ /* Public API for the sshmem framework */ @@ -144,6 +146,25 @@ OSHMEM_DECLSPEC extern mca_base_framework_t oshmem_sshmem_base_framework; oshmem_output_verbose(0, oshmem_sshmem_base_framework.framework_output, \ "Warning %s:%d - %s()", __SSHMEM_FILE__, __LINE__, __FUNCTION__, __VA_ARGS__) + +/* + * Get unique file name + */ +static inline char * oshmem_get_unique_file_name(uint64_t pe) +{ + char *file_name = NULL; + + assert(mca_sshmem_base_backing_file_dir); + + if (NULL == (file_name = calloc(OPAL_PATH_MAX, sizeof(char)))) { + return NULL; + } + + snprintf(file_name, OPAL_PATH_MAX, "%s/shmem_job_%u_pe_%llu", mca_sshmem_base_backing_file_dir, ORTE_PROC_MY_NAME->jobid, pe); + + return file_name; +} + END_C_DECLS #endif /* MCA_SSHMEM_BASE_H */ diff --git a/oshmem/mca/sshmem/base/sshmem_base_open.c b/oshmem/mca/sshmem/base/sshmem_base_open.c index 979eba0a26..10f3510301 100644 --- a/oshmem/mca/sshmem/base/sshmem_base_open.c +++ b/oshmem/mca/sshmem/base/sshmem_base_open.c @@ -40,6 +40,8 @@ void *mca_sshmem_base_start_address = (void*)0; void* mca_sshmem_base_start_address = (void*)0xFF000000; #endif +char * mca_sshmem_base_backing_file_dir = NULL; + /* ////////////////////////////////////////////////////////////////////////// */ /** * Register some sshmem-wide MCA params @@ -65,6 +67,20 @@ mca_sshmem_base_register (mca_base_register_flag_t flags) "start_address", MCA_BASE_VAR_SYN_FLAG_DEPRECATED); + mca_sshmem_base_backing_file_dir = "/dev/shm"; + index = mca_base_var_register("oshmem", + "sshmem", + "base", + "backing_file_dir", + "Specifies where backing files will be created when " + "mmap is used and shmem_mmap_anonymous set to 0.", + MCA_BASE_VAR_TYPE_STRING, + NULL, + 0, + MCA_BASE_VAR_FLAG_SETTABLE, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_sshmem_base_backing_file_dir); return OSHMEM_SUCCESS; } diff --git a/oshmem/mca/sshmem/mmap/help-oshmem-sshmem-mmap.txt b/oshmem/mca/sshmem/mmap/help-oshmem-sshmem-mmap.txt index 9ae3bae259..f979dfa3eb 100644 --- a/oshmem/mca/sshmem/mmap/help-oshmem-sshmem-mmap.txt +++ b/oshmem/mca/sshmem/mmap/help-oshmem-sshmem-mmap.txt @@ -23,3 +23,26 @@ This issue could also be related to CONFIG_STRICT_DEVMEM kernel option which if enabled prevents access to physical memory via "mmap". In this case you could try using other sshmem components instead. + +[mmap:file open failure] + +You can try the following: + +1. Switch to anonymous mapping using "-mca sshmem_mmap_anonymous 0". + The anonymous mapping is not backed by any file. +2. Set path to backing files explicitly using "-mca sshmem_base_backing_file_dir " + + File: %s + Error: %s + +[mmap:file truncate failure] + +File can not be truncated to specified length. + +You can try to switch to anonymous mapping using "-mca sshmem_mmap_anonymous 0". +The anonymous mapping is not backed by any file. + + File: %s + Size: %llu + Error: %s + diff --git a/oshmem/mca/sshmem/mmap/sshmem_mmap.h b/oshmem/mca/sshmem/mmap/sshmem_mmap.h index 0d65eb5481..1335af55a2 100644 --- a/oshmem/mca/sshmem/mmap/sshmem_mmap.h +++ b/oshmem/mca/sshmem/mmap/sshmem_mmap.h @@ -25,6 +25,8 @@ typedef struct mca_sshmem_mmap_component_t { mca_sshmem_base_component_t super; /* priority for mmap component */ int priority; + int is_anonymous; + int is_start_addr_fixed; } mca_sshmem_mmap_component_t; OSHMEM_MODULE_DECLSPEC extern mca_sshmem_mmap_component_t diff --git a/oshmem/mca/sshmem/mmap/sshmem_mmap_component.c b/oshmem/mca/sshmem/mmap/sshmem_mmap_component.c index c3d60a7020..dbb46e0f23 100644 --- a/oshmem/mca/sshmem/mmap/sshmem_mmap_component.c +++ b/oshmem/mca/sshmem/mmap/sshmem_mmap_component.c @@ -105,6 +105,23 @@ mmap_register(void) MCA_BASE_VAR_SCOPE_ALL_EQ, &mca_sshmem_mmap_component.priority); + mca_sshmem_mmap_component.is_anonymous = 1; + mca_base_component_var_register (&mca_sshmem_mmap_component.super.base_version, + "anonymous", "Select whether anonymous sshmem is used for mmap " + "component (default: 1)", MCA_BASE_VAR_TYPE_INT, + NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_ALL_EQ, + &mca_sshmem_mmap_component.is_anonymous); + + mca_sshmem_mmap_component.is_start_addr_fixed = 1; + mca_base_component_var_register (&mca_sshmem_mmap_component.super.base_version, + "fixed", "Select whether fixed start address is used for shmem " + "(default: 1)", MCA_BASE_VAR_TYPE_INT, + NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_ALL_EQ, + &mca_sshmem_mmap_component.is_start_addr_fixed); return OSHMEM_SUCCESS; } diff --git a/oshmem/mca/sshmem/mmap/sshmem_mmap_module.c b/oshmem/mca/sshmem/mmap/sshmem_mmap_module.c index 6120bb0bac..8fe87d3912 100644 --- a/oshmem/mca/sshmem/mmap/sshmem_mmap_module.c +++ b/oshmem/mca/sshmem/mmap/sshmem_mmap_module.c @@ -45,13 +45,10 @@ #include "oshmem/proc/proc.h" #include "oshmem/mca/sshmem/sshmem.h" #include "oshmem/mca/sshmem/base/base.h" +#include "oshmem/util/oshmem_util.h" #include "sshmem_mmap.h" -#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) -# define MAP_ANONYMOUS MAP_ANON -#endif /* MAP_ANONYMOUS and MAP_ANON */ - #if !defined(MAP_FAILED) # define MAP_FAILED ((char*)-1) #endif /* MAP_FAILED */ @@ -125,6 +122,7 @@ shmem_ds_reset(map_segment_t *ds_buf) ds_buf->end = 0; ds_buf->seg_size = 0; ds_buf->type = MAP_SEGMENT_UNKNOWN; + unlink(ds_buf->seg_name); memset(ds_buf->seg_name, '\0', sizeof(ds_buf->seg_name)); } @@ -167,8 +165,8 @@ ds_copy(const map_segment_t *from, return OSHMEM_SUCCESS; } - /* ////////////////////////////////////////////////////////////////////////// */ + static int segment_create(map_segment_t *ds_buf, const char *file_name, @@ -207,7 +205,19 @@ segment_create(map_segment_t *ds_buf, } ds_buf->type = MAP_SEGMENT_ALLOC_MMAP; - ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID; + if (mca_sshmem_mmap_component.is_anonymous) { + /* + * Segment attach is not called for anonymous mmap + */ + ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID; + } else { + /* + * Warning: implied that input file name has a fixed format + * and pe which is stored as segment identifier is used in file name + * generation during segment attachment + */ + ds_buf->seg_id = oshmem_my_proc_id(); + } ds_buf->seg_base_addr = addr; ds_buf->seg_size = size; ds_buf->end = (void*)((uintptr_t)ds_buf->seg_base_addr + ds_buf->seg_size); @@ -241,22 +251,60 @@ segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey) return (mkey->va_base); } - addr = mmap((void *)mca_sshmem_base_start_address, - ds_buf->seg_size, - PROT_READ | PROT_WRITE, - MAP_SHARED | -#if defined(MAP_ANONYMOUS) - MAP_ANONYMOUS | -#endif - MAP_FIXED, - -1, - 0); + if (mca_sshmem_mmap_component.is_anonymous) { + /* + * Note: segment attach for anonymous mmap + * is not called due to invalid segment id + */ + addr = mmap((void *)mca_sshmem_base_start_address, + ds_buf->seg_size, + PROT_READ | PROT_WRITE, + MAP_SHARED | + MAP_ANONYMOUS | + MAP_FIXED, + -1, + 0); + } else { + char *file_name = NULL; + if (NULL == (file_name = oshmem_get_unique_file_name(mkey->u.key))) { + OPAL_OUTPUT( + (oshmem_sshmem_base_framework.framework_output, + "Can't get file name") + ); + return NULL; + } + + int fd; + if (-1 == (fd = open(file_name, O_CREAT | O_RDWR, 0600))) { + OPAL_OUTPUT( + (oshmem_sshmem_base_framework.framework_output, + "file open failed: %s", strerror(errno)) + ); + free(file_name); + return NULL; + } + free(file_name); + + addr = mmap((void *)NULL, + ds_buf->seg_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + fd, + 0); + + if (0 != close(fd)) { + OPAL_OUTPUT( + (oshmem_sshmem_base_framework.framework_output, + "file close failed: %s", strerror(errno)) + ); + } + } if (MAP_FAILED == addr) { - OPAL_OUTPUT_VERBOSE( - (5, oshmem_sshmem_base_framework.framework_output, - "Failed to mmap() %llu bytes (errno=%d)", - (unsigned long long)ds_buf->seg_size, errno) + OPAL_OUTPUT( + (oshmem_sshmem_base_framework.framework_output, + "Failed to mmap() %llu bytes (errno=%d)", + (unsigned long long)ds_buf->seg_size, errno) ); return NULL; } @@ -271,7 +319,7 @@ segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey) mca_sshmem_mmap_component.super.base_version.mca_component_name, ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name, mkey->va_base, mkey->len, (unsigned long long)mkey->u.key) - ); + ); /* update returned base pointer with an offset that hides our stuff */ return (mkey->va_base);