/* * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "oshmem_config.h" #include #ifdef HAVE_FCNTL_H #include #endif /* HAVE_FCNTL_H */ #ifdef HAVE_SYS_MMAN_H #include #endif /* HAVE_SYS_MMAN_H */ #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ #ifdef HAVE_SYS_TYPES_H #include #endif /* HAVE_SYS_TYPES_H */ #ifdef HAVE_SYS_IPC_H #include #endif /* HAVE_SYS_IPC_H */ #if HAVE_SYS_SHM_H #include #endif /* HAVE_SYS_SHM_H */ #if HAVE_SYS_STAT_H #include #endif /* HAVE_SYS_STAT_H */ #include #ifdef HAVE_NETDB_H #include #endif /* HAVE_NETDB_H */ #include "opal/constants.h" #include "opal_stdint.h" #include "opal/util/output.h" #include "opal/util/path.h" #include "opal/util/show_help.h" #include "orte/util/show_help.h" #include "oshmem/proc/proc.h" #include "oshmem/mca/sshmem/sshmem.h" #include "oshmem/mca/sshmem/base/base.h" #include "sshmem_sysv.h" /* ////////////////////////////////////////////////////////////////////////// */ /* local functions */ static int module_init(void); static int segment_create(map_segment_t *ds_buf, const char *file_name, size_t size); static void * segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey); static int segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey); static int segment_unlink(map_segment_t *ds_buf); static int module_finalize(void); /* sysv shmem module */ mca_sshmem_sysv_module_t mca_sshmem_sysv_module = { /* super */ { module_init, segment_create, segment_attach, segment_detach, segment_unlink, module_finalize } }; /* ////////////////////////////////////////////////////////////////////////// */ static int module_init(void) { /* nothing to do */ return OSHMEM_SUCCESS; } /* ////////////////////////////////////////////////////////////////////////// */ static int module_finalize(void) { /* nothing to do */ return OSHMEM_SUCCESS; } /* ////////////////////////////////////////////////////////////////////////// */ static int segment_create(map_segment_t *ds_buf, const char *file_name, size_t size) { int rc = OSHMEM_SUCCESS; void *addr = NULL; int shmid = MAP_SEGMENT_SHM_INVALID; int flags; int try_hp; assert(ds_buf); /* init the contents of map_segment_t */ shmem_ds_reset(ds_buf); /* for sysv shared memory we don't have to worry about the backing store * being located on a network file system... so no check is needed here. */ /* create a new shared memory segment and save the shmid. note the use of * real_size here */ flags = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR; try_hp = mca_sshmem_sysv_component.use_hp; #if defined (SHM_HUGETLB) flags |= ((0 != try_hp) ? SHM_HUGETLB : 0); size = ((size + sshmem_sysv_gethugepagesize() - 1) / sshmem_sysv_gethugepagesize()) * sshmem_sysv_gethugepagesize(); #endif /* Create a new shared memory segment and save the shmid. */ retry_alloc: shmid = shmget(IPC_PRIVATE, size, flags); if (shmid == MAP_SEGMENT_SHM_INVALID) { /* hugepage alloc was set to auto. Hopefully it failed because there are no * enough hugepages on the system. Turn it off and retry. */ if (-1 == try_hp) { OPAL_OUTPUT_VERBOSE( (10, oshmem_sshmem_base_framework.framework_output, "failed to allocate %llu bytes with huge pages. " "Using regular pages", (unsigned long long)size)); flags = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR; try_hp = 0; goto retry_alloc; } opal_show_help("help-oshmem-sshmem.txt", "create segment failure", true, "sysv", orte_process_info.nodename, (unsigned long long) size, strerror(errno), errno); opal_show_help("help-oshmem-sshmem-sysv.txt", "sysv:create segment failure", true); return OSHMEM_ERROR; } /* Attach to the segment */ addr = shmat(shmid, (void *) mca_sshmem_base_start_address, 0); if (addr == (void *) -1L) { opal_show_help("help-oshmem-sshmem.txt", "create segment failure", true, "sysv", orte_process_info.nodename, (unsigned long long) size, strerror(errno), errno); opal_show_help("help-oshmem-sshmem-sysv.txt", "sysv:create segment failure", true); shmctl(shmid, IPC_RMID, NULL); return OSHMEM_ERR_OUT_OF_RESOURCE; } shmctl(shmid, IPC_RMID, NULL ); ds_buf->type = MAP_SEGMENT_ALLOC_SHM; ds_buf->seg_id = shmid; ds_buf->super.va_base = addr; ds_buf->seg_size = size; ds_buf->super.va_end = (void*)((uintptr_t)ds_buf->super.va_base + ds_buf->seg_size); OPAL_OUTPUT_VERBOSE( (70, oshmem_sshmem_base_framework.framework_output, "%s: %s: create %s " "(id: %d, addr: %p size: %lu)\n", mca_sshmem_sysv_component.super.base_version.mca_type_name, mca_sshmem_sysv_component.super.base_version.mca_component_name, (rc ? "failure" : "successful"), ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size) ); return rc; } /* ////////////////////////////////////////////////////////////////////////// */ /** * segment_attach can only be called after a successful call to segment_create */ static void * segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey) { assert(ds_buf); assert(mkey->va_base == 0); if (MAP_SEGMENT_SHM_INVALID == (int)(mkey->u.key)) { return (mkey->va_base); } mkey->va_base = shmat((int)(mkey->u.key), 0, 0); OPAL_OUTPUT_VERBOSE( (70, oshmem_sshmem_base_framework.framework_output, "%s: %s: attach successful " "(id: %d, addr: %p size: %lu | va_base: 0x%p len: %d key %llx)\n", mca_sshmem_sysv_component.super.base_version.mca_type_name, mca_sshmem_sysv_component.super.base_version.mca_component_name, ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, mkey->va_base, mkey->len, (unsigned long long)mkey->u.key) ); /* update returned base pointer with an offset that hides our stuff */ return (mkey->va_base); } /* ////////////////////////////////////////////////////////////////////////// */ static int segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey) { int rc = OSHMEM_SUCCESS; assert(ds_buf); OPAL_OUTPUT_VERBOSE( (70, oshmem_sshmem_base_framework.framework_output, "%s: %s: detaching " "(id: %d, addr: %p size: %lu)\n", mca_sshmem_sysv_component.super.base_version.mca_type_name, mca_sshmem_sysv_component.super.base_version.mca_component_name, ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size) ); if (ds_buf->seg_id != MAP_SEGMENT_SHM_INVALID) { shmctl(ds_buf->seg_id, IPC_RMID, NULL ); } if (mca_sshmem_sysv_component.use_hp != 0) { /** * Workaround kernel panic when detaching huge pages from user space simultanously from several processes * dont detach here instead let kernel do it during process cleanup */ /* shmdt((void *)ds_buf->seg_base_addr); */ } /* reset the contents of the map_segment_t associated with this * shared memory segment. */ shmem_ds_reset(ds_buf); return rc; } /* ////////////////////////////////////////////////////////////////////////// */ static int segment_unlink(map_segment_t *ds_buf) { /* not much unlink work needed for sysv */ OPAL_OUTPUT_VERBOSE( (70, oshmem_sshmem_base_framework.framework_output, "%s: %s: unlinking " "(id: %d, size: %lu)\n", mca_sshmem_sysv_component.super.base_version.mca_type_name, mca_sshmem_sysv_component.super.base_version.mca_component_name, ds_buf->seg_id, (unsigned long)ds_buf->seg_size) ); /* don't completely reset. in particular, only reset * the id and flip the invalid bit. size and name values will remain valid * across unlinks. other information stored in flags will remain untouched. */ ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID; /* note: this is only changing the valid bit to 0. */ MAP_SEGMENT_INVALIDATE(ds_buf); return OSHMEM_SUCCESS; } /* * Get current huge page size * */ size_t sshmem_sysv_gethugepagesize(void) { static size_t huge_page_size = 0; char buf[256]; int size_kb; FILE *f; /* Cache the huge page size value */ if (huge_page_size == 0) { f = fopen("/proc/meminfo", "r"); if (f != NULL) { while (fgets(buf, sizeof(buf), f)) { if (sscanf(buf, "Hugepagesize: %d kB", &size_kb) == 1) { huge_page_size = size_kb * 1024L; break; } } fclose(f); } if (huge_page_size == 0) { huge_page_size = 2 * 1024L *1024L; } } return huge_page_size; }