1
1
openmpi/oshmem/mca/sshmem/sysv/sshmem_sysv_module.c
Gilles Gouaillardet 10cb9f6f9e oshmem: remove unnecessary dependencies to ORTE
either use OPAL or OMPI layers, since ORTE layer
is not present when PMIx RTE is used

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
2019-02-20 13:55:55 +09:00

331 строка
9.6 KiB
C

/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#include <errno.h>
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif /* HAVE_FCNTL_H */
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif /* HAVE_SYS_MMAN_H */
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif /* HAVE_SYS_TYPES_H */
#ifdef HAVE_SYS_IPC_H
#include <sys/ipc.h>
#endif /* HAVE_SYS_IPC_H */
#if HAVE_SYS_SHM_H
#include <sys/shm.h>
#endif /* HAVE_SYS_SHM_H */
#if HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif /* HAVE_SYS_STAT_H */
#include <string.h>
#ifdef HAVE_NETDB_H
#include <netdb.h>
#endif /* HAVE_NETDB_H */
#include "opal/constants.h"
#include "opal_stdint.h"
#include "opal/util/output.h"
#include "opal/util/path.h"
#include "opal/util/show_help.h"
#include "oshmem/proc/proc.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "sshmem_sysv.h"
/* ////////////////////////////////////////////////////////////////////////// */
/* local functions */
static int
module_init(void);
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size);
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
static int
segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
static int
segment_unlink(map_segment_t *ds_buf);
static int
module_finalize(void);
/* sysv shmem module */
mca_sshmem_sysv_module_t mca_sshmem_sysv_module = {
/* super */
{
module_init,
segment_create,
segment_attach,
segment_detach,
segment_unlink,
module_finalize
}
};
/* ////////////////////////////////////////////////////////////////////////// */
static int
module_init(void)
{
/* nothing to do */
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
module_finalize(void)
{
/* nothing to do */
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size)
{
int rc = OSHMEM_SUCCESS;
void *addr = NULL;
int shmid = MAP_SEGMENT_SHM_INVALID;
int flags;
int try_hp;
assert(ds_buf);
/* init the contents of map_segment_t */
shmem_ds_reset(ds_buf);
/* for sysv shared memory we don't have to worry about the backing store
* being located on a network file system... so no check is needed here.
*/
/* create a new shared memory segment and save the shmid. note the use of
* real_size here
*/
flags = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR;
try_hp = mca_sshmem_sysv_component.use_hp;
#if defined (SHM_HUGETLB)
flags |= ((0 != try_hp) ? SHM_HUGETLB : 0);
size = ((size + sshmem_sysv_gethugepagesize() - 1) / sshmem_sysv_gethugepagesize()) * sshmem_sysv_gethugepagesize();
#endif
/* Create a new shared memory segment and save the shmid. */
retry_alloc:
shmid = shmget(IPC_PRIVATE, size, flags);
if (shmid == MAP_SEGMENT_SHM_INVALID) {
/* hugepage alloc was set to auto. Hopefully it failed because there are no
* enough hugepages on the system. Turn it off and retry.
*/
if (-1 == try_hp) {
OPAL_OUTPUT_VERBOSE(
(10, oshmem_sshmem_base_framework.framework_output,
"failed to allocate %llu bytes with huge pages. "
"Using regular pages", (unsigned long long)size));
flags = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR;
try_hp = 0;
goto retry_alloc;
}
opal_show_help("help-oshmem-sshmem.txt",
"create segment failure",
true,
"sysv",
ompi_process_info.nodename, (unsigned long long) size,
strerror(errno), errno);
opal_show_help("help-oshmem-sshmem-sysv.txt",
"sysv:create segment failure",
true);
return OSHMEM_ERROR;
}
/* Attach to the segment */
addr = shmat(shmid, (void *) mca_sshmem_base_start_address, 0);
if (addr == (void *) -1L) {
opal_show_help("help-oshmem-sshmem.txt",
"create segment failure",
true,
"sysv",
ompi_process_info.nodename, (unsigned long long) size,
strerror(errno), errno);
opal_show_help("help-oshmem-sshmem-sysv.txt",
"sysv:create segment failure",
true);
shmctl(shmid, IPC_RMID, NULL);
return OSHMEM_ERR_OUT_OF_RESOURCE;
}
shmctl(shmid, IPC_RMID, NULL );
ds_buf->type = MAP_SEGMENT_ALLOC_SHM;
ds_buf->seg_id = shmid;
ds_buf->super.va_base = addr;
ds_buf->seg_size = size;
ds_buf->super.va_end = (void*)((uintptr_t)ds_buf->super.va_base + ds_buf->seg_size);
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: create %s "
"(id: %d, addr: %p size: %lu)\n",
mca_sshmem_sysv_component.super.base_version.mca_type_name,
mca_sshmem_sysv_component.super.base_version.mca_component_name,
(rc ? "failure" : "successful"),
ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size)
);
return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
/**
* segment_attach can only be called after a successful call to segment_create
*/
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
assert(ds_buf);
assert(mkey->va_base == 0);
if (MAP_SEGMENT_SHM_INVALID == (int)(mkey->u.key)) {
return (mkey->va_base);
}
mkey->va_base = shmat((int)(mkey->u.key), 0, 0);
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: attach successful "
"(id: %d, addr: %p size: %lu | va_base: 0x%p len: %d key %llx)\n",
mca_sshmem_sysv_component.super.base_version.mca_type_name,
mca_sshmem_sysv_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size,
mkey->va_base, mkey->len, (unsigned long long)mkey->u.key)
);
/* update returned base pointer with an offset that hides our stuff */
return (mkey->va_base);
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
int rc = OSHMEM_SUCCESS;
assert(ds_buf);
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: detaching "
"(id: %d, addr: %p size: %lu)\n",
mca_sshmem_sysv_component.super.base_version.mca_type_name,
mca_sshmem_sysv_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size)
);
if (ds_buf->seg_id != MAP_SEGMENT_SHM_INVALID) {
shmctl(ds_buf->seg_id, IPC_RMID, NULL );
}
if (mca_sshmem_sysv_component.use_hp != 0) {
/**
* Workaround kernel panic when detaching huge pages from user space simultanously from several processes
* dont detach here instead let kernel do it during process cleanup
*/
/* shmdt((void *)ds_buf->seg_base_addr); */
}
/* reset the contents of the map_segment_t associated with this
* shared memory segment.
*/
shmem_ds_reset(ds_buf);
return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_unlink(map_segment_t *ds_buf)
{
/* not much unlink work needed for sysv */
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: unlinking "
"(id: %d, size: %lu)\n",
mca_sshmem_sysv_component.super.base_version.mca_type_name,
mca_sshmem_sysv_component.super.base_version.mca_component_name,
ds_buf->seg_id, (unsigned long)ds_buf->seg_size)
);
/* don't completely reset. in particular, only reset
* the id and flip the invalid bit. size and name values will remain valid
* across unlinks. other information stored in flags will remain untouched.
*/
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
/* note: this is only changing the valid bit to 0. */
MAP_SEGMENT_INVALIDATE(ds_buf);
return OSHMEM_SUCCESS;
}
/*
* Get current huge page size
*
*/
size_t sshmem_sysv_gethugepagesize(void)
{
static size_t huge_page_size = 0;
char buf[256];
int size_kb;
FILE *f;
/* Cache the huge page size value */
if (huge_page_size == 0) {
f = fopen("/proc/meminfo", "r");
if (f != NULL) {
while (fgets(buf, sizeof(buf), f)) {
if (sscanf(buf, "Hugepagesize: %d kB", &size_kb) == 1) {
huge_page_size = size_kb * 1024L;
break;
}
}
fclose(f);
}
if (huge_page_size == 0) {
huge_page_size = 2 * 1024L *1024L;
}
}
return huge_page_size;
}