454 строки
15 KiB
C
454 строки
15 KiB
C
|
/*
|
||
|
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||
|
* University Research and Technology
|
||
|
* Corporation. All rights reserved.
|
||
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||
|
* of Tennessee Research Foundation. All rights
|
||
|
* reserved.
|
||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||
|
* University of Stuttgart. All rights reserved.
|
||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||
|
* All rights reserved.
|
||
|
* Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved.
|
||
|
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||
|
* Copyright (c) 2010-2011 Los Alamos National Security, LLC.
|
||
|
* All rights reserved.
|
||
|
*
|
||
|
* $COPYRIGHT$
|
||
|
*
|
||
|
* Additional copyrights may follow
|
||
|
*
|
||
|
* $HEADER$
|
||
|
*/
|
||
|
|
||
|
#include "opal_config.h"
|
||
|
|
||
|
#include <errno.h>
|
||
|
#ifdef HAVE_FCNTL_H
|
||
|
#include <fcntl.h>
|
||
|
#endif /* HAVE_FCNTL_H */
|
||
|
#ifdef HAVE_SYS_MMAN_H
|
||
|
#include <sys/mman.h>
|
||
|
#endif /* HAVE_SYS_MMAN_H */
|
||
|
#ifdef HAVE_UNISTD_H
|
||
|
#include <unistd.h>
|
||
|
#endif /* HAVE_UNISTD_H */
|
||
|
#ifdef HAVE_SYS_TYPES_H
|
||
|
#include <sys/types.h>
|
||
|
#endif /* HAVE_SYS_TYPES_H */
|
||
|
#ifdef HAVE_STRING_H
|
||
|
#include <string.h>
|
||
|
#endif /* HAVE_STRING_H */
|
||
|
|
||
|
#include "opal/constants.h"
|
||
|
#include "opal_stdint.h"
|
||
|
#include "opal/util/output.h"
|
||
|
#include "opal/util/path.h"
|
||
|
#include "opal/util/show_help.h"
|
||
|
#include "opal/mca/base/mca_base_param.h"
|
||
|
#include "opal/mca/shmem/shmem.h"
|
||
|
#include "opal/mca/shmem/base/base.h"
|
||
|
|
||
|
#include "shmem_mmap.h"
|
||
|
|
||
|
/* for tons of debug output: -mca shmem_base_verbose 70 */
|
||
|
|
||
|
/* ////////////////////////////////////////////////////////////////////////// */
|
||
|
/*local functions */
|
||
|
/* local functions */
|
||
|
static int
|
||
|
module_init(void);
|
||
|
|
||
|
static int
|
||
|
segment_create(opal_shmem_ds_t *ds_buf,
|
||
|
const char *file_name,
|
||
|
size_t size);
|
||
|
|
||
|
static int
|
||
|
ds_copy(const opal_shmem_ds_t *from,
|
||
|
opal_shmem_ds_t *to);
|
||
|
|
||
|
static void *
|
||
|
segment_attach(opal_shmem_ds_t *ds_buf);
|
||
|
|
||
|
static int
|
||
|
segment_detach(opal_shmem_ds_t *ds_buf);
|
||
|
|
||
|
static int
|
||
|
segment_unlink(opal_shmem_ds_t *ds_buf);
|
||
|
|
||
|
static int
|
||
|
module_finalize(void);
|
||
|
|
||
|
/*
|
||
|
* mmap shmem module
|
||
|
*/
|
||
|
opal_shmem_mmap_module_t opal_shmem_mmap_module = {
|
||
|
/* super */
|
||
|
{
|
||
|
module_init,
|
||
|
segment_create,
|
||
|
ds_copy,
|
||
|
segment_attach,
|
||
|
segment_detach,
|
||
|
segment_unlink,
|
||
|
module_finalize
|
||
|
}
|
||
|
};
|
||
|
|
||
|
/* ////////////////////////////////////////////////////////////////////////// */
|
||
|
/* private utility functions */
|
||
|
/* ////////////////////////////////////////////////////////////////////////// */
|
||
|
|
||
|
/* ////////////////////////////////////////////////////////////////////////// */
|
||
|
/**
|
||
|
* completely resets the contents of *ds_buf
|
||
|
*/
|
||
|
static inline void
|
||
|
shmem_ds_reset(opal_shmem_ds_t *ds_buf)
|
||
|
{
|
||
|
OPAL_OUTPUT_VERBOSE(
|
||
|
(70, opal_shmem_base_output,
|
||
|
"%s: %s: shmem_ds_resetting "
|
||
|
"(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n",
|
||
|
mca_shmem_mmap_component.super.base_version.mca_type_name,
|
||
|
mca_shmem_mmap_component.super.base_version.mca_component_name,
|
||
|
(unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size,
|
||
|
ds_buf->seg_name)
|
||
|
);
|
||
|
|
||
|
ds_buf->opid = 0;
|
||
|
ds_buf->seg_cpid = 0;
|
||
|
OPAL_SHMEM_DS_RESET_FLAGS(ds_buf);
|
||
|
ds_buf->seg_id = OPAL_SHMEM_DS_ID_INVALID;
|
||
|
ds_buf->seg_size = 0;
|
||
|
memset(ds_buf->seg_name, '\0', OPAL_PATH_MAX);
|
||
|
}
|
||
|
|
||
|
/* ////////////////////////////////////////////////////////////////////////// */
|
||
|
static int
|
||
|
module_init(void)
|
||
|
{
|
||
|
/* nothing to do */
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
/* ////////////////////////////////////////////////////////////////////////// */
|
||
|
static int
|
||
|
module_finalize(void)
|
||
|
{
|
||
|
/* nothing to do */
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
/* ////////////////////////////////////////////////////////////////////////// */
|
||
|
static int
|
||
|
ds_copy(const opal_shmem_ds_t *from,
|
||
|
opal_shmem_ds_t *to)
|
||
|
{
|
||
|
pid_t my_pid = getpid();
|
||
|
|
||
|
/* inter-process copy - exclude process-specific data */
|
||
|
if (from->opid != my_pid) {
|
||
|
/* mask out internal flags */
|
||
|
to->flags = (from->flags & OPAL_SHMEM_DS_FLAGS_INTERNAL_MASK);
|
||
|
to->seg_base_addr = NULL;
|
||
|
}
|
||
|
/* i am the owner process, so i can safely copy all the information */
|
||
|
else {
|
||
|
to->flags = from->flags;
|
||
|
to->seg_base_addr = from->seg_base_addr;
|
||
|
}
|
||
|
|
||
|
to->opid = my_pid;
|
||
|
to->seg_id = from->seg_id;
|
||
|
to->seg_size = from->seg_size;
|
||
|
to->seg_cpid = from->seg_cpid;
|
||
|
memcpy(to->seg_name, from->seg_name, OPAL_PATH_MAX);
|
||
|
|
||
|
OPAL_OUTPUT_VERBOSE(
|
||
|
(70, opal_shmem_base_output,
|
||
|
"%s: %s: ds_copy complete "
|
||
|
"from: (opid: %lu, id: %d, size: %"PRIsize_t", "
|
||
|
"name: %s flags: 0x%02x) "
|
||
|
"to: (opid: %lu, id: %d, size: %"PRIsize_t", "
|
||
|
"name: %s flags: 0x%02x)\n",
|
||
|
mca_shmem_mmap_component.super.base_version.mca_type_name,
|
||
|
mca_shmem_mmap_component.super.base_version.mca_component_name,
|
||
|
(unsigned long)from->opid, from->seg_id, from->seg_size,
|
||
|
from->seg_name, from->flags, (unsigned long)to->opid, to->seg_id,
|
||
|
to->seg_size, to->seg_name, to->flags)
|
||
|
);
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
/* ////////////////////////////////////////////////////////////////////////// */
|
||
|
static int
|
||
|
segment_create(opal_shmem_ds_t *ds_buf,
|
||
|
const char *file_name,
|
||
|
size_t size)
|
||
|
{
|
||
|
int rc = OPAL_SUCCESS;
|
||
|
char *tmp_fn = NULL;
|
||
|
pid_t my_pid = getpid();
|
||
|
/* the real size of the shared memory segment. this includes enough space
|
||
|
* to store our segment header.
|
||
|
*/
|
||
|
size_t real_size = size + sizeof(opal_shmem_seg_hdr_t);
|
||
|
opal_shmem_seg_hdr_t *seg_hdrp = MAP_FAILED;
|
||
|
|
||
|
/* init the contents of opal_shmem_ds_t */
|
||
|
shmem_ds_reset(ds_buf);
|
||
|
|
||
|
/* determine whether the specified filename is on a network file system.
|
||
|
* this is an important check because if the backing store is located on
|
||
|
* a network filesystem, the user will see a shared memory performance hit.
|
||
|
*
|
||
|
* strduping file_name because opal_path_nfs doesn't take a const char *
|
||
|
*/
|
||
|
if (NULL == (tmp_fn = strdup(file_name))) {
|
||
|
/* out of resources */
|
||
|
return OPAL_ERROR;
|
||
|
}
|
||
|
else if (opal_path_nfs(tmp_fn)) {
|
||
|
char hn[MAXHOSTNAMELEN];
|
||
|
gethostname(hn, MAXHOSTNAMELEN - 1);
|
||
|
hn[MAXHOSTNAMELEN - 1] = '\0';
|
||
|
opal_show_help("help-opal-shmem-mmap.txt", "mmap on nfs", 1, hn,
|
||
|
tmp_fn);
|
||
|
}
|
||
|
free(tmp_fn);
|
||
|
|
||
|
if (-1 == (ds_buf->seg_id = open(file_name, O_CREAT | O_RDWR, 0600))) {
|
||
|
int err = errno;
|
||
|
char hn[MAXHOSTNAMELEN];
|
||
|
gethostname(hn, MAXHOSTNAMELEN - 1);
|
||
|
hn[MAXHOSTNAMELEN - 1] = '\0';
|
||
|
opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
|
||
|
"open(2)", "", strerror(err), err);
|
||
|
rc = OPAL_ERROR;
|
||
|
goto out;
|
||
|
}
|
||
|
/* size backing file - note the use of real_size here */
|
||
|
else if (0 != ftruncate(ds_buf->seg_id, real_size)) {
|
||
|
int err = errno;
|
||
|
char hn[MAXHOSTNAMELEN];
|
||
|
gethostname(hn, MAXHOSTNAMELEN - 1);
|
||
|
hn[MAXHOSTNAMELEN - 1] = '\0';
|
||
|
opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
|
||
|
"ftruncate(2)", "", strerror(err), err);
|
||
|
rc = OPAL_ERROR;
|
||
|
goto out;
|
||
|
}
|
||
|
else if (MAP_FAILED == (seg_hdrp = mmap(NULL, real_size,
|
||
|
PROT_READ | PROT_WRITE, MAP_SHARED,
|
||
|
ds_buf->seg_id, 0))) {
|
||
|
int err = errno;
|
||
|
char hn[MAXHOSTNAMELEN];
|
||
|
gethostname(hn, MAXHOSTNAMELEN - 1);
|
||
|
hn[MAXHOSTNAMELEN - 1] = '\0';
|
||
|
opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
|
||
|
"mmap(2)", "", strerror(err), err);
|
||
|
rc = OPAL_ERROR;
|
||
|
goto out;
|
||
|
}
|
||
|
/* all is well */
|
||
|
else {
|
||
|
/* -- initialize the shared memory segment -- */
|
||
|
opal_atomic_rmb();
|
||
|
|
||
|
/* init segment lock */
|
||
|
opal_atomic_init(&seg_hdrp->lock, OPAL_ATOMIC_UNLOCKED);
|
||
|
/* i was the creator of this segment, so note that fact */
|
||
|
seg_hdrp->cpid = my_pid;
|
||
|
|
||
|
opal_atomic_wmb();
|
||
|
|
||
|
/* -- initialize the contents of opal_shmem_ds_t -- */
|
||
|
ds_buf->opid = my_pid;
|
||
|
ds_buf->seg_cpid = my_pid;
|
||
|
ds_buf->seg_size = real_size;
|
||
|
ds_buf->seg_base_addr = (unsigned char *)seg_hdrp;
|
||
|
strncpy(ds_buf->seg_name, file_name, OPAL_PATH_MAX - 1);
|
||
|
|
||
|
/* set "valid" bit because setment creation was successful */
|
||
|
OPAL_SHMEM_DS_SET_VALID(ds_buf);
|
||
|
|
||
|
OPAL_OUTPUT_VERBOSE(
|
||
|
(70, opal_shmem_base_output,
|
||
|
"%s: %s: create successful "
|
||
|
"(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n",
|
||
|
mca_shmem_mmap_component.super.base_version.mca_type_name,
|
||
|
mca_shmem_mmap_component.super.base_version.mca_component_name,
|
||
|
(unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size,
|
||
|
ds_buf->seg_name)
|
||
|
);
|
||
|
}
|
||
|
|
||
|
out:
|
||
|
/* in this component, the id is the file descriptor returned by open. this
|
||
|
* check is here to see if it is safe to call close on the file descriptor.
|
||
|
* that is, we are making sure that our call to open was successful and
|
||
|
* we are not not in an error path.
|
||
|
*/
|
||
|
if (-1 != ds_buf->seg_id) {
|
||
|
if (0 != close(ds_buf->seg_id)) {
|
||
|
int err = errno;
|
||
|
char hn[MAXHOSTNAMELEN];
|
||
|
gethostname(hn, MAXHOSTNAMELEN - 1);
|
||
|
hn[MAXHOSTNAMELEN - 1] = '\0';
|
||
|
opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
|
||
|
"close(2)", "", strerror(err), err);
|
||
|
rc = OPAL_ERROR;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* an error occured, so invalidate the shmem object and munmap if needed */
|
||
|
if (OPAL_SUCCESS != rc) {
|
||
|
if (MAP_FAILED != seg_hdrp) {
|
||
|
munmap(seg_hdrp, real_size);
|
||
|
}
|
||
|
shmem_ds_reset(ds_buf);
|
||
|
}
|
||
|
return rc;
|
||
|
}
|
||
|
|
||
|
/* ////////////////////////////////////////////////////////////////////////// */
|
||
|
/**
|
||
|
* segment_attach can only be called after a successful call to segment_create
|
||
|
*/
|
||
|
static void *
|
||
|
segment_attach(opal_shmem_ds_t *ds_buf)
|
||
|
{
|
||
|
pid_t my_pid = getpid();
|
||
|
|
||
|
if (my_pid != ds_buf->seg_cpid) {
|
||
|
if (-1 == (ds_buf->seg_id = open(ds_buf->seg_name, O_CREAT | O_RDWR,
|
||
|
0600))) {
|
||
|
int err = errno;
|
||
|
char hn[MAXHOSTNAMELEN];
|
||
|
gethostname(hn, MAXHOSTNAMELEN - 1);
|
||
|
hn[MAXHOSTNAMELEN - 1] = '\0';
|
||
|
opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
|
||
|
"open(2)", "", strerror(err), err);
|
||
|
return NULL;
|
||
|
}
|
||
|
else if (MAP_FAILED == (ds_buf->seg_base_addr =
|
||
|
mmap(NULL, ds_buf->seg_size,
|
||
|
PROT_READ | PROT_WRITE, MAP_SHARED,
|
||
|
ds_buf->seg_id, 0))) {
|
||
|
int err = errno;
|
||
|
char hn[MAXHOSTNAMELEN];
|
||
|
gethostname(hn, MAXHOSTNAMELEN - 1);
|
||
|
hn[MAXHOSTNAMELEN - 1] = '\0';
|
||
|
opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
|
||
|
"mmap(2)", "", strerror(err), err);
|
||
|
/* mmap failed, so close the file and return NULL - no error check
|
||
|
* here because we are already in an error path...
|
||
|
*/
|
||
|
close(ds_buf->seg_id);
|
||
|
return NULL;
|
||
|
}
|
||
|
/* all is well */
|
||
|
else {
|
||
|
/* if close fails here, that's okay. just let the user know and
|
||
|
* continue. if we got this far, open and mmap were successful...
|
||
|
*/
|
||
|
if (0 != close(ds_buf->seg_id)) {
|
||
|
int err = errno;
|
||
|
char hn[MAXHOSTNAMELEN];
|
||
|
gethostname(hn, MAXHOSTNAMELEN - 1);
|
||
|
hn[MAXHOSTNAMELEN - 1] = '\0';
|
||
|
opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1,
|
||
|
hn, "close(2)", "", strerror(err), err);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
/* else i was the segment creator. nothing to do here because all the hard
|
||
|
* work was done in segment_create :-).
|
||
|
*/
|
||
|
|
||
|
OPAL_OUTPUT_VERBOSE(
|
||
|
(70, opal_shmem_base_output,
|
||
|
"%s: %s: attach successful "
|
||
|
"(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n",
|
||
|
mca_shmem_mmap_component.super.base_version.mca_type_name,
|
||
|
mca_shmem_mmap_component.super.base_version.mca_component_name,
|
||
|
(unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size,
|
||
|
ds_buf->seg_name)
|
||
|
);
|
||
|
|
||
|
/* update returned base pointer with an offset that hides our stuff */
|
||
|
return (ds_buf->seg_base_addr + sizeof(opal_shmem_seg_hdr_t));
|
||
|
}
|
||
|
|
||
|
/* ////////////////////////////////////////////////////////////////////////// */
|
||
|
static int
|
||
|
segment_detach(opal_shmem_ds_t *ds_buf)
|
||
|
{
|
||
|
int rc = OPAL_SUCCESS;
|
||
|
|
||
|
OPAL_OUTPUT_VERBOSE(
|
||
|
(70, opal_shmem_base_output,
|
||
|
"%s: %s: detaching "
|
||
|
"(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n",
|
||
|
mca_shmem_mmap_component.super.base_version.mca_type_name,
|
||
|
mca_shmem_mmap_component.super.base_version.mca_component_name,
|
||
|
(unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size,
|
||
|
ds_buf->seg_name)
|
||
|
);
|
||
|
|
||
|
if (0 != munmap(ds_buf->seg_base_addr, ds_buf->seg_size)) {
|
||
|
int err = errno;
|
||
|
char hn[MAXHOSTNAMELEN];
|
||
|
gethostname(hn, MAXHOSTNAMELEN - 1);
|
||
|
hn[MAXHOSTNAMELEN - 1] = '\0';
|
||
|
opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
|
||
|
"munmap(2)", "", strerror(err), err);
|
||
|
rc = OPAL_ERROR;
|
||
|
}
|
||
|
/* reset the contents of the opal_shmem_ds_t associated with this
|
||
|
* shared memory segment.
|
||
|
*/
|
||
|
shmem_ds_reset(ds_buf);
|
||
|
return rc;
|
||
|
}
|
||
|
|
||
|
/* ////////////////////////////////////////////////////////////////////////// */
|
||
|
static int
|
||
|
segment_unlink(opal_shmem_ds_t *ds_buf)
|
||
|
{
|
||
|
OPAL_OUTPUT_VERBOSE(
|
||
|
(70, opal_shmem_base_output,
|
||
|
"%s: %s: unlinking "
|
||
|
"(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n",
|
||
|
mca_shmem_mmap_component.super.base_version.mca_type_name,
|
||
|
mca_shmem_mmap_component.super.base_version.mca_component_name,
|
||
|
(unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size,
|
||
|
ds_buf->seg_name)
|
||
|
);
|
||
|
|
||
|
if (-1 == unlink(ds_buf->seg_name)) {
|
||
|
int err = errno;
|
||
|
char hn[MAXHOSTNAMELEN];
|
||
|
gethostname(hn, MAXHOSTNAMELEN - 1);
|
||
|
hn[MAXHOSTNAMELEN - 1] = '\0';
|
||
|
opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
|
||
|
"unlink(2)", ds_buf->seg_name, strerror(err), err);
|
||
|
return OPAL_ERROR;
|
||
|
}
|
||
|
|
||
|
/* don't completely reset the opal_shmem_ds_t. in particular, only reset
|
||
|
* the id and flip the invalid bit. size and name values will remain valid
|
||
|
* across unlinks. other information stored in flags will remain untouched.
|
||
|
*/
|
||
|
ds_buf->seg_id = OPAL_SHMEM_DS_ID_INVALID;
|
||
|
/* note: this is only chaning the valid bit to 0. this is not the same
|
||
|
* as calling invalidate(ds_buf).
|
||
|
*/
|
||
|
OPAL_SHMEM_DS_INVALIDATE(ds_buf);
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|