1
1

OSHMEM: extract memheap allocate methods into separate framework

- similar to opal/shmem
- next step is some refactoring and merge into opal/shmem
 Developed by Igor, reviewed by AlexM, MikeD

This commit fixes trac:4261.

This commit was SVN r30855.

The following Trac tickets were found above:
  Ticket 4261 --> https://svn.open-mpi.org/trac/ompi/ticket/4261
Этот коммит содержится в:
Mike Dubman 2014-02-26 16:32:23 +00:00
родитель dfe4a504e4
Коммит 323e4418b9
52 изменённых файлов: 3206 добавлений и 922 удалений

Просмотреть файл

@ -13,7 +13,7 @@
if OSHMEM_PROFILING
c_pshmem_lib = shmem/c/profile/liboshmem_c_pshmem.la
else
c_pshmem_lib =
c_pshmem_lib =
endif
# Do we have the Fortran bindings?

Просмотреть файл

@ -1,9 +1,9 @@
# Copyright (c) 2013 Mellanox Technologies, Inc.
# All rights reserved.
# $COPYRIGHT$
#
#
# Additional copyrights may follow
#
#
# $HEADER$
#
@ -13,7 +13,7 @@ nodist_headers = \
oshmem_config.h
# Install these in $(includedir)
include_HEADERS =
include_HEADERS =
# Install these in $(includedir)
mppincludedir = $(includedir)/mpp

Просмотреть файл

@ -9,7 +9,7 @@
headers += \
oshmem/constants.h \
oshmem/frameworks.h \
oshmem/frameworks.h \
oshmem/types.h
nodist_headers += \

Просмотреть файл

@ -90,10 +90,10 @@ enum {
SHMEM_ULLONG,
SHMEM_FLOAT,
SHMEM_DOUBLE,
SHMEM_LDOUBLE,
SHMEM_LDOUBLE,
SHMEM_FINT,
SHMEM_FINT4,
SHMEM_FINT,
SHMEM_FINT4,
SHMEM_FINT8
};

Просмотреть файл

@ -11,8 +11,8 @@
#define SHMEM_TYPES_H
/*
* Predefine some internal types so we dont need all the include
/*
* Predefine some internal types so we dont need all the include
* dependencies.
*/

Просмотреть файл

@ -2,9 +2,9 @@
* Copyright (c) 2013 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*
* This file should be included by any file that needs full

Просмотреть файл

@ -38,60 +38,15 @@ OSHMEM_DECLSPEC extern struct mca_memheap_base_module_t* mca_memheap_base_module
#define MEMHEAP_BASE_PRIVATE_SIZE (1ULL << MEMHEAP_BASE_PAGE_ORDER) /* should be at least the same as a huge page size */
#define MEMHEAP_BASE_MIN_SIZE (1ULL << MEMHEAP_BASE_PAGE_ORDER) /* must fit into at least one huge page */
extern void* mca_memheap_base_start_address;
extern char* mca_memheap_base_include;
extern char* mca_memheap_base_exclude;
extern int mca_memheap_base_already_opened;
extern int mca_memheap_base_alloc_type;
extern int mca_memheap_base_key_exchange;
extern int mca_memheap_base_mr_interleave_factor;
#define MCA_MEMHEAP_MAX_SEGMENTS 256
#define HEAP_SEG_INDEX 0
#define SYMB_SEG_INDEX 1
#define MEMHEAP_SHM_INVALID (-1)
#define MEMHEAP_SHM_CODE( type, id ) ((((uint64_t)(type)) << 32) | ((uint32_t)(id)))
#define MEMHEAP_SHM_GET_TYPE( x ) (((uint32_t)((x) >> 32)) & 0xFFFFFFFF)
#define MEMHEAP_SHM_GET_ID( x ) ((uint32_t)((x) & 0xFFFFFFFF))
typedef enum {
MAP_SEGMENT_STATIC = 0,
MAP_SEGMENT_ALLOC_MMAP,
MAP_SEGMENT_ALLOC_SHM,
MAP_SEGMENT_ALLOC_IBV,
MAP_SEGMENT_UNKNOWN
} segment_type_t;
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
#include <infiniband/verbs.h>
typedef struct openib_device_t {
struct ibv_device **ib_devs;
struct ibv_device *ib_dev;
struct ibv_context *ib_dev_context;
struct ibv_device_attr ib_dev_attr;
struct ibv_pd *ib_pd;
opal_value_array_t ib_mr_array;
struct ibv_mr *ib_mr_shared;
} openib_device_t;
#endif /* MPAGE_ENABLE */
typedef struct map_segment_t {
mca_spml_mkey_t **mkeys_cache; /* includes remote segment bases in va_base */
mca_spml_mkey_t *mkeys; /* includes local segment bases in va_base */
int is_active; /* enable/disable flag */
int shmid;
void* start; /* base address of the segment */
void* end; /* final address of the segment */
size_t size; /* length of the segment */
segment_type_t type; /* type of the segment */
void *context; /* additional data related the segment */
} map_segment_t;
typedef struct mca_memheap_map {
map_segment_t mem_segs[MCA_MEMHEAP_MAX_SEGMENTS]; /* TODO: change into pointer array */
int n_segments;
@ -114,9 +69,9 @@ OSHMEM_DECLSPEC uint64_t mca_memheap_base_find_offset(int pe,
void* va,
void* rva);
OSHMEM_DECLSPEC int mca_memheap_base_is_symmetric_addr(const void* va);
OSHMEM_DECLSPEC mca_spml_mkey_t *mca_memheap_base_get_mkey(void* va,
OSHMEM_DECLSPEC sshmem_mkey_t *mca_memheap_base_get_mkey(void* va,
int tr_id);
OSHMEM_DECLSPEC mca_spml_mkey_t * mca_memheap_base_get_cached_mkey(int pe,
OSHMEM_DECLSPEC sshmem_mkey_t * mca_memheap_base_get_cached_mkey(int pe,
void* va,
int btl_id,
void** rva);

Просмотреть файл

@ -13,139 +13,21 @@
#include "oshmem_config.h"
#include "oshmem/util/oshmem_util.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "oshmem/mca/memheap/memheap.h"
#include "oshmem/mca/memheap/base/base.h"
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/stat.h>
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
#include <infiniband/verbs.h>
#endif /* MPAGE_ENABLE */
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
# define MAP_ANONYMOUS MAP_ANON
#endif /* MAP_ANONYMOUS and MAP_ANON */
#if !defined(MAP_FAILED)
# define MAP_FAILED ((char*)-1)
#endif /* MAP_FAILED */
extern char* mca_memheap_base_param_hca_name;
static int _shm_attach(map_segment_t *, size_t, int, int);
static void _shm_detach(map_segment_t *);
static int _mmap_attach(map_segment_t *, size_t);
static void _mmap_detach(map_segment_t *);
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
static int _ibv_attach(map_segment_t *, size_t);
static void _ibv_detach(map_segment_t *);
#endif /* MPAGE_ENABLE */
static int _adaptive_attach(map_segment_t *, size_t);
int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size)
{
int ret = OSHMEM_SUCCESS;
int value = mca_memheap_base_alloc_type;
assert(map);
assert(HEAP_SEG_INDEX == map->n_segments);
MEMHEAP_VERBOSE(5,
"memheap method : %d",
mca_memheap_base_alloc_type);
map_segment_t *s = &map->mem_segs[map->n_segments];
memset(s, 0, sizeof(*s));
s->is_active = 0;
s->shmid = MEMHEAP_SHM_INVALID;
s->start = 0;
s->end = 0;
s->size = 0;
s->type = MAP_SEGMENT_UNKNOWN;
s->context = NULL;
switch (value) {
case 0:
/* use sysv alloc without hugepages */
ret = _shm_attach(s, size, 0, 1);
break;
case 1:
ret = _shm_attach(s, size, 1, 1);
if (OSHMEM_SUCCESS != ret)
ret = _shm_attach(s, size, 0, 1);
break;
case 2:
/* huge pages only */
ret = _shm_attach(s, size, 1, 1);
if (OSHMEM_SUCCESS != ret)
MEMHEAP_ERROR("FAILED to allocated symmetric heap using hugepages fallback is disabled, errno=%d",
errno);
break;
case 3:
/* huge pages only + cleanup shmid */
ret = _shm_attach(s, size, 1, 0);
if (OSHMEM_SUCCESS != ret)
MEMHEAP_ERROR("FAILED to allocated symmetric heap using hugepages fallback is disabled, errno=%d",
errno);
break;
case 4:
/* use sysv alloc without hugepages */
ret = _shm_attach(s, size, 0, 0);
break;
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
case 5:
/* use shared memory registration (mpages) */
ret = _ibv_attach(s, size);
if (OSHMEM_SUCCESS != ret)
ret = _shm_attach(s, size, 0, 1);
break;
#endif /* MPAGE_ENABLE */
case 100:
/* use mmap. It will severaly impact performance of intra node communication */
ret = _mmap_attach(s, size);
MEMHEAP_VERBOSE(1,
"mmap() memheap allocation will severely impact performance of intra node communication");
break;
case 101:
ret = _shm_attach(s, size, 1, 1);
if (OSHMEM_SUCCESS != ret) {
MEMHEAP_ERROR("Failed to allocate hugepages. Falling back on regular allocation");
ret = _mmap_attach(s, size);
} else {
s->shmid = MEMHEAP_SHM_INVALID;
}
MEMHEAP_VERBOSE(1, "SM BTL will be always used for intranode comm\n");
break;
case 102:
ret = _shm_attach(s, size, 1, 1);
if (OSHMEM_SUCCESS != ret) {
MEMHEAP_ERROR("FAILED to allocated symmetric heap using hugepages fallback is disabled, errno=%d",
errno);
} else {
s->shmid = MEMHEAP_SHM_INVALID;
}
break;
default:
ret = _adaptive_attach(s, size);
}
ret = mca_sshmem_segment_create(s, "", size);
if (OSHMEM_SUCCESS == ret) {
map->n_segments++;
@ -164,397 +46,7 @@ void mca_memheap_base_alloc_exit(mca_memheap_map_t *map)
assert(s);
switch (s->type) {
case MAP_SEGMENT_ALLOC_SHM:
_shm_detach(s);
break;
case MAP_SEGMENT_ALLOC_MMAP:
_mmap_detach(s);
break;
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
case MAP_SEGMENT_ALLOC_IBV:
_ibv_detach(s);
break;
#endif /* MPAGE_ENABLE */
default:
MEMHEAP_ERROR("Unknown segment type: %d", (int)s->type);
}
mca_sshmem_segment_detach(s, NULL);
mca_sshmem_unlink(s);
}
}
static int _adaptive_attach(map_segment_t *s, size_t size)
{
int rc = OSHMEM_SUCCESS;
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
rc = _ibv_attach(s, size);
#endif /* MPAGE_ENABLE */
if (rc) {
rc = _shm_attach(s, size, 1, 1);
}
if (rc) {
rc = _shm_attach(s, size, 0, 1);
}
if (rc) {
rc = _shm_attach(s, size, 0, 0);
}
if (rc) {
rc = _mmap_attach(s, size);
}
return rc;
}
static int _shm_attach(map_segment_t *s, size_t size, int use_hp, int do_rmid)
{
static int shm_context = 0;
;
void *addr = NULL;
int shmid = MEMHEAP_SHM_INVALID;
int flags;
assert(s);
shm_context = use_hp;
flags = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR;;
#if defined (SHM_HUGETLB)
flags |= (use_hp ? SHM_HUGETLB : 0);
#endif
/* Create a new shared memory segment and save the shmid. */
shmid = shmget(IPC_PRIVATE, size, flags);
if (shmid == MEMHEAP_SHM_INVALID) {
MEMHEAP_VERBOSE(1, "Failed to get shm segment (errno=%d)", errno);
return OSHMEM_ERROR;
}
/* Attach to the sement */
addr = shmat(shmid, (void *) mca_memheap_base_start_address, 0);
if (addr == (void *) -1L) {
MEMHEAP_VERBOSE(1, "Failed to attach to shm segment (errno=%d)", errno);
shmctl(shmid, IPC_RMID, NULL );
return OSHMEM_ERR_OUT_OF_RESOURCE;
}
MEMHEAP_VERBOSE(5, "got shmid %d", shmid);
if (do_rmid)
shmctl(shmid, IPC_RMID, NULL );
s->type = MAP_SEGMENT_ALLOC_SHM;
s->shmid = shmid;
s->start = addr;
s->size = size;
s->end = (void*)((uintptr_t)s->start + s->size);
s->context = &shm_context;
return OSHMEM_SUCCESS;
}
static void _shm_detach(map_segment_t *s)
{
assert(s);
if (s->shmid != MEMHEAP_SHM_INVALID) {
shmctl(s->shmid, IPC_RMID, NULL );
}
if (s->context && (*((int *) (s->context))) > 0) {
/**
* Workaround kernel panic when detaching huge pages from user space simultanously from several processes
* dont detach here instead let kernel do it during process cleanup
*/
/* shmdt((void *)s->start); */
}
}
static int _mmap_attach(map_segment_t *s, size_t size)
{
void *addr = NULL;
assert(s);
addr = mmap((void *) mca_memheap_base_start_address,
size,
PROT_READ | PROT_WRITE,
MAP_SHARED |
#if defined(MAP_ANONYMOUS)
MAP_ANONYMOUS |
#endif
MAP_FIXED,
0,
0);
if (MAP_FAILED == addr) {
MEMHEAP_ERROR("Failed to mmap() %llu bytes (errno=%d)",
(unsigned long long)size, errno);
return OSHMEM_ERR_OUT_OF_RESOURCE;
}
s->type = MAP_SEGMENT_ALLOC_MMAP;
s->shmid = MEMHEAP_SHM_INVALID;
s->start = addr;
s->size = size;
s->end = (void*)((uintptr_t)s->start + s->size);
s->context = NULL;
return OSHMEM_SUCCESS;
}
static void _mmap_detach(map_segment_t *s)
{
assert(s);
munmap((void *) s->start, s->size);
}
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
static int _ibv_attach(map_segment_t *s, size_t size)
{
int rc = OSHMEM_SUCCESS;
static openib_device_t memheap_device;
openib_device_t *device = &memheap_device;
int num_devs = 0;
assert(s);
memset(device, 0, sizeof(*device));
#ifdef HAVE_IBV_GET_DEVICE_LIST
device->ib_devs = ibv_get_device_list(&num_devs);
#else
#error unsupported ibv_get_device_list in infiniband/verbs.h
#endif
if (num_devs == 0 || !device->ib_devs)
{
rc = OSHMEM_ERR_NOT_SUPPORTED;
}
/* Open device */
if (!rc)
{
int i = 0;
if (num_devs > 1)
{
if (NULL == mca_memheap_base_param_hca_name)
{
MEMHEAP_VERBOSE(5, "found %d HCAs, choosing the first", num_devs);
}
else
{
MEMHEAP_VERBOSE(5, "found %d HCAs, searching for %s", num_devs, mca_memheap_base_param_hca_name);
}
}
for (i = 0; i < num_devs; i++)
{
device->ib_dev = device->ib_devs[i];
device->ib_dev_context = ibv_open_device(device->ib_dev);
if (NULL == device->ib_dev_context)
{
MEMHEAP_ERROR("error obtaining device context for %s errno says %d: %s",
ibv_get_device_name(device->ib_dev), errno, strerror(errno));
rc = OSHMEM_ERR_RESOURCE_BUSY;
}
else
{
if (NULL != mca_memheap_base_param_hca_name)
{
if (0 == strcmp(mca_memheap_base_param_hca_name,ibv_get_device_name(device->ib_dev)))
{
MEMHEAP_VERBOSE(5, "mca_memheap_base_param_hca_name = %s, selected %s as %d of %d", mca_memheap_base_param_hca_name, ibv_get_device_name(device->ib_dev), i, num_devs);
rc = OSHMEM_SUCCESS;
break;
}
}
else
{
MEMHEAP_VERBOSE(5, "mca_memheap_base_param_hca_name = %s, selected %s as %d of %d", mca_memheap_base_param_hca_name, ibv_get_device_name(device->ib_dev), i, num_devs);
rc = OSHMEM_SUCCESS;
break;
}
}
}
}
/* Obtain device attributes */
if (!rc)
{
if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr))
{
MEMHEAP_ERROR("error obtaining device attributes for %s errno says %d: %s",
ibv_get_device_name(device->ib_dev), errno, strerror(errno));
rc = OSHMEM_ERR_RESOURCE_BUSY;
}
else
{
MEMHEAP_VERBOSE(5, "ibv device %s",
ibv_get_device_name(device->ib_dev));
}
}
/* Allocate the protection domain for the device */
if (!rc)
{
device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
if (NULL == device->ib_pd)
{
MEMHEAP_ERROR("error allocating protection domain for %s errno says %d: %s",
ibv_get_device_name(device->ib_dev), errno, strerror(errno));
rc = OSHMEM_ERR_RESOURCE_BUSY;
}
}
/* Allocate memory */
if (!rc)
{
void *addr = NULL;
struct ibv_mr *ib_mr = NULL;
int access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ;
OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
access_flag |= IBV_ACCESS_ALLOCATE_MR |
IBV_ACCESS_SHARED_MR_USER_READ |
IBV_ACCESS_SHARED_MR_USER_WRITE;
#endif /* MPAGE_ENABLE */
ib_mr = ibv_reg_mr(device->ib_pd, addr, size, access_flag);
if (NULL == ib_mr)
{
MEMHEAP_ERROR("error to ibv_reg_mr() %llu bytes errno says %d: %s",
(unsigned long long)size, errno, strerror(errno));
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
}
else
{
device->ib_mr_shared = ib_mr;
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
}
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
if (!rc)
{
access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ|
IBV_ACCESS_NO_RDMA;
addr = (void *)mca_memheap_base_start_address;
ib_mr = ibv_reg_shared_mr(device->ib_mr_shared->handle,
device->ib_pd, addr, access_flag);
if (NULL == ib_mr)
{
MEMHEAP_ERROR("error to ibv_reg_shared_mr() %llu bytes errno says %d: %s",
(unsigned long long)size, errno, strerror(errno));
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
}
else
{
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
}
}
#endif /* MPAGE_ENABLE */
if (!rc)
{
assert(size == device->ib_mr_shared->length);
s->type = MAP_SEGMENT_ALLOC_IBV;
s->shmid = device->ib_mr_shared->handle;
s->start = ib_mr->addr;
s->size = size;
s->end = (void*)((uintptr_t)s->start + s->size);
s->context = &memheap_device;
}
}
return rc;
}
static void _ibv_detach(map_segment_t *s)
{
int rc = OSHMEM_SUCCESS;
openib_device_t *device = NULL;
assert(s);
device = (openib_device_t *)s->context;
if (device)
{
if(!rc && opal_value_array_get_size(&device->ib_mr_array))
{
struct ibv_mr** array;
struct ibv_mr* ib_mr = NULL;
array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
while (opal_value_array_get_size(&device->ib_mr_array) > 0)
{
ib_mr = array[0];
if(ibv_dereg_mr(ib_mr))
{
MEMHEAP_ERROR("error ibv_dereg_mr(): %d: %s", errno, strerror(errno));
rc = OSHMEM_ERROR;
}
opal_value_array_remove_item(&device->ib_mr_array, 0);
}
if(!rc && device->ib_mr_shared)
{
device->ib_mr_shared = NULL;
}
OBJ_DESTRUCT(&device->ib_mr_array);
}
if(!rc && device->ib_pd)
{
if(ibv_dealloc_pd(device->ib_pd))
{
MEMHEAP_ERROR("error ibv_dealloc_pd(): %d: %s", errno, strerror(errno));
rc = OSHMEM_ERROR;
}
else
{
device->ib_pd = NULL;
}
}
if(!rc && device->ib_dev_context)
{
if(ibv_close_device(device->ib_dev_context))
{
MEMHEAP_ERROR("error ibv_close_device(): %d: %s", errno, strerror(errno));
rc = OSHMEM_ERROR;
}
else
{
device->ib_dev_context = NULL;
}
}
if(!rc && device->ib_devs)
{
ibv_free_device_list(device->ib_devs);
device->ib_devs = NULL;
}
}
}
#endif /* MPAGE_ENABLE */

Просмотреть файл

@ -27,19 +27,11 @@
#include "oshmem/mca/memheap/base/static-components.h"
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
int mca_memheap_base_alloc_type = 5;
#else
int mca_memheap_base_alloc_type = 1;
#endif /* MPAGE_ENABLE */
void* mca_memheap_base_start_address = (void*)0xFF000000;
int mca_memheap_base_output = -1;
int mca_memheap_base_key_exchange = 1;
int mca_memheap_base_mr_interleave_factor = 2;
char* mca_memheap_base_include = NULL;
char* mca_memheap_base_exclude = NULL;
char* mca_memheap_base_param_hca_name = NULL;
opal_list_t mca_memheap_base_components_opened;
struct mca_memheap_base_module_t* mca_memheap_base_module_initialized = NULL;
int mca_memheap_base_already_opened = 0;
@ -48,47 +40,6 @@ mca_memheap_map_t mca_memheap_base_map;
static int mca_memheap_base_register(mca_base_register_flag_t flags)
{
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
(void) mca_base_var_register("oshmem",
"memheap",
"base",
"alloc_type",
"0|1|2|5 - disabled huge pages, enabled huge pages with fallback to mmap(), do not fallback to mmap(), enabled mpages(default)",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_memheap_base_alloc_type);
#else
(void) mca_base_var_register("oshmem",
"memheap",
"base",
"alloc_type",
"0|1|2 - disabled huge pages, enabled huge pages(default) with fallback to mmap(), do not fallback to mmap()",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_memheap_base_alloc_type);
#endif /* MPAGE_ENABLE */
(void) mca_base_var_register("oshmem",
"memheap",
"base",
"start_address",
"Specify base address for shared memory region",
MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG,
NULL,
0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_memheap_base_start_address);
(void) mca_base_var_register("oshmem",
"memheap",
"base",
@ -102,19 +53,6 @@ static int mca_memheap_base_register(mca_base_register_flag_t flags)
MCA_BASE_VAR_SCOPE_READONLY,
&mca_memheap_base_key_exchange);
(void) mca_base_var_register("oshmem",
"memheap",
"base",
"mr_interleave_factor",
"2 - default, try to give at least N Gbytes spaces between mapped memheaps of other pes that are local to me",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_memheap_base_mr_interleave_factor);
(void) mca_base_var_register("oshmem",
"memheap",
"base",
@ -149,19 +87,6 @@ static int mca_memheap_base_register(mca_base_register_flag_t flags)
MCA_BASE_VAR_SCOPE_READONLY,
&mca_memheap_base_exclude);
(void) mca_base_var_register("oshmem",
"memheap",
"base",
"hca_name",
"Specify excluded memheap implementations",
MCA_BASE_VAR_TYPE_STRING,
NULL,
0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_memheap_base_param_hca_name);
return OSHMEM_SUCCESS;
}

Просмотреть файл

@ -18,20 +18,12 @@
#include "oshmem/proc/proc.h"
#include "oshmem/util/oshmem_util.h"
#include "oshmem/runtime/runtime.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "oshmem/mca/memheap/memheap.h"
#include "oshmem/mca/memheap/base/base.h"
#include "oshmem/mca/spml/spml.h"
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
#include <sys/ipc.h>
#include <sys/shm.h>
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
#include <infiniband/verbs.h>
#endif /* MPAGE_ENABLE */
/* Turn ON/OFF debug output from build (default 0) */
#ifndef MEMHEAP_BASE_DEBUG
@ -47,7 +39,7 @@
struct oob_comm {
opal_mutex_t lck;
opal_condition_t cond;
mca_spml_mkey_t *mkeys;
sshmem_mkey_t *mkeys;
int mkeys_rcvd;
MPI_Request recv_req;
char buf[MEMHEAP_MKEY_MAXSIZE];
@ -66,11 +58,11 @@ static int oshmem_mkey_recv_cb(void);
/* pickup list of rkeys and remote va */
static int memheap_oob_get_mkeys(int pe,
uint32_t va_seg_num,
mca_spml_mkey_t *mkey);
sshmem_mkey_t *mkey);
static inline void* __seg2base_va(int seg)
{
return memheap_map->mem_segs[seg].start;
return memheap_map->mem_segs[seg].seg_base_addr;
}
static int _seg_cmp(const void *k, const void *v)
@ -78,7 +70,7 @@ static int _seg_cmp(const void *k, const void *v)
uintptr_t va = (uintptr_t) k;
map_segment_t *s = (map_segment_t *) v;
if (va < (uintptr_t)s->start)
if (va < (uintptr_t)s->seg_base_addr)
return -1;
if (va >= (uintptr_t)s->end)
return 1;
@ -90,7 +82,7 @@ static inline map_segment_t *__find_va(const void* va)
{
map_segment_t *s;
if (OPAL_LIKELY((uintptr_t)va >= (uintptr_t)memheap_map->mem_segs[HEAP_SEG_INDEX].start &&
if (OPAL_LIKELY((uintptr_t)va >= (uintptr_t)memheap_map->mem_segs[HEAP_SEG_INDEX].seg_base_addr &&
(uintptr_t)va < (uintptr_t)memheap_map->mem_segs[HEAP_SEG_INDEX].end)) {
s = &memheap_map->mem_segs[HEAP_SEG_INDEX];
} else {
@ -105,9 +97,9 @@ static inline map_segment_t *__find_va(const void* va)
if (s) {
MEMHEAP_VERBOSE(5, "match seg#%02ld: 0x%llX - 0x%llX %llu bytes va=%p",
s - memheap_map->mem_segs,
(long long)s->start,
(long long)s->seg_base_addr,
(long long)s->end,
(long long)(s->end - s->start),
(long long)(s->end - s->seg_base_addr),
(void *)va);
}
#endif
@ -123,7 +115,7 @@ static int pack_local_mkeys(opal_buffer_t *msg, int pe, int seg, int all_trs)
{
oshmem_proc_t *proc;
int i, n, tr_id;
mca_spml_mkey_t *mkey;
sshmem_mkey_t *mkey;
/* go over all transports to remote pe and pack mkeys */
if (!all_trs) {
@ -167,72 +159,21 @@ static int pack_local_mkeys(opal_buffer_t *msg, int pe, int seg, int all_trs)
return OSHMEM_SUCCESS;
}
static void memheap_attach_segment(mca_spml_mkey_t *mkey, int tr_id)
static void memheap_attach_segment(sshmem_mkey_t *mkey, int tr_id)
{
/* process special case when va was got using shmget(IPC_PRIVATE)
* this case is notable for:
* - key is set as (type|shmid);
* - key is set as (type|seg_id);
* - va_base is set as 0;
*/
assert(mkey->va_base == 0);
if (MEMHEAP_SHM_INVALID == (int) MEMHEAP_SHM_GET_ID(mkey->u.key)) {
return;
}
MEMHEAP_VERBOSE(5,
"shared memory usage tr_id: %d key %llx base_va %p shmid 0x%X|0x%X",
"shared memory usage tr_id: %d va_base: 0x%p len: %d key %llx",
tr_id,
(unsigned long long)mkey->u.key,
mkey->va_base,
MEMHEAP_SHM_GET_TYPE(mkey->u.key),
MEMHEAP_SHM_GET_ID(mkey->u.key));
mkey->va_base, mkey->len, (unsigned long long)mkey->u.key);
if (MAP_SEGMENT_ALLOC_SHM == MEMHEAP_SHM_GET_TYPE(mkey->u.key)) {
mkey->va_base = shmat(MEMHEAP_SHM_GET_ID(mkey->u.key),
0,
0);
} else if (MAP_SEGMENT_ALLOC_IBV == MEMHEAP_SHM_GET_TYPE(mkey->u.key)) {
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
openib_device_t *device = NULL;
struct ibv_mr *ib_mr;
void *addr;
static int mr_count;
int access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_NO_RDMA;
device = (openib_device_t *)memheap_map->mem_segs[HEAP_SEG_INDEX].context;
assert(device);
/* workaround mtt problem - request aligned addresses */
++mr_count;
addr = (void *)((uintptr_t)mca_memheap_base_start_address + mca_memheap_base_mr_interleave_factor*1024ULL*1024ULL*1024ULL*mr_count);
ib_mr = ibv_reg_shared_mr(MEMHEAP_SHM_GET_ID(mkey->u.key),
device->ib_pd, addr, access_flag);
if (NULL == ib_mr) {
mkey->va_base = (void*)-1;
MEMHEAP_ERROR("error to ibv_reg_shared_mr() errno says %d: %s",
errno, strerror(errno));
} else {
if (ib_mr->addr != addr) {
MEMHEAP_WARN("Failed to map shared region to address %p got addr %p. Try to increase 'memheap_mr_interleave_factor' from %d", addr, ib_mr->addr, mca_memheap_base_mr_interleave_factor);
}
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
mkey->va_base = ib_mr->addr;
}
#endif /* MPAGE_ENABLE */
} else {
MEMHEAP_ERROR("tr_id: %d key %llx attach failed: incorrect shmid 0x%X|0x%X",
tr_id,
(unsigned long long)mkey->u.key,
MEMHEAP_SHM_GET_TYPE(mkey->u.key),
MEMHEAP_SHM_GET_ID(mkey->u.key));
oshmem_shmem_abort(-1);
}
mca_sshmem_segment_attach(&(memheap_map->mem_segs[HEAP_SEG_INDEX]), mkey);
if ((void *) -1 == (void *) mkey->va_base) {
MEMHEAP_ERROR("tr_id: %d key %llx attach failed: errno = %d",
@ -512,7 +453,7 @@ static int send_buffer(int pe, opal_buffer_t *msg)
return rc;
}
static int memheap_oob_get_mkeys(int pe, uint32_t seg, mca_spml_mkey_t *mkeys)
static int memheap_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys)
{
opal_buffer_t *msg;
uint8_t cmd;
@ -632,8 +573,8 @@ void mca_memheap_modex_recv_all(void)
if (NULL != s->mkeys_cache[i]) {
MEMHEAP_VERBOSE(10, "PE%d: segment%d already exists, mkey will be replaced", i, j);
} else {
s->mkeys_cache[i] = (mca_spml_mkey_t *) calloc(memheap_map->num_transports,
sizeof(mca_spml_mkey_t));
s->mkeys_cache[i] = (sshmem_mkey_t *) calloc(memheap_map->num_transports,
sizeof(sshmem_mkey_t));
if (NULL == s->mkeys_cache[i]) {
MEMHEAP_ERROR("PE%d: segment%d: Failed to allocate mkeys cache entry", i, j);
oshmem_shmem_abort(-1);
@ -649,22 +590,6 @@ void mca_memheap_modex_recv_all(void)
free(send_buffer);
free(rcv_buffer);
OBJ_RELEASE(msg);
if (3 == mca_memheap_base_alloc_type || 4 == mca_memheap_base_alloc_type) {
/* unfortunately we must do barrier here to assure that everyone are attached to our segment
* good thing that this code path only invoked on older linuxes (-mca memheap_base_alloc_type 3|4)
* that does not support IPC_RMID op on attached segments.
*/
shmem_barrier_all();
/* keys exchanged, segments attached, now we can safely cleanup */
if (memheap_map->mem_segs[HEAP_SEG_INDEX].type
== MAP_SEGMENT_ALLOC_SHM) {
shmctl(memheap_map->mem_segs[HEAP_SEG_INDEX].shmid,
IPC_RMID,
NULL );
}
}
}
static inline void* va2rva(void* va,
@ -676,21 +601,21 @@ static inline void* va2rva(void* va,
(uintptr_t)va - ((uintptr_t)local_base - (uintptr_t)remote_base));
}
mca_spml_mkey_t * mca_memheap_base_get_cached_mkey(int pe,
sshmem_mkey_t * mca_memheap_base_get_cached_mkey(int pe,
void* va,
int btl_id,
void** rva)
{
map_segment_t *s;
int rc;
mca_spml_mkey_t *mkey;
sshmem_mkey_t *mkey;
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p", pe, va);
s = __find_va(va);
if (NULL == s)
return NULL ;
if (!s->is_active)
if (!MAP_SEGMENT_IS_VALID(s))
return NULL ;
if (pe == oshmem_my_proc_id()) {
@ -702,13 +627,13 @@ mca_spml_mkey_t * mca_memheap_base_get_cached_mkey(int pe,
if (OPAL_LIKELY(s->mkeys_cache[pe])) {
mkey = &s->mkeys_cache[pe][btl_id];
*rva = va2rva(va, s->start, mkey->va_base);
*rva = va2rva(va, s->seg_base_addr, mkey->va_base);
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (cached) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva);
return mkey;
}
s->mkeys_cache[pe] = (mca_spml_mkey_t *) calloc(memheap_map->num_transports,
sizeof(mca_spml_mkey_t));
s->mkeys_cache[pe] = (sshmem_mkey_t *) calloc(memheap_map->num_transports,
sizeof(sshmem_mkey_t));
if (!s->mkeys_cache[pe])
return NULL ;
@ -719,19 +644,19 @@ mca_spml_mkey_t * mca_memheap_base_get_cached_mkey(int pe,
return NULL ;
mkey = &s->mkeys_cache[pe][btl_id];
*rva = va2rva(va, s->start, mkey->va_base);
*rva = va2rva(va, s->seg_base_addr, mkey->va_base);
MEMHEAP_VERBOSE_FASTPATH(5, "rkey: pe=%d va=%p -> (remote lookup) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva);
return mkey;
}
mca_spml_mkey_t *mca_memheap_base_get_mkey(void* va, int tr_id)
sshmem_mkey_t *mca_memheap_base_get_mkey(void* va, int tr_id)
{
map_segment_t *s;
s = __find_va(va);
return ((s && s->is_active) ? &s->mkeys[tr_id] : NULL );
return ((s && MAP_SEGMENT_IS_VALID(s)) ? &s->mkeys[tr_id] : NULL );
}
uint64_t mca_memheap_base_find_offset(int pe,
@ -743,7 +668,7 @@ uint64_t mca_memheap_base_find_offset(int pe,
s = __find_va(va);
return ((s && s->is_active) ? ((uintptr_t)rva - (uintptr_t)(s->mkeys_cache[pe][tr_id].va_base)) : 0);
return ((s && MAP_SEGMENT_IS_VALID(s)) ? ((uintptr_t)rva - (uintptr_t)(s->mkeys_cache[pe][tr_id].va_base)) : 0);
}
int mca_memheap_base_is_symmetric_addr(const void* va)
@ -761,11 +686,11 @@ int mca_memheap_base_detect_addr_type(void* va)
if (s) {
if (s->type == MAP_SEGMENT_STATIC) {
addr_type = ADDR_STATIC;
} else if ((uintptr_t)va >= (uintptr_t) s->start
&& (uintptr_t)va < (uintptr_t) ((uintptr_t)s->start + mca_memheap.memheap_size)) {
} else if ((uintptr_t)va >= (uintptr_t) s->seg_base_addr
&& (uintptr_t)va < (uintptr_t) ((uintptr_t)s->seg_base_addr + mca_memheap.memheap_size)) {
addr_type = ADDR_USER;
} else {
assert( (uintptr_t)va >= (uintptr_t) ((uintptr_t)s->start + mca_memheap.memheap_size) && (uintptr_t)va < (uintptr_t)s->end);
assert( (uintptr_t)va >= (uintptr_t) ((uintptr_t)s->seg_base_addr + mca_memheap.memheap_size) && (uintptr_t)va < (uintptr_t)s->end);
addr_type = ADDR_PRIVATE;
}
}

Просмотреть файл

@ -30,11 +30,11 @@ int mca_memheap_base_reg(mca_memheap_map_t *memheap_map)
MEMHEAP_VERBOSE(5,
"register seg#%02d: 0x%p - 0x%p %llu bytes type=0x%X id=0x%X",
i,
s->start,
s->seg_base_addr,
s->end,
(long long)((uintptr_t)s->end - (uintptr_t)s->start),
(long long)((uintptr_t)s->end - (uintptr_t)s->seg_base_addr),
s->type,
s->shmid);
s->seg_id);
ret = _reg_segment(s, &memheap_map->num_transports);
}
@ -49,15 +49,15 @@ int mca_memheap_base_dereg(mca_memheap_map_t *memheap_map)
for (i = 0; i < memheap_map->n_segments; i++) {
map_segment_t *s = &memheap_map->mem_segs[i];
if (!s->is_active)
if (!MAP_SEGMENT_IS_VALID(s))
continue;
MEMHEAP_VERBOSE(5,
"deregistering segment#%d: %p - %p %llu bytes",
i,
s->start,
s->seg_base_addr,
s->end,
(long long)((uintptr_t)s->end - (uintptr_t)s->start));
(long long)((uintptr_t)s->end - (uintptr_t)s->seg_base_addr));
ret = _dereg_segment(s);
}
@ -92,7 +92,7 @@ static int _dereg_segment(map_segment_t *s)
s->mkeys_cache = NULL;
}
s->is_active = 0;
MAP_SEGMENT_INVALIDATE(s);
return rc;
}
@ -106,17 +106,17 @@ static int _reg_segment(map_segment_t *s, int *num_btl)
nprocs = oshmem_num_procs();
my_pe = oshmem_my_proc_id();
s->mkeys_cache = (mca_spml_mkey_t **) calloc(nprocs,
sizeof(mca_spml_mkey_t *));
s->mkeys_cache = (sshmem_mkey_t **) calloc(nprocs,
sizeof(sshmem_mkey_t *));
if (NULL == s->mkeys_cache) {
MEMHEAP_ERROR("Failed to allocate memory for remote segments");
rc = OSHMEM_ERROR;
}
if (!rc) {
s->mkeys = MCA_SPML_CALL(register((void *)(unsigned long)s->start,
(uintptr_t)s->end - (uintptr_t)s->start,
MEMHEAP_SHM_CODE(s->type, s->shmid),
s->mkeys = MCA_SPML_CALL(register((void *)(unsigned long)s->seg_base_addr,
(uintptr_t)s->end - (uintptr_t)s->seg_base_addr,
s->seg_id,
num_btl));
if (NULL == s->mkeys) {
free(s->mkeys_cache);
@ -129,7 +129,7 @@ static int _reg_segment(map_segment_t *s, int *num_btl)
if (OSHMEM_SUCCESS == rc) {
s->mkeys_cache[my_pe] = s->mkeys;
s->is_active = 1;
MAP_SEGMENT_SET_VALID(s);
}
return rc;

Просмотреть файл

@ -170,34 +170,41 @@ int mca_memheap_base_select()
return OSHMEM_SUCCESS;
}
static size_t memheap_size(void)
static size_t _memheap_size(void)
{
char *p;
unsigned long long factor;
long long factor = 1;
int idx;
unsigned long long size;
long long size = 0;
p = getenv(SHMEM_HEAP_SIZE);
if (!p)
return SIZE_IN_MEGA_BYTES(DEFAULT_SYMMETRIC_HEAP_SIZE);
idx = strlen(p) - 1;
if (p[idx] == 'k' || p[idx] == 'K') {
factor = 1024;
} else if (p[idx] == 'm' || p[idx] == 'M') {
factor = 1024 * 1024;
} else if (p[idx] == 'g' || p[idx] == 'G') {
factor = 1024 * 1024 * 1024;
} else if (p[idx] == 't' || p[idx] == 'T') {
factor = 1024UL * 1024UL * 1024UL * 1024UL;
} else
factor = 1;
if (1 == sscanf(p, "%lld%n", &size, &idx)) {
if (p[idx] != '\0') {
if (p[idx + 1] == '\0') {
if (p[idx] == 'k' || p[idx] == 'K') {
factor = 1024;
} else if (p[idx] == 'm' || p[idx] == 'M') {
factor = 1024 * 1024;
} else if (p[idx] == 'g' || p[idx] == 'G') {
factor = 1024 * 1024 * 1024;
} else if (p[idx] == 't' || p[idx] == 'T') {
factor = 1024UL * 1024UL * 1024UL * 1024UL;
} else {
size = 0;
}
} else {
size = 0;
}
}
}
size = atoll(p);
if (size == 0) {
MEMHEAP_ERROR("Incorrect symmetric heap size %s. Using default heap size %d M\n",
if (size <= 0) {
MEMHEAP_ERROR("Set incorrect symmetric heap size %s.\n",
p, DEFAULT_SYMMETRIC_HEAP_SIZE);
return SIZE_IN_MEGA_BYTES(DEFAULT_SYMMETRIC_HEAP_SIZE);
return 0;
}
return (size_t) memheap_align(size * factor);
}
@ -208,7 +215,7 @@ static memheap_context_t* _memheap_create(void)
static memheap_context_t context;
size_t user_size;
user_size = memheap_size();
user_size = _memheap_size();
if (user_size < MEMHEAP_BASE_MIN_SIZE) {
MEMHEAP_ERROR("Requested memheap size is less than minimal meamheap size (%llu < %llu)",
(unsigned long long)user_size, MEMHEAP_BASE_MIN_SIZE);
@ -239,10 +246,10 @@ static memheap_context_t* _memheap_create(void)
context.user_size = user_size;
context.private_size = MEMHEAP_BASE_PRIVATE_SIZE;
context.user_base_addr =
(void*) ((unsigned char*) mca_memheap_base_map.mem_segs[HEAP_SEG_INDEX].start
(void*) ((unsigned char*) mca_memheap_base_map.mem_segs[HEAP_SEG_INDEX].seg_base_addr
+ 0);
context.private_base_addr =
(void*) ((unsigned char*) mca_memheap_base_map.mem_segs[HEAP_SEG_INDEX].start
(void*) ((unsigned char*) mca_memheap_base_map.mem_segs[HEAP_SEG_INDEX].seg_base_addr
+ context.user_size);
}

Просмотреть файл

@ -59,16 +59,15 @@ int mca_memheap_base_static_init(mca_memheap_map_t *map)
map_segment_t *s = &map->mem_segs[map->n_segments];
memset(s, 0, sizeof(*s));
s->is_active = 0;
s->shmid = MEMHEAP_SHM_INVALID;
s->start = memheap_context.mem_segs[i].start;
MAP_SEGMENT_RESET_FLAGS(s);
s->seg_id = MAP_SEGMENT_SHM_INVALID;
s->seg_base_addr = memheap_context.mem_segs[i].start;
s->end = memheap_context.mem_segs[i].end;
s->size = ((uintptr_t)s->end - (uintptr_t)s->start);
s->seg_size = ((uintptr_t)s->end - (uintptr_t)s->seg_base_addr);
s->type = MAP_SEGMENT_STATIC;
s->context = NULL;
map->n_segments++;
total_mem += ((uintptr_t)s->end - (uintptr_t)s->start);
total_mem += ((uintptr_t)s->end - (uintptr_t)s->seg_base_addr);
}
MEMHEAP_VERBOSE(1,
"Memheap static memory: %llu byte(s), %d segments",

Просмотреть файл

@ -1,62 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2013 Mellanox Technologies, Inc.
# All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AC_DEFUN([MCA_oshmem_memheap_CONFIG],[
# configure all the components
MCA_CONFIGURE_FRAMEWORK($1, $2, 1)
# this is a direct callable component, so set that up.
MCA_SETUP_DIRECT_CALL($1, $2)
OMPI_CHECK_OPENFABRICS([oshmem_verbs],
[oshmem_verbs_happy="yes"],
[oshmem_verbs_happy="no"])
# substitute in the things needed to build MEMHEAP BASE
AC_SUBST([oshmem_verbs_CFLAGS])
AC_SUBST([oshmem_verbs_CPPFLAGS])
AC_SUBST([oshmem_verbs_LDFLAGS])
AC_SUBST([oshmem_verbs_LIBS])
# If we have the oshmem_verbs stuff available, find out what we've got
AS_IF(
[test "$oshmem_verbs_happy" = "yes"],
[
OSHMEM_LIBSHMEM_EXTRA_LDFLAGS="$OSHMEM_LIBSHMEM_EXTRA_LDFLAGS $oshmem_verbs_LDFLAGS"
OSHMEM_LIBSHMEM_EXTRA_LIBS="$OSHMEM_LIBSHMEM_EXTRA_LIBS $oshmem_verbs_LIBS"
# ibv_reg_shared_mr was added in MOFED 1.8
oshmem_have_mpage=0
oshmem_verbs_save_CPPFLAGS="$CPPFLAGS"
oshmem_verbs_save_LDFLAGS="$LDFLAGS"
oshmem_verbs_save_LIBS="$LIBS"
CPPFLAGS="$CPPFLAGS $oshmem_verbs_CPPFLAGS"
LDFLAGS="$LDFLAGS $oshmem_verbs_LDFLAGS"
LIBS="$LIBS $oshmem_verbs_LIBS"
AC_CHECK_DECLS([IBV_ACCESS_ALLOCATE_MR,IBV_ACCESS_SHARED_MR_USER_READ],
[oshmem_have_mpage=2], [],
[#include <infiniband/verbs.h>])
CPPFLAGS="$oshmem_verbs_save_CPPFLAGS"
LDFLAGS="$oshmem_verbs_save_LDFLAGS"
LIBS="$oshmem_verbs_save_LIBS"
AC_DEFINE_UNQUOTED(MPAGE_ENABLE, $oshmem_have_mpage,
[Whether we can use M-PAGE supported since MOFED 1.8])
])
])

Просмотреть файл

@ -13,6 +13,8 @@
#include "opal/mca/mca.h"
#include "oshmem/constants.h"
#include "oshmem/proc/proc.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/spml/spml.h"
#define DEFAULT_SYMMETRIC_HEAP_SIZE 256
@ -65,11 +67,11 @@ typedef uint64_t (*mca_memheap_base_module_find_offset_fn_t)(int pe,
* @return mkey suitable to access pe via given transport id. rva is set to virtual address mapping of (va)
* on remote pe.
*/
typedef mca_spml_mkey_t * (*mca_memheap_base_module_get_cached_mkey_fn_t)(int pe,
typedef sshmem_mkey_t * (*mca_memheap_base_module_get_cached_mkey_fn_t)(int pe,
void* va,
int transport_id,
void** rva);
typedef mca_spml_mkey_t * (*mca_memheap_base_module_get_local_mkey_fn_t)(void* va,
typedef sshmem_mkey_t * (*mca_memheap_base_module_get_local_mkey_fn_t)(void* va,
int transport_id);
/*

Просмотреть файл

@ -66,7 +66,7 @@ OSHMEM_DECLSPEC int mca_spml_base_wait(void* addr,
OSHMEM_DECLSPEC int mca_spml_base_wait_nb(void* handle);
OSHMEM_DECLSPEC int mca_spml_base_oob_get_mkeys(int pe,
uint32_t seg,
mca_spml_mkey_t *mkeys);
sshmem_mkey_t *mkeys);
/*
* MCA framework

Просмотреть файл

@ -137,7 +137,7 @@ int mca_spml_base_wait_nb(void* handle)
return OSHMEM_SUCCESS;
}
int mca_spml_base_oob_get_mkeys(int pe, uint32_t seg, mca_spml_mkey_t *mkeys)
int mca_spml_base_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys)
{
return OSHMEM_ERROR;
}

Просмотреть файл

@ -74,7 +74,7 @@ static int spml_ikrit_get_ep_address(spml_ikrit_mxm_ep_conn_info_t *ep_info,
return OSHMEM_SUCCESS;
}
#else
static inline mxm_mem_key_t *to_mxm_mkey(mca_spml_mkey_t *mkey) {
static inline mxm_mem_key_t *to_mxm_mkey(sshmem_mkey_t *mkey) {
if (0 == mkey->len) {
return &mxm_empty_mem_key;
@ -519,20 +519,20 @@ int mca_spml_ikrit_add_procs(oshmem_proc_t** procs, size_t nprocs)
}
mca_spml_mkey_t *mca_spml_ikrit_register(void* addr,
sshmem_mkey_t *mca_spml_ikrit_register(void* addr,
size_t size,
uint64_t shmid,
int *count)
{
int i;
mca_spml_mkey_t *mkeys;
sshmem_mkey_t *mkeys;
#if MXM_API >= MXM_VERSION(2,0)
mxm_error_t err;
mxm_mem_key_t *m_key;
#endif
*count = 0;
mkeys = (mca_spml_mkey_t *) calloc(1, MXM_PTL_LAST * sizeof(*mkeys));
mkeys = (sshmem_mkey_t *) calloc(1, MXM_PTL_LAST * sizeof(*mkeys));
if (!mkeys) {
return NULL ;
}
@ -540,7 +540,7 @@ mca_spml_mkey_t *mca_spml_ikrit_register(void* addr,
for (i = 0; i < MXM_PTL_LAST; i++) {
switch (i) {
case MXM_PTL_SHM:
if ((int) MEMHEAP_SHM_GET_ID(shmid) != MEMHEAP_SHM_INVALID) {
if ((int)shmid != MAP_SEGMENT_SHM_INVALID) {
mkeys[i].u.key = shmid;
mkeys[i].va_base = 0;
} else {
@ -610,7 +610,7 @@ error_out:
return NULL ;
}
int mca_spml_ikrit_deregister(mca_spml_mkey_t *mkeys)
int mca_spml_ikrit_deregister(sshmem_mkey_t *mkeys)
{
int i;
@ -658,7 +658,7 @@ static inline int get_ptl_id(int dst)
return proc->transport_ids[0];
}
int mca_spml_ikrit_oob_get_mkeys(int pe, uint32_t seg, mca_spml_mkey_t *mkeys)
int mca_spml_ikrit_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys)
{
int ptl;
ptl = get_ptl_id(pe);
@ -695,7 +695,7 @@ static int mca_spml_ikrit_get_helper(mxm_send_req_t *sreq,
/* shmem spec states that get() operations are blocking. So it is enough
to have single mxm request. Also we count on mxm doing copy */
void *rva;
mca_spml_mkey_t *r_mkey;
sshmem_mkey_t *r_mkey;
int ptl_id;
ptl_id = get_ptl_id(src);
@ -747,7 +747,7 @@ static inline int mca_spml_ikrit_get_shm(void *src_addr,
{
int ptl_id;
void *rva;
mca_spml_mkey_t *r_mkey;
sshmem_mkey_t *r_mkey;
ptl_id = get_ptl_id(src);
/**
@ -984,7 +984,7 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
void *rva;
mca_spml_ikrit_put_request_t *put_req;
int ptl_id;
mca_spml_mkey_t *r_mkey;
sshmem_mkey_t *r_mkey;
static int count;
int need_progress = 0;
@ -1131,7 +1131,7 @@ int mca_spml_ikrit_put_simple(void* dst_addr,
mxm_send_req_t mxm_req;
mxm_wait_t wait;
int ptl_id;
mca_spml_mkey_t *r_mkey;
sshmem_mkey_t *r_mkey;
static int count;
ptl_id = get_ptl_id(dst);

Просмотреть файл

@ -141,14 +141,14 @@ extern int mca_spml_ikrit_send(void* buf,
int dst,
mca_spml_base_put_mode_t mode);
extern mca_spml_mkey_t *mca_spml_ikrit_register(void* addr,
extern sshmem_mkey_t *mca_spml_ikrit_register(void* addr,
size_t size,
uint64_t shmid,
int *count);
extern int mca_spml_ikrit_deregister(mca_spml_mkey_t *mkeys);
extern int mca_spml_ikrit_deregister(sshmem_mkey_t *mkeys);
extern int mca_spml_ikrit_oob_get_mkeys(int pe,
uint32_t seg,
mca_spml_mkey_t *mkeys);
sshmem_mkey_t *mkeys);
extern int mca_spml_ikrit_add_procs(oshmem_proc_t** procs, size_t nprocs);
extern int mca_spml_ikrit_del_procs(oshmem_proc_t** procs, size_t nprocs);

Просмотреть файл

@ -21,6 +21,8 @@
#include "oshmem/proc/proc.h"
#include "ompi/mca/btl/btl.h"
#include "oshmem/mca/sshmem/sshmem.h"
BEGIN_C_DECLS
/*
@ -71,25 +73,8 @@ typedef mca_spml_base_component_2_0_0_t mca_spml_base_component_t;
/**
* MCA management functions.
*/
/**
* memory key
* We have two kinds of keys:
* - shared memory type of keys. Memory segment must be attached before access
* such keys use va_base = 0 and key
* - ib type of key. Key is passed with each put/get op.
* use va_base = <remote vaddr>, key is stored in mkey struct
*/
typedef struct mca_spml_mkey {
void* va_base;
uint16_t len;
union {
void *data;
uint64_t key;
} u;
void *spml_context; /* spml module can attach internal structures here */
} mca_spml_mkey_t;
static inline char *mca_spml_base_mkey2str(mca_spml_mkey_t *mkey)
static inline char *mca_spml_base_mkey2str(sshmem_mkey_t *mkey)
{
static char buf[64];
@ -128,7 +113,7 @@ typedef int (*mca_spml_base_module_wait_fn_t)(void* addr,
* @return array of mkeys (one mkey per "btl") or NULL on failure
*
*/
typedef mca_spml_mkey_t * (*mca_spml_base_module_register_fn_t)(void *addr,
typedef sshmem_mkey_t * (*mca_spml_base_module_register_fn_t)(void *addr,
size_t size,
uint64_t shmid,
int *count);
@ -136,7 +121,7 @@ typedef mca_spml_mkey_t * (*mca_spml_base_module_register_fn_t)(void *addr,
/**
* deregister memory pinned by register()
*/
typedef int (*mca_spml_base_module_deregister_fn_t)(mca_spml_mkey_t *mkeys);
typedef int (*mca_spml_base_module_deregister_fn_t)(sshmem_mkey_t *mkeys);
/**
* try to fill up mkeys that can be used to reach remote pe.
@ -148,7 +133,7 @@ typedef int (*mca_spml_base_module_deregister_fn_t)(mca_spml_mkey_t *mkeys);
*/
typedef int (*mca_spml_base_module_oob_get_mkeys_fn_t)(int pe,
uint32_t seg,
mca_spml_mkey_t *mkeys);
sshmem_mkey_t *mkeys);
/**
* For each proc setup a datastructure that indicates the BTLs

Просмотреть файл

@ -312,14 +312,14 @@ static void mca_yoda_get_response_callback(mca_btl_base_module_t* btl,
/**
* note: we have to reg memory directly with btl because no proc will have a full btl list in proc_bml
*/
int mca_spml_yoda_deregister(mca_spml_mkey_t *mkeys)
int mca_spml_yoda_deregister(sshmem_mkey_t *mkeys)
{
int i;
struct yoda_btl *ybtl;
mca_spml_yoda_context_t* yoda_context;
MCA_SPML_CALL(fence());
mca_spml_yoda_wait_gets();
MCA_SPML_CALL(fence());
mca_spml_yoda_wait_gets();
if (!mkeys) {
return OSHMEM_SUCCESS;
@ -346,7 +346,7 @@ int mca_spml_yoda_deregister(mca_spml_mkey_t *mkeys)
return OSHMEM_SUCCESS;
}
mca_spml_mkey_t *mca_spml_yoda_register(void* addr,
sshmem_mkey_t *mca_spml_yoda_register(void* addr,
size_t size,
uint64_t shmid,
int *count)
@ -355,7 +355,7 @@ mca_spml_mkey_t *mca_spml_yoda_register(void* addr,
mca_btl_base_descriptor_t* des = NULL;
const opal_datatype_t *datatype = &opal_datatype_wchar;
opal_convertor_t convertor;
mca_spml_mkey_t *mkeys;
sshmem_mkey_t *mkeys;
struct yoda_btl *ybtl;
oshmem_proc_t *proc_self;
mca_spml_yoda_context_t* yoda_context;
@ -366,7 +366,7 @@ mca_spml_mkey_t *mca_spml_yoda_register(void* addr,
SPML_VERBOSE(10, "address %p len %llu", addr, (unsigned long long)size);
*count = 0;
/* make sure everything is initialized to 0 */
mkeys = (mca_spml_mkey_t *) calloc(1,
mkeys = (sshmem_mkey_t *) calloc(1,
mca_spml_yoda.n_btls * sizeof(*mkeys));
if (!mkeys) {
return NULL ;
@ -400,7 +400,7 @@ mca_spml_mkey_t *mca_spml_yoda_register(void* addr,
/* If we have shared memory just save its id*/
if (YODA_BTL_SM == ybtl->btl_type
&& MEMHEAP_SHM_INVALID != (int) MEMHEAP_SHM_GET_ID(shmid)) {
&& MAP_SEGMENT_SHM_INVALID != (int)shmid) {
mkeys[i].u.key = shmid;
mkeys[i].va_base = 0;
continue;
@ -455,9 +455,9 @@ mca_spml_mkey_t *mca_spml_yoda_register(void* addr,
}
SPML_VERBOSE(5,
"rank %d btl %s address 0x%p len %llu shmid 0x%X|0x%X",
"rank %d btl %s va_base: 0x%p len: %d key %llx size %llu",
oshmem_proc_local_proc->proc_name.vpid, btl_type2str(ybtl->btl_type),
mkeys[i].va_base, (unsigned long long)size, MEMHEAP_SHM_GET_TYPE(shmid), MEMHEAP_SHM_GET_ID(shmid));
mkeys[i].va_base, mkeys[i].len, (unsigned long long)mkeys[i].u.key, (unsigned long long)size);
}
OBJ_DESTRUCT(&convertor);
*count = mca_spml_yoda.n_btls;
@ -750,7 +750,7 @@ static inline int mca_spml_yoda_put_internal(void *dst_addr,
unsigned int frag_size = 0;
char *p_src, *p_dst;
void* rva;
mca_spml_mkey_t *r_mkey;
sshmem_mkey_t *r_mkey;
int btl_id = 0;
struct yoda_btl *ybtl;
int put_via_send;
@ -991,7 +991,7 @@ int mca_spml_yoda_enable(bool enable)
int mca_spml_yoda_get(void* src_addr, size_t size, void* dst_addr, int src)
{
int rc = OSHMEM_SUCCESS;
mca_spml_mkey_t *r_mkey, *l_mkey;
sshmem_mkey_t *r_mkey, *l_mkey;
void* rva;
unsigned ncopied = 0;
unsigned int frag_size = 0;

Просмотреть файл

@ -110,11 +110,11 @@ extern int mca_spml_yoda_send(void* buf,
size_t size,
int dst,
mca_spml_base_put_mode_t mode);
extern mca_spml_mkey_t *mca_spml_yoda_register(void* addr,
extern sshmem_mkey_t *mca_spml_yoda_register(void* addr,
size_t size,
uint64_t shmid,
int *count);
extern int mca_spml_yoda_deregister(mca_spml_mkey_t *mkeys);
extern int mca_spml_yoda_deregister(sshmem_mkey_t *mkeys);
extern int mca_spml_yoda_add_procs(oshmem_proc_t** procs,
size_t nprocs);
extern int mca_spml_yoda_del_procs(oshmem_proc_t** procs,

29
oshmem/mca/sshmem/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,29 @@
#
# Copyright (c) 2014 Mellanox Technologies, Inc.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# main library setup
noinst_LTLIBRARIES = libmca_sshmem.la
libmca_sshmem_la_SOURCES =
# local files
headers = sshmem.h sshmem_types.h
libmca_sshmem_la_SOURCES += $(headers)
# Conditionally install the header files
if WANT_INSTALL_HEADERS
oshmemdir = $(ompiincludedir)/$(subdir)
nobase_oshmem_HEADERS = $(headers)
nobase_nodist_oshmem_HEADERS = $(nodist_headers)
endif
include base/Makefile.am
distclean-local:
rm -f base/static-components.h

17
oshmem/mca/sshmem/base/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,17 @@
# Copyright (c) 2014 Mellanox Technologies, Inc.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
headers += \
base/base.h
libmca_sshmem_la_SOURCES += \
base/sshmem_base_close.c \
base/sshmem_base_select.c \
base/sshmem_base_open.c \
base/sshmem_base_wrappers.c

149
oshmem/mca/sshmem/base/base.h Обычный файл
Просмотреть файл

@ -0,0 +1,149 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_SSHMEM_BASE_H
#define MCA_SSHMEM_BASE_H
#include "oshmem_config.h"
#include "opal/mca/base/mca_base_framework.h"
#include "oshmem/mca/sshmem/sshmem.h"
BEGIN_C_DECLS
extern void* mca_sshmem_base_start_address;
/* ////////////////////////////////////////////////////////////////////////// */
/* Public API for the sshmem framework */
/* ////////////////////////////////////////////////////////////////////////// */
OSHMEM_DECLSPEC int
mca_sshmem_segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size);
OSHMEM_DECLSPEC int
mca_sshmem_ds_copy(const map_segment_t *from,
map_segment_t *to);
OSHMEM_DECLSPEC void *
mca_sshmem_segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
OSHMEM_DECLSPEC int
mca_sshmem_segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
OSHMEM_DECLSPEC int
mca_sshmem_unlink(map_segment_t *ds_buf);
/* ////////////////////////////////////////////////////////////////////////// */
/* End Public API for the sshmem framework */
/* ////////////////////////////////////////////////////////////////////////// */
/*
* Global functions for MCA overall sshmem open and close
*/
/**
* Select an available component.
*
* @return OSHMEM_SUCCESS Upon success.
* @return OSHMEM_NOT_FOUND If no component can be selected.
* @return OSHMEM_ERROR Upon other failure.
*
* This function invokes the selection process for shmem components,
* which works as follows:
*
* - If the \em sshmem MCA parameter is not specified, the
* selection set is all available shmem components.
* - If the \em sshmem MCA parameter is specified, the
* selection set is just that component.
* - All components in the selection set are queried to see if
* they want to run. All components that want to run are ranked
* by their priority and the highest priority component is
* selected. All non-selected components have their "close"
* function invoked to let them know that they were not selected.
* - The selected component will have its "init" function invoked to
* let it know that it was selected.
*
* If we fall through this entire process and no component is
* selected, then return OSHMEM_NOT_FOUND (this is not a fatal
* error).
*
* At the end of this process, we'll either have a single
* component that is selected and initialized, or no component was
* selected. If no component was selected, subsequent invocation
* of the shmem wrapper functions will return an error.
*/
OSHMEM_DECLSPEC int
mca_sshmem_base_select(void);
/**
* Shut down the sshmem MCA framework.
*
* @retval OSHMEM_SUCCESS Always
*
* This function shuts down everything in the sshmem MCA
* framework, and is called during opal_finalize().
*
* It must be the last function invoked on the sshmem MCA
* framework.
*/
OSHMEM_DECLSPEC int
mca_sshmem_base_close(void);
/**
* Indication of whether a component was successfully selected or
* not
*/
OSHMEM_DECLSPEC extern bool mca_sshmem_base_selected;
/**
* Global component struct for the selected component
*/
OSHMEM_DECLSPEC extern const mca_sshmem_base_component_2_0_0_t
*mca_sshmem_base_component;
/**
* Global module struct for the selected module
*/
OSHMEM_DECLSPEC extern const mca_sshmem_base_module_2_0_0_t
*mca_sshmem_base_module;
/**
* Framework structure declaration
*/
OSHMEM_DECLSPEC extern mca_base_framework_t oshmem_sshmem_base_framework;
/* ******************************************************************** */
#ifdef __BASE_FILE__
#define __SSHMEM_FILE__ __BASE_FILE__
#else
#define __SSHMEM_FILE__ __FILE__
#endif
#ifdef OPAL_ENABLE_DEBUG
#define SSHMEM_VERBOSE(level, ...) \
oshmem_output_verbose(level, oshmem_sshmem_base_framework.framework_output, \
"%s:%d - %s()", __SSHMEM_FILE__, __LINE__, __FUNCTION__, __VA_ARGS__)
#else
#define SSHMEM_VERBOSE(level, ...)
#endif
#define SSHMEM_ERROR(...) \
oshmem_output_verbose(0, oshmem_sshmem_base_framework.framework_output, \
"Error %s:%d - %s()", __SSHMEM_FILE__, __LINE__, __FUNCTION__, __VA_ARGS__)
#define SSHMEM_WARN(...) \
oshmem_output_verbose(0, oshmem_sshmem_base_framework.framework_output, \
"Error %s:%d - %s()", __SSHMEM_FILE__, __LINE__, __FUNCTION__, __VA_ARGS__)
END_C_DECLS
#endif /* MCA_SSHMEM_BASE_H */

33
oshmem/mca/sshmem/base/sshmem_base_close.c Обычный файл
Просмотреть файл

@ -0,0 +1,33 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#include "opal/constants.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
/* ////////////////////////////////////////////////////////////////////////// */
int
mca_sshmem_base_close(void)
{
/* if there is a selected sshmem module, finalize it */
if (NULL != mca_sshmem_base_module &&
NULL != mca_sshmem_base_module->module_finalize) {
mca_sshmem_base_module->module_finalize();
}
return mca_base_framework_components_close (&oshmem_sshmem_base_framework,
NULL);
}

80
oshmem/mca/sshmem/base/sshmem_base_open.c Обычный файл
Просмотреть файл

@ -0,0 +1,80 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#include "opal/constants.h"
#include "opal/util/output.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_var.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
/*
* The following file was created by configure. It contains extern
* statements and the definition of an array of pointers to each
* component's public mca_base_component_t struct.
*/
#include "oshmem/mca/sshmem/base/static-components.h"
/**
* globals
*/
void* mca_sshmem_base_start_address = (void*)0xFF000000;
/* ////////////////////////////////////////////////////////////////////////// */
/**
* Register some sshmem-wide MCA params
*/
static int
mca_sshmem_base_register (mca_base_register_flag_t flags)
{
int index;
index = mca_base_var_register("oshmem",
"sshmem",
"base",
"start_address",
"Specify base address for shared memory region",
MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG,
NULL,
0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_sshmem_base_start_address);
(void) mca_base_var_register_synonym(index, "oshmem", "memheap", "base",
"start_address",
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
return OSHMEM_SUCCESS;
}
static int mca_sshmem_base_open(mca_base_open_flag_t flags)
{
/* Open up all available components */
if (OPAL_SUCCESS !=
mca_base_framework_components_open(&oshmem_sshmem_base_framework, flags)) {
return OSHMEM_ERROR;
}
return OSHMEM_SUCCESS;
}
/* Use the default open function */
MCA_BASE_FRAMEWORK_DECLARE(oshmem, sshmem,
"OSHMEM SSHMEM",
mca_sshmem_base_register,
mca_sshmem_base_open,
mca_sshmem_base_close,
mca_sshmem_base_static_components,
MCA_BASE_FRAMEWORK_FLAG_DEFAULT);

156
oshmem/mca/sshmem/base/sshmem_base_select.c Обычный файл
Просмотреть файл

@ -0,0 +1,156 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "opal/constants.h"
#include "opal/util/output.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
/*
* globals
*/
bool mca_sshmem_base_selected = false;
const mca_sshmem_base_component_2_0_0_t *mca_sshmem_base_component = NULL;
const mca_sshmem_base_module_2_0_0_t *mca_sshmem_base_module = NULL;
/* ////////////////////////////////////////////////////////////////////////// */
static int
mca_sshmem_base_runtime_query(mca_base_module_t **best_module,
mca_base_component_t **best_component)
{
mca_base_component_list_item_t *cli = NULL;
mca_base_component_t *component = NULL;
mca_base_module_t *module = NULL;
int priority = 0, best_priority = INT32_MIN;
*best_module = NULL;
*best_component = NULL;
opal_output_verbose(10, oshmem_sshmem_base_framework.framework_output,
"sshmem: base: runtime_query: "
"Auto-selecting sshmem components");
/* traverse the list of available components.
* for each call their 'run-time query' functions to determine relative
* priority.
*/
OPAL_LIST_FOREACH(cli, &oshmem_sshmem_base_framework.framework_components, mca_base_component_list_item_t) {
component = (mca_base_component_t *)cli->cli_component;
/* if there is a run-time query function then use it. otherwise, skip
* the component.
*/
if (NULL == ((mca_sshmem_base_component_2_0_0_t *)
component)->runtime_query) {
opal_output_verbose(5, oshmem_sshmem_base_framework.framework_output,
"sshmem: base: runtime_query: "
"(sshmem) Skipping component [%s]. It does not "
"implement a run-time query function",
component->mca_component_name);
continue;
}
/* query this component for the module and priority */
opal_output_verbose(5, oshmem_sshmem_base_framework.framework_output,
"sshmem: base: runtime_query: "
"(shmem) Querying component (run-time) [%s]",
component->mca_component_name);
((mca_sshmem_base_component_2_0_0_t *)
component)->runtime_query(&module, &priority, NULL);
/* if no module was returned, then skip component.
* this probably means that the run-time test deemed the shared memory
* backing facility unusable or unsafe.
*/
if (NULL == module) {
opal_output_verbose(5, oshmem_sshmem_base_framework.framework_output,
"sshmem: base: runtime_query: "
"(sshmem) Skipping component [%s]. Run-time "
"Query failed to return a module",
component->mca_component_name);
continue;
}
/* determine if this is the best module we have seen by looking the
* priority
*/
opal_output_verbose(5, oshmem_sshmem_base_framework.framework_output,
"sshmem: base: runtime_query: "
"(%5s) Query of component [%s] set priority to %d",
"shmem", component->mca_component_name, priority);
if (priority > best_priority) {
best_priority = priority;
*best_module = module;
*best_component = component;
}
}
/* finished querying all components.
* make sure we found something in the process.
*/
if (NULL == *best_component) {
opal_output_verbose(5, oshmem_sshmem_base_framework.framework_output,
"sshmem: base: runtime_query: "
"(%5s) No component selected!", "shmem");
return OSHMEM_ERR_NOT_FOUND;
}
opal_output_verbose(5, oshmem_sshmem_base_framework.framework_output,
"sshmem: base: runtime_query: "
"(%5s) Selected component [%s]", "shmem",
(*best_component)->mca_component_name);
/* close the non-selected components */
(void) mca_base_framework_components_close (&oshmem_sshmem_base_framework,
(mca_base_component_t *)(*best_component));
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
int
mca_sshmem_base_select(void)
{
mca_sshmem_base_component_2_0_0_t *best_component = NULL;
mca_sshmem_base_module_2_0_0_t *best_module = NULL;
/* select the best component */
if (OSHMEM_SUCCESS != mca_sshmem_base_runtime_query(
(mca_base_module_t **)&best_module,
(mca_base_component_t **)&best_component)) {
/* it is NOT okay if we don't find a module because we need at
* least one shared memory backing facility component instance.
*/
return OSHMEM_ERROR;
}
/* save the winner */
mca_sshmem_base_component = best_component;
mca_sshmem_base_module = best_module;
mca_sshmem_base_selected = true;
/* initialize the winner */
if (NULL != mca_sshmem_base_module) {
return mca_sshmem_base_module->module_init();
}
else {
return OSHMEM_ERROR;
}
}

Просмотреть файл

@ -0,0 +1,75 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#include "opal/constants.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
/* ////////////////////////////////////////////////////////////////////////// */
int
mca_sshmem_segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size)
{
if (!mca_sshmem_base_selected) {
return OSHMEM_ERROR;
}
return mca_sshmem_base_module->segment_create(ds_buf, file_name, size);
}
/* ////////////////////////////////////////////////////////////////////////// */
int
mca_sshmem_ds_copy(const map_segment_t *from,
map_segment_t *to)
{
if (!mca_sshmem_base_selected) {
return OSHMEM_ERROR;
}
return mca_sshmem_base_module->ds_copy(from, to);
}
/* ////////////////////////////////////////////////////////////////////////// */
void *
mca_sshmem_segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
if (!mca_sshmem_base_selected) {
return NULL;
}
return mca_sshmem_base_module->segment_attach(ds_buf, mkey);
}
/* ////////////////////////////////////////////////////////////////////////// */
int
mca_sshmem_segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
if (!mca_sshmem_base_selected) {
return OSHMEM_ERROR;
}
return mca_sshmem_base_module->segment_detach(ds_buf, mkey);
}
/* ////////////////////////////////////////////////////////////////////////// */
int
mca_sshmem_unlink(map_segment_t *ds_buf)
{
if (!mca_sshmem_base_selected) {
return OSHMEM_ERROR;
}
return mca_sshmem_base_module->unlink(ds_buf);
}

34
oshmem/mca/sshmem/mmap/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,34 @@
# Copyright (c) 2014 Mellanox Technologies, Inc.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
sshmem_mmap.h \
sshmem_mmap_component.c \
sshmem_mmap_module.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_oshmem_sshmem_mmap_DSO
component_noinst =
component_install = mca_sshmem_mmap.la
else
component_noinst = libmca_sshmem_mmap.la
component_install =
endif
mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_sshmem_mmap_la_SOURCES = $(sources)
mca_sshmem_mmap_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_sshmem_mmap_la_SOURCES =$(sources)
libmca_sshmem_mmap_la_LDFLAGS = -module -avoid-version

40
oshmem/mca/sshmem/mmap/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,40 @@
# -*- shell-script -*-
#
# Copyright (c) 2014 Mellanox Technologies, Inc.
# All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_mca_sshmem_mmap_CONFIG(action-if-can-compile,
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_oshmem_sshmem_mmap_CONFIG],[
AC_CONFIG_FILES([oshmem/mca/sshmem/mmap/Makefile])
# do we have the mmap shm stuff?
AC_MSG_CHECKING([if want mmap shared memory support])
AC_ARG_ENABLE(mmap-sshmem,
AC_HELP_STRING([--disable-mmap-sshmem],
[disable mmap shared memory support (default: enabled)]))
AS_IF([test "$enable_mmap_sshmem" = "no"],
[AC_MSG_RESULT([no])
oshmem_mmap_sm_build_mmap=0],
[AC_MSG_RESULT([yes])
AC_SEARCH_LIBS([mmap], [c],
[oshmem_mmap_sm_build_mmap=1],
[oshmem_mmap_sm_build_mmap=0])])
AS_IF([test "$enable_mmap_sshmem" = "yes" -a "$oshmem_mmap_sm_build_mmap" = "0"],
[AC_MSG_WARN([MMAP shared memory support requested but not found])
AC_MSG_ERROR([Cannot continue])])
AS_IF([test "$oshmem_mmap_sm_build_mmap" = "1"], [$1], [$2])
AC_DEFINE_UNQUOTED([OSHMEM_SSHMEM_MMAP],
[$oshmem_mmap_sm_build_mmap],
[Whether we have shared memory support for mmap or not])
])dnl

40
oshmem/mca/sshmem/mmap/sshmem_mmap.h Обычный файл
Просмотреть файл

@ -0,0 +1,40 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_SSHMEM_MMAP_EXPORT_H
#define MCA_SSHMEM_MMAP_EXPORT_H
#include "oshmem_config.h"
#include "oshmem/mca/sshmem/sshmem.h"
BEGIN_C_DECLS
/**
* globally exported variable to hold the mmap component.
*/
typedef struct mca_sshmem_mmap_component_t {
/* base component struct */
mca_sshmem_base_component_t super;
/* priority for mmap component */
int priority;
} mca_sshmem_mmap_component_t;
OSHMEM_MODULE_DECLSPEC extern mca_sshmem_mmap_component_t
mca_sshmem_mmap_component;
typedef struct mca_sshmem_mmap_module_t {
mca_sshmem_base_module_t super;
} mca_sshmem_mmap_module_t;
extern mca_sshmem_mmap_module_t mca_sshmem_mmap_module;
END_C_DECLS
#endif /* MCA_SHMEM_MMAP_EXPORT_H */

Просмотреть файл

@ -0,0 +1,135 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#include "opal/constants.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "sshmem_mmap.h"
/**
* public string showing the shmem ompi_mmap component version number
*/
const char *mca_sshmem_mmap_component_version_string =
"OSHMEM mmap sshmem MCA component version " OSHMEM_VERSION;
int mca_sshmem_mmap_relocate_backing_file = 0;
char *mca_sshmem_mmap_backing_file_base_dir = NULL;
bool mca_sshmem_mmap_nfs_warning = true;
/**
* local functions
*/
static int mmap_register(void);
static int mmap_open(void);
static int mmap_close(void);
static int mmap_query(mca_base_module_t **module, int *priority);
static int mmap_runtime_query(mca_base_module_t **module,
int *priority,
const char *hint);
/**
* instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
mca_sshmem_mmap_component_t mca_sshmem_mmap_component = {
/* ////////////////////////////////////////////////////////////////////// */
/* super */
/* ////////////////////////////////////////////////////////////////////// */
{
/**
* common MCA component data
*/
{
MCA_SSHMEM_BASE_VERSION_2_0_0,
/* component name and version */
"mmap",
OSHMEM_MAJOR_VERSION,
OSHMEM_MINOR_VERSION,
OSHMEM_RELEASE_VERSION,
/* component open */
mmap_open,
/* component close */
mmap_close,
/* component query */
mmap_query,
/* component register */
mmap_register
},
/* MCA v2.0.0 component meta data */
{
/* the component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
mmap_runtime_query,
},
};
/* ////////////////////////////////////////////////////////////////////////// */
static int
mmap_runtime_query(mca_base_module_t **module,
int *priority,
const char *hint)
{
/* no run-time query needed for mmap, so this is easy */
*priority = mca_sshmem_mmap_component.priority;
*module = (mca_base_module_t *)&mca_sshmem_mmap_module.super;
return OPAL_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
mmap_register(void)
{
int ret;
/* ////////////////////////////////////////////////////////////////////// */
/* (default) priority - set high to make mmap the default */
mca_sshmem_mmap_component.priority = 20;
ret = mca_base_component_var_register (&mca_sshmem_mmap_component.super.base_version,
"priority", "Priority for sshmem mmap "
"component (default: 20)", MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_ALL_EQ,
&mca_sshmem_mmap_component.priority);
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
mmap_open(void)
{
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
mmap_query(mca_base_module_t **module, int *priority)
{
*priority = mca_sshmem_mmap_component.priority;
*module = (mca_base_module_t *)&mca_sshmem_mmap_module.super;
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
mmap_close(void)
{
return OSHMEM_SUCCESS;
}

326
oshmem/mca/sshmem/mmap/sshmem_mmap_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,326 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#include <errno.h>
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif /* HAVE_FCNTL_H */
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif /* HAVE_SYS_MMAN_H */
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif /* HAVE_SYS_TYPES_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#ifdef HAVE_NETDB_H
#include <netdb.h>
#endif /* HAVE_NETDB_H */
#ifdef HAVE_TIME_H
#include <time.h>
#endif /* HAVE_NETDB_H */
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif /* HAVE_SYS_STAT_H */
#include "opal/constants.h"
#include "opal/util/output.h"
#include "opal/util/path.h"
#include "opal/util/show_help.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "sshmem_mmap.h"
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
# define MAP_ANONYMOUS MAP_ANON
#endif /* MAP_ANONYMOUS and MAP_ANON */
#if !defined(MAP_FAILED)
# define MAP_FAILED ((char*)-1)
#endif /* MAP_FAILED */
/* ////////////////////////////////////////////////////////////////////////// */
/*local functions */
/* local functions */
static int
module_init(void);
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size);
static int
ds_copy(const map_segment_t *from,
map_segment_t *to);
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
static int
segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
static int
segment_unlink(map_segment_t *ds_buf);
static int
module_finalize(void);
/*
* mmap shmem module
*/
mca_sshmem_mmap_module_t mca_sshmem_mmap_module = {
/* super */
{
module_init,
segment_create,
ds_copy,
segment_attach,
segment_detach,
segment_unlink,
module_finalize
}
};
/* ////////////////////////////////////////////////////////////////////////// */
/* private utility functions */
/* ////////////////////////////////////////////////////////////////////////// */
/* ////////////////////////////////////////////////////////////////////////// */
/**
* completely resets the contents of *ds_buf
*/
static inline void
shmem_ds_reset(map_segment_t *ds_buf)
{
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: shmem_ds_resetting "
"(id: %d, size: %lu, name: %s)\n",
mca_sshmem_mmap_component.super.base_version.mca_type_name,
mca_sshmem_mmap_component.super.base_version.mca_component_name,
ds_buf->seg_id, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
MAP_SEGMENT_RESET_FLAGS(ds_buf);
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
ds_buf->seg_base_addr = 0;
ds_buf->end = 0;
ds_buf->seg_size = 0;
ds_buf->type = MAP_SEGMENT_UNKNOWN;
memset(ds_buf->seg_name, '\0', sizeof(ds_buf->seg_name));
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
module_init(void)
{
/* nothing to do */
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
module_finalize(void)
{
/* nothing to do */
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
ds_copy(const map_segment_t *from,
map_segment_t *to)
{
memcpy(to, from, sizeof(map_segment_t));
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: ds_copy complete "
"from: (id: %d, size: %lu, "
"name: %s flags: 0x%02x) "
"to: (id: %d, size: %lu, "
"name: %s flags: 0x%02x)\n",
mca_sshmem_mmap_component.super.base_version.mca_type_name,
mca_sshmem_mmap_component.super.base_version.mca_component_name,
from->seg_id, (unsigned long)from->seg_size, from->seg_name,
from->flags, to->seg_id, (unsigned long)to->seg_size, to->seg_name,
to->flags)
);
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size)
{
int rc = OSHMEM_SUCCESS;
void *addr = NULL;
assert(ds_buf);
/* init the contents of map_segment_t */
shmem_ds_reset(ds_buf);
addr = mmap((void *)mca_sshmem_base_start_address,
size,
PROT_READ | PROT_WRITE,
MAP_SHARED |
#if defined(MAP_ANONYMOUS)
MAP_ANONYMOUS |
#endif
MAP_FIXED,
0,
0);
if (MAP_FAILED == addr) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"Failed to mmap() %llu bytes (errno=%d)",
(unsigned long long)size, errno)
);
return OSHMEM_ERR_OUT_OF_RESOURCE;
}
ds_buf->type = MAP_SEGMENT_ALLOC_MMAP;
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
ds_buf->seg_base_addr = addr;
ds_buf->seg_size = size;
ds_buf->end = (void*)((uintptr_t)ds_buf->seg_base_addr + ds_buf->seg_size);
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: create %s "
"(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_mmap_component.super.base_version.mca_type_name,
mca_sshmem_mmap_component.super.base_version.mca_component_name,
(rc ? "failure" : "successful"),
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
/**
* segment_attach can only be called after a successful call to segment_create
*/
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
void *addr = NULL;
assert(ds_buf);
assert(mkey->va_base == 0);
if (MAP_SEGMENT_SHM_INVALID == (int)(mkey->u.key)) {
return (mkey->va_base);
}
addr = mmap((void *)mca_sshmem_base_start_address,
ds_buf->seg_size,
PROT_READ | PROT_WRITE,
MAP_SHARED |
#if defined(MAP_ANONYMOUS)
MAP_ANONYMOUS |
#endif
MAP_FIXED,
0,
0);
if (MAP_FAILED == addr) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"Failed to mmap() %llu bytes (errno=%d)",
(unsigned long long)ds_buf->seg_size, errno)
);
return NULL;
}
mkey->va_base = addr;
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: attach successful "
"(id: %d, addr: %p size: %lu, name: %s | va_base: 0x%p len: %d key %llx)\n",
mca_sshmem_mmap_component.super.base_version.mca_type_name,
mca_sshmem_mmap_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name,
mkey->va_base, mkey->len, (unsigned long long)mkey->u.key)
);
/* update returned base pointer with an offset that hides our stuff */
return (mkey->va_base);
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
int rc = OSHMEM_SUCCESS;
assert(ds_buf);
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: detaching "
"(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_mmap_component.super.base_version.mca_type_name,
mca_sshmem_mmap_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
munmap((void *)ds_buf->seg_base_addr, ds_buf->seg_size);
/* reset the contents of the map_segment_t associated with this
* shared memory segment.
*/
shmem_ds_reset(ds_buf);
return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_unlink(map_segment_t *ds_buf)
{
/* not much unlink work needed for sysv */
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: unlinking "
"(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_mmap_component.super.base_version.mca_type_name,
mca_sshmem_mmap_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
/* don't completely reset. in particular, only reset
* the id and flip the invalid bit. size and name values will remain valid
* across unlinks. other information stored in flags will remain untouched.
*/
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
/* note: this is only changing the valid bit to 0. */
MAP_SEGMENT_INVALIDATE(ds_buf);
return OSHMEM_SUCCESS;
}

176
oshmem/mca/sshmem/sshmem.h Обычный файл
Просмотреть файл

@ -0,0 +1,176 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* sshmem (shared memory backing facility) framework component interface
* definitions.
*
* The module has the following functions:
*
* - module_init
* - segment_create
* - ds_copy
* - segment_attach
* - segment_detach
* - unlink
* - module_finalize
*/
#ifndef MCA_SSHMEM_H
#define MCA_SSHMEM_H
#include "oshmem_config.h"
#include "oshmem/types.h"
#include "oshmem/constants.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "oshmem/mca/sshmem/sshmem_types.h"
BEGIN_C_DECLS
/* ////////////////////////////////////////////////////////////////////////// */
typedef int
(*mca_sshmem_base_component_runtime_query_fn_t)(mca_base_module_t **module,
int *priority,
const char *hint);
/* structure for sshmem components. */
struct mca_sshmem_base_component_2_0_0_t {
/* base MCA component */
mca_base_component_t base_version;
/* base MCA data */
mca_base_component_data_t base_data;
/* component runtime query */
mca_sshmem_base_component_runtime_query_fn_t runtime_query;
};
/* convenience typedefs */
typedef struct mca_sshmem_base_component_2_0_0_t
mca_sshmem_base_component_2_0_0_t;
typedef struct mca_sshmem_base_component_2_0_0_t mca_sshmem_base_component_t;
/* ////////////////////////////////////////////////////////////////////////// */
/* shmem API function pointers */
/**
* module initialization function.
* @return OSHMEM_SUCCESS on success.
*/
typedef int
(*mca_sshmem_base_module_init_fn_t)(void);
/**
* copy shmem data structure information pointed to by from to the structure
* pointed to by to.
*
* @param from source pointer (IN).
*
* @param to destination pointer (OUT).
*
* @return OSHMEM_SUCCESS on success.
*/
typedef int
(*mca_sshmem_base_ds_copy_fn_t)(const map_segment_t *from,
map_segment_t *to);
/**
* create a new shared memory segment and initialize members in structure
* pointed to by ds_buf.
*
* @param ds_buf pointer to map_segment_t typedef'd structure
* defined in shmem_types.h (OUT).
*
* @param file_name file_name unique string identifier that must be a valid,
* writable path (IN).
*
* @param size size of the shared memory segment.
*
* @return OSHMEM_SUCCESS on success.
*/
typedef int
(*mca_sshmem_base_module_segment_create_fn_t)(map_segment_t *ds_buf,
const char *file_name,
size_t size);
/**
* attach to an existing shared memory segment initialized by segment_create.
*
* @param ds_buf pointer to initialized map_segment_t typedef'd
* structure (IN/OUT).
*
* @return base address of shared memory segment on success. returns
* NULL otherwise.
*/
typedef void *
(*mca_sshmem_base_module_segment_attach_fn_t)(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
/**
* detach from an existing shared memory segment.
*
* @param ds_buf pointer to initialized map_segment_t typedef'd structure
* (IN/OUT).
*
* @return OSHMEM_SUCCESS on success.
*/
typedef int
(*mca_sshmem_base_module_segment_detach_fn_t)(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
/**
* unlink an existing shared memory segment.
*
* @param ds_buf pointer to initialized map_segment_t typedef'd structure
* (IN/OUT).
*
* @return OSHMEM_SUCCESS on success.
*/
typedef int
(*mca_sshmem_base_module_unlink_fn_t)(map_segment_t *ds_buf);
/**
* module finalize function. invoked by the base on the selected
* module when the sshmem framework is being shut down.
*/
typedef int (*mca_sshmem_base_module_finalize_fn_t)(void);
/**
* structure for shmem modules
*/
struct mca_sshmem_base_module_2_0_0_t {
mca_sshmem_base_module_init_fn_t module_init;
mca_sshmem_base_module_segment_create_fn_t segment_create;
mca_sshmem_base_ds_copy_fn_t ds_copy;
mca_sshmem_base_module_segment_attach_fn_t segment_attach;
mca_sshmem_base_module_segment_detach_fn_t segment_detach;
mca_sshmem_base_module_unlink_fn_t unlink;
mca_sshmem_base_module_finalize_fn_t module_finalize;
};
/**
* convenience typedefs
*/
typedef struct mca_sshmem_base_module_2_0_0_t mca_sshmem_base_module_2_0_0_t;
typedef struct mca_sshmem_base_module_2_0_0_t mca_sshmem_base_module_t;
/**
* macro for use in components that are of type sshmem
* see: opal/mca/mca.h for more information
*/
#define MCA_SSHMEM_BASE_VERSION_2_0_0 \
MCA_BASE_VERSION_2_0_0, \
"sshmem", 2, 0, 0
END_C_DECLS
#endif /* MCA_SSHMEM_H */

109
oshmem/mca/sshmem/sshmem_types.h Обычный файл
Просмотреть файл

@ -0,0 +1,109 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* shmem (shared memory backing facility) framework types, convenience macros,
* etc.
*/
#ifndef MCA_SSHMEM_TYPES_H
#define MCA_SSHMEM_TYPES_H
#include "oshmem_config.h"
BEGIN_C_DECLS
/**
* flag indicating the state (valid/invalid) of the sshmem data structure
* 0x0* - reserved for non-internal flags
*/
#define MAP_SEGMENT_FLAGS_VALID 0x01
/**
* invalid id value
*/
#define MAP_SEGMENT_SHM_INVALID (-1)
/**
* macro that sets all bits in flags to 0
*/
#define MAP_SEGMENT_RESET_FLAGS(ds_buf) \
do { \
(ds_buf)->flags = 0x00; \
} while (0)
/**
* sets valid bit in flags to 1
*/
#define MAP_SEGMENT_SET_VALID(ds_buf) \
do { \
(ds_buf)->flags |= MAP_SEGMENT_FLAGS_VALID; \
} while (0)
/**
* sets valid bit in flags to 0
*/
#define MAP_SEGMENT_INVALIDATE(ds_buf) \
do { \
(ds_buf)->flags &= ~MAP_SEGMENT_FLAGS_VALID; \
} while (0)
/**
* evaluates to 1 if the valid bit in flags is set to 1. evaluates to 0
* otherwise.
*/
#define MAP_SEGMENT_IS_VALID(ds_buf) \
( (ds_buf)->flags & MAP_SEGMENT_FLAGS_VALID )
typedef uint8_t segment_flag_t;
typedef enum {
MAP_SEGMENT_STATIC = 0,
MAP_SEGMENT_ALLOC_MMAP,
MAP_SEGMENT_ALLOC_SHM,
MAP_SEGMENT_ALLOC_IBV,
MAP_SEGMENT_UNKNOWN
} segment_type_t;
/**
* memory key
* We have two kinds of keys:
* - shared memory type of keys. Memory segment must be attached before access
* such keys use va_base = 0 and key
* - ib type of key. Key is passed with each put/get op.
* use va_base = <remote vaddr>, key is stored in mkey struct
*/
typedef struct sshmem_mkey {
void* va_base;
uint16_t len;
union {
void *data;
uint64_t key;
} u;
void *spml_context; /* spml module can attach internal structures here */
} sshmem_mkey_t;
typedef struct map_segment_t {
sshmem_mkey_t **mkeys_cache; /* includes remote segment bases in va_base */
sshmem_mkey_t *mkeys; /* includes local segment bases in va_base */
segment_flag_t flags; /* enable/disable flag */
int seg_id;
void* seg_base_addr; /* base address of the segment */
void* end; /* final address of the segment */
char seg_name[OPAL_PATH_MAX];
size_t seg_size; /* length of the segment */
segment_type_t type; /* type of the segment */
} map_segment_t;
END_C_DECLS
#endif /* MCA_SSHMEM_TYPES_H */

34
oshmem/mca/sshmem/sysv/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,34 @@
# Copyright (c) 2014 Mellanox Technologies, Inc.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
sshmem_sysv.h \
sshmem_sysv_component.c \
sshmem_sysv_module.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_oshmem_sshmem_sysv_DSO
component_noinst =
component_install = mca_sshmem_sysv.la
else
component_noinst = libmca_sshmem_sysv.la
component_install =
endif
mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_sshmem_sysv_la_SOURCES = $(sources)
mca_sshmem_sysv_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_sshmem_sysv_la_SOURCES = $(sources)
libmca_sshmem_sysv_la_LDFLAGS = -module -avoid-version

40
oshmem/mca/sshmem/sysv/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,40 @@
# -*- shell-script -*-
#
# Copyright (c) 2014 Mellanox Technologies, Inc.
# All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_mca_sshmem_sysv_CONFIG(action-if-can-compile,
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_oshmem_sshmem_sysv_CONFIG],[
AC_CONFIG_FILES([oshmem/mca/sshmem/sysv/Makefile])
# do we have the sysv shm stuff?
AC_MSG_CHECKING([if want SYSV shared memory support])
AC_ARG_ENABLE(sysv-sshmem,
AC_HELP_STRING([--disable-sysv-sshmem],
[disable sysv shared memory support (default: enabled)]))
AS_IF([test "$enable_sysv_sshmem" = "no"],
[AC_MSG_RESULT([no])
oshmem_sysv_sm_build_sysv=0],
[AC_MSG_RESULT([yes])
AC_CHECK_FUNC(shmget,
[oshmem_sysv_sm_build_sysv=1],
[oshmem_sysv_sm_build_sysv=0])])
AS_IF([test "$enable_sysv_sshmem" = "yes" -a "$oshmem_sysv_sm_build_sysv" = "0"],
[AC_MSG_WARN([SYSV shared memory support requested but not found])
AC_MSG_ERROR([Cannot continue])])
AS_IF([test "$oshmem_sysv_sm_build_sysv" = "1"], [$1], [$2])
AC_DEFINE_UNQUOTED([OSHMEM_SSHMEM_SYSV],
[$oshmem_sysv_sm_build_sysv],
[Whether we have shared memory support for SYSV or not])
])dnl

41
oshmem/mca/sshmem/sysv/sshmem_sysv.h Обычный файл
Просмотреть файл

@ -0,0 +1,41 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_SSHMEM_SYSV_EXPORT_H
#define MCA_SSHMEM_SYSV_EXPORT_H
#include "oshmem_config.h"
#include "oshmem/mca/sshmem/sshmem.h"
BEGIN_C_DECLS
/**
* globally exported variable to hold the sysv component.
*/
typedef struct mca_sshmem_sysv_component_t {
/* base component struct */
mca_sshmem_base_component_t super;
/* priority for sysv component */
int priority;
int use_hp;
} mca_sshmem_sysv_component_t;
OSHMEM_MODULE_DECLSPEC extern mca_sshmem_sysv_component_t
mca_sshmem_sysv_component;
typedef struct mca_sshmem_sysv_module_t {
mca_sshmem_base_module_t super;
} mca_sshmem_sysv_module_t;
extern mca_sshmem_sysv_module_t mca_sshmem_sysv_module;
END_C_DECLS
#endif /* MCA_SSHMEM_SYSV_EXPORT_H */

Просмотреть файл

@ -0,0 +1,189 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif /* HAVE_SYS_MMAN_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_SYS_IPC_H
#include <sys/ipc.h>
#endif /* HAVE_SYS_IPC_H */
#if HAVE_SYS_SHM_H
#include <sys/shm.h>
#endif /* HAVE_SYS_SHM_H */
#if HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif /* HAVE_SYS_STAT_H */
#include "opal/constants.h"
#include "opal/util/show_help.h"
#include "opal/util/output.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "sshmem_sysv.h"
/* public string showing the shmem ompi_sysv component version number */
const char *mca_sshmem_sysv_component_version_string =
"OSHMEM sysv sshmem MCA component version " OSHMEM_VERSION;
/* local functions */
static int sysv_register (void);
static int sysv_open(void);
static int sysv_query(mca_base_module_t **module, int *priority);
static int sysv_runtime_query(mca_base_module_t **module,
int *priority,
const char *hint);
/* instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
mca_sshmem_sysv_component_t mca_sshmem_sysv_component = {
/* ////////////////////////////////////////////////////////////////////// */
/* super */
/* ////////////////////////////////////////////////////////////////////// */
{
/* common MCA component data */
{
MCA_SSHMEM_BASE_VERSION_2_0_0,
/* component name and version */
"sysv",
OSHMEM_MAJOR_VERSION,
OSHMEM_MINOR_VERSION,
OSHMEM_RELEASE_VERSION,
/* component open */
sysv_open,
/* component close */
NULL,
/* component query */
sysv_query,
sysv_register
},
/* MCA v2.0.0 component meta data */
{
/* the component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
sysv_runtime_query,
},
/* ////////////////////////////////////////////////////////////////////// */
/* sysv component-specific information */
/* see: shmem_sysv.h for more information */
};
/* ////////////////////////////////////////////////////////////////////////// */
static int
sysv_runtime_query(mca_base_module_t **module,
int *priority,
const char *hint)
{
char c = 'j';
int shmid = -1;
char *a = NULL;
char *addr = NULL;
struct shmid_ds tmp_buff;
int flags;
*priority = 0;
*module = NULL;
/* if we are here, then let the run-time test games begin */
#if defined (SHM_HUGETLB)
mca_sshmem_sysv_component.use_hp = 1;
flags = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR | SHM_HUGETLB;
if (-1 == (shmid = shmget(IPC_PRIVATE, (size_t)(getpagesize()), flags))) {
mca_sshmem_sysv_component.use_hp = 0;
}
else if ((void *)-1 == (addr = shmat(shmid, NULL, 0))) {
mca_sshmem_sysv_component.use_hp = 0;
}
#endif
if (0 == mca_sshmem_sysv_component.use_hp) {
flags = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR;
if (-1 == (shmid = shmget(IPC_PRIVATE, (size_t)(getpagesize()), flags))) {
goto out;
}
else if ((void *)-1 == (addr = shmat(shmid, NULL, 0))) {
goto out;
}
}
/* protect against lazy establishment - may not be needed, but can't hurt */
a = addr;
*a = c;
if (-1 == shmctl(shmid, IPC_RMID, NULL)) {
goto out;
}
else if (-1 == shmctl(shmid, IPC_STAT, &tmp_buff)) {
goto out;
}
/* all is well - rainbows and butterflies */
else {
*priority = mca_sshmem_sysv_component.priority;
*module = (mca_base_module_t *)&mca_sshmem_sysv_module.super;
}
out:
if ((char *)-1 != addr) {
shmdt(addr);
}
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
sysv_register(void)
{
/* ////////////////////////////////////////////////////////////////////// */
/* (default) priority - set lower than mmap's priority */
mca_sshmem_sysv_component.priority = 30;
(void) mca_base_component_var_register(&mca_sshmem_sysv_component.super.base_version,
"priority", "Priority for the sshmem sysv "
"component (default: 30)", MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_ALL_EQ,
&mca_sshmem_sysv_component.priority);
mca_sshmem_sysv_component.use_hp = 0;
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
sysv_open(void)
{
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
sysv_query(mca_base_module_t **module, int *priority)
{
*priority = mca_sshmem_sysv_component.priority;
*module = (mca_base_module_t *)&mca_sshmem_sysv_module.super;
return OSHMEM_SUCCESS;
}

329
oshmem/mca/sshmem/sysv/sshmem_sysv_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,329 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#include <errno.h>
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif /* HAVE_FCNTL_H */
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif /* HAVE_SYS_MMAN_H */
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif /* HAVE_SYS_TYPES_H */
#ifdef HAVE_SYS_IPC_H
#include <sys/ipc.h>
#endif /* HAVE_SYS_IPC_H */
#if HAVE_SYS_SHM_H
#include <sys/shm.h>
#endif /* HAVE_SYS_SHM_H */
#if HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif /* HAVE_SYS_STAT_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#ifdef HAVE_NETDB_H
#include <netdb.h>
#endif /* HAVE_NETDB_H */
#include "opal/constants.h"
#include "opal_stdint.h"
#include "opal/util/output.h"
#include "opal/util/path.h"
#include "opal/util/show_help.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "sshmem_sysv.h"
/* ////////////////////////////////////////////////////////////////////////// */
/* local functions */
static int
module_init(void);
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size);
static int
ds_copy(const map_segment_t *from,
map_segment_t *to);
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
static int
segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
static int
segment_unlink(map_segment_t *ds_buf);
static int
module_finalize(void);
/* sysv shmem module */
mca_sshmem_sysv_module_t mca_sshmem_sysv_module = {
/* super */
{
module_init,
segment_create,
ds_copy,
segment_attach,
segment_detach,
segment_unlink,
module_finalize
}
};
/* ////////////////////////////////////////////////////////////////////////// */
/* private utility functions */
/* ////////////////////////////////////////////////////////////////////////// */
/* ////////////////////////////////////////////////////////////////////////// */
/**
* completely resets the contents of *ds_buf
*/
static inline void
shmem_ds_reset(map_segment_t *ds_buf)
{
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: shmem_ds_resetting "
"(id: %d, size: %lu, name: %s)\n",
mca_sshmem_sysv_component.super.base_version.mca_type_name,
mca_sshmem_sysv_component.super.base_version.mca_component_name,
ds_buf->seg_id, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
MAP_SEGMENT_RESET_FLAGS(ds_buf);
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
ds_buf->seg_base_addr = 0;
ds_buf->end = 0;
ds_buf->seg_size = 0;
ds_buf->type = MAP_SEGMENT_UNKNOWN;
memset(ds_buf->seg_name, '\0', sizeof(ds_buf->seg_name));
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
module_init(void)
{
/* nothing to do */
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
module_finalize(void)
{
/* nothing to do */
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
ds_copy(const map_segment_t *from,
map_segment_t *to)
{
memcpy(to, from, sizeof(map_segment_t));
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: ds_copy complete "
"from: (id: %d, size: %lu, "
"name: %s flags: 0x%02x) "
"to: (id: %d, size: %lu, "
"name: %s flags: 0x%02x)\n",
mca_sshmem_sysv_component.super.base_version.mca_type_name,
mca_sshmem_sysv_component.super.base_version.mca_component_name,
from->seg_id, (unsigned long)from->seg_size, from->seg_name,
from->flags, to->seg_id, (unsigned long)to->seg_size, to->seg_name,
to->flags)
);
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size)
{
int rc = OSHMEM_SUCCESS;
void *addr = NULL;
int shmid = MAP_SEGMENT_SHM_INVALID;
int flags;
assert(ds_buf);
/* init the contents of map_segment_t */
shmem_ds_reset(ds_buf);
/* for sysv shared memory we don't have to worry about the backing store
* being located on a network file system... so no check is needed here.
*/
/* create a new shared memory segment and save the shmid. note the use of
* real_size here
*/
flags = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR;
#if defined (SHM_HUGETLB)
flags |= (mca_sshmem_sysv_component.use_hp ? SHM_HUGETLB : 0);
#endif
/* Create a new shared memory segment and save the shmid. */
shmid = shmget(IPC_PRIVATE, size, flags);
if (shmid == MAP_SEGMENT_SHM_INVALID) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"Failed to shmget() %llu bytes (errno=%d)",
(unsigned long long)size, errno)
);
return OSHMEM_ERROR;
}
/* Attach to the sement */
addr = shmat(shmid, (void *) mca_sshmem_base_start_address, 0);
if (addr == (void *) -1L) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"Failed to shmat() %llu bytes (errno=%d)",
(unsigned long long)size, errno)
);
shmctl(shmid, IPC_RMID, NULL );
return OSHMEM_ERR_OUT_OF_RESOURCE;
}
shmctl(shmid, IPC_RMID, NULL );
ds_buf->type = MAP_SEGMENT_ALLOC_SHM;
ds_buf->seg_id = shmid;
ds_buf->seg_base_addr = addr;
ds_buf->seg_size = size;
ds_buf->end = (void*)((uintptr_t)ds_buf->seg_base_addr + ds_buf->seg_size);
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: create %s "
"(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_sysv_component.super.base_version.mca_type_name,
mca_sshmem_sysv_component.super.base_version.mca_component_name,
(rc ? "failure" : "successful"),
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
/**
* segment_attach can only be called after a successful call to segment_create
*/
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
void *addr = NULL;
assert(ds_buf);
assert(mkey->va_base == 0);
if (MAP_SEGMENT_SHM_INVALID == (int)(mkey->u.key)) {
return (mkey->va_base);
}
mkey->va_base = shmat((int)(mkey->u.key), 0, 0);
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: attach successful "
"(id: %d, addr: %p size: %lu, name: %s | va_base: 0x%p len: %d key %llx)\n",
mca_sshmem_sysv_component.super.base_version.mca_type_name,
mca_sshmem_sysv_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name,
mkey->va_base, mkey->len, (unsigned long long)mkey->u.key)
);
/* update returned base pointer with an offset that hides our stuff */
return (mkey->va_base);
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
int rc = OSHMEM_SUCCESS;
assert(ds_buf);
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: detaching "
"(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_sysv_component.super.base_version.mca_type_name,
mca_sshmem_sysv_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
if (ds_buf->seg_id != MAP_SEGMENT_SHM_INVALID) {
shmctl(ds_buf->seg_id, IPC_RMID, NULL );
}
if (mca_sshmem_sysv_component.use_hp > 0) {
/**
* Workaround kernel panic when detaching huge pages from user space simultanously from several processes
* dont detach here instead let kernel do it during process cleanup
*/
/* shmdt((void *)ds_buf->seg_base_addr); */
}
/* reset the contents of the map_segment_t associated with this
* shared memory segment.
*/
shmem_ds_reset(ds_buf);
return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_unlink(map_segment_t *ds_buf)
{
/* not much unlink work needed for sysv */
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: unlinking "
"(id: %d, size: %lu, name: %s)\n",
mca_sshmem_sysv_component.super.base_version.mca_type_name,
mca_sshmem_sysv_component.super.base_version.mca_component_name,
ds_buf->seg_id, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
/* don't completely reset. in particular, only reset
* the id and flip the invalid bit. size and name values will remain valid
* across unlinks. other information stored in flags will remain untouched.
*/
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
/* note: this is only changing the valid bit to 0. */
MAP_SEGMENT_INVALIDATE(ds_buf);
return OSHMEM_SUCCESS;
}

38
oshmem/mca/sshmem/verbs/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,38 @@
# Copyright (c) 2014 Mellanox Technologies, Inc.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(oshmem_verbs_CPPFLAGS)
sources = \
sshmem_verbs.h \
sshmem_verbs_component.c \
sshmem_verbs_module.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_oshmem_sshmem_verbs_DSO
component_noinst =
component_install = mca_sshmem_verbs.la
else
component_noinst = libmca_sshmem_verbs.la
component_install =
endif
mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_sshmem_verbs_la_SOURCES = $(sources)
mca_sshmem_verbs_la_LDFLAGS = -module -avoid-version $(oshmem_verbs_LDFLAGS)
mca_sshmem_verbs_la_LIBADD = $(oshmem_verbs_LIBS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_sshmem_verbs_la_SOURCES =$(sources)
libmca_sshmem_verbs_la_LDFLAGS = -module -avoid-version $(oshmem_verbs_LDFLAGS)
libmca_sshmem_verbs_la_LIBADD = $(oshmem_verbs_LIBS)

85
oshmem/mca/sshmem/verbs/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,85 @@
# -*- shell-script -*-
#
# Copyright (c) 2014 Mellanox Technologies, Inc.
# All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_mca_sshmem_verbs_CONFIG(action-if-can-compile,
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_oshmem_sshmem_verbs_CONFIG],[
AC_CONFIG_FILES([oshmem/mca/sshmem/verbs/Makefile])
# do we have the verbs shm stuff?
AC_MSG_CHECKING([if want verbs shared memory support])
AC_ARG_ENABLE(verbs-sshmem,
AC_HELP_STRING([--disable-verbs-sshmem],
[disable verbs shared memory support (default: enabled)]))
AS_IF([test "$enable_verbs_sshmem" = "no"],
[AC_MSG_RESULT([no])
oshmem_verbs_sm_build_verbs=0],
[AC_MSG_RESULT([yes])
OMPI_CHECK_OPENFABRICS([oshmem_verbs],
[oshmem_verbs_sm_build_verbs=1],
[oshmem_verbs_sm_build_verbs=0])])
# substitute in the things needed to build
AC_SUBST([oshmem_verbs_CFLAGS])
AC_SUBST([oshmem_verbs_CPPFLAGS])
AC_SUBST([oshmem_verbs_LDFLAGS])
AC_SUBST([oshmem_verbs_LIBS])
# If we have the oshmem_verbs stuff available, find out what we've got
AS_IF(
[test "$oshmem_verbs_sm_build_verbs" = "1"],
[
OSHMEM_LIBSHMEM_EXTRA_LDFLAGS="$OSHMEM_LIBSHMEM_EXTRA_LDFLAGS $oshmem_verbs_LDFLAGS"
OSHMEM_LIBSHMEM_EXTRA_LIBS="$OSHMEM_LIBSHMEM_EXTRA_LIBS $oshmem_verbs_LIBS"
# ibv_reg_shared_mr was added in MOFED 1.8
oshmem_have_mpage=0
oshmem_verbs_save_CPPFLAGS="$CPPFLAGS"
oshmem_verbs_save_LDFLAGS="$LDFLAGS"
oshmem_verbs_save_LIBS="$LIBS"
CPPFLAGS="$CPPFLAGS $oshmem_verbs_CPPFLAGS"
LDFLAGS="$LDFLAGS $oshmem_verbs_LDFLAGS"
LIBS="$LIBS $oshmem_verbs_LIBS"
AC_CHECK_DECLS([IBV_ACCESS_ALLOCATE_MR,IBV_ACCESS_SHARED_MR_USER_READ],
[oshmem_have_mpage=2], [],
[#include <infiniband/verbs.h>])
AC_CHECK_DECLS([IBV_EXP_ACCESS_ALLOCATE_MR,IBV_EXP_ACCESS_SHARED_MR_USER_READ],
[oshmem_have_mpage=3], [],
[#include <infiniband/verbs.h>])
CPPFLAGS="$oshmem_verbs_save_CPPFLAGS"
LDFLAGS="$oshmem_verbs_save_LDFLAGS"
LIBS="$oshmem_verbs_save_LIBS"
AC_DEFINE_UNQUOTED(MPAGE_ENABLE, $oshmem_have_mpage,
[Whether we can use M-PAGE supported since MOFED 1.8])
if test "x$oshmem_have_mpage" = "x0"; then
oshmem_verbs_sm_build_verbs=0
fi
])
AS_IF([test "$enable_verbs_sshmem" = "yes" -a "$oshmem_verbs_sm_build_verbs" = "0"],
[AC_MSG_WARN([VERBS shared memory support requested but not found])
AC_MSG_ERROR([Cannot continue])])
AS_IF([test "$oshmem_verbs_sm_build_verbs" = "1"], [$1], [$2])
AC_DEFINE_UNQUOTED([OSHMEM_SSHMEM_VERBS],
[$oshmem_verbs_sm_build_verbs],
[Whether we have shared memory support for verbs or not])
])dnl

85
oshmem/mca/sshmem/verbs/sshmem_verbs.h Обычный файл
Просмотреть файл

@ -0,0 +1,85 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_SSHMEM_VERBS_EXPORT_H
#define MCA_SSHMEM_VERBS_EXPORT_H
#include "oshmem_config.h"
#include "oshmem/mca/sshmem/sshmem.h"
BEGIN_C_DECLS
#include <infiniband/verbs.h>
#include "opal/class/opal_list.h"
#include "opal/class/opal_value_array.h"
typedef struct openib_device_t {
struct ibv_device **ib_devs;
struct ibv_device *ib_dev;
struct ibv_context *ib_dev_context;
struct ibv_device_attr ib_dev_attr;
struct ibv_pd *ib_pd;
opal_value_array_t ib_mr_array;
struct ibv_mr *ib_mr_shared;
} openib_device_t;
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
# if MPAGE_ENABLE < 3
# define IBV_EXP_ACCESS_ALLOCATE_MR IBV_ACCESS_ALLOCATE_MR
# define IBV_EXP_ACCESS_SHARED_MR_USER_READ IBV_ACCESS_SHARED_MR_USER_READ
# define IBV_EXP_ACCESS_SHARED_MR_USER_WRITE IBV_ACCESS_SHARED_MR_USER_WRITE
# define IBV_EXP_ACCESS_NO_RDMA IBV_ACCESS_NO_RDMA
# define ibv_exp_reg_shared_mr ibv_reg_shared_mr_ex
# define ibv_exp_reg_shared_mr_in ibv_reg_shared_mr_in
struct ibv_exp_reg_mr_in {
struct ibv_pd *pd;
void *addr;
size_t length;
int access;
int exp_access;
uint32_t comp_mask;
};
static inline struct ibv_mr *ibv_exp_reg_mr(struct ibv_exp_reg_mr_in *in)
{
return ibv_reg_mr(in->pd, in->addr, in->length, in->access | in->exp_access);
}
# endif
#endif /* MPAGE_ENABLE */
/**
* globally exported variable to hold the verbs component.
*/
typedef struct mca_sshmem_verbs_component_t {
/* base component struct */
mca_sshmem_base_component_t super;
/* priority for verbs component */
int priority;
char* hca_name;
int mr_interleave_factor;
} mca_sshmem_verbs_component_t;
OSHMEM_MODULE_DECLSPEC extern mca_sshmem_verbs_component_t
mca_sshmem_verbs_component;
typedef struct mca_sshmem_verbs_module_t {
mca_sshmem_base_module_t super;
} mca_sshmem_verbs_module_t;
extern mca_sshmem_verbs_module_t mca_sshmem_verbs_module;
END_C_DECLS
#endif /* MCA_SSHMEM_VERBS_EXPORT_H */

Просмотреть файл

@ -0,0 +1,305 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include "opal/constants.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "sshmem_verbs.h"
/**
* public string showing the shmem ompi_mmap component version number
*/
const char *mca_sshmem_verbs_component_version_string =
"OSHMEM mmap sshmem MCA component version " OSHMEM_VERSION;
int mca_sshmem_verbs_relocate_backing_file = 0;
char *mca_sshmem_verbs_backing_file_base_dir = NULL;
bool mca_sshmem_verbs_nfs_warning = true;
/**
* local functions
*/
static int verbs_register(void);
static int verbs_open(void);
static int verbs_close(void);
static int verbs_query(mca_base_module_t **module, int *priority);
static int verbs_runtime_query(mca_base_module_t **module,
int *priority,
const char *hint);
/**
* instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
mca_sshmem_verbs_component_t mca_sshmem_verbs_component = {
/* ////////////////////////////////////////////////////////////////////// */
/* super */
/* ////////////////////////////////////////////////////////////////////// */
{
/**
* common MCA component data
*/
{
MCA_SSHMEM_BASE_VERSION_2_0_0,
/* component name and version */
"verbs",
OSHMEM_MAJOR_VERSION,
OSHMEM_MINOR_VERSION,
OSHMEM_RELEASE_VERSION,
/* component open */
verbs_open,
/* component close */
verbs_close,
/* component query */
verbs_query,
/* component register */
verbs_register
},
/* MCA v2.0.0 component meta data */
{
/* the component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
verbs_runtime_query,
},
};
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_runtime_query(mca_base_module_t **module,
int *priority,
const char *hint)
{
int rc = OSHMEM_SUCCESS;
openib_device_t my_device;
openib_device_t *device = &my_device;
int num_devs = 0;
int i = 0;
*priority = 0;
*module = NULL;
memset(device, 0, sizeof(*device));
#ifdef HAVE_IBV_GET_DEVICE_LIST
device->ib_devs = ibv_get_device_list(&num_devs);
#else
#error unsupported ibv_get_device_list in infiniband/verbs.h
#endif
if (num_devs == 0 || !device->ib_devs) {
return OSHMEM_ERR_NOT_SUPPORTED;
}
/* Open device */
if (NULL != mca_sshmem_verbs_component.hca_name) {
for (i = 0; i < num_devs; i++) {
if (0 == strcmp(mca_sshmem_verbs_component.hca_name, ibv_get_device_name(device->ib_devs[0]))) {
device->ib_dev = device->ib_devs[i];
break;
}
}
} else {
device->ib_dev = device->ib_devs[0];
}
if (NULL == device->ib_dev) {
rc = OSHMEM_ERR_NOT_FOUND;
goto out;
}
if (NULL == (device->ib_dev_context = ibv_open_device(device->ib_dev))) {
rc = OSHMEM_ERR_RESOURCE_BUSY;
goto out;
}
/* Obtain device attributes */
if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)) {
rc = OSHMEM_ERR_RESOURCE_BUSY;
goto out;
}
/* Allocate the protection domain for the device */
device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
if (NULL == device->ib_pd) {
rc = OSHMEM_ERR_RESOURCE_BUSY;
goto out;
}
/* Allocate memory */
if (!rc) {
void *addr = NULL;
size_t size = getpagesize();
struct ibv_mr *ib_mr = NULL;
int access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ;
int exp_access_flag = 0;
OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
exp_access_flag = IBV_EXP_ACCESS_ALLOCATE_MR |
IBV_EXP_ACCESS_SHARED_MR_USER_READ |
IBV_EXP_ACCESS_SHARED_MR_USER_WRITE;
#endif /* MPAGE_ENABLE */
struct ibv_exp_reg_mr_in in = {device->ib_pd, addr, size, access_flag, exp_access_flag, 0};
ib_mr = ibv_exp_reg_mr(&in);
if (NULL == ib_mr) {
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
} else {
device->ib_mr_shared = ib_mr;
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
}
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
if (!rc) {
access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ|
IBV_EXP_ACCESS_NO_RDMA;
addr = (void *)mca_sshmem_base_start_address;
struct ibv_exp_reg_shared_mr_in in = {0, device->ib_mr_shared->handle, device->ib_pd, addr, access_flag};
ib_mr = ibv_exp_reg_shared_mr(&in);
if (NULL == ib_mr) {
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
} else {
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
}
}
#endif /* MPAGE_ENABLE */
}
/* all is well - rainbows and butterflies */
if (!rc) {
*priority = mca_sshmem_verbs_component.priority;
*module = (mca_base_module_t *)&mca_sshmem_verbs_module.super;
}
out:
if (device) {
if (opal_value_array_get_size(&device->ib_mr_array)) {
struct ibv_mr** array;
struct ibv_mr* ib_mr = NULL;
array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
while (opal_value_array_get_size(&device->ib_mr_array) > 0) {
ib_mr = array[0];
ibv_dereg_mr(ib_mr);
opal_value_array_remove_item(&device->ib_mr_array, 0);
}
if (device->ib_mr_shared) {
device->ib_mr_shared = NULL;
}
OBJ_DESTRUCT(&device->ib_mr_array);
}
if (device->ib_pd) {
ibv_dealloc_pd(device->ib_pd);
device->ib_pd = NULL;
}
if(device->ib_dev_context) {
ibv_close_device(device->ib_dev_context);
device->ib_dev_context = NULL;
}
if(device->ib_devs) {
ibv_free_device_list(device->ib_devs);
device->ib_devs = NULL;
}
}
return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_register(void)
{
int index;
/* ////////////////////////////////////////////////////////////////////// */
/* (default) priority - set high to make verbs the default */
mca_sshmem_verbs_component.priority = 40;
index = mca_base_component_var_register (&mca_sshmem_verbs_component.super.base_version,
"priority", "Priority for sshmem verbs "
"component (default: 40)", MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_ALL_EQ,
&mca_sshmem_verbs_component.priority);
mca_sshmem_verbs_component.hca_name = NULL;
index = mca_base_component_var_register (&mca_sshmem_verbs_component.super.base_version,
"hca_name", "Preferred hca (default: the first)", MCA_BASE_VAR_TYPE_STRING,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_sshmem_verbs_component.hca_name);
if (index) {
(void) mca_base_var_register_synonym(index, "oshmem", "memheap", "base",
"hca_name",
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
mca_sshmem_verbs_component.mr_interleave_factor = 2;
index = mca_base_component_var_register (&mca_sshmem_verbs_component.super.base_version,
"mr_interleave_factor", "try to give at least N Gbytes spaces between mapped memheaps "
"of other PEs that are local to me (default: 2)", MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_sshmem_verbs_component.mr_interleave_factor);
if (index) {
(void) mca_base_var_register_synonym(index, "oshmem", "memheap", "base",
"mr_interleave_factor",
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_open(void)
{
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_query(mca_base_module_t **module, int *priority)
{
*priority = mca_sshmem_verbs_component.priority;
*module = (mca_base_module_t *)&mca_sshmem_verbs_module.super;
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_close(void)
{
return OSHMEM_SUCCESS;
}

503
oshmem/mca/sshmem/verbs/sshmem_verbs_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,503 @@
/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#include <errno.h>
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif /* HAVE_FCNTL_H */
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif /* HAVE_SYS_MMAN_H */
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif /* HAVE_SYS_TYPES_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#ifdef HAVE_NETDB_H
#include <netdb.h>
#endif /* HAVE_NETDB_H */
#ifdef HAVE_TIME_H
#include <time.h>
#endif /* HAVE_NETDB_H */
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif /* HAVE_SYS_STAT_H */
#include "opal/constants.h"
#include "opal/util/output.h"
#include "opal/util/path.h"
#include "opal/util/show_help.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "sshmem_verbs.h"
static openib_device_t memheap_device;
/* ////////////////////////////////////////////////////////////////////////// */
/*local functions */
/* local functions */
static int
module_init(void);
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size);
static int
ds_copy(const map_segment_t *from,
map_segment_t *to);
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
static int
segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
static int
segment_unlink(map_segment_t *ds_buf);
static int
module_finalize(void);
/*
* mmap shmem module
*/
mca_sshmem_verbs_module_t mca_sshmem_verbs_module = {
/* super */
{
module_init,
segment_create,
ds_copy,
segment_attach,
segment_detach,
segment_unlink,
module_finalize
}
};
/* ////////////////////////////////////////////////////////////////////////// */
/* private utility functions */
/* ////////////////////////////////////////////////////////////////////////// */
/* ////////////////////////////////////////////////////////////////////////// */
/**
* completely resets the contents of *ds_buf
*/
static inline void
shmem_ds_reset(map_segment_t *ds_buf)
{
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: shmem_ds_resetting "
"(id: %d, size: %lu, name: %s)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name,
ds_buf->seg_id, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
MAP_SEGMENT_RESET_FLAGS(ds_buf);
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
ds_buf->seg_base_addr = 0;
ds_buf->end = 0;
ds_buf->seg_size = 0;
ds_buf->type = MAP_SEGMENT_UNKNOWN;
memset(ds_buf->seg_name, '\0', sizeof(ds_buf->seg_name));
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
module_init(void)
{
/* nothing to do */
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
module_finalize(void)
{
/* nothing to do */
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
ds_copy(const map_segment_t *from,
map_segment_t *to)
{
memcpy(to, from, sizeof(map_segment_t));
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: ds_copy complete "
"from: (id: %d, size: %lu, "
"name: %s flags: 0x%02x) "
"to: (id: %d, size: %lu, "
"name: %s flags: 0x%02x)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name,
from->seg_id, (unsigned long)from->seg_size, from->seg_name,
from->flags, to->seg_id, (unsigned long)to->seg_size, to->seg_name,
to->flags)
);
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size)
{
int rc = OSHMEM_SUCCESS;
void *addr = NULL;
openib_device_t *device = &memheap_device;
int num_devs = 0;
int i = 0;
assert(ds_buf);
/* init the contents of map_segment_t */
shmem_ds_reset(ds_buf);
memset(device, 0, sizeof(*device));
#ifdef HAVE_IBV_GET_DEVICE_LIST
device->ib_devs = ibv_get_device_list(&num_devs);
#else
#error unsupported ibv_get_device_list in infiniband/verbs.h
#endif
if (num_devs == 0 || !device->ib_devs) {
return OSHMEM_ERR_NOT_SUPPORTED;
}
/* Open device */
if (NULL != mca_sshmem_verbs_component.hca_name) {
for (i = 0; i < num_devs; i++) {
if (0 == strcmp(mca_sshmem_verbs_component.hca_name, ibv_get_device_name(device->ib_devs[0]))) {
device->ib_dev = device->ib_devs[i];
break;
}
}
} else {
device->ib_dev = device->ib_devs[0];
}
if (NULL == device->ib_dev) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error getting device says %d: %s",
errno, strerror(errno))
);
return OSHMEM_ERR_NOT_FOUND;
}
if (NULL == (device->ib_dev_context = ibv_open_device(device->ib_dev))) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error obtaining device context for %s errno says %d: %s",
ibv_get_device_name(device->ib_dev), errno, strerror(errno))
);
return OSHMEM_ERR_RESOURCE_BUSY;
}
/* Obtain device attributes */
if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error obtaining device attributes for %s errno says %d: %s",
ibv_get_device_name(device->ib_dev), errno, strerror(errno))
);
return OSHMEM_ERR_RESOURCE_BUSY;
}
/* Allocate the protection domain for the device */
device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
if (NULL == device->ib_pd) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error allocating protection domain for %s errno says %d: %s",
ibv_get_device_name(device->ib_dev), errno, strerror(errno))
);
return OSHMEM_ERR_RESOURCE_BUSY;
}
/* Allocate memory */
if (!rc) {
void *addr = NULL;
struct ibv_mr *ib_mr = NULL;
int access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ;
int exp_access_flag = 0;
OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
exp_access_flag = IBV_EXP_ACCESS_ALLOCATE_MR |
IBV_EXP_ACCESS_SHARED_MR_USER_READ |
IBV_EXP_ACCESS_SHARED_MR_USER_WRITE;
#endif /* MPAGE_ENABLE */
struct ibv_exp_reg_mr_in in = {device->ib_pd, addr, size, access_flag, exp_access_flag, 0};
ib_mr = ibv_exp_reg_mr(&in);
if (NULL == ib_mr) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error to ibv_exp_reg_mr() %llu bytes errno says %d: %s",
(unsigned long long)size, errno, strerror(errno))
);
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
} else {
device->ib_mr_shared = ib_mr;
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
}
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
if (!rc) {
access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ|
IBV_EXP_ACCESS_NO_RDMA;
addr = (void *)mca_sshmem_base_start_address;
struct ibv_exp_reg_shared_mr_in in = {0,device->ib_mr_shared->handle,device->ib_pd, addr, access_flag};
ib_mr = ibv_exp_reg_shared_mr(&in);
if (NULL == ib_mr) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error to ibv_reg_shared_mr() %llu bytes errno says %d: %s",
(unsigned long long)size, errno, strerror(errno))
);
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
} else {
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
}
}
#endif /* MPAGE_ENABLE */
if (!rc) {
assert(size == device->ib_mr_shared->length);
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"ibv device %s",
ibv_get_device_name(device->ib_dev))
);
ds_buf->type = MAP_SEGMENT_ALLOC_IBV;
ds_buf->seg_id = device->ib_mr_shared->handle;
ds_buf->seg_base_addr = ib_mr->addr;
ds_buf->seg_size = size;
ds_buf->end = (void*)((uintptr_t)ds_buf->seg_base_addr + ds_buf->seg_size);
}
}
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: create %s "
"(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name,
(rc ? "failure" : "successful"),
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
/**
* segment_attach can only be called after a successful call to segment_create
*/
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
openib_device_t *device = &memheap_device;
static int mr_count = 0;
void *addr = NULL;
assert(ds_buf);
assert(mkey->va_base == 0);
if (MAP_SEGMENT_SHM_INVALID == (int)(mkey->u.key)) {
return (mkey->va_base);
}
/* workaround mtt problem - request aligned addresses */
++mr_count;
addr = (void *)((uintptr_t)mca_sshmem_base_start_address +
mca_sshmem_verbs_component.mr_interleave_factor * 1024ULL * 1024ULL * 1024ULL * mr_count);
{
struct ibv_mr *ib_mr = NULL;
int access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_EXP_ACCESS_NO_RDMA;
struct ibv_exp_reg_shared_mr_in in = {
0, mkey->u.key, device->ib_pd, addr, access_flag};
ib_mr = ibv_exp_reg_shared_mr(&in);
if (NULL == ib_mr) {
mkey->va_base = (void *)-1;
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error to ibv_reg_shared_mr() %llu bytes errno says %d: %s",
(unsigned long long)ds_buf->seg_size, errno, strerror(errno))
);
} else {
if (ib_mr->addr != addr) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"Failed to map shared region to address %p got addr %p. Try to increase 'memheap_mr_interleave_factor' from %d",
addr, ib_mr->addr, mca_sshmem_verbs_component.mr_interleave_factor)
);
}
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
mkey->va_base = ib_mr->addr;
}
}
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: attach successful "
"(id: %d, addr: %p size: %lu, name: %s | va_base: 0x%p len: %d key %llx)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name,
mkey->va_base, mkey->len, (unsigned long long)mkey->u.key)
);
/* update returned base pointer with an offset that hides our stuff */
return (mkey->va_base);
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
int rc = OSHMEM_SUCCESS;
openib_device_t *device = &memheap_device;
assert(ds_buf);
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: detaching "
"(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
if (device) {
if (opal_value_array_get_size(&device->ib_mr_array)) {
struct ibv_mr** array;
struct ibv_mr* ib_mr = NULL;
array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
while (opal_value_array_get_size(&device->ib_mr_array) > 0) {
ib_mr = array[0];
if(ibv_dereg_mr(ib_mr)) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error ibv_dereg_mr(): %d: %s",
errno, strerror(errno))
);
rc = OSHMEM_ERROR;
}
opal_value_array_remove_item(&device->ib_mr_array, 0);
}
if (!rc && device->ib_mr_shared) {
device->ib_mr_shared = NULL;
}
OBJ_DESTRUCT(&device->ib_mr_array);
}
if (!rc && device->ib_pd) {
if (ibv_dealloc_pd(device->ib_pd)) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error ibv_dealloc_pd(): %d: %s",
errno, strerror(errno))
);
rc = OSHMEM_ERROR;
} else {
device->ib_pd = NULL;
}
}
if(!rc && device->ib_dev_context) {
if(ibv_close_device(device->ib_dev_context)) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error ibv_close_device(): %d: %s",
errno, strerror(errno))
);
rc = OSHMEM_ERROR;
} else {
device->ib_dev_context = NULL;
}
}
if(!rc && device->ib_devs) {
ibv_free_device_list(device->ib_devs);
device->ib_devs = NULL;
}
}
/* reset the contents of the map_segment_t associated with this
* shared memory segment.
*/
shmem_ds_reset(ds_buf);
return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_unlink(map_segment_t *ds_buf)
{
/* not much unlink work needed for sysv */
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: unlinking "
"(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
/* don't completely reset. in particular, only reset
* the id and flip the invalid bit. size and name values will remain valid
* across unlinks. other information stored in flags will remain untouched.
*/
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
/* note: this is only changing the valid bit to 0. */
MAP_SEGMENT_INVALIDATE(ds_buf);
return OSHMEM_SUCCESS;
}

Просмотреть файл

@ -26,5 +26,3 @@ liboshmem_la_SOURCES += \
runtime/oshmem_shmem_exchange.c \
runtime/oshmem_info_support.c

Просмотреть файл

@ -123,4 +123,3 @@ int oshmem_shmem_abort(int errcode)
return OSHMEM_SUCCESS;
}

Просмотреть файл

@ -43,14 +43,12 @@
#include "oshmem/constants.h"
#include "oshmem/runtime/runtime.h"
#include "oshmem/mca/spml/spml.h"
#include "oshmem/mca/spml/base/base.h"
#include "oshmem/mca/scoll/scoll.h"
#include "oshmem/mca/scoll/base/base.h"
#include "oshmem/mca/atomic/atomic.h"
#include "oshmem/mca/atomic/base/base.h"
#include "oshmem/runtime/params.h"
#include "oshmem/mca/spml/base/base.h"
#include "oshmem/mca/scoll/base/base.h"
#include "oshmem/mca/atomic/base/base.h"
#include "oshmem/mca/memheap/base/base.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "oshmem/proc/proc.h"
#include "oshmem/proc/proc_group_cache.h"
#include "oshmem/op/op.h"
@ -124,6 +122,10 @@ static int _shmem_finalize(void)
return ret;
}
if (OSHMEM_SUCCESS != (ret = mca_base_framework_close(&oshmem_sshmem_base_framework) ) ) {
return ret;
}
if (OSHMEM_SUCCESS
!= (ret =
MCA_SPML_CALL(del_procs(oshmem_group_all->proc_array, oshmem_group_all->proc_count)))) {

Просмотреть файл

@ -56,14 +56,11 @@
#include "oshmem/runtime/runtime.h"
#include "oshmem/runtime/params.h"
#include "oshmem/runtime/oshmem_shmem_preconnect.h"
#include "oshmem/mca/spml/spml.h"
#include "oshmem/mca/spml/base/base.h"
#include "oshmem/mca/scoll/scoll.h"
#include "oshmem/mca/scoll/base/base.h"
#include "oshmem/mca/atomic/atomic.h"
#include "oshmem/mca/atomic/base/base.h"
#include "oshmem/mca/memheap/memheap.h"
#include "oshmem/mca/memheap/base/base.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "oshmem/proc/proc.h"
#include "oshmem/proc/proc_group_cache.h"
#include "oshmem/op/op.h"
@ -407,13 +404,23 @@ static int _shmem_init(int argc, char **argv, int requested, int *provided)
goto error;
}
if (OSHMEM_SUCCESS != (ret = mca_base_framework_open(&oshmem_sshmem_base_framework, MCA_BASE_OPEN_DEFAULT))) {
error = "mca_sshmem_base_open() failed";
goto error;
}
if (OSHMEM_SUCCESS != (ret = mca_sshmem_base_select())) {
error = "mca_sshmem_base_select() failed";
goto error;
}
if (OSHMEM_SUCCESS != (ret = mca_base_framework_open(&oshmem_memheap_base_framework, MCA_BASE_OPEN_DEFAULT))) {
error = "mca_memheap_base_open() failed";
goto error;
}
if (OSHMEM_SUCCESS != (ret = mca_memheap_base_select())) {
error = "mca_select_base_select() failed";
error = "mca_memheap_base_select() failed";
goto error;
}

Просмотреть файл

@ -18,7 +18,7 @@
int shmem_addr_accessible(void *addr, int pe)
{
void* rva;
mca_spml_mkey_t *mkey;
sshmem_mkey_t *mkey;
RUNTIME_CHECK_INIT();

Просмотреть файл

@ -11,6 +11,8 @@
#ifndef OSHMEM_UTIL_H
#define OSHMEM_UTIL_H
#include "oshmem_config.h"
void oshmem_output_verbose(int level, int output_id, const char* prefix, const char* file, int line, const char* function, const char* format, ...);
#endif /* OSHMEM_UTIL_H */