1
1
openmpi/oshmem/mca/sshmem/verbs/sshmem_verbs_module.c
Nathan Hjelm 4d92c9989e more c99 updates
This commit does two things. It removes checks for C99 required
headers (stdlib.h, string.h, signal.h, etc). Additionally it removes
definitions for required C99 types (intptr_t, int64_t, int32_t, etc).

Signed-off-by: Nathan Hjelm <hjelmn@me.com>
2015-06-25 10:14:13 -06:00

518 строки
17 KiB
C

/*
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#include <errno.h>
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif /* HAVE_FCNTL_H */
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif /* HAVE_SYS_MMAN_H */
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif /* HAVE_SYS_TYPES_H */
#include <string.h>
#ifdef HAVE_NETDB_H
#include <netdb.h>
#endif /* HAVE_NETDB_H */
#include <time.h>
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif /* HAVE_SYS_STAT_H */
#include "opal/constants.h"
#include "opal/util/output.h"
#include "opal/util/path.h"
#include "opal/util/show_help.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "sshmem_verbs.h"
static openib_device_t memheap_device;
/* ////////////////////////////////////////////////////////////////////////// */
/*local functions */
/* local functions */
static int
module_init(void);
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size);
static int
ds_copy(const map_segment_t *from,
map_segment_t *to);
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
static int
segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
static int
segment_unlink(map_segment_t *ds_buf);
static int
module_finalize(void);
/*
* mmap shmem module
*/
mca_sshmem_verbs_module_t mca_sshmem_verbs_module = {
/* super */
{
module_init,
segment_create,
ds_copy,
segment_attach,
segment_detach,
segment_unlink,
module_finalize
}
};
/* ////////////////////////////////////////////////////////////////////////// */
/* private utility functions */
/* ////////////////////////////////////////////////////////////////////////// */
/* ////////////////////////////////////////////////////////////////////////// */
/**
* completely resets the contents of *ds_buf
*/
static inline void
shmem_ds_reset(map_segment_t *ds_buf)
{
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: shmem_ds_resetting "
"(id: %d, size: %lu, name: %s)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name,
ds_buf->seg_id, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
MAP_SEGMENT_RESET_FLAGS(ds_buf);
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
ds_buf->seg_base_addr = 0;
ds_buf->end = 0;
ds_buf->seg_size = 0;
ds_buf->type = MAP_SEGMENT_UNKNOWN;
memset(ds_buf->seg_name, '\0', sizeof(ds_buf->seg_name));
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
module_init(void)
{
/* nothing to do */
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
module_finalize(void)
{
/* nothing to do */
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
ds_copy(const map_segment_t *from,
map_segment_t *to)
{
memcpy(to, from, sizeof(map_segment_t));
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: ds_copy complete "
"from: (id: %d, size: %lu, "
"name: %s flags: 0x%02x) "
"to: (id: %d, size: %lu, "
"name: %s flags: 0x%02x)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name,
from->seg_id, (unsigned long)from->seg_size, from->seg_name,
from->flags, to->seg_id, (unsigned long)to->seg_size, to->seg_name,
to->flags)
);
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size)
{
int rc = OSHMEM_SUCCESS;
void *addr = NULL;
openib_device_t *device = &memheap_device;
int num_devs = 0;
int i = 0;
assert(ds_buf);
/* init the contents of map_segment_t */
shmem_ds_reset(ds_buf);
memset(device, 0, sizeof(*device));
#ifdef HAVE_IBV_GET_DEVICE_LIST
device->ib_devs = ibv_get_device_list(&num_devs);
#else
#error unsupported ibv_get_device_list in infiniband/verbs.h
#endif
if (num_devs == 0 || !device->ib_devs) {
return OSHMEM_ERR_NOT_SUPPORTED;
}
/* Open device */
if (NULL != mca_sshmem_verbs_component.hca_name) {
for (i = 0; i < num_devs; i++) {
if (0 == strcmp(mca_sshmem_verbs_component.hca_name, ibv_get_device_name(device->ib_devs[i]))) {
device->ib_dev = device->ib_devs[i];
break;
}
}
} else {
device->ib_dev = device->ib_devs[0];
}
if (NULL == device->ib_dev) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error getting device says %d: %s",
errno, strerror(errno))
);
return OSHMEM_ERR_NOT_FOUND;
}
if (NULL == (device->ib_dev_context = ibv_open_device(device->ib_dev))) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error obtaining device context for %s errno says %d: %s",
ibv_get_device_name(device->ib_dev), errno, strerror(errno))
);
return OSHMEM_ERR_RESOURCE_BUSY;
}
/* Obtain device attributes */
if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error obtaining device attributes for %s errno says %d: %s",
ibv_get_device_name(device->ib_dev), errno, strerror(errno))
);
return OSHMEM_ERR_RESOURCE_BUSY;
}
/* Allocate the protection domain for the device */
device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
if (NULL == device->ib_pd) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error allocating protection domain for %s errno says %d: %s",
ibv_get_device_name(device->ib_dev), errno, strerror(errno))
);
return OSHMEM_ERR_RESOURCE_BUSY;
}
/* Allocate memory */
if (!rc) {
void *addr = NULL;
struct ibv_mr *ib_mr = NULL;
uint64_t access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ;
uint64_t exp_access_flag = 0;
OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));
#if (MPAGE_ENABLE > 0)
exp_access_flag = IBV_EXP_ACCESS_ALLOCATE_MR |
IBV_EXP_ACCESS_SHARED_MR_USER_READ |
IBV_EXP_ACCESS_SHARED_MR_USER_WRITE;
#endif /* MPAGE_ENABLE */
struct ibv_exp_reg_mr_in in = {device->ib_pd, addr, size, access_flag|exp_access_flag, 0};
#if MPAGE_HAVE_IBV_EXP_REG_MR_CREATE_FLAGS
if (0 == mca_sshmem_verbs_component.has_shared_mr) {
in.addr = (void *)mca_sshmem_base_start_address;
in.comp_mask = IBV_EXP_REG_MR_CREATE_FLAGS;
in.create_flags = IBV_EXP_REG_MR_CREATE_CONTIG;
in.exp_access = access_flag;
}
#endif
ib_mr = ibv_exp_reg_mr(&in);
if (NULL == ib_mr) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error to ibv_exp_reg_mr() %llu bytes errno says %d: %s",
(unsigned long long)size, errno, strerror(errno))
);
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
} else {
device->ib_mr_shared = ib_mr;
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
}
#if (MPAGE_ENABLE > 0)
if (!rc && mca_sshmem_verbs_component.has_shared_mr) {
access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ|
IBV_EXP_ACCESS_NO_RDMA;
addr = (void *)mca_sshmem_base_start_address;
struct ibv_exp_reg_shared_mr_in in;
mca_sshmem_verbs_fill_shared_mr(&in, device->ib_pd, device->ib_mr_shared->handle, addr, access_flag);
ib_mr = ibv_exp_reg_shared_mr(&in);
if (NULL == ib_mr) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error to ibv_reg_shared_mr() %llu bytes errno says %d: %s has_shared_mr: %d",
(unsigned long long)size, errno, strerror(errno),
mca_sshmem_verbs_component.has_shared_mr
)
);
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
} else {
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
}
}
#endif /* MPAGE_ENABLE */
if (!rc) {
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"ibv device %s shared_mr: %d",
ibv_get_device_name(device->ib_dev),
mca_sshmem_verbs_component.has_shared_mr)
);
if (mca_sshmem_verbs_component.has_shared_mr) {
assert(size == device->ib_mr_shared->length);
ds_buf->type = MAP_SEGMENT_ALLOC_IBV;
ds_buf->seg_id = device->ib_mr_shared->handle;
} else {
ds_buf->type = MAP_SEGMENT_ALLOC_IBV_NOSHMR;
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
}
ds_buf->seg_base_addr = ib_mr->addr;
ds_buf->seg_size = size;
ds_buf->end = (void*)((uintptr_t)ds_buf->seg_base_addr + ds_buf->seg_size);
}
}
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: create %s "
"(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name,
(rc ? "failure" : "successful"),
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
/**
* segment_attach can only be called after a successful call to segment_create
*/
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
openib_device_t *device = &memheap_device;
static int mr_count = 0;
void *addr = NULL;
assert(ds_buf);
assert(mkey->va_base == 0);
if (MAP_SEGMENT_SHM_INVALID == (int)(mkey->u.key)) {
return (mkey->va_base);
}
/* workaround mtt problem - request aligned addresses */
++mr_count;
addr = (void *)((uintptr_t)mca_sshmem_base_start_address +
mca_sshmem_verbs_component.mr_interleave_factor * 1024ULL * 1024ULL * 1024ULL * mr_count);
{
struct ibv_mr *ib_mr = NULL;
uint64_t access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_EXP_ACCESS_NO_RDMA;
struct ibv_exp_reg_shared_mr_in in;
mca_sshmem_verbs_fill_shared_mr(&in, device->ib_pd, mkey->u.key, addr, access_flag);
ib_mr = ibv_exp_reg_shared_mr(&in);
if (NULL == ib_mr) {
mkey->va_base = (void *)-1;
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error to ibv_reg_shared_mr() %llu bytes errno says %d: %s",
(unsigned long long)ds_buf->seg_size, errno, strerror(errno))
);
} else {
if (ib_mr->addr != addr) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"Failed to map shared region to address %p got addr %p. Try to increase 'memheap_mr_interleave_factor' from %d",
addr, ib_mr->addr, mca_sshmem_verbs_component.mr_interleave_factor)
);
}
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
mkey->va_base = ib_mr->addr;
}
}
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: attach successful "
"(id: %d, addr: %p size: %lu, name: %s | va_base: 0x%p len: %d key %llx)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name,
mkey->va_base, mkey->len, (unsigned long long)mkey->u.key)
);
/* update returned base pointer with an offset that hides our stuff */
return (mkey->va_base);
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
int rc = OSHMEM_SUCCESS;
openib_device_t *device = &memheap_device;
int i;
assert(ds_buf);
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: detaching "
"(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
if (device) {
if (0 < (i = opal_value_array_get_size(&device->ib_mr_array))) {
struct ibv_mr** array;
struct ibv_mr* ib_mr = NULL;
array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
for (i--;i >= 0; i--) {
ib_mr = array[i];
if(ibv_dereg_mr(ib_mr)) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error ibv_dereg_mr(): %d: %s",
errno, strerror(errno))
);
rc = OSHMEM_ERROR;
}
opal_value_array_remove_item(&device->ib_mr_array, i);
}
if (!rc && device->ib_mr_shared) {
device->ib_mr_shared = NULL;
}
OBJ_DESTRUCT(&device->ib_mr_array);
}
if (!rc && device->ib_pd) {
if (ibv_dealloc_pd(device->ib_pd)) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error ibv_dealloc_pd(): %d: %s",
errno, strerror(errno))
);
rc = OSHMEM_ERROR;
} else {
device->ib_pd = NULL;
}
}
if(!rc && device->ib_dev_context) {
if(ibv_close_device(device->ib_dev_context)) {
OPAL_OUTPUT_VERBOSE(
(5, oshmem_sshmem_base_framework.framework_output,
"error ibv_close_device(): %d: %s",
errno, strerror(errno))
);
rc = OSHMEM_ERROR;
} else {
device->ib_dev_context = NULL;
}
}
if(!rc && device->ib_devs) {
ibv_free_device_list(device->ib_devs);
device->ib_devs = NULL;
}
}
/* reset the contents of the map_segment_t associated with this
* shared memory segment.
*/
shmem_ds_reset(ds_buf);
return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_unlink(map_segment_t *ds_buf)
{
/* not much unlink work needed for sysv */
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: unlinking "
"(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
);
/* don't completely reset. in particular, only reset
* the id and flip the invalid bit. size and name values will remain valid
* across unlinks. other information stored in flags will remain untouched.
*/
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
/* note: this is only changing the valid bit to 0. */
MAP_SEGMENT_INVALIDATE(ds_buf);
return OSHMEM_SUCCESS;
}