1
1
openmpi/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c
Tomislav Janjusic cb5ff55b27 oshmem/ucx: fixed a build issue
Co-authored with: Artem Polyakov <artemp@mellanox.com>

Signed-off-by: Tomislav Janjusic <tomislavj@mellanox.com>
2019-12-19 21:14:54 +02:00

412 строки
12 KiB
C

/*
* Copyright (c) 2017 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#include "opal/constants.h"
#include "opal/util/output.h"
#include "opal/util/path.h"
#include "opal/util/show_help.h"
#include "oshmem/proc/proc.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/include/shmemx.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "oshmem/util/oshmem_util.h"
#include "oshmem/mca/spml/ucx/spml_ucx.h"
#include "sshmem_ucx.h"
//#include <ucs/sys/math.h>
#if HAVE_UCX_DEVICE_MEM
#include <ucp/core/ucp_resource.h>
#include <uct/ib/base/ib_alloc.h>
#endif
#define ALLOC_ELEM_SIZE sizeof(uint64_t)
#define min(a,b) ((a) < (b) ? (a) : (b))
#define max(a,b) ((a) > (b) ? (a) : (b))
/* ////////////////////////////////////////////////////////////////////////// */
/*local functions */
/* local functions */
static int
module_init(void);
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size, long hint);
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
static int
segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
static int
segment_unlink(map_segment_t *ds_buf);
static int
module_finalize(void);
static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size,
void* old_ptr, void** new_ptr);
static int sshmem_ucx_memheap_free(map_segment_t *s, void* ptr);
/*
* ucx shmem module
*/
mca_sshmem_ucx_module_t mca_sshmem_ucx_module = {
/* super */
{
module_init,
segment_create,
segment_attach,
segment_detach,
segment_unlink,
module_finalize
}
};
static int
module_init(void)
{
/* nothing to do */
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
module_finalize(void)
{
/* nothing to do */
return OSHMEM_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
static segment_allocator_t sshmem_ucx_allocator = {
.realloc = sshmem_ucx_memheap_realloc,
.free = sshmem_ucx_memheap_free
};
static int
segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,
unsigned flags, long hint, void *dev_mem)
{
mca_sshmem_ucx_segment_context_t *ctx;
int rc = OSHMEM_SUCCESS;
mca_spml_ucx_t *spml = (mca_spml_ucx_t*)mca_spml.self;
ucp_mem_map_params_t mem_map_params;
ucp_mem_h mem_h;
ucs_status_t status;
assert(ds_buf);
/* init the contents of map_segment_t */
shmem_ds_reset(ds_buf);
mem_map_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS |
UCP_MEM_MAP_PARAM_FIELD_LENGTH |
UCP_MEM_MAP_PARAM_FIELD_FLAGS;
mem_map_params.address = address;
mem_map_params.length = size;
mem_map_params.flags = flags;
status = ucp_mem_map(spml->ucp_context, &mem_map_params, &mem_h);
if (UCS_OK != status) {
SSHMEM_ERROR("ucp_mem_map() failed: %s\n", ucs_status_string(status));
rc = OSHMEM_ERROR;
goto out;
}
if (!(flags & UCP_MEM_MAP_FIXED)) {
/* Memory was allocated at an arbitrary address; obtain it */
ucp_mem_attr_t mem_attr;
mem_attr.field_mask = UCP_MEM_ATTR_FIELD_ADDRESS;
status = ucp_mem_query(mem_h, &mem_attr);
if (status != UCS_OK) {
SSHMEM_ERROR("ucp_mem_query() failed: %s\n", ucs_status_string(status));
ucp_mem_unmap(spml->ucp_context, mem_h);
rc = OSHMEM_ERROR;
goto out;
}
ds_buf->super.va_base = mem_attr.address;
} else {
ds_buf->super.va_base = mem_map_params.address;
}
ctx = calloc(1, sizeof(*ctx));
if (!ctx) {
ucp_mem_unmap(spml->ucp_context, mem_h);
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
goto out;
}
ds_buf->seg_size = size;
ds_buf->super.va_end = (void*)((uintptr_t)ds_buf->super.va_base + ds_buf->seg_size);
ds_buf->context = ctx;
ds_buf->type = MAP_SEGMENT_ALLOC_UCX;
ds_buf->alloc_hints = hint;
ctx->ucp_memh = mem_h;
ctx->dev_mem = dev_mem;
if (hint) {
ds_buf->allocator = &sshmem_ucx_allocator;
}
out:
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: create %s "
"(id: %d, addr: %p size: %lu)\n",
mca_sshmem_ucx_component.super.base_version.mca_type_name,
mca_sshmem_ucx_component.super.base_version.mca_component_name,
(rc ? "failure" : "successful"),
ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size)
);
return rc;
}
#if HAVE_UCX_DEVICE_MEM
static uct_ib_device_mem_h alloc_device_mem(mca_spml_ucx_t *spml, size_t size,
void **address_p)
{
uct_ib_device_mem_h dev_mem = NULL;
ucs_status_t status;
uct_md_h uct_md;
void *address;
size_t length;
uct_md = ucp_context_find_tl_md(spml->ucp_context, "mlx5");
if (uct_md == NULL) {
SSHMEM_VERBOSE(1, "ucp_context_find_tl_md() returned NULL\n");
return NULL;
}
/* If found a matching memory domain, allocate device memory on it */
length = size;
address = NULL;
status = uct_ib_md_alloc_device_mem(uct_md, &length, &address,
UCT_MD_MEM_ACCESS_ALL, "sshmem_seg",
&dev_mem);
if (status != UCS_OK) {
/* If could not allocate device memory - fallback to mmap (since some
* PEs in the job may succeed and while others failed */
SSHMEM_VERBOSE(1, "uct_ib_md_alloc_dm() failed: %s\n",
ucs_status_string(status));
return NULL;
}
SSHMEM_VERBOSE(3, "uct_ib_md_alloc_dm() returned address %p\n", address);
*address_p = address;
return dev_mem;
}
#endif
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size, long hint)
{
mca_spml_ucx_t *spml = (mca_spml_ucx_t*)mca_spml.self;
unsigned flags;
#if HAVE_UCX_DEVICE_MEM
int ret = OSHMEM_ERROR;
if (hint & SHMEM_HINT_DEVICE_NIC_MEM) {
if (size > UINT_MAX) {
return OSHMEM_ERR_BAD_PARAM;
}
void *dev_mem_address;
uct_ib_device_mem_h dev_mem = alloc_device_mem(spml, size,
&dev_mem_address);
if (dev_mem != NULL) {
ret = segment_create_internal(ds_buf, dev_mem_address, size, 0,
hint, dev_mem);
if (ret == OSHMEM_SUCCESS) {
return OSHMEM_SUCCESS;
} else if (dev_mem != NULL) {
uct_ib_md_release_device_mem(dev_mem);
/* fallback to regular allocation */
}
}
}
#endif
flags = UCP_MEM_MAP_ALLOCATE | (spml->heap_reg_nb ? UCP_MEM_MAP_NONBLOCK : 0);
if (hint) {
return segment_create_internal(ds_buf, NULL, size, flags, hint, NULL);
} else {
return segment_create_internal(ds_buf, mca_sshmem_base_start_address,
size, flags | UCP_MEM_MAP_FIXED, hint,
NULL);
}
}
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
assert(ds_buf);
assert(mkey->va_base == 0);
OPAL_OUTPUT((oshmem_sshmem_base_framework.framework_output,
"can not attach to ucx segment"));
oshmem_shmem_abort(-1);
return NULL;
}
static int
segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: detaching "
"(id: %d, addr: %p size: %lu)\n",
mca_sshmem_ucx_component.super.base_version.mca_type_name,
mca_sshmem_ucx_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size)
);
/* reset the contents of the map_segment_t associated with this
* shared memory segment.
*/
shmem_ds_reset(ds_buf);
return OSHMEM_SUCCESS;
}
static int
segment_unlink(map_segment_t *ds_buf)
{
mca_spml_ucx_t *spml = (mca_spml_ucx_t *)mca_spml.self;
mca_sshmem_ucx_segment_context_t *ctx = ds_buf->context;
if (ctx->shadow_allocator) {
sshmem_ucx_shadow_destroy(ctx->shadow_allocator);
}
ucp_mem_unmap(spml->ucp_context, ctx->ucp_memh);
#if HAVE_UCX_DEVICE_MEM
if (ctx->dev_mem) {
uct_ib_md_release_device_mem(ctx->dev_mem);
}
#endif
ds_buf->context = NULL;
free(ctx);
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
"%s: %s: unlinking "
"(id: %d, addr: %p size: %lu)\n",
mca_sshmem_ucx_component.super.base_version.mca_type_name,
mca_sshmem_ucx_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size)
);
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
MAP_SEGMENT_INVALIDATE(ds_buf);
return OSHMEM_SUCCESS;
}
static void *sshmem_ucx_memheap_index2ptr(map_segment_t *s, unsigned index)
{
return (char*)s->super.va_base + (index * ALLOC_ELEM_SIZE);
}
static unsigned sshmem_ucx_memheap_ptr2index(map_segment_t *s, void *ptr)
{
return ((char*)ptr - (char*)s->super.va_base) / ALLOC_ELEM_SIZE;
}
static void sshmem_ucx_memheap_wordcopy(void *dst, void *src, size_t size)
{
const size_t count = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t);
uint64_t *dst64 = (uint64_t*)dst;
uint64_t *src64 = (uint64_t*)src;
size_t i;
for (i = 0; i < count; ++i) {
*(dst64++) = *(src64++);
}
opal_atomic_wmb();
}
static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size,
void* old_ptr, void** new_ptr)
{
mca_sshmem_ucx_segment_context_t *ctx = s->context;
unsigned alloc_count, index, old_index, old_alloc_count;
int res;
int inplace;
if (size > s->seg_size) {
return OSHMEM_ERR_OUT_OF_RESOURCE;
}
/* create allocator on demand */
if (!ctx->shadow_allocator) {
ctx->shadow_allocator = sshmem_ucx_shadow_create(s->seg_size);
if (!ctx->shadow_allocator) {
return OSHMEM_ERR_OUT_OF_RESOURCE;
}
}
/* Allocate new element. Zero-size allocation should still return a unique
* pointer, so allocate 1 byte */
alloc_count = max((size + ALLOC_ELEM_SIZE - 1) / ALLOC_ELEM_SIZE, 1);
if (!old_ptr) {
res = sshmem_ucx_shadow_alloc(ctx->shadow_allocator, alloc_count, &index);
} else {
old_index = sshmem_ucx_memheap_ptr2index(s, old_ptr);
res = sshmem_ucx_shadow_realloc(ctx->shadow_allocator, alloc_count,
old_index, &index, &inplace);
}
if (res != OSHMEM_SUCCESS) {
return res;
}
*new_ptr = sshmem_ucx_memheap_index2ptr(s, index);
/* Copy to new segment and release old*/
if (old_ptr && !inplace) {
old_alloc_count = sshmem_ucx_shadow_size(ctx->shadow_allocator, old_index);
sshmem_ucx_memheap_wordcopy(*new_ptr, old_ptr,
min(size, old_alloc_count * ALLOC_ELEM_SIZE));
sshmem_ucx_shadow_free(ctx->shadow_allocator, old_index);
}
return OSHMEM_SUCCESS;
}
static int sshmem_ucx_memheap_free(map_segment_t *s, void* ptr)
{
mca_sshmem_ucx_segment_context_t *ctx = s->context;
if (!ptr) {
return OSHMEM_SUCCESS;
}
return sshmem_ucx_shadow_free(ctx->shadow_allocator,
sshmem_ucx_memheap_ptr2index(s, ptr));
}