1
1

OSHMEM: Add support for shmemx_malloc_with_hint()

- added multiple segments processing
- added shmemx_malloc_with_hint call + set of hints

(picked from master 94b5e91)

Signed-off-by: Sergey Oblomov <sergeyo@mellanox.com>
Signed-off-by: Yossi Itigin <yosefe@mellanox.com>
Этот коммит содержится в:
Yossi Itigin 2019-05-07 22:14:41 +03:00
родитель 5d4c9b444a
Коммит fc41c16134
27 изменённых файлов: 634 добавлений и 85 удалений

Просмотреть файл

@ -16,6 +16,11 @@
extern "C" {
#endif
/*
* Symmetric heap routines
*/
OSHMEM_DECLSPEC void* pshmemx_malloc_with_hint(size_t size, long hint);
/*
* Legacy API

Просмотреть файл

@ -18,11 +18,29 @@
extern "C" {
#endif
enum {
SHMEM_HINT_NONE = 0,
SHMEM_HINT_LOW_LAT_MEM = 1 << 0,
SHMEM_HINT_HIGH_BW_MEM = 1 << 1,
SHMEM_HINT_NEAR_NIC_MEM = 1 << 2,
SHMEM_HINT_DEVICE_GPU_MEM = 1 << 3,
SHMEM_HINT_DEVICE_NIC_MEM = 1 << 4,
SHMEM_HINT_PSYNC = 1 << 16,
SHMEM_HINT_PWORK = 1 << 17,
SHMEM_HINT_ATOMICS = 1 << 18
};
/*
* All OpenSHMEM extension APIs that are not part of this specification must be defined in the shmemx.h include
* file. These extensions shall use the shmemx_ prefix for all routine, variable, and constant names.
*/
/*
* Symmetric heap routines
*/
OSHMEM_DECLSPEC void* shmemx_malloc_with_hint(size_t size, long hint);
/*
* Elemental put routines
*/

Просмотреть файл

@ -41,14 +41,17 @@ OSHMEM_DECLSPEC int mca_memheap_base_select(void);
extern int mca_memheap_base_already_opened;
extern int mca_memheap_base_key_exchange;
#define MCA_MEMHEAP_MAX_SEGMENTS 4
#define HEAP_SEG_INDEX 0
#define SYMB_SEG_INDEX 1
#define MCA_MEMHEAP_SEG_COUNT (SYMB_SEG_INDEX+1)
#define MCA_MEMHEAP_MAX_SEGMENTS 8
#define HEAP_SEG_INDEX 0
#define MEMHEAP_SEG_INVALID 0xFFFF
typedef struct mca_memheap_base_config {
long device_nic_mem_seg_size; /* Used for SHMEM_HINT_DEVICE_NIC_MEM */
} mca_memheap_base_config_t;
typedef struct mca_memheap_map {
map_segment_t mem_segs[MCA_MEMHEAP_MAX_SEGMENTS]; /* TODO: change into pointer array */
int n_segments;
@ -56,8 +59,9 @@ typedef struct mca_memheap_map {
} mca_memheap_map_t;
extern mca_memheap_map_t mca_memheap_base_map;
extern mca_memheap_base_config_t mca_memheap_base_config;
int mca_memheap_base_alloc_init(mca_memheap_map_t *, size_t);
int mca_memheap_base_alloc_init(mca_memheap_map_t *, size_t, long);
void mca_memheap_base_alloc_exit(mca_memheap_map_t *);
int mca_memheap_base_static_init(mca_memheap_map_t *);
void mca_memheap_base_static_exit(mca_memheap_map_t *);
@ -173,10 +177,12 @@ static inline int memheap_is_va_in_segment(void *va, int segno)
static inline int memheap_find_segnum(void *va)
{
if (OPAL_LIKELY(memheap_is_va_in_segment(va, SYMB_SEG_INDEX))) {
return SYMB_SEG_INDEX;
} else if (memheap_is_va_in_segment(va, HEAP_SEG_INDEX)) {
return HEAP_SEG_INDEX;
int i;
for (i = 0; i < mca_memheap_base_map.n_segments; i++) {
if (memheap_is_va_in_segment(va, i)) {
return i;
}
}
return MEMHEAP_SEG_INVALID;
}
@ -193,18 +199,17 @@ static inline void *map_segment_va2rva(mkey_segment_t *seg, void *va)
return memheap_va2rva(va, seg->super.va_base, seg->rva_base);
}
static inline map_base_segment_t *map_segment_find_va(map_base_segment_t *segs, size_t elem_size, void *va)
static inline map_base_segment_t *map_segment_find_va(map_base_segment_t *segs,
size_t elem_size, void *va)
{
map_base_segment_t *rseg;
int i;
rseg = (map_base_segment_t *)((char *)segs + elem_size * HEAP_SEG_INDEX);
if (OPAL_LIKELY(map_segment_is_va_in(rseg, va))) {
return rseg;
}
rseg = (map_base_segment_t *)((char *)segs + elem_size * SYMB_SEG_INDEX);
if (OPAL_LIKELY(map_segment_is_va_in(rseg, va))) {
return rseg;
for (i = 0; i < MCA_MEMHEAP_MAX_SEGMENTS; i++) {
rseg = (map_base_segment_t *)((char *)segs + elem_size * i);
if (OPAL_LIKELY(map_segment_is_va_in(rseg, va))) {
return rseg;
}
}
return NULL;
@ -214,21 +219,14 @@ void mkey_segment_init(mkey_segment_t *seg, sshmem_mkey_t *mkey, uint32_t segno)
static inline map_segment_t *memheap_find_va(void* va)
{
map_segment_t *s;
map_segment_t *s = NULL;
int i;
/* most probably there will be only two segments: heap and global data */
if (OPAL_LIKELY(memheap_is_va_in_segment(va, SYMB_SEG_INDEX))) {
s = &memheap_map->mem_segs[SYMB_SEG_INDEX];
} else if (memheap_is_va_in_segment(va, HEAP_SEG_INDEX)) {
s = &memheap_map->mem_segs[HEAP_SEG_INDEX];
} else if (memheap_map->n_segments - 2 > 0) {
s = bsearch(va,
&memheap_map->mem_segs[SYMB_SEG_INDEX+1],
memheap_map->n_segments - 2,
sizeof(*s),
mca_memheap_seg_cmp);
} else {
s = NULL;
for (i = 0; i < memheap_map->n_segments; i++) {
if (memheap_is_va_in_segment(va, i)) {
s = &memheap_map->mem_segs[i];
break;
}
}
#if MEMHEAP_BASE_DEBUG == 1

Просмотреть файл

@ -19,17 +19,21 @@
#include "oshmem/mca/memheap/base/base.h"
int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size)
int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size, long hint)
{
int ret = OSHMEM_SUCCESS;
char * seg_filename = NULL;
assert(map);
assert(HEAP_SEG_INDEX == map->n_segments);
if (hint == 0) {
assert(HEAP_SEG_INDEX == map->n_segments);
} else {
assert(HEAP_SEG_INDEX < map->n_segments);
}
map_segment_t *s = &map->mem_segs[map->n_segments];
seg_filename = oshmem_get_unique_file_name(oshmem_my_proc_id());
ret = mca_sshmem_segment_create(s, seg_filename, size);
ret = mca_sshmem_segment_create(s, seg_filename, size, hint);
if (OSHMEM_SUCCESS == ret) {
map->n_segments++;
@ -45,12 +49,34 @@ int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size)
void mca_memheap_base_alloc_exit(mca_memheap_map_t *map)
{
if (map) {
map_segment_t *s = &map->mem_segs[HEAP_SEG_INDEX];
int i;
assert(s);
if (!map) {
return;
}
mca_sshmem_segment_detach(s, NULL);
mca_sshmem_unlink(s);
for (i = 0; i < map->n_segments; ++i) {
map_segment_t *s = &map->mem_segs[i];
if (s->type != MAP_SEGMENT_STATIC) {
mca_sshmem_segment_detach(s, NULL);
mca_sshmem_unlink(s);
}
}
}
int mca_memheap_alloc_with_hint(size_t size, long hint, void** ptr)
{
int i;
for (i = 0; i < mca_memheap_base_map.n_segments; i++) {
map_segment_t *s = &mca_memheap_base_map.mem_segs[i];
if (s->allocator && (hint && s->alloc_hints)) {
/* Do not fall back to default allocator since it will break the
* symmetry between PEs
*/
return s->allocator->realloc(s, size, NULL, ptr);
}
}
return MCA_MEMHEAP_CALL(alloc(size, ptr));
}

Просмотреть файл

@ -52,6 +52,12 @@ static int mca_memheap_base_register(mca_base_register_flag_t flags)
MCA_BASE_VAR_SCOPE_READONLY,
&mca_memheap_base_key_exchange);
mca_base_var_register("oshmem", "memheap", "base", "device_nic_mem_seg_size",
"Size of memory block used for allocations with hint SHMEM_HINT_DEVICE_NIC_MEM",
MCA_BASE_VAR_TYPE_LONG, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_memheap_base_config.device_nic_mem_seg_size);
return OSHMEM_SUCCESS;
}

Просмотреть файл

@ -749,7 +749,7 @@ void mkey_segment_init(mkey_segment_t *seg, sshmem_mkey_t *mkey, uint32_t segno)
{
map_segment_t *s;
if (segno >= MCA_MEMHEAP_SEG_COUNT) {
if (segno >= MCA_MEMHEAP_MAX_SEGMENTS) {
return;
}

Просмотреть файл

@ -22,6 +22,13 @@
#include "oshmem/mca/memheap/memheap.h"
#include "oshmem/mca/memheap/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "oshmem/include/shmemx.h"
#include "oshmem/mca/sshmem/base/base.h"
mca_memheap_base_config_t mca_memheap_base_config = {
.device_nic_mem_seg_size = 0
};
mca_memheap_base_module_t mca_memheap = {0};
@ -95,7 +102,7 @@ static memheap_context_t* _memheap_create(void)
{
int rc = OSHMEM_SUCCESS;
static memheap_context_t context;
size_t user_size;
size_t user_size, size;
user_size = _memheap_size();
if (user_size < MEMHEAP_BASE_MIN_SIZE) {
@ -106,7 +113,18 @@ static memheap_context_t* _memheap_create(void)
/* Inititialize symmetric area */
if (OSHMEM_SUCCESS == rc) {
rc = mca_memheap_base_alloc_init(&mca_memheap_base_map,
user_size + MEMHEAP_BASE_PRIVATE_SIZE);
user_size + MEMHEAP_BASE_PRIVATE_SIZE, 0);
}
/* Initialize atomic symmetric area */
size = mca_memheap_base_config.device_nic_mem_seg_size;
if ((OSHMEM_SUCCESS == rc) && (size > 0)) {
rc = mca_memheap_base_alloc_init(&mca_memheap_base_map, size,
SHMEM_HINT_DEVICE_NIC_MEM);
if (rc == OSHMEM_ERR_NOT_IMPLEMENTED) {
/* do not treat NOT_IMPLEMENTED as error */
rc = OSHMEM_SUCCESS;
}
}
/* Inititialize static/global variables area */

Просмотреть файл

@ -49,7 +49,7 @@ int mca_memheap_base_static_init(mca_memheap_map_t *map)
int ret = OSHMEM_SUCCESS;
assert(map);
assert(SYMB_SEG_INDEX <= map->n_segments);
assert(HEAP_SEG_INDEX < map->n_segments);
ret = _load_segments();

Просмотреть файл

@ -138,6 +138,8 @@ typedef struct mca_memheap_base_module_t mca_memheap_base_module_t;
OSHMEM_DECLSPEC extern mca_memheap_base_module_t mca_memheap;
int mca_memheap_alloc_with_hint(size_t size, long hint, void**);
static inline int mca_memheap_base_mkey_is_shm(sshmem_mkey_t *mkey)
{
return (0 == mkey->len) && (MAP_SEGMENT_SHM_INVALID != (int)mkey->u.key);

Просмотреть файл

@ -39,6 +39,7 @@
#include "orte/util/show_help.h"
#include "oshmem/mca/spml/ucx/spml_ucx_component.h"
#include "oshmem/mca/sshmem/ucx/sshmem_ucx.h"
/* Turn ON/OFF debug output from build (default 0) */
#ifndef SPML_UCX_PUT_DEBUG
@ -270,7 +271,7 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs)
OSHMEM_PROC_DATA(procs[i])->num_transports = 1;
OSHMEM_PROC_DATA(procs[i])->transport_ids = spml_ucx_transport_ids;
for (j = 0; j < MCA_MEMHEAP_SEG_COUNT; j++) {
for (j = 0; j < MCA_MEMHEAP_MAX_SEGMENTS; j++) {
mca_spml_ucx_ctx_default.ucp_peers[i].mkeys[j].key.rkey = NULL;
}
@ -441,7 +442,8 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr,
}
} else {
ucx_mkey->mem_h = (ucp_mem_h)mem_seg->context;
mca_sshmem_ucx_segment_context_t *ctx = mem_seg->context;
ucx_mkey->mem_h = ctx->ucp_memh;
}
status = ucp_rkey_pack(mca_spml_ucx.ucp_context, ucx_mkey->mem_h,
@ -592,17 +594,19 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx
goto error2;
}
for (j = 0; j < MCA_MEMHEAP_SEG_COUNT; j++) {
for (j = 0; j < memheap_map->n_segments; j++) {
mkey = &memheap_map->mem_segs[j].mkeys_cache[i][0];
ucx_mkey = &ucx_ctx->ucp_peers[i].mkeys[j].key;
err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[i].ucp_conn,
mkey->u.data,
&ucx_mkey->rkey);
if (UCS_OK != err) {
SPML_UCX_ERROR("failed to unpack rkey");
goto error2;
if (mkey->u.data) {
err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[i].ucp_conn,
mkey->u.data,
&ucx_mkey->rkey);
if (UCS_OK != err) {
SPML_UCX_ERROR("failed to unpack rkey");
goto error2;
}
mca_spml_ucx_cache_mkey(ucx_ctx, mkey, j, i);
}
mca_spml_ucx_cache_mkey(ucx_ctx, mkey, j, i);
}
}
@ -750,6 +754,8 @@ int mca_spml_ucx_fence(shmem_ctx_t ctx)
ucs_status_t err;
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
opal_atomic_wmb();
err = ucp_worker_fence(ucx_ctx->ucp_worker);
if (UCS_OK != err) {
SPML_UCX_ERROR("fence failed: %s", ucs_status_string(err));
@ -764,6 +770,8 @@ int mca_spml_ucx_quiet(shmem_ctx_t ctx)
int ret;
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
opal_atomic_wmb();
ret = opal_common_ucx_worker_flush(ucx_ctx->ucp_worker);
if (OMPI_SUCCESS != ret) {
oshmem_shmem_abort(-1);

Просмотреть файл

@ -62,7 +62,7 @@ typedef struct spml_ucx_cached_mkey spml_ucx_cached_mkey_t;
struct ucp_peer {
ucp_ep_h ucp_conn;
spml_ucx_cached_mkey_t mkeys[MCA_MEMHEAP_SEG_COUNT];
spml_ucx_cached_mkey_t mkeys[MCA_MEMHEAP_MAX_SEGMENTS];
};
typedef struct ucp_peer ucp_peer_t;

Просмотреть файл

@ -314,7 +314,7 @@ static void _ctx_cleanup(mca_spml_ucx_ctx_t *ctx)
del_procs = malloc(sizeof(*del_procs) * nprocs);
for (i = 0; i < nprocs; ++i) {
for (j = 0; j < MCA_MEMHEAP_SEG_COUNT; j++) {
for (j = 0; j < memheap_map->n_segments; j++) {
if (ctx->ucp_peers[i].mkeys[j].key.rkey != NULL) {
ucp_rkey_destroy(ctx->ucp_peers[i].mkeys[j].key.rkey);
}

Просмотреть файл

@ -31,7 +31,7 @@ extern char* mca_sshmem_base_backing_file_dir;
OSHMEM_DECLSPEC int
mca_sshmem_segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size);
size_t size, long hint);
OSHMEM_DECLSPEC void *
mca_sshmem_segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);

Просмотреть файл

@ -18,13 +18,13 @@
int
mca_sshmem_segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size)
size_t size, long hint)
{
if (!mca_sshmem_base_selected) {
return OSHMEM_ERROR;
}
return mca_sshmem_base_module->segment_create(ds_buf, file_name, size);
return mca_sshmem_base_module->segment_create(ds_buf, file_name, size, hint);
}
void *

Просмотреть файл

@ -62,7 +62,7 @@ module_init(void);
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size);
size_t size, long hint);
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
@ -111,7 +111,7 @@ module_finalize(void)
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size)
size_t size, long hint)
{
int rc = OSHMEM_SUCCESS;
void *addr = NULL;

Просмотреть файл

@ -83,14 +83,19 @@ typedef int
* @param file_name file_name unique string identifier that must be a valid,
* writable path (IN).
*
* @param address address to attach the segment at, or 0 allocate
* any available address in the process.
*
* @param size size of the shared memory segment.
*
* @param hint hint of the shared memory segment.
*
* @return OSHMEM_SUCCESS on success.
*/
typedef int
(*mca_sshmem_base_module_segment_create_fn_t)(map_segment_t *ds_buf,
const char *file_name,
size_t size);
size_t size, long hint);
/**
* attach to an existing shared memory segment initialized by segment_create.

Просмотреть файл

@ -107,6 +107,8 @@ typedef struct mkey_segment {
void *rva_base; /* base va on remote pe */
} mkey_segment_t;
typedef struct segment_allocator segment_allocator_t;
typedef struct map_segment {
map_base_segment_t super;
sshmem_mkey_t **mkeys_cache; /* includes remote segment bases in va_base */
@ -115,10 +117,17 @@ typedef struct map_segment {
int seg_id;
size_t seg_size; /* length of the segment */
segment_type_t type; /* type of the segment */
long alloc_hints; /* allocation hints this segment supports */
void *context; /* allocator can use this field to store
its own private data */
segment_allocator_t *allocator; /* segment-specific allocator */
} map_segment_t;
struct segment_allocator {
int (*realloc)(map_segment_t*, size_t newsize, void *, void **);
int (*free)(map_segment_t*, void*);
};
END_C_DECLS
#endif /* MCA_SSHMEM_TYPES_H */

Просмотреть файл

@ -60,7 +60,7 @@ module_init(void);
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size);
size_t size, long hint);
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
@ -109,7 +109,7 @@ module_finalize(void)
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size)
size_t size, long hint)
{
int rc = OSHMEM_SUCCESS;
void *addr = NULL;

Просмотреть файл

@ -15,7 +15,8 @@ AM_CPPFLAGS = $(sshmem_ucx_CPPFLAGS)
sources = \
sshmem_ucx.h \
sshmem_ucx_component.c \
sshmem_ucx_module.c
sshmem_ucx_module.c \
sshmem_ucx_shadow.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la

Просмотреть файл

@ -22,6 +22,40 @@ AC_DEFUN([MCA_oshmem_sshmem_ucx_CONFIG],[
[$1],
[$2])
# Check for UCX device memory allocation support
save_LDFLAGS="$LDFLAGS"
save_LIBS="$LIBS"
save_CPPFLAGS="$CPPFLAGS"
alloc_dm_LDFLAGS=" -L$ompi_check_ucx_libdir/ucx"
alloc_dm_LIBS=" -luct_ib"
CPPFLAGS+=" $sshmem_ucx_CPPFLAGS"
LDFLAGS+=" $sshmem_ucx_LDFLAGS $alloc_dm_LDFLAGS"
LIBS+=" $sshmem_ucx_LIBS $alloc_dm_LIBS"
AC_LANG_PUSH([C])
AC_LINK_IFELSE([AC_LANG_PROGRAM(
[[
#include <ucp/core/ucp_resource.h>
#include <uct/ib/base/ib_alloc.h>
]],
[[
uct_md_h md = ucp_context_find_tl_md((ucp_context_h)NULL, "");
(void)uct_ib_md_alloc_device_mem(md, NULL, NULL, 0, "", NULL);
uct_ib_md_release_device_mem(NULL);
]])],
[
AC_MSG_NOTICE([UCX device memory allocation is supported])
AC_DEFINE([HAVE_UCX_DEVICE_MEM], [1], [Support for device memory allocation])
sshmem_ucx_LIBS+=" $alloc_dm_LIBS"
sshmem_ucx_LDFLAGS+=" $alloc_dm_LDFLAGS"
],
[AC_MSG_NOTICE([UCX device memory allocation is not supported])])
AC_LANG_POP([C])
CPPFLAGS="$save_CPPFLAGS"
LDFLAGS="$save_LDFLAGS"
LIBS="$save_LIBS"
# substitute in the things needed to build ucx
AC_SUBST([sshmem_ucx_CFLAGS])

Просмотреть файл

@ -15,8 +15,12 @@
#include "oshmem/mca/sshmem/sshmem.h"
#include <ucp/api/ucp.h>
BEGIN_C_DECLS
typedef struct sshmem_ucx_shadow_allocator sshmem_ucx_shadow_allocator_t;
/**
* globally exported variable to hold the ucx component.
*/
@ -30,11 +34,26 @@ typedef struct mca_sshmem_ucx_component_t {
OSHMEM_MODULE_DECLSPEC extern mca_sshmem_ucx_component_t
mca_sshmem_ucx_component;
typedef struct mca_sshmem_ucx_segment_context {
void *dev_mem;
sshmem_ucx_shadow_allocator_t *shadow_allocator;
ucp_mem_h ucp_memh;
} mca_sshmem_ucx_segment_context_t;
typedef struct mca_sshmem_ucx_module_t {
mca_sshmem_base_module_t super;
} mca_sshmem_ucx_module_t;
extern mca_sshmem_ucx_module_t mca_sshmem_ucx_module;
sshmem_ucx_shadow_allocator_t *sshmem_ucx_shadow_create(unsigned count);
void sshmem_ucx_shadow_destroy(sshmem_ucx_shadow_allocator_t *allocator);
int sshmem_ucx_shadow_alloc(sshmem_ucx_shadow_allocator_t *allocator,
unsigned count, unsigned *index);
int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator,
unsigned index);
size_t sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator,
unsigned index);
END_C_DECLS
#endif /* MCA_SHMEM_UCX_EXPORT_H */

Просмотреть файл

@ -18,12 +18,24 @@
#include "oshmem/proc/proc.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/include/shmemx.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "oshmem/util/oshmem_util.h"
#include "oshmem/mca/spml/ucx/spml_ucx.h"
#include "sshmem_ucx.h"
//#include <ucs/sys/math.h>
#if HAVE_UCX_DEVICE_MEM
#include <ucp/core/ucp_resource.h>
#include <uct/ib/base/ib_alloc.h>
#endif
#define ALLOC_ELEM_SIZE sizeof(uint64_t)
#define min(a,b) ((a) < (b) ? (a) : (b))
#define max(a,b) ((a) > (b) ? (a) : (b))
/* ////////////////////////////////////////////////////////////////////////// */
/*local functions */
/* local functions */
@ -33,7 +45,7 @@ module_init(void);
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size);
size_t size, long hint);
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
@ -47,6 +59,11 @@ segment_unlink(map_segment_t *ds_buf);
static int
module_finalize(void);
static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size,
void* old_ptr, void** new_ptr);
static int sshmem_ucx_memheap_free(map_segment_t *s, void* ptr);
/*
* ucx shmem module
*/
@ -79,13 +96,18 @@ module_finalize(void)
/* ////////////////////////////////////////////////////////////////////////// */
static segment_allocator_t sshmem_ucx_allocator = {
.realloc = sshmem_ucx_memheap_realloc,
.free = sshmem_ucx_memheap_free
};
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size)
segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,
unsigned flags, long hint, void *dev_mem)
{
mca_sshmem_ucx_segment_context_t *ctx;
int rc = OSHMEM_SUCCESS;
mca_spml_ucx_t *spml = (mca_spml_ucx_t *)mca_spml.self;
mca_spml_ucx_t *spml = (mca_spml_ucx_t*)mca_spml.self;
ucp_mem_map_params_t mem_map_params;
ucp_mem_h mem_h;
ucs_status_t status;
@ -99,25 +121,51 @@ segment_create(map_segment_t *ds_buf,
UCP_MEM_MAP_PARAM_FIELD_LENGTH |
UCP_MEM_MAP_PARAM_FIELD_FLAGS;
mem_map_params.address = (void *)mca_sshmem_base_start_address;
mem_map_params.address = address;
mem_map_params.length = size;
mem_map_params.flags = UCP_MEM_MAP_ALLOCATE|UCP_MEM_MAP_FIXED;
if (spml->heap_reg_nb) {
mem_map_params.flags |= UCP_MEM_MAP_NONBLOCK;
}
mem_map_params.flags = flags;
status = ucp_mem_map(spml->ucp_context, &mem_map_params, &mem_h);
if (UCS_OK != status) {
SSHMEM_ERROR("ucp_mem_map() failed: %s\n", ucs_status_string(status));
rc = OSHMEM_ERROR;
goto out;
}
ds_buf->super.va_base = mem_map_params.address;
if (!(flags & UCP_MEM_MAP_FIXED)) {
/* Memory was allocated at an arbitrary address; obtain it */
ucp_mem_attr_t mem_attr;
mem_attr.field_mask = UCP_MEM_ATTR_FIELD_ADDRESS;
status = ucp_mem_query(mem_h, &mem_attr);
if (status != UCS_OK) {
SSHMEM_ERROR("ucp_mem_query() failed: %s\n", ucs_status_string(status));
ucp_mem_unmap(spml->ucp_context, mem_h);
rc = OSHMEM_ERROR;
goto out;
}
ds_buf->super.va_base = mem_attr.address;
} else {
ds_buf->super.va_base = mem_map_params.address;
}
ctx = calloc(1, sizeof(*ctx));
if (!ctx) {
ucp_mem_unmap(spml->ucp_context, mem_h);
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
goto out;
}
ds_buf->seg_size = size;
ds_buf->super.va_end = (void*)((uintptr_t)ds_buf->super.va_base + ds_buf->seg_size);
ds_buf->context = mem_h;
ds_buf->context = ctx;
ds_buf->type = MAP_SEGMENT_ALLOC_UCX;
ds_buf->alloc_hints = hint;
ctx->ucp_memh = mem_h;
ctx->dev_mem = dev_mem;
if (hint) {
ds_buf->allocator = &sshmem_ucx_allocator;
}
out:
OPAL_OUTPUT_VERBOSE(
@ -132,6 +180,84 @@ out:
return rc;
}
#if HAVE_UCX_DEVICE_MEM
static uct_ib_device_mem_h alloc_device_mem(mca_spml_ucx_t *spml, size_t size,
void **address_p)
{
uct_ib_device_mem_h dev_mem = NULL;
ucs_status_t status;
uct_md_h uct_md;
void *address;
size_t length;
int ret;
uct_md = ucp_context_find_tl_md(spml->ucp_context, "mlx5");
if (uct_md == NULL) {
SSHMEM_VERBOSE(1, "ucp_context_find_tl_md() returned NULL\n");
return NULL;
}
/* If found a matching memory domain, allocate device memory on it */
length = size;
address = NULL;
status = uct_ib_md_alloc_device_mem(uct_md, &length, &address,
UCT_MD_MEM_ACCESS_ALL, "sshmem_seg",
&dev_mem);
if (status != UCS_OK) {
/* If could not allocate device memory - fallback to mmap (since some
* PEs in the job may succeed and while others failed */
SSHMEM_VERBOSE(1, "uct_ib_md_alloc_dm() failed: %s\n",
ucs_status_string(status));
return NULL;
}
SSHMEM_VERBOSE(3, "uct_ib_md_alloc_dm() returned address %p\n", address);
*address_p = address;
return dev_mem;
}
#endif
static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size, long hint)
{
mca_spml_ucx_t *spml = (mca_spml_ucx_t*)mca_spml.self;
unsigned flags;
int ret;
#if HAVE_UCX_DEVICE_MEM
if (hint & SHMEM_HINT_DEVICE_NIC_MEM) {
if (size > UINT_MAX) {
return OSHMEM_ERR_BAD_PARAM;
}
void *dev_mem_address;
uct_ib_device_mem_h dev_mem = alloc_device_mem(spml, size,
&dev_mem_address);
if (dev_mem != NULL) {
ret = segment_create_internal(ds_buf, dev_mem_address, size, 0,
hint, dev_mem);
if (ret == OSHMEM_SUCCESS) {
return OSHMEM_SUCCESS;
} else if (dev_mem != NULL) {
uct_ib_md_release_device_mem(dev_mem);
/* fallback to regular allocation */
}
}
}
#endif
flags = UCP_MEM_MAP_ALLOCATE | (spml->heap_reg_nb ? UCP_MEM_MAP_NONBLOCK : 0);
if (hint) {
return segment_create_internal(ds_buf, NULL, size, flags, hint, NULL);
} else {
return segment_create_internal(ds_buf, mca_sshmem_base_start_address,
size, flags | UCP_MEM_MAP_FIXED, hint,
NULL);
}
}
static void *
segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
{
@ -168,10 +294,22 @@ static int
segment_unlink(map_segment_t *ds_buf)
{
mca_spml_ucx_t *spml = (mca_spml_ucx_t *)mca_spml.self;
mca_sshmem_ucx_segment_context_t *ctx = ds_buf->context;
assert(ds_buf);
if (ctx->shadow_allocator) {
sshmem_ucx_shadow_destroy(ctx->shadow_allocator);
}
ucp_mem_unmap(spml->ucp_context, (ucp_mem_h)ds_buf->context);
ucp_mem_unmap(spml->ucp_context, ctx->ucp_memh);
#if HAVE_UCX_DEVICE_MEM
if (ctx->dev_mem) {
uct_ib_md_release_device_mem(ctx->dev_mem);
}
#endif
ds_buf->context = NULL;
free(ctx);
OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output,
@ -188,3 +326,79 @@ segment_unlink(map_segment_t *ds_buf)
return OSHMEM_SUCCESS;
}
static void *sshmem_ucx_memheap_index2ptr(map_segment_t *s, unsigned index)
{
return (char*)s->super.va_base + (index * ALLOC_ELEM_SIZE);
}
static unsigned sshmem_ucx_memheap_ptr2index(map_segment_t *s, void *ptr)
{
return ((char*)ptr - (char*)s->super.va_base) / ALLOC_ELEM_SIZE;
}
void sshmem_ucx_memheap_wordcopy(void *dst, void *src, size_t size)
{
const size_t count = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t);
uint64_t *dst64 = (uint64_t*)dst;
uint64_t *src64 = (uint64_t*)src;
size_t i;
for (i = 0; i < count; ++i) {
*(dst64++) = *(src64++);
}
opal_atomic_wmb();
}
static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size,
void* old_ptr, void** new_ptr)
{
mca_sshmem_ucx_segment_context_t *ctx = s->context;
unsigned alloc_count, index;
int res;
if (size > s->seg_size) {
return OSHMEM_ERR_OUT_OF_RESOURCE;
}
/* create allocator on demand */
if (!ctx->shadow_allocator) {
ctx->shadow_allocator = sshmem_ucx_shadow_create(s->seg_size);
if (!ctx->shadow_allocator) {
return OSHMEM_ERR_OUT_OF_RESOURCE;
}
}
/* Allocate new element. Zero-size allocation should still return a unique
* pointer, so allocate 1 byte */
alloc_count = max((size + ALLOC_ELEM_SIZE - 1) / ALLOC_ELEM_SIZE, 1);
res = sshmem_ucx_shadow_alloc(ctx->shadow_allocator, alloc_count, &index);
if (res != OSHMEM_SUCCESS) {
return res;
}
*new_ptr = sshmem_ucx_memheap_index2ptr(s, index);
/* Copy to new segment and release old*/
if (old_ptr) {
unsigned old_index = sshmem_ucx_memheap_ptr2index(s, old_ptr);
unsigned old_alloc_count = sshmem_ucx_shadow_size(ctx->shadow_allocator,
old_index);
sshmem_ucx_memheap_wordcopy(*new_ptr, old_ptr,
min(size, old_alloc_count * ALLOC_ELEM_SIZE));
sshmem_ucx_shadow_free(ctx->shadow_allocator, old_index);
}
return OSHMEM_SUCCESS;
}
static int sshmem_ucx_memheap_free(map_segment_t *s, void* ptr)
{
mca_sshmem_ucx_segment_context_t *ctx = s->context;
if (!ptr) {
return OSHMEM_SUCCESS;
}
return sshmem_ucx_shadow_free(ctx->shadow_allocator,
sshmem_ucx_memheap_ptr2index(s, ptr));
}

127
oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c Обычный файл
Просмотреть файл

@ -0,0 +1,127 @@
/*
* Copyright (c) 2019 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "oshmem_config.h"
#include "oshmem/mca/sshmem/sshmem.h"
#include "oshmem/include/shmemx.h"
#include "oshmem/mca/sshmem/base/base.h"
#include "sshmem_ucx.h"
#define SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE 0x1
typedef struct sshmem_ucx_shadow_alloc_elem {
unsigned flags;
unsigned block_size;
} sshmem_ucx_shadow_alloc_elem_t;
struct sshmem_ucx_shadow_allocator {
size_t num_elems;
sshmem_ucx_shadow_alloc_elem_t elems[];
};
static int sshmem_ucx_shadow_is_free(sshmem_ucx_shadow_alloc_elem_t *elem)
{
return elem->flags & SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE;
}
static void sshmem_ucx_shadow_set_elem(sshmem_ucx_shadow_alloc_elem_t *elem,
unsigned flags, unsigned block_size)
{
elem->flags = flags;
elem->block_size = block_size;
}
sshmem_ucx_shadow_allocator_t *sshmem_ucx_shadow_create(unsigned count)
{
sshmem_ucx_shadow_allocator_t *allocator;
allocator = calloc(1, sizeof(*allocator) +
count * sizeof(*allocator->elems));
if (allocator) {
/* initialization: set initial element to the whole buffer */
sshmem_ucx_shadow_set_elem(&allocator->elems[0],
SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE, count);
allocator->num_elems = count;
}
return allocator;
}
void sshmem_ucx_shadow_destroy(sshmem_ucx_shadow_allocator_t *allocator)
{
free(allocator); /* no leak check. TODO add leak warnings/debug */
}
int sshmem_ucx_shadow_alloc(sshmem_ucx_shadow_allocator_t *allocator,
unsigned count, unsigned *index)
{
sshmem_ucx_shadow_alloc_elem_t *end = &allocator->elems[allocator->num_elems];
sshmem_ucx_shadow_alloc_elem_t *elem;
assert(count > 0);
for (elem = &allocator->elems[0]; elem < end; elem += elem->block_size) {
if (sshmem_ucx_shadow_is_free(elem) && (elem->block_size >= count)) {
/* found suitable free element */
if (elem->block_size > count) {
/* create new 'free' element for tail of current buffer */
sshmem_ucx_shadow_set_elem(elem + count,
SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE,
elem->block_size - count);
}
/* set the size and flags of the allocated element */
sshmem_ucx_shadow_set_elem(elem, 0, count);
*index = elem - &allocator->elems[0];
return OSHMEM_SUCCESS;
}
}
return OSHMEM_ERR_OUT_OF_RESOURCE;
}
static void sshmem_ucx_shadow_merge_blocks(sshmem_ucx_shadow_allocator_t *allocator)
{
sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[0];
sshmem_ucx_shadow_alloc_elem_t *end = &allocator->elems[allocator->num_elems];
sshmem_ucx_shadow_alloc_elem_t *next_elem;
while ( (next_elem = (elem + elem->block_size)) < end) {
if (sshmem_ucx_shadow_is_free(elem) && sshmem_ucx_shadow_is_free(next_elem)) {
/* current & next elements are free, should be merged */
elem->block_size += next_elem->block_size;
/* clean element which is merged */
sshmem_ucx_shadow_set_elem(next_elem, 0, 0);
} else {
elem = next_elem;
}
}
}
int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator,
unsigned index)
{
sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[index];
elem->flags |= SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE;
sshmem_ucx_shadow_merge_blocks(allocator);
return OSHMEM_SUCCESS;
}
size_t sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator,
unsigned index)
{
sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[index];
assert(!sshmem_ucx_shadow_is_free(elem));
return elem->block_size;
}

Просмотреть файл

@ -58,6 +58,8 @@
#define shrealloc pshrealloc /* shmem-compat.h */
#define shfree pshfree /* shmem-compat.h */
#define shmemx_malloc_with_hint pshmemx_malloc_with_hint
/*
* Remote pointer operations
*/

Просмотреть файл

@ -11,6 +11,7 @@
#include "oshmem/constants.h"
#include "oshmem/include/shmem.h"
#include "oshmem/include/shmemx.h"
#include "oshmem/shmem/shmem_api_logger.h"
@ -19,9 +20,11 @@
#if OSHMEM_PROFILING
#include "oshmem/include/pshmem.h"
#pragma weak shmem_malloc = pshmem_malloc
#pragma weak shmem_calloc = pshmem_calloc
#pragma weak shmalloc = pshmalloc
#include "oshmem/include/pshmemx.h"
#pragma weak shmem_malloc = pshmem_malloc
#pragma weak shmem_calloc = pshmem_calloc
#pragma weak shmalloc = pshmalloc
#pragma weak shmemx_malloc_with_hint = pshmemx_malloc_with_hint
#include "oshmem/shmem/c/profile/defines.h"
#endif
@ -72,3 +75,33 @@ static inline void* _shmalloc(size_t size)
#endif
return pBuff;
}
void* shmemx_malloc_with_hint(size_t size, long hint)
{
int rc;
void* pBuff = NULL;
if (!hint) {
return _shmalloc(size);
}
RUNTIME_CHECK_INIT();
RUNTIME_CHECK_WITH_MEMHEAP_SIZE(size);
SHMEM_MUTEX_LOCK(shmem_internal_mutex_alloc);
rc = mca_memheap_alloc_with_hint(size, hint, &pBuff);
SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_alloc);
if (OSHMEM_SUCCESS != rc) {
SHMEM_API_VERBOSE(10,
"Allocation with shmalloc(size=%lu) failed.",
(unsigned long)size);
return NULL ;
}
#if OSHMEM_SPEC_COMPAT == 1
shmem_barrier_all();
#endif
return pBuff;
}

Просмотреть файл

@ -18,6 +18,7 @@
#include "oshmem/runtime/runtime.h"
#include "oshmem/mca/memheap/memheap.h"
#include "oshmem/mca/memheap/base/base.h"
#if OSHMEM_PROFILING
#include "oshmem/include/pshmem.h"
@ -41,6 +42,7 @@ void shfree(void* ptr)
static inline void _shfree(void* ptr)
{
int rc;
map_segment_t *s;
RUNTIME_CHECK_INIT();
if (NULL == ptr) {
@ -55,7 +57,17 @@ static inline void _shfree(void* ptr)
SHMEM_MUTEX_LOCK(shmem_internal_mutex_alloc);
rc = MCA_MEMHEAP_CALL(free(ptr));
if (ptr) {
s = memheap_find_va(ptr);
} else {
s = NULL;
}
if (s && s->allocator) {
rc = s->allocator->free(s, ptr);
} else {
rc = MCA_MEMHEAP_CALL(free(ptr));
}
SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_alloc);

Просмотреть файл

@ -18,6 +18,7 @@
#include "oshmem/shmem/shmem_api_logger.h"
#include "oshmem/mca/memheap/memheap.h"
#include "oshmem/mca/memheap/base/base.h"
#if OSHMEM_PROFILING
#include "oshmem/include/pshmem.h"
@ -42,12 +43,23 @@ static inline void* _shrealloc(void *ptr, size_t size)
{
int rc;
void* pBuff = NULL;
map_segment_t *s;
RUNTIME_CHECK_INIT();
SHMEM_MUTEX_LOCK(shmem_internal_mutex_alloc);
rc = MCA_MEMHEAP_CALL(realloc(size, ptr, &pBuff));
if (ptr) {
s = memheap_find_va(ptr);
} else {
s = NULL;
}
if (s && s->allocator) {
rc = s->allocator->realloc(s, size, ptr, &pBuff);
} else {
rc = MCA_MEMHEAP_CALL(realloc(size, ptr, &pBuff));
}
SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_alloc);