From 277c2a9e5c7711098be826e6c154253747fdad9a Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Wed, 15 May 2019 19:33:36 +0300 Subject: [PATCH 1/3] ALLOC_WITH_HINT: added implace realloc - in some cases realloc operation may be completed without allocation of new buffer (and without additional data copy) - added logic to reallocate buffer inplace if possible Signed-off-by: Sergey Oblomov --- oshmem/mca/spml/ucx/spml_ucx.c | 3 +- oshmem/mca/sshmem/ucx/sshmem_ucx.h | 13 ++++- oshmem/mca/sshmem/ucx/sshmem_ucx_module.c | 20 ++++--- oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c | 64 ++++++++++++++++++++++- 4 files changed, 88 insertions(+), 12 deletions(-) diff --git a/oshmem/mca/spml/ucx/spml_ucx.c b/oshmem/mca/spml/ucx/spml_ucx.c index bd87222c71..78d04abecc 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.c +++ b/oshmem/mca/spml/ucx/spml_ucx.c @@ -551,7 +551,8 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx { ucp_worker_params_t params; ucp_ep_params_t ep_params; - size_t i, j, nprocs = oshmem_num_procs(); + size_t i, nprocs = oshmem_num_procs(); + int j; ucs_status_t err; spml_ucx_mkey_t *ucx_mkey; sshmem_mkey_t *mkey; diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx.h b/oshmem/mca/sshmem/ucx/sshmem_ucx.h index f171fe641b..3d6bba7018 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx.h +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx.h @@ -49,10 +49,19 @@ sshmem_ucx_shadow_allocator_t *sshmem_ucx_shadow_create(unsigned count); void sshmem_ucx_shadow_destroy(sshmem_ucx_shadow_allocator_t *allocator); int sshmem_ucx_shadow_alloc(sshmem_ucx_shadow_allocator_t *allocator, unsigned count, unsigned *index); + +/* reallocate existing allocated buffer. if possible - used inplace + * reallocation. + * parameter 'inplace' - out, in case if zero - new buffer was allocated + * (inplace is not possible), user should remove original buffer after data + * is copied, else (if inplace == 0) - no additional action required */ +int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, + unsigned count, unsigned old_index, unsigned *index, + int *inplace); int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator, unsigned index); -size_t sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, - unsigned index); +unsigned sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, + unsigned index); END_C_DECLS diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c index 244eb7a169..46ea60a495 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c @@ -190,7 +190,6 @@ static uct_ib_device_mem_h alloc_device_mem(mca_spml_ucx_t *spml, size_t size, uct_md_h uct_md; void *address; size_t length; - int ret; uct_md = ucp_context_find_tl_md(spml->ucp_context, "mlx5"); if (uct_md == NULL) { @@ -354,8 +353,9 @@ static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size, void* old_ptr, void** new_ptr) { mca_sshmem_ucx_segment_context_t *ctx = s->context; - unsigned alloc_count, index; + unsigned alloc_count, index, old_index, old_alloc_count; int res; + int inplace; if (size > s->seg_size) { return OSHMEM_ERR_OUT_OF_RESOURCE; @@ -372,7 +372,15 @@ static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size, /* Allocate new element. Zero-size allocation should still return a unique * pointer, so allocate 1 byte */ alloc_count = max((size + ALLOC_ELEM_SIZE - 1) / ALLOC_ELEM_SIZE, 1); - res = sshmem_ucx_shadow_alloc(ctx->shadow_allocator, alloc_count, &index); + + if (!old_ptr) { + res = sshmem_ucx_shadow_alloc(ctx->shadow_allocator, alloc_count, &index); + } else { + old_index = sshmem_ucx_memheap_ptr2index(s, old_ptr); + res = sshmem_ucx_shadow_realloc(ctx->shadow_allocator, alloc_count, + old_index, &index, &inplace); + } + if (res != OSHMEM_SUCCESS) { return res; } @@ -380,10 +388,8 @@ static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size, *new_ptr = sshmem_ucx_memheap_index2ptr(s, index); /* Copy to new segment and release old*/ - if (old_ptr) { - unsigned old_index = sshmem_ucx_memheap_ptr2index(s, old_ptr); - unsigned old_alloc_count = sshmem_ucx_shadow_size(ctx->shadow_allocator, - old_index); + if (old_ptr && !inplace) { + old_alloc_count = sshmem_ucx_shadow_size(ctx->shadow_allocator, old_index); sshmem_ucx_memheap_wordcopy(*new_ptr, old_ptr, min(size, old_alloc_count * ALLOC_ELEM_SIZE)); sshmem_ucx_shadow_free(ctx->shadow_allocator, old_index); diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c index 92fa2bb0cf..d5a25eaf15 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c @@ -107,6 +107,66 @@ static void sshmem_ucx_shadow_merge_blocks(sshmem_ucx_shadow_allocator_t *alloca } } + + +int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, + unsigned count, unsigned old_index, unsigned *index, + int *inplace) +{ + sshmem_ucx_shadow_alloc_elem_t *end = &allocator->elems[allocator->num_elems]; + sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[old_index]; + sshmem_ucx_shadow_alloc_elem_t *next = &elem[elem->block_size]; + unsigned old_count = elem->block_size; + + assert(count > 0); + assert(!sshmem_ucx_shadow_is_free(elem)); + + *inplace = 1; + + if (count == old_count) { + *index = old_index; + return OSHMEM_SUCCESS; + } + + if (count < elem->block_size) { + /* requested block is shorter than allocated block + * then just cut current buffer */ + sshmem_ucx_shadow_set_elem(elem + count, + SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE, + elem->block_size - count); + elem->block_size = count; + *index = old_index; + sshmem_ucx_shadow_merge_blocks(allocator); + return OSHMEM_SUCCESS; + } + + assert(count > old_count); + + /* try to check if next element is free & has enough length */ + if ((next < end) && /* non-last element? */ + sshmem_ucx_shadow_is_free(next) && /* next is free */ + (old_count + next->block_size >= count)) + { + assert(elem < next); + assert(elem + count > next); + assert(elem + count <= end); + assert(next + next->block_size <= end); + + if (old_count + next->block_size > count) { + sshmem_ucx_shadow_set_elem(elem + count, SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE, + old_count + next->block_size - count); + } + + sshmem_ucx_shadow_set_elem(next, 0, 0); + elem->block_size = count; + *index = old_index; + return OSHMEM_SUCCESS; + } + + *inplace = 0; + return sshmem_ucx_shadow_alloc(allocator, count, index); +} + int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator, unsigned index) { @@ -117,8 +177,8 @@ int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator, return OSHMEM_SUCCESS; } -size_t sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, - unsigned index) +unsigned sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, + unsigned index) { sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[index]; From a51badd627c5cdd3212cd6bedd3daa236cd6c8db Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Thu, 16 May 2019 09:38:01 +0300 Subject: [PATCH 2/3] SHADOW ALLOCATOR: minor code optimization Signed-off-by: Sergey Oblomov --- oshmem/mca/sshmem/ucx/sshmem_ucx.h | 4 ++-- oshmem/mca/sshmem/ucx/sshmem_ucx_module.c | 2 +- oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx.h b/oshmem/mca/sshmem/ucx/sshmem_ucx.h index 3d6bba7018..fa264b40f4 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx.h +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx.h @@ -50,9 +50,9 @@ void sshmem_ucx_shadow_destroy(sshmem_ucx_shadow_allocator_t *allocator); int sshmem_ucx_shadow_alloc(sshmem_ucx_shadow_allocator_t *allocator, unsigned count, unsigned *index); -/* reallocate existing allocated buffer. if possible - used inplace +/* Reallocate existing allocated buffer. If possible - used inplace * reallocation. - * parameter 'inplace' - out, in case if zero - new buffer was allocated + * Parameter 'inplace' - out, in case if zero - new buffer was allocated * (inplace is not possible), user should remove original buffer after data * is copied, else (if inplace == 0) - no additional action required */ int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c index 46ea60a495..1208d42834 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c @@ -336,7 +336,7 @@ static unsigned sshmem_ucx_memheap_ptr2index(map_segment_t *s, void *ptr) return ((char*)ptr - (char*)s->super.va_base) / ALLOC_ELEM_SIZE; } -void sshmem_ucx_memheap_wordcopy(void *dst, void *src, size_t size) +static void sshmem_ucx_memheap_wordcopy(void *dst, void *src, size_t size) { const size_t count = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); uint64_t *dst64 = (uint64_t*)dst; diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c index d5a25eaf15..9aaf77772f 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c @@ -128,7 +128,7 @@ int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, return OSHMEM_SUCCESS; } - if (count < elem->block_size) { + if (count < old_count) { /* requested block is shorter than allocated block * then just cut current buffer */ sshmem_ucx_shadow_set_elem(elem + count, From d6a09120244be36d870e791146b5baed93659754 Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Fri, 24 May 2019 09:16:56 +0300 Subject: [PATCH 3/3] OSHMEM: minor optimization of realloc in shadow allocator Signed-off-by: Sergey Oblomov --- oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c index 9aaf77772f..06922c3e1b 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c @@ -113,10 +113,10 @@ int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, unsigned count, unsigned old_index, unsigned *index, int *inplace) { - sshmem_ucx_shadow_alloc_elem_t *end = &allocator->elems[allocator->num_elems]; sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[old_index]; - sshmem_ucx_shadow_alloc_elem_t *next = &elem[elem->block_size]; unsigned old_count = elem->block_size; + sshmem_ucx_shadow_alloc_elem_t *end; + sshmem_ucx_shadow_alloc_elem_t *next; assert(count > 0); assert(!sshmem_ucx_shadow_is_free(elem)); @@ -142,8 +142,10 @@ int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, assert(count > old_count); + end = &allocator->elems[allocator->num_elems]; + next = &elem[old_count]; /* try to check if next element is free & has enough length */ - if ((next < end) && /* non-last element? */ + if ((next < end) && /* non-last element? */ sshmem_ucx_shadow_is_free(next) && /* next is free */ (old_count + next->block_size >= count)) {