diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx.h b/oshmem/mca/sshmem/ucx/sshmem_ucx.h index f171fe641b..fa264b40f4 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx.h +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx.h @@ -49,10 +49,19 @@ sshmem_ucx_shadow_allocator_t *sshmem_ucx_shadow_create(unsigned count); void sshmem_ucx_shadow_destroy(sshmem_ucx_shadow_allocator_t *allocator); int sshmem_ucx_shadow_alloc(sshmem_ucx_shadow_allocator_t *allocator, unsigned count, unsigned *index); + +/* Reallocate existing allocated buffer. If possible - used inplace + * reallocation. + * Parameter 'inplace' - out, in case if zero - new buffer was allocated + * (inplace is not possible), user should remove original buffer after data + * is copied, else (if inplace == 0) - no additional action required */ +int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, + unsigned count, unsigned old_index, unsigned *index, + int *inplace); int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator, unsigned index); -size_t sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, - unsigned index); +unsigned sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, + unsigned index); END_C_DECLS diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c index 53eab101a8..113d471374 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c @@ -190,7 +190,6 @@ static uct_ib_device_mem_h alloc_device_mem(mca_spml_ucx_t *spml, size_t size, uct_md_h uct_md; void *address; size_t length; - int ret; uct_md = ucp_context_find_tl_md(spml->ucp_context, "mlx5"); if (uct_md == NULL) { @@ -353,8 +352,9 @@ static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size, void* old_ptr, void** new_ptr) { mca_sshmem_ucx_segment_context_t *ctx = s->context; - unsigned alloc_count, index; + unsigned alloc_count, index, old_index, old_alloc_count; int res; + int inplace; if (size > s->seg_size) { return OSHMEM_ERR_OUT_OF_RESOURCE; @@ -371,7 +371,15 @@ static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size, /* Allocate new element. Zero-size allocation should still return a unique * pointer, so allocate 1 byte */ alloc_count = max((size + ALLOC_ELEM_SIZE - 1) / ALLOC_ELEM_SIZE, 1); - res = sshmem_ucx_shadow_alloc(ctx->shadow_allocator, alloc_count, &index); + + if (!old_ptr) { + res = sshmem_ucx_shadow_alloc(ctx->shadow_allocator, alloc_count, &index); + } else { + old_index = sshmem_ucx_memheap_ptr2index(s, old_ptr); + res = sshmem_ucx_shadow_realloc(ctx->shadow_allocator, alloc_count, + old_index, &index, &inplace); + } + if (res != OSHMEM_SUCCESS) { return res; } @@ -379,10 +387,8 @@ static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size, *new_ptr = sshmem_ucx_memheap_index2ptr(s, index); /* Copy to new segment and release old*/ - if (old_ptr) { - unsigned old_index = sshmem_ucx_memheap_ptr2index(s, old_ptr); - unsigned old_alloc_count = sshmem_ucx_shadow_size(ctx->shadow_allocator, - old_index); + if (old_ptr && !inplace) { + old_alloc_count = sshmem_ucx_shadow_size(ctx->shadow_allocator, old_index); sshmem_ucx_memheap_wordcopy(*new_ptr, old_ptr, min(size, old_alloc_count * ALLOC_ELEM_SIZE)); sshmem_ucx_shadow_free(ctx->shadow_allocator, old_index); diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c index 92fa2bb0cf..06922c3e1b 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c @@ -107,6 +107,68 @@ static void sshmem_ucx_shadow_merge_blocks(sshmem_ucx_shadow_allocator_t *alloca } } + + +int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, + unsigned count, unsigned old_index, unsigned *index, + int *inplace) +{ + sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[old_index]; + unsigned old_count = elem->block_size; + sshmem_ucx_shadow_alloc_elem_t *end; + sshmem_ucx_shadow_alloc_elem_t *next; + + assert(count > 0); + assert(!sshmem_ucx_shadow_is_free(elem)); + + *inplace = 1; + + if (count == old_count) { + *index = old_index; + return OSHMEM_SUCCESS; + } + + if (count < old_count) { + /* requested block is shorter than allocated block + * then just cut current buffer */ + sshmem_ucx_shadow_set_elem(elem + count, + SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE, + elem->block_size - count); + elem->block_size = count; + *index = old_index; + sshmem_ucx_shadow_merge_blocks(allocator); + return OSHMEM_SUCCESS; + } + + assert(count > old_count); + + end = &allocator->elems[allocator->num_elems]; + next = &elem[old_count]; + /* try to check if next element is free & has enough length */ + if ((next < end) && /* non-last element? */ + sshmem_ucx_shadow_is_free(next) && /* next is free */ + (old_count + next->block_size >= count)) + { + assert(elem < next); + assert(elem + count > next); + assert(elem + count <= end); + assert(next + next->block_size <= end); + + if (old_count + next->block_size > count) { + sshmem_ucx_shadow_set_elem(elem + count, SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE, + old_count + next->block_size - count); + } + + sshmem_ucx_shadow_set_elem(next, 0, 0); + elem->block_size = count; + *index = old_index; + return OSHMEM_SUCCESS; + } + + *inplace = 0; + return sshmem_ucx_shadow_alloc(allocator, count, index); +} + int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator, unsigned index) { @@ -117,8 +179,8 @@ int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator, return OSHMEM_SUCCESS; } -size_t sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, - unsigned index) +unsigned sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, + unsigned index) { sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[index];