diff --git a/opal/mca/btl/vader/btl_vader.h b/opal/mca/btl/vader/btl_vader.h index 7e1afad24c..028c7a3816 100644 --- a/opal/mca/btl/vader/btl_vader.h +++ b/opal/mca/btl/vader/btl_vader.h @@ -15,6 +15,8 @@ * Copyright (c) 2010-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * * $COPYRIGHT$ * @@ -53,6 +55,7 @@ #include "opal/mca/rcache/rcache.h" #include "opal/mca/rcache/base/base.h" #include "opal/mca/btl/base/btl_base_error.h" +#include "opal/mca/mpool/base/base.h" #include "opal/util/proc.h" #include "btl_vader_endpoint.h" @@ -112,16 +115,15 @@ struct mca_btl_vader_component_t { opal_mutex_t lock; /**< lock to protect concurrent updates to this structure's members */ char *my_segment; /**< this rank's base pointer */ size_t segment_size; /**< size of my_segment */ - size_t segment_offset; /**< start of unused portion of my_segment */ int32_t num_smp_procs; /**< current number of smp procs on this host */ opal_free_list_t vader_frags_eager; /**< free list of vader send frags */ opal_free_list_t vader_frags_max_send; /**< free list of vader max send frags (large fragments) */ opal_free_list_t vader_frags_user; /**< free list of small inline frags */ + opal_free_list_t vader_fboxes; /**< free list of available fast-boxes */ unsigned int fbox_threshold; /**< number of sends required before we setup a send fast box for a peer */ unsigned int fbox_max; /**< maximum number of send fast boxes to allocate */ unsigned int fbox_size; /**< size of each peer fast box allocation */ - unsigned int fbox_count; /**< number of send fast boxes allocated */ int single_copy_mechanism; /**< single copy mechanism to use */ @@ -143,6 +145,7 @@ struct mca_btl_vader_component_t { #if OPAL_BTL_VADER_HAVE_KNEM unsigned int knem_dma_min; /**< minimum size to enable DMA for knem transfers (0 disables) */ #endif + mca_mpool_base_module_t *mpool; }; typedef struct mca_btl_vader_component_t mca_btl_vader_component_t; OPAL_MODULE_DECLSPEC extern mca_btl_vader_component_t mca_btl_vader_component; diff --git a/opal/mca/btl/vader/btl_vader_component.c b/opal/mca/btl/vader/btl_vader_component.c index 3fc35c6a46..1119725997 100644 --- a/opal/mca/btl/vader/btl_vader_component.c +++ b/opal/mca/btl/vader/btl_vader_component.c @@ -18,6 +18,9 @@ * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -301,6 +304,7 @@ static int mca_btl_vader_component_open(void) OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_eager, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_user, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_max_send, opal_free_list_t); + OBJ_CONSTRUCT(&mca_btl_vader_component.vader_fboxes, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_vader_component.pending_endpoints, opal_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.pending_fragments, opal_list_t); @@ -321,6 +325,7 @@ static int mca_btl_vader_component_close(void) OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_eager); OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_user); OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_max_send); + OBJ_DESTRUCT(&mca_btl_vader_component.vader_fboxes); OBJ_DESTRUCT(&mca_btl_vader_component.lock); OBJ_DESTRUCT(&mca_btl_vader_component.pending_endpoints); OBJ_DESTRUCT(&mca_btl_vader_component.pending_fragments); @@ -517,7 +522,6 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls, /* no fast boxes allocated initially */ component->num_fbox_in_endpoints = 0; - component->fbox_count = 0; mca_btl_vader_check_single_copy (); @@ -559,8 +563,6 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls, } } - component->segment_offset = 0; - /* initialize my fifo */ vader_fifo_init ((struct vader_fifo_t *) component->my_segment); diff --git a/opal/mca/btl/vader/btl_vader_endpoint.h b/opal/mca/btl/vader/btl_vader_endpoint.h index d3a39e08f2..e9409b90c1 100644 --- a/opal/mca/btl/vader/btl_vader_endpoint.h +++ b/opal/mca/btl/vader/btl_vader_endpoint.h @@ -13,6 +13,8 @@ * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -58,6 +60,7 @@ typedef struct mca_btl_base_endpoint_t { uint32_t *startp; /**< pointer to location storing start offset */ unsigned int start, end; uint16_t seq; + opal_free_list_item_t *fbox; /**< fast-box free list item */ } fbox_out; int32_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing @@ -101,13 +104,16 @@ static inline void mca_btl_vader_endpoint_setup_fbox_recv (struct mca_btl_base_e endpoint->fbox_in.buffer = base; } -static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_endpoint_t *endpoint, void *base) +static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_endpoint_t *endpoint, opal_free_list_item_t *fbox) { + void *base = fbox->ptr; + endpoint->fbox_out.start = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_out.end = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_out.startp = (uint32_t *) base; endpoint->fbox_out.startp[0] = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_out.seq = 0; + endpoint->fbox_out.fbox = fbox; /* zero out the first header in the fast box */ memset ((char *) base + MCA_BTL_VADER_FBOX_ALIGNMENT, 0, MCA_BTL_VADER_FBOX_ALIGNMENT); diff --git a/opal/mca/btl/vader/btl_vader_fbox.h b/opal/mca/btl/vader/btl_vader_fbox.h index 25f99c2c98..3762c62010 100644 --- a/opal/mca/btl/vader/btl_vader_fbox.h +++ b/opal/mca/btl/vader/btl_vader_fbox.h @@ -2,6 +2,8 @@ /* * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -259,20 +261,17 @@ static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mc /* protect access to mca_btl_vader_component.segment_offset */ OPAL_THREAD_LOCK(&mca_btl_vader_component.lock); - if (mca_btl_vader_component.segment_size >= mca_btl_vader_component.segment_offset + mca_btl_vader_component.fbox_size && - mca_btl_vader_component.fbox_max > mca_btl_vader_component.fbox_count) { - /* verify the remote side will accept another fbox */ - if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) { - void *fbox_base = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset; - mca_btl_vader_component.segment_offset += mca_btl_vader_component.fbox_size; + /* verify the remote side will accept another fbox */ + if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) { + opal_free_list_item_t *fbox = opal_free_list_get (&mca_btl_vader_component.vader_fboxes); + if (NULL != fbox) { /* zero out the fast box */ - memset (fbox_base, 0, mca_btl_vader_component.fbox_size); - mca_btl_vader_endpoint_setup_fbox_send (ep, fbox_base); + memset (fbox->ptr, 0, mca_btl_vader_component.fbox_size); + mca_btl_vader_endpoint_setup_fbox_send (ep, fbox); hdr->flags |= MCA_BTL_VADER_FLAG_SETUP_FBOX; hdr->fbox_base = virtual2relative((char *) ep->fbox_out.buffer); - ++mca_btl_vader_component.fbox_count; } else { opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, 1); } diff --git a/opal/mca/btl/vader/btl_vader_frag.c b/opal/mca/btl/vader/btl_vader_frag.c index a132ea3d72..3635af99a4 100644 --- a/opal/mca/btl/vader/btl_vader_frag.c +++ b/opal/mca/btl/vader/btl_vader_frag.c @@ -13,6 +13,8 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,38 +43,9 @@ static inline void mca_btl_vader_frag_constructor (mca_btl_vader_frag_t *frag) int mca_btl_vader_frag_init (opal_free_list_item_t *item, void *ctx) { mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) item; - unsigned int data_size = (unsigned int)(uintptr_t) ctx; - unsigned int frag_size = data_size + sizeof (mca_btl_vader_hdr_t); - /* ensure next fragment is aligned on a cache line */ - frag_size = (frag_size + 63) & ~63; - - OPAL_THREAD_LOCK(&mca_btl_vader_component.lock); - - if (data_size && mca_btl_vader_component.segment_size < mca_btl_vader_component.segment_offset + frag_size) { - OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock); - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* Set the list element here so we don't have to set it on the critical path. This only - * works if each free list has its own unique fragment size and ALL free lists are initialized - * with opal_free_list_init. */ - if (mca_btl_vader_component.max_inline_send == data_size) { - frag->my_list = &mca_btl_vader_component.vader_frags_user; - } else if (mca_btl_vader.super.btl_eager_limit == data_size) { - frag->my_list = &mca_btl_vader_component.vader_frags_eager; - } else if (mca_btl_vader.super.btl_max_send_size == data_size) { - frag->my_list = &mca_btl_vader_component.vader_frags_max_send; - } - - if (data_size) { - item->ptr = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset; - mca_btl_vader_component.segment_offset += frag_size; - } - - OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock); - - mca_btl_vader_frag_constructor ((mca_btl_vader_frag_t *) item); + /* Set the list element here so we don't have to set it on the critical path */ + frag->my_list = (opal_free_list_t *) ctx; return OPAL_SUCCESS; } diff --git a/opal/mca/btl/vader/btl_vader_module.c b/opal/mca/btl/vader/btl_vader_module.c index bcc25f0f00..36ec1f0061 100644 --- a/opal/mca/btl/vader/btl_vader_module.c +++ b/opal/mca/btl/vader/btl_vader_module.c @@ -17,7 +17,7 @@ * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2018 Triad National Security, LLC. All rights + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -97,19 +97,32 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n) return OPAL_ERR_OUT_OF_RESOURCE; } - component->segment_offset = MCA_BTL_VADER_FIFO_SIZE; + component->mpool = mca_mpool_basic_create ((void *) (component->my_segment + MCA_BTL_VADER_FIFO_SIZE), + (unsigned long) (mca_btl_vader_component.segment_size - MCA_BTL_VADER_FIFO_SIZE), 64); + if (NULL == component->mpool) { + free (component->endpoints); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + rc = opal_free_list_init (&component->vader_fboxes, sizeof (opal_free_list_item_t), 8, + OBJ_CLASS(opal_free_list_item_t), mca_btl_vader_component.fbox_size, + opal_cache_line_size, 0, mca_btl_vader_component.fbox_max, 4, + component->mpool, 0, NULL, NULL, NULL); + if (OPAL_SUCCESS != rc) { + return rc; + } /* initialize fragment descriptor free lists */ /* initialize free list for small send and inline fragments */ rc = opal_free_list_init (&component->vader_frags_user, sizeof(mca_btl_vader_frag_t), opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t), - 0, opal_cache_line_size, - component->vader_free_list_num, + mca_btl_vader_component.max_inline_send + sizeof (mca_btl_vader_frag_t), + opal_cache_line_size, component->vader_free_list_num, component->vader_free_list_max, component->vader_free_list_inc, - NULL, 0, NULL, mca_btl_vader_frag_init, - (void *)(intptr_t) mca_btl_vader_component.max_inline_send); + component->mpool, 0, NULL, mca_btl_vader_frag_init, + &component->vader_frags_user); if (OPAL_SUCCESS != rc) { return rc; } @@ -118,12 +131,12 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n) rc = opal_free_list_init (&component->vader_frags_eager, sizeof (mca_btl_vader_frag_t), opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t), - 0, opal_cache_line_size, - component->vader_free_list_num, + mca_btl_vader.super.btl_eager_limit + sizeof (mca_btl_vader_frag_t), + opal_cache_line_size, component->vader_free_list_num, component->vader_free_list_max, component->vader_free_list_inc, - NULL, 0, NULL, mca_btl_vader_frag_init, - (void *)(intptr_t) mca_btl_vader.super.btl_eager_limit); + component->mpool, 0, NULL, mca_btl_vader_frag_init, + &component->vader_frags_eager); if (OPAL_SUCCESS != rc) { return rc; } @@ -133,12 +146,12 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n) rc = opal_free_list_init (&component->vader_frags_max_send, sizeof (mca_btl_vader_frag_t), opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t), - 0, opal_cache_line_size, - component->vader_free_list_num, + mca_btl_vader.super.btl_max_send_size + sizeof (mca_btl_vader_frag_t), + opal_cache_line_size, component->vader_free_list_num, component->vader_free_list_max, component->vader_free_list_inc, - NULL, 0, NULL, mca_btl_vader_frag_init, - (void *)(intptr_t) mca_btl_vader.super.btl_max_send_size); + component->mpool, 0, NULL, mca_btl_vader_frag_init, + &component->vader_frags_max_send); if (OPAL_SUCCESS != rc) { return rc; } @@ -367,6 +380,11 @@ static int vader_finalize(struct mca_btl_base_module_t *btl) } #endif + if (component->mpool) { + component->mpool->mpool_finalize (component->mpool); + component->mpool = NULL; + } + return OPAL_SUCCESS; } @@ -536,6 +554,7 @@ static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep) OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t); OBJ_CONSTRUCT(&ep->pending_frags_lock, opal_mutex_t); ep->fifo = NULL; + ep->fbox_out.fbox = NULL; } #if OPAL_BTL_VADER_HAVE_XPMEM @@ -564,8 +583,12 @@ static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep) /* disconnect from the peer's segment */ opal_shmem_segment_detach (&seg_ds); } + if (ep->fbox_out.fbox) { + opal_free_list_return (&mca_btl_vader_component.vader_fboxes, ep->fbox_out.fbox); + } ep->fbox_in.buffer = ep->fbox_out.buffer = NULL; + ep->fbox_out.fbox = NULL; ep->segment_base = NULL; ep->fifo = NULL; } diff --git a/opal/mca/mpool/base/Makefile.am b/opal/mca/mpool/base/Makefile.am index 646444e231..dd85a97fb6 100644 --- a/opal/mca/mpool/base/Makefile.am +++ b/opal/mca/mpool/base/Makefile.am @@ -28,7 +28,8 @@ libmca_mpool_la_SOURCES += \ base/mpool_base_lookup.c \ base/mpool_base_alloc.c \ base/mpool_base_tree.c \ - base/mpool_base_default.c + base/mpool_base_default.c \ + base/mpool_base_basic.c dist_opaldata_DATA += \ base/help-mpool-base.txt diff --git a/opal/mca/mpool/base/base.h b/opal/mca/mpool/base/base.h index 88a99cad01..6d95665bff 100644 --- a/opal/mca/mpool/base/base.h +++ b/opal/mca/mpool/base/base.h @@ -53,6 +53,8 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_base_selected_module_t); OPAL_DECLSPEC mca_mpool_base_component_t* mca_mpool_base_component_lookup(const char* name); OPAL_DECLSPEC mca_mpool_base_module_t* mca_mpool_base_module_lookup(const char* name); +OPAL_DECLSPEC mca_mpool_base_module_t *mca_mpool_basic_create (void *base, size_t size, unsigned min_align); + /* * Globals */ diff --git a/opal/mca/mpool/base/mpool_base_basic.c b/opal/mca/mpool/base/mpool_base_basic.c new file mode 100644 index 0000000000..fba7e6fed7 --- /dev/null +++ b/opal/mca/mpool/base/mpool_base_basic.c @@ -0,0 +1,109 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyrigth (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/align.h" + +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/mpool/base/base.h" +#include "opal/constants.h" +#include "opal/util/sys_limits.h" + +struct mca_mpool_base_basic_module_t { + mca_mpool_base_module_t super; + opal_mutex_t lock; + uintptr_t ptr; + size_t size; + size_t avail; + unsigned min_align; +}; +typedef struct mca_mpool_base_basic_module_t mca_mpool_base_basic_module_t; + +static void *mca_mpool_base_basic_alloc (mca_mpool_base_module_t *mpool, size_t size, + size_t align, uint32_t flags) +{ + mca_mpool_base_basic_module_t *basic_module = (mca_mpool_base_basic_module_t *) mpool; + uintptr_t next_ptr; + void *ptr; + + opal_mutex_lock (&basic_module->lock); + + align = align > basic_module->min_align ? align : basic_module->min_align; + + next_ptr = OPAL_ALIGN(basic_module->ptr, align, uintptr_t); + + size = OPAL_ALIGN(size, 8, size_t) + next_ptr - basic_module->ptr; + + if (size > basic_module->avail) { + opal_mutex_unlock (&basic_module->lock); + return NULL; + } + + ptr = (void *) next_ptr; + basic_module->avail -= size; + basic_module->ptr += size; + + opal_mutex_unlock (&basic_module->lock); + return ptr; +} + +/** + * free function + */ +static void mca_mpool_base_basic_free (mca_mpool_base_module_t *mpool, void *addr) +{ + /* nothing to do for now */ +} + +static void mca_mpool_base_basic_finalize (struct mca_mpool_base_module_t *mpool) +{ + mca_mpool_base_basic_module_t *basic_module = (mca_mpool_base_basic_module_t *) mpool; + + OBJ_DESTRUCT(&basic_module->lock); + free (mpool); +} + +static mca_mpool_base_module_t mca_mpool_basic_template = { + .mpool_alloc = mca_mpool_base_basic_alloc, + .mpool_free = mca_mpool_base_basic_free, + .mpool_finalize = mca_mpool_base_basic_finalize, + .flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM, +}; + +mca_mpool_base_module_t *mca_mpool_basic_create (void *base, size_t size, unsigned min_align) +{ + mca_mpool_base_basic_module_t *basic_module = calloc (1, sizeof (*basic_module)); + + if (OPAL_UNLIKELY(NULL == basic_module)) { + return NULL; + } + + memcpy (&basic_module->super, &mca_mpool_basic_template, sizeof (mca_mpool_basic_template)); + + OBJ_CONSTRUCT(&basic_module->lock, opal_mutex_t); + + basic_module->super.mpool_base = base; + basic_module->ptr = (uintptr_t) base; + basic_module->size = basic_module->avail = size; + basic_module->min_align = min_align; + + return &basic_module->super; +}