From 20017d345e49b644316af476ec0345ac3f68316a Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Mon, 14 Jan 2019 15:54:16 -0700 Subject: [PATCH] btl/vader: use basic mpool type to handle frag/fbox allocation This commit updates btl/vader to use an mpool for handling all shared memory allocations (frags, fboxes). Signed-off-by: Nathan Hjelm --- opal/mca/btl/vader/btl_vader.h | 7 ++-- opal/mca/btl/vader/btl_vader_component.c | 8 +++-- opal/mca/btl/vader/btl_vader_endpoint.h | 8 ++++- opal/mca/btl/vader/btl_vader_fbox.h | 17 +++++----- opal/mca/btl/vader/btl_vader_frag.c | 35 +++---------------- opal/mca/btl/vader/btl_vader_module.c | 43 ++++++++++++++++++------ 6 files changed, 62 insertions(+), 56 deletions(-) diff --git a/opal/mca/btl/vader/btl_vader.h b/opal/mca/btl/vader/btl_vader.h index 7e1afad24c..028c7a3816 100644 --- a/opal/mca/btl/vader/btl_vader.h +++ b/opal/mca/btl/vader/btl_vader.h @@ -15,6 +15,8 @@ * Copyright (c) 2010-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * * $COPYRIGHT$ * @@ -53,6 +55,7 @@ #include "opal/mca/rcache/rcache.h" #include "opal/mca/rcache/base/base.h" #include "opal/mca/btl/base/btl_base_error.h" +#include "opal/mca/mpool/base/base.h" #include "opal/util/proc.h" #include "btl_vader_endpoint.h" @@ -112,16 +115,15 @@ struct mca_btl_vader_component_t { opal_mutex_t lock; /**< lock to protect concurrent updates to this structure's members */ char *my_segment; /**< this rank's base pointer */ size_t segment_size; /**< size of my_segment */ - size_t segment_offset; /**< start of unused portion of my_segment */ int32_t num_smp_procs; /**< current number of smp procs on this host */ opal_free_list_t vader_frags_eager; /**< free list of vader send frags */ opal_free_list_t vader_frags_max_send; /**< free list of vader max send frags (large fragments) */ opal_free_list_t vader_frags_user; /**< free list of small inline frags */ + opal_free_list_t vader_fboxes; /**< free list of available fast-boxes */ unsigned int fbox_threshold; /**< number of sends required before we setup a send fast box for a peer */ unsigned int fbox_max; /**< maximum number of send fast boxes to allocate */ unsigned int fbox_size; /**< size of each peer fast box allocation */ - unsigned int fbox_count; /**< number of send fast boxes allocated */ int single_copy_mechanism; /**< single copy mechanism to use */ @@ -143,6 +145,7 @@ struct mca_btl_vader_component_t { #if OPAL_BTL_VADER_HAVE_KNEM unsigned int knem_dma_min; /**< minimum size to enable DMA for knem transfers (0 disables) */ #endif + mca_mpool_base_module_t *mpool; }; typedef struct mca_btl_vader_component_t mca_btl_vader_component_t; OPAL_MODULE_DECLSPEC extern mca_btl_vader_component_t mca_btl_vader_component; diff --git a/opal/mca/btl/vader/btl_vader_component.c b/opal/mca/btl/vader/btl_vader_component.c index 3fc35c6a46..1119725997 100644 --- a/opal/mca/btl/vader/btl_vader_component.c +++ b/opal/mca/btl/vader/btl_vader_component.c @@ -18,6 +18,9 @@ * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -301,6 +304,7 @@ static int mca_btl_vader_component_open(void) OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_eager, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_user, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_max_send, opal_free_list_t); + OBJ_CONSTRUCT(&mca_btl_vader_component.vader_fboxes, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_vader_component.pending_endpoints, opal_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.pending_fragments, opal_list_t); @@ -321,6 +325,7 @@ static int mca_btl_vader_component_close(void) OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_eager); OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_user); OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_max_send); + OBJ_DESTRUCT(&mca_btl_vader_component.vader_fboxes); OBJ_DESTRUCT(&mca_btl_vader_component.lock); OBJ_DESTRUCT(&mca_btl_vader_component.pending_endpoints); OBJ_DESTRUCT(&mca_btl_vader_component.pending_fragments); @@ -517,7 +522,6 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls, /* no fast boxes allocated initially */ component->num_fbox_in_endpoints = 0; - component->fbox_count = 0; mca_btl_vader_check_single_copy (); @@ -559,8 +563,6 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls, } } - component->segment_offset = 0; - /* initialize my fifo */ vader_fifo_init ((struct vader_fifo_t *) component->my_segment); diff --git a/opal/mca/btl/vader/btl_vader_endpoint.h b/opal/mca/btl/vader/btl_vader_endpoint.h index d3a39e08f2..e9409b90c1 100644 --- a/opal/mca/btl/vader/btl_vader_endpoint.h +++ b/opal/mca/btl/vader/btl_vader_endpoint.h @@ -13,6 +13,8 @@ * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -58,6 +60,7 @@ typedef struct mca_btl_base_endpoint_t { uint32_t *startp; /**< pointer to location storing start offset */ unsigned int start, end; uint16_t seq; + opal_free_list_item_t *fbox; /**< fast-box free list item */ } fbox_out; int32_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing @@ -101,13 +104,16 @@ static inline void mca_btl_vader_endpoint_setup_fbox_recv (struct mca_btl_base_e endpoint->fbox_in.buffer = base; } -static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_endpoint_t *endpoint, void *base) +static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_endpoint_t *endpoint, opal_free_list_item_t *fbox) { + void *base = fbox->ptr; + endpoint->fbox_out.start = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_out.end = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_out.startp = (uint32_t *) base; endpoint->fbox_out.startp[0] = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_out.seq = 0; + endpoint->fbox_out.fbox = fbox; /* zero out the first header in the fast box */ memset ((char *) base + MCA_BTL_VADER_FBOX_ALIGNMENT, 0, MCA_BTL_VADER_FBOX_ALIGNMENT); diff --git a/opal/mca/btl/vader/btl_vader_fbox.h b/opal/mca/btl/vader/btl_vader_fbox.h index 25f99c2c98..3762c62010 100644 --- a/opal/mca/btl/vader/btl_vader_fbox.h +++ b/opal/mca/btl/vader/btl_vader_fbox.h @@ -2,6 +2,8 @@ /* * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -259,20 +261,17 @@ static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mc /* protect access to mca_btl_vader_component.segment_offset */ OPAL_THREAD_LOCK(&mca_btl_vader_component.lock); - if (mca_btl_vader_component.segment_size >= mca_btl_vader_component.segment_offset + mca_btl_vader_component.fbox_size && - mca_btl_vader_component.fbox_max > mca_btl_vader_component.fbox_count) { - /* verify the remote side will accept another fbox */ - if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) { - void *fbox_base = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset; - mca_btl_vader_component.segment_offset += mca_btl_vader_component.fbox_size; + /* verify the remote side will accept another fbox */ + if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) { + opal_free_list_item_t *fbox = opal_free_list_get (&mca_btl_vader_component.vader_fboxes); + if (NULL != fbox) { /* zero out the fast box */ - memset (fbox_base, 0, mca_btl_vader_component.fbox_size); - mca_btl_vader_endpoint_setup_fbox_send (ep, fbox_base); + memset (fbox->ptr, 0, mca_btl_vader_component.fbox_size); + mca_btl_vader_endpoint_setup_fbox_send (ep, fbox); hdr->flags |= MCA_BTL_VADER_FLAG_SETUP_FBOX; hdr->fbox_base = virtual2relative((char *) ep->fbox_out.buffer); - ++mca_btl_vader_component.fbox_count; } else { opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, 1); } diff --git a/opal/mca/btl/vader/btl_vader_frag.c b/opal/mca/btl/vader/btl_vader_frag.c index a132ea3d72..3635af99a4 100644 --- a/opal/mca/btl/vader/btl_vader_frag.c +++ b/opal/mca/btl/vader/btl_vader_frag.c @@ -13,6 +13,8 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,38 +43,9 @@ static inline void mca_btl_vader_frag_constructor (mca_btl_vader_frag_t *frag) int mca_btl_vader_frag_init (opal_free_list_item_t *item, void *ctx) { mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) item; - unsigned int data_size = (unsigned int)(uintptr_t) ctx; - unsigned int frag_size = data_size + sizeof (mca_btl_vader_hdr_t); - /* ensure next fragment is aligned on a cache line */ - frag_size = (frag_size + 63) & ~63; - - OPAL_THREAD_LOCK(&mca_btl_vader_component.lock); - - if (data_size && mca_btl_vader_component.segment_size < mca_btl_vader_component.segment_offset + frag_size) { - OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock); - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* Set the list element here so we don't have to set it on the critical path. This only - * works if each free list has its own unique fragment size and ALL free lists are initialized - * with opal_free_list_init. */ - if (mca_btl_vader_component.max_inline_send == data_size) { - frag->my_list = &mca_btl_vader_component.vader_frags_user; - } else if (mca_btl_vader.super.btl_eager_limit == data_size) { - frag->my_list = &mca_btl_vader_component.vader_frags_eager; - } else if (mca_btl_vader.super.btl_max_send_size == data_size) { - frag->my_list = &mca_btl_vader_component.vader_frags_max_send; - } - - if (data_size) { - item->ptr = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset; - mca_btl_vader_component.segment_offset += frag_size; - } - - OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock); - - mca_btl_vader_frag_constructor ((mca_btl_vader_frag_t *) item); + /* Set the list element here so we don't have to set it on the critical path */ + frag->my_list = (opal_free_list_t *) ctx; return OPAL_SUCCESS; } diff --git a/opal/mca/btl/vader/btl_vader_module.c b/opal/mca/btl/vader/btl_vader_module.c index bcc25f0f00..1dbd2defba 100644 --- a/opal/mca/btl/vader/btl_vader_module.c +++ b/opal/mca/btl/vader/btl_vader_module.c @@ -97,19 +97,32 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n) return OPAL_ERR_OUT_OF_RESOURCE; } - component->segment_offset = MCA_BTL_VADER_FIFO_SIZE; + component->mpool = mca_mpool_basic_create ((void *) (component->my_segment + MCA_BTL_VADER_FIFO_SIZE), + (unsigned long) (mca_btl_vader_component.segment_size - MCA_BTL_VADER_FIFO_SIZE), 64); + if (NULL == component->mpool) { + free (component->endpoints); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + rc = opal_free_list_init (&component->vader_fboxes, sizeof (opal_free_list_item_t), 8, + OBJ_CLASS(opal_free_list_item_t), mca_btl_vader_component.fbox_size, + opal_cache_line_size, 0, mca_btl_vader_component.fbox_max, 4, + component->mpool, 0, NULL, NULL, NULL); + if (OPAL_SUCCESS != rc) { + return rc; + } /* initialize fragment descriptor free lists */ /* initialize free list for small send and inline fragments */ rc = opal_free_list_init (&component->vader_frags_user, sizeof(mca_btl_vader_frag_t), opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t), - 0, opal_cache_line_size, + mca_btl_vader_component.max_inline_send, opal_cache_line_size, component->vader_free_list_num, component->vader_free_list_max, component->vader_free_list_inc, - NULL, 0, NULL, mca_btl_vader_frag_init, - (void *)(intptr_t) mca_btl_vader_component.max_inline_send); + component->mpool, 0, NULL, mca_btl_vader_frag_init, + &component->vader_frags_user); if (OPAL_SUCCESS != rc) { return rc; } @@ -118,12 +131,12 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n) rc = opal_free_list_init (&component->vader_frags_eager, sizeof (mca_btl_vader_frag_t), opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t), - 0, opal_cache_line_size, + mca_btl_vader.super.btl_eager_limit, opal_cache_line_size, component->vader_free_list_num, component->vader_free_list_max, component->vader_free_list_inc, - NULL, 0, NULL, mca_btl_vader_frag_init, - (void *)(intptr_t) mca_btl_vader.super.btl_eager_limit); + component->mpool, 0, NULL, mca_btl_vader_frag_init, + &component->vader_frags_eager); if (OPAL_SUCCESS != rc) { return rc; } @@ -133,12 +146,12 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n) rc = opal_free_list_init (&component->vader_frags_max_send, sizeof (mca_btl_vader_frag_t), opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t), - 0, opal_cache_line_size, + mca_btl_vader.super.btl_max_send_size, opal_cache_line_size, component->vader_free_list_num, component->vader_free_list_max, component->vader_free_list_inc, - NULL, 0, NULL, mca_btl_vader_frag_init, - (void *)(intptr_t) mca_btl_vader.super.btl_max_send_size); + component->mpool, 0, NULL, mca_btl_vader_frag_init, + &component->vader_frags_max_send); if (OPAL_SUCCESS != rc) { return rc; } @@ -367,6 +380,11 @@ static int vader_finalize(struct mca_btl_base_module_t *btl) } #endif + if (component->mpool) { + component->mpool->mpool_finalize (component->mpool); + component->mpool = NULL; + } + return OPAL_SUCCESS; } @@ -536,6 +554,7 @@ static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep) OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t); OBJ_CONSTRUCT(&ep->pending_frags_lock, opal_mutex_t); ep->fifo = NULL; + ep->fbox_out.fbox = NULL; } #if OPAL_BTL_VADER_HAVE_XPMEM @@ -564,8 +583,12 @@ static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep) /* disconnect from the peer's segment */ opal_shmem_segment_detach (&seg_ds); } + if (ep->fbox_out.fbox) { + opal_free_list_return (&mca_btl_vader_component.vader_fboxes, ep->fbox_out.fbox); + } ep->fbox_in.buffer = ep->fbox_out.buffer = NULL; + ep->fbox_out.fbox = NULL; ep->segment_base = NULL; ep->fifo = NULL; }