1
1

btl/vader: use basic mpool type to handle frag/fbox allocation

This commit updates btl/vader to use an mpool for handling all shared
memory allocations (frags, fboxes).

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2019-01-14 15:54:16 -07:00
родитель 6ffc7cc96c
Коммит f62d26ddbc
6 изменённых файлов: 61 добавлений и 56 удалений

Просмотреть файл

@ -15,6 +15,8 @@
* Copyright (c) 2010-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
@ -53,6 +55,7 @@
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/util/proc.h"
#include "btl_vader_endpoint.h"
@ -112,16 +115,15 @@ struct mca_btl_vader_component_t {
opal_mutex_t lock; /**< lock to protect concurrent updates to this structure's members */
char *my_segment; /**< this rank's base pointer */
size_t segment_size; /**< size of my_segment */
size_t segment_offset; /**< start of unused portion of my_segment */
int32_t num_smp_procs; /**< current number of smp procs on this host */
opal_free_list_t vader_frags_eager; /**< free list of vader send frags */
opal_free_list_t vader_frags_max_send; /**< free list of vader max send frags (large fragments) */
opal_free_list_t vader_frags_user; /**< free list of small inline frags */
opal_free_list_t vader_fboxes; /**< free list of available fast-boxes */
unsigned int fbox_threshold; /**< number of sends required before we setup a send fast box for a peer */
unsigned int fbox_max; /**< maximum number of send fast boxes to allocate */
unsigned int fbox_size; /**< size of each peer fast box allocation */
unsigned int fbox_count; /**< number of send fast boxes allocated */
int single_copy_mechanism; /**< single copy mechanism to use */
@ -143,6 +145,7 @@ struct mca_btl_vader_component_t {
#if OPAL_BTL_VADER_HAVE_KNEM
unsigned int knem_dma_min; /**< minimum size to enable DMA for knem transfers (0 disables) */
#endif
mca_mpool_base_module_t *mpool;
};
typedef struct mca_btl_vader_component_t mca_btl_vader_component_t;
OPAL_MODULE_DECLSPEC extern mca_btl_vader_component_t mca_btl_vader_component;

Просмотреть файл

@ -19,6 +19,8 @@
* Copyright (c) 2014-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -303,6 +305,7 @@ static int mca_btl_vader_component_open(void)
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_eager, opal_free_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_user, opal_free_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_max_send, opal_free_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_fboxes, opal_free_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.pending_endpoints, opal_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.pending_fragments, opal_list_t);
@ -323,6 +326,7 @@ static int mca_btl_vader_component_close(void)
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_eager);
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_user);
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_max_send);
OBJ_DESTRUCT(&mca_btl_vader_component.vader_fboxes);
OBJ_DESTRUCT(&mca_btl_vader_component.lock);
OBJ_DESTRUCT(&mca_btl_vader_component.pending_endpoints);
OBJ_DESTRUCT(&mca_btl_vader_component.pending_fragments);
@ -523,7 +527,6 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
/* no fast boxes allocated initially */
component->num_fbox_in_endpoints = 0;
component->fbox_count = 0;
mca_btl_vader_check_single_copy ();
@ -564,8 +567,6 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
}
}
component->segment_offset = 0;
/* initialize my fifo */
vader_fifo_init ((struct vader_fifo_t *) component->my_segment);

Просмотреть файл

@ -13,6 +13,8 @@
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -58,6 +60,7 @@ typedef struct mca_btl_base_endpoint_t {
uint32_t *startp; /**< pointer to location storing start offset */
unsigned int start, end;
uint16_t seq;
opal_free_list_item_t *fbox; /**< fast-box free list item */
} fbox_out;
int32_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing
@ -101,13 +104,16 @@ static inline void mca_btl_vader_endpoint_setup_fbox_recv (struct mca_btl_base_e
endpoint->fbox_in.buffer = base;
}
static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_endpoint_t *endpoint, void *base)
static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_endpoint_t *endpoint, opal_free_list_item_t *fbox)
{
void *base = fbox->ptr;
endpoint->fbox_out.start = MCA_BTL_VADER_FBOX_ALIGNMENT;
endpoint->fbox_out.end = MCA_BTL_VADER_FBOX_ALIGNMENT;
endpoint->fbox_out.startp = (uint32_t *) base;
endpoint->fbox_out.startp[0] = MCA_BTL_VADER_FBOX_ALIGNMENT;
endpoint->fbox_out.seq = 0;
endpoint->fbox_out.fbox = fbox;
/* zero out the first header in the fast box */
memset ((char *) base + MCA_BTL_VADER_FBOX_ALIGNMENT, 0, MCA_BTL_VADER_FBOX_ALIGNMENT);

Просмотреть файл

@ -2,6 +2,8 @@
/*
* Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -259,20 +261,17 @@ static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mc
/* protect access to mca_btl_vader_component.segment_offset */
OPAL_THREAD_LOCK(&mca_btl_vader_component.lock);
if (mca_btl_vader_component.segment_size >= mca_btl_vader_component.segment_offset + mca_btl_vader_component.fbox_size &&
mca_btl_vader_component.fbox_max > mca_btl_vader_component.fbox_count) {
/* verify the remote side will accept another fbox */
if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) {
void *fbox_base = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset;
mca_btl_vader_component.segment_offset += mca_btl_vader_component.fbox_size;
/* verify the remote side will accept another fbox */
if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) {
opal_free_list_item_t *fbox = opal_free_list_get (&mca_btl_vader_component.vader_fboxes);
if (NULL != fbox) {
/* zero out the fast box */
memset (fbox_base, 0, mca_btl_vader_component.fbox_size);
mca_btl_vader_endpoint_setup_fbox_send (ep, fbox_base);
memset (fbox->ptr, 0, mca_btl_vader_component.fbox_size);
mca_btl_vader_endpoint_setup_fbox_send (ep, fbox);
hdr->flags |= MCA_BTL_VADER_FLAG_SETUP_FBOX;
hdr->fbox_base = virtual2relative((char *) ep->fbox_out.buffer);
++mca_btl_vader_component.fbox_count;
} else {
opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, 1);
}

Просмотреть файл

@ -13,6 +13,8 @@
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -41,38 +43,9 @@ static inline void mca_btl_vader_frag_constructor (mca_btl_vader_frag_t *frag)
int mca_btl_vader_frag_init (opal_free_list_item_t *item, void *ctx)
{
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) item;
unsigned int data_size = (unsigned int)(uintptr_t) ctx;
unsigned int frag_size = data_size + sizeof (mca_btl_vader_hdr_t);
/* ensure next fragment is aligned on a cache line */
frag_size = (frag_size + 63) & ~63;
OPAL_THREAD_LOCK(&mca_btl_vader_component.lock);
if (data_size && mca_btl_vader_component.segment_size < mca_btl_vader_component.segment_offset + frag_size) {
OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* Set the list element here so we don't have to set it on the critical path. This only
* works if each free list has its own unique fragment size and ALL free lists are initialized
* with opal_free_list_init. */
if (mca_btl_vader_component.max_inline_send == data_size) {
frag->my_list = &mca_btl_vader_component.vader_frags_user;
} else if (mca_btl_vader.super.btl_eager_limit == data_size) {
frag->my_list = &mca_btl_vader_component.vader_frags_eager;
} else if (mca_btl_vader.super.btl_max_send_size == data_size) {
frag->my_list = &mca_btl_vader_component.vader_frags_max_send;
}
if (data_size) {
item->ptr = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset;
mca_btl_vader_component.segment_offset += frag_size;
}
OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock);
mca_btl_vader_frag_constructor ((mca_btl_vader_frag_t *) item);
/* Set the list element here so we don't have to set it on the critical path */
frag->my_list = (opal_free_list_t *) ctx;
return OPAL_SUCCESS;
}

Просмотреть файл

@ -97,19 +97,32 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
return OPAL_ERR_OUT_OF_RESOURCE;
}
component->segment_offset = MCA_BTL_VADER_FIFO_SIZE;
component->mpool = mca_mpool_basic_create ((void *) (component->my_segment + MCA_BTL_VADER_FIFO_SIZE),
(unsigned long) (mca_btl_vader_component.segment_size - MCA_BTL_VADER_FIFO_SIZE), 64);
if (NULL == component->mpool) {
free (component->endpoints);
return OPAL_ERR_OUT_OF_RESOURCE;
}
rc = opal_free_list_init (&component->vader_fboxes, sizeof (opal_free_list_item_t), 8,
OBJ_CLASS(opal_free_list_item_t), mca_btl_vader_component.fbox_size,
opal_cache_line_size, 0, mca_btl_vader_component.fbox_max, 4,
component->mpool, 0, NULL, NULL, NULL);
if (OPAL_SUCCESS != rc) {
return rc;
}
/* initialize fragment descriptor free lists */
/* initialize free list for small send and inline fragments */
rc = opal_free_list_init (&component->vader_frags_user,
sizeof(mca_btl_vader_frag_t),
opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t),
0, opal_cache_line_size,
mca_btl_vader_component.max_inline_send, opal_cache_line_size,
component->vader_free_list_num,
component->vader_free_list_max,
component->vader_free_list_inc,
NULL, 0, NULL, mca_btl_vader_frag_init,
(void *)(intptr_t) mca_btl_vader_component.max_inline_send);
component->mpool, 0, NULL, mca_btl_vader_frag_init,
&component->vader_frags_user);
if (OPAL_SUCCESS != rc) {
return rc;
}
@ -118,12 +131,12 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
rc = opal_free_list_init (&component->vader_frags_eager,
sizeof (mca_btl_vader_frag_t),
opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t),
0, opal_cache_line_size,
mca_btl_vader.super.btl_eager_limit, opal_cache_line_size,
component->vader_free_list_num,
component->vader_free_list_max,
component->vader_free_list_inc,
NULL, 0, NULL, mca_btl_vader_frag_init,
(void *)(intptr_t) mca_btl_vader.super.btl_eager_limit);
component->mpool, 0, NULL, mca_btl_vader_frag_init,
&component->vader_frags_eager);
if (OPAL_SUCCESS != rc) {
return rc;
}
@ -133,12 +146,12 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
rc = opal_free_list_init (&component->vader_frags_max_send,
sizeof (mca_btl_vader_frag_t),
opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t),
0, opal_cache_line_size,
mca_btl_vader.super.btl_max_send_size, opal_cache_line_size,
component->vader_free_list_num,
component->vader_free_list_max,
component->vader_free_list_inc,
NULL, 0, NULL, mca_btl_vader_frag_init,
(void *)(intptr_t) mca_btl_vader.super.btl_max_send_size);
component->mpool, 0, NULL, mca_btl_vader_frag_init,
&component->vader_frags_max_send);
if (OPAL_SUCCESS != rc) {
return rc;
}
@ -367,6 +380,11 @@ static int vader_finalize(struct mca_btl_base_module_t *btl)
}
#endif
if (component->mpool) {
component->mpool->mpool_finalize (component->mpool);
component->mpool = NULL;
}
return OPAL_SUCCESS;
}
@ -536,6 +554,7 @@ static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep)
OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t);
OBJ_CONSTRUCT(&ep->pending_frags_lock, opal_mutex_t);
ep->fifo = NULL;
ep->fbox_out.fbox = NULL;
}
#if OPAL_BTL_VADER_HAVE_XPMEM
@ -564,8 +583,12 @@ static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep)
/* disconnect from the peer's segment */
opal_shmem_segment_detach (&seg_ds);
}
if (ep->fbox_out.fbox) {
opal_free_list_return (&mca_btl_vader_component.vader_fboxes, ep->fbox_out.fbox);
}
ep->fbox_in.buffer = ep->fbox_out.buffer = NULL;
ep->fbox_out.fbox = NULL;
ep->segment_base = NULL;
ep->fifo = NULL;
}