1
1

rcache/base: update VMA tree to use opal_interval_tree_t

This commit replaces the current VMA tree implementation with one that
uses the new opal_interval_tree_t class. Since the VMA tree lock is no
longer used this commit also updates rcache/grdma and btl/vader to
take better care when searching for existing registrations.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2018-02-16 12:13:17 -07:00 коммит произвёл Nathan Hjelm
родитель 7163fc98a0
Коммит 38d9b10db8
9 изменённых файлов: 231 добавлений и 793 удалений

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
@ -33,7 +33,6 @@ int mca_btl_vader_xpmem_init (void)
}
struct vader_check_reg_ctx_t {
mca_rcache_base_vma_module_t *vma_module;
mca_btl_base_endpoint_t *ep;
mca_rcache_base_registration_t **reg;
uintptr_t base;
@ -58,13 +57,24 @@ static int vader_check_reg (mca_rcache_base_registration_t *reg, void *ctx)
return 1;
}
/* remove this pointer from the rcache and decrement its reference count
(so it is detached later) */
mca_rcache_base_vma_delete (vader_ctx->vma_module, reg);
return 2;
}
void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep)
{
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
int32_t ref_count;
ref_count = opal_atomic_add_fetch_32 (&reg->ref_count, -1);
if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) {
mca_rcache_base_vma_delete (vma_module, reg);
opal_memchecker_base_mem_noaccess (reg->rcache_context, (uintptr_t)(reg->bound - reg->base));
(void)xpmem_detach (reg->rcache_context);
OBJ_RELEASE (reg);
}
}
/* look up the remote pointer in the peer rcache and attach if
* necessary */
mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr,
@ -73,7 +83,7 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
uint64_t attach_align = 1 << mca_btl_vader_component.log_attach_align;
mca_rcache_base_registration_t *reg = NULL;
vader_check_reg_ctx_t check_ctx = {.ep = ep, .reg = &reg, .vma_module = vma_module};
vader_check_reg_ctx_t check_ctx = {.ep = ep, .reg = &reg};
xpmem_addr_t xpmem_addr;
uintptr_t base, bound;
int rc;
@ -88,16 +98,17 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
check_ctx.bound = bound;
/* several segments may match the base pointer */
rc = mca_rcache_base_vma_iterate (vma_module, (void *) base, bound - base, vader_check_reg, &check_ctx);
rc = mca_rcache_base_vma_iterate (vma_module, (void *) base, bound - base, true, vader_check_reg, &check_ctx);
if (2 == rc) {
/* remove this pointer from the rcache and decrement its reference count
(so it is detached later) */
mca_rcache_base_vma_delete (vma_module, reg);
/* start the new segment from the lower of the two bases */
base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base;
if (OPAL_LIKELY(0 == opal_atomic_add_fetch_32 (&reg->ref_count, -1))) {
/* this pointer is not in use */
(void) xpmem_detach (reg->rcache_context);
OBJ_RELEASE(reg);
}
/* remove the last reference to this registration */
vader_return_registration (reg, ep);
reg = NULL;
}
@ -127,7 +138,9 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
opal_memchecker_base_mem_defined (reg->rcache_context, bound - base);
mca_rcache_base_vma_insert (vma_module, reg, 0);
if (!(flags & MCA_RCACHE_FLAGS_PERSIST)) {
mca_rcache_base_vma_insert (vma_module, reg, 0);
}
}
}
@ -138,22 +151,6 @@ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpo
return reg;
}
void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep)
{
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
int32_t ref_count;
ref_count = opal_atomic_add_fetch_32 (&reg->ref_count, -1);
if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) {
/* protect rcache access */
mca_rcache_base_vma_delete (vma_module, reg);
opal_memchecker_base_mem_noaccess (reg->rcache_context, (uintptr_t)(reg->bound - reg->base));
(void)xpmem_detach (reg->rcache_context);
OBJ_RELEASE (reg);
}
}
static int mca_btl_vader_endpoint_xpmem_rcache_cleanup (mca_rcache_base_registration_t *reg, void *ctx)
{
mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module;
@ -161,7 +158,6 @@ static int mca_btl_vader_endpoint_xpmem_rcache_cleanup (mca_rcache_base_registra
if ((intptr_t) reg->alloc_base == ep->peer_smp_rank) {
/* otherwise dereg will fail on assert */
reg->ref_count = 0;
(void) mca_rcache_base_vma_delete (vma_module, reg);
OBJ_RELEASE(reg);
}
@ -172,7 +168,7 @@ void mca_btl_vader_xpmem_cleanup_endpoint (struct mca_btl_base_endpoint_t *ep)
{
/* clean out the registration cache */
(void) mca_rcache_base_vma_iterate (mca_btl_vader_component.vma_module,
NULL, (size_t) -1,
NULL, (size_t) -1, true,
mca_btl_vader_endpoint_xpmem_rcache_cleanup,
(void *) ep);
if (ep->segment_base) {

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2012-2016 Los Alamos National Security, LLC.
* Copyright (c) 2012-2018 Los Alamos National Security, LLC.
* All rights reserved
* Copyright (c) 2015-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -73,11 +73,6 @@ OBJ_CLASS_INSTANCE(mca_rcache_base_registration_t, opal_free_list_item_t,
* Global variables
*/
opal_list_t mca_rcache_base_modules = {{0}};
opal_free_list_t mca_rcache_base_vma_tree_items = {{{0}}};
bool mca_rcache_base_vma_tree_items_inited = false;
unsigned int mca_rcache_base_vma_tree_items_min = TREE_ITEMS_MIN;
int mca_rcache_base_vma_tree_items_max = TREE_ITEMS_MAX;
unsigned int mca_rcache_base_vma_tree_items_inc = TREE_ITEMS_INC;
OBJ_CLASS_INSTANCE(mca_rcache_base_selected_module_t, opal_list_item_t, NULL, NULL);
@ -114,9 +109,6 @@ static int mca_rcache_base_close(void)
(void) mca_base_framework_close (&opal_memory_base_framework);
}
OBJ_DESTRUCT(&mca_rcache_base_vma_tree_items);
mca_rcache_base_vma_tree_items_inited = false;
/* All done */
/* Close all remaining available components */
return mca_base_framework_components_close(&opal_rcache_base_framework, NULL);
@ -133,37 +125,12 @@ static int mca_rcache_base_open(mca_base_open_flag_t flags)
OBJ_CONSTRUCT(&mca_rcache_base_modules, opal_list_t);
/* the free list is only initialized when a VMA tree is created */
OBJ_CONSTRUCT(&mca_rcache_base_vma_tree_items, opal_free_list_t);
/* Open up all available components */
return mca_base_framework_components_open(&opal_rcache_base_framework, flags);
}
static int mca_rcache_base_register_mca_variables (mca_base_register_flag_t flags)
{
mca_rcache_base_vma_tree_items_min = TREE_ITEMS_MIN;
(void) mca_base_framework_var_register (&opal_rcache_base_framework, "vma_tree_items_min",
"Minimum number of VMA tree items to allocate (default: "
STRINGIFY(TREE_ITEMS_MIN) ")", MCA_BASE_VAR_TYPE_UNSIGNED_INT,
NULL, MCA_BASE_VAR_BIND_NO_OBJECT, 0, OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY, &mca_rcache_base_vma_tree_items_min);
mca_rcache_base_vma_tree_items_max = TREE_ITEMS_MAX;
(void) mca_base_framework_var_register (&opal_rcache_base_framework, "vma_tree_items_max",
"Maximum number of VMA tree items to allocate (default: "
STRINGIFY(TREE_ITEMS_MAX) ", -1: unlimited)", MCA_BASE_VAR_TYPE_INT,
NULL, MCA_BASE_VAR_BIND_NO_OBJECT, 0, OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY, &mca_rcache_base_vma_tree_items_max);
mca_rcache_base_vma_tree_items_inc = TREE_ITEMS_INC;
(void) mca_base_framework_var_register (&opal_rcache_base_framework, "vma_tree_items_inc",
"Number of VMA tree items to allocate at a time (default: "
STRINGIFY(TREE_ITEMS_INC) ")", MCA_BASE_VAR_TYPE_UNSIGNED_INT,
NULL, MCA_BASE_VAR_BIND_NO_OBJECT, 0, OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY, &mca_rcache_base_vma_tree_items_inc);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
* Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
@ -53,14 +53,6 @@ OBJ_CLASS_INSTANCE(mca_rcache_base_vma_module_t, opal_object_t,
mca_rcache_base_vma_module_t *mca_rcache_base_vma_module_alloc (void)
{
if (!mca_rcache_base_vma_tree_items_inited) {
opal_free_list_init (&mca_rcache_base_vma_tree_items, sizeof (mca_rcache_base_vma_item_t),
8, OBJ_CLASS(mca_rcache_base_vma_item_t), 0, 8,
mca_rcache_base_vma_tree_items_min, mca_rcache_base_vma_tree_items_max,
mca_rcache_base_vma_tree_items_inc, NULL, 0, NULL, NULL, NULL);
mca_rcache_base_vma_tree_items_inited = true;
}
return OBJ_NEW(mca_rcache_base_vma_module_t);
}
@ -154,11 +146,11 @@ int mca_rcache_base_vma_delete (mca_rcache_base_vma_module_t *vma_module,
}
int mca_rcache_base_vma_iterate (mca_rcache_base_vma_module_t *vma_module,
unsigned char *base, size_t size,
unsigned char *base, size_t size, bool partial_ok,
int (*callback_fn) (struct mca_rcache_base_registration_t *, void *),
void *ctx)
{
return mca_rcache_base_vma_tree_iterate (vma_module, base, size, callback_fn, ctx);
return mca_rcache_base_vma_tree_iterate (vma_module, base, size, partial_ok, callback_fn, ctx);
}
void mca_rcache_base_vma_dump_range (mca_rcache_base_vma_module_t *vma_module,
@ -166,3 +158,8 @@ void mca_rcache_base_vma_dump_range (mca_rcache_base_vma_module_t *vma_module,
{
mca_rcache_base_vma_tree_dump_range (vma_module, base, size, msg);
}
size_t mca_rcache_base_vma_size (mca_rcache_base_vma_module_t *vma_module)
{
return mca_rcache_base_vma_tree_size (vma_module);
}

Просмотреть файл

@ -13,7 +13,7 @@
*
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
* Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
@ -33,7 +33,7 @@
#include "opal_config.h"
#include "opal/class/opal_list.h"
#include "opal/class/opal_rb_tree.h"
#include "opal/class/opal_interval_tree.h"
#include "opal/class/opal_lifo.h"
BEGIN_C_DECLS
@ -42,7 +42,7 @@ struct mca_rcache_base_registration_t;
struct mca_rcache_base_vma_module_t {
opal_object_t super;
opal_rb_tree_t rb_tree;
opal_interval_tree_t tree;
opal_list_t vma_list;
opal_lifo_t vma_gc_lifo;
size_t reg_cur_cache_size;
@ -77,6 +77,7 @@ void mca_rcache_base_vma_dump_range (mca_rcache_base_vma_module_t *vma_module,
* @param[in] vma_module vma tree
* @param[in] base base address of region
* @param[in] size size of region
* @param[in] partial_ok partial overlap of range is ok
* @param[in] callback_fn function to call for each matching registration handle
* @param[in] ctx callback context
*
@ -87,10 +88,12 @@ void mca_rcache_base_vma_dump_range (mca_rcache_base_vma_module_t *vma_module,
* other than OPAL_SUCCESS.
*/
int mca_rcache_base_vma_iterate (mca_rcache_base_vma_module_t *vma_module,
unsigned char *base, size_t size,
unsigned char *base, size_t size, bool partial_ok,
int (*callback_fn) (struct mca_rcache_base_registration_t *, void *),
void *ctx);
size_t mca_rcache_base_vma_size (mca_rcache_base_vma_module_t *vma_module);
END_C_DECLS
#endif /* MCA_RCACHE_BASE_VMA_H */

Просмотреть файл

@ -16,7 +16,7 @@
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
* Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -31,332 +31,56 @@
#include "rcache_base_vma_tree.h"
#include "opal/mca/rcache/base/base.h"
OBJ_CLASS_INSTANCE(mca_rcache_base_vma_reg_list_item_t, opal_list_item_t, NULL, NULL);
static void mca_rcache_base_vma_item_construct (mca_rcache_base_vma_item_t *vma_item)
{
OBJ_CONSTRUCT(&vma_item->reg_list, opal_list_t);
vma_item->in_use = false;
}
static void mca_rcache_base_vma_item_destruct (mca_rcache_base_vma_item_t *vma_item)
{
OPAL_LIST_DESTRUCT(&vma_item->reg_list);
}
OBJ_CLASS_INSTANCE(mca_rcache_base_vma_item_t, opal_free_list_item_t,
mca_rcache_base_vma_item_construct,
mca_rcache_base_vma_item_destruct);
/**
* Function for the red black tree to compare 2 keys
*
* @param key1 a pointer to the 1st key
* @param key2 a pointer to the second key
*
* @retval -1 if key1 is below key2
* @retval 1 if key 1 is above key2
* @retval 0 if the keys are the same
*/
static int mca_rcache_base_vma_tree_node_compare(void *key1, void *key2)
{
mca_rcache_base_vma_item_t *vma1 = (mca_rcache_base_vma_item_t *) key1,
*vma2 = (mca_rcache_base_vma_item_t *) key2;
if (vma1->start < vma2->start) {
return -1;
}
if (vma1->start > vma2->start) {
return 1;
}
return 0;
}
static int mca_rcache_base_vma_tree_node_compare_search(void *key1, void *key2)
{
mca_rcache_base_vma_item_t *vma = (mca_rcache_base_vma_item_t *) key2;
uintptr_t addr = (uintptr_t) key1;
if (vma->end < addr) {
return 1;
}
if (vma->start <= addr) {
return 0;
}
return -1;
}
static int mca_rcache_base_vma_tree_node_compare_closest(void *key1, void *key2)
{
mca_rcache_base_vma_item_t *vma = (mca_rcache_base_vma_item_t *) key2, *prev_vma;
uintptr_t addr = (uintptr_t) key1;
if (vma->end < addr) {
return 1;
}
if (vma->start <= addr) {
return 0;
}
prev_vma = (mca_rcache_base_vma_item_t *) opal_list_get_prev (&vma->super);
if (prev_vma == (mca_rcache_base_vma_item_t *) opal_list_get_end (&vma->vma_module->vma_list)
|| prev_vma->end < addr) {
return 0;
}
return -1;
}
static inline
mca_rcache_base_vma_item_t *mca_rcache_base_vma_new (mca_rcache_base_vma_module_t *vma_module,
uintptr_t start, uintptr_t end)
{
mca_rcache_base_vma_item_t *vma_item = (mca_rcache_base_vma_item_t *) opal_free_list_get (&mca_rcache_base_vma_tree_items);
if (NULL == vma_item) {
return NULL;
}
vma_item->start = start;
vma_item->end = end;
vma_item->vma_module = vma_module;
(void) opal_rb_tree_insert (&vma_module->rb_tree, vma_item, vma_item);
return vma_item;
}
static void mca_rcache_base_vma_return (mca_rcache_base_vma_module_t *vma_module, mca_rcache_base_vma_item_t *vma_item)
{
opal_list_item_t *item;
while (NULL != (item = opal_list_remove_first (&vma_item->reg_list))) {
OBJ_RELEASE(item);
}
opal_free_list_return (&mca_rcache_base_vma_tree_items, &vma_item->super);
}
static inline int mca_rcache_base_vma_compare_regs (mca_rcache_base_registration_t *reg1,
mca_rcache_base_registration_t *reg2)
{
/* persisten registration are on top */
if ((reg1->flags & MCA_RCACHE_FLAGS_PERSIST) &&
!(reg2->flags & MCA_RCACHE_FLAGS_PERSIST)) {
return 1;
}
if (!(reg1->flags & MCA_RCACHE_FLAGS_PERSIST) &&
(reg2->flags & MCA_RCACHE_FLAGS_PERSIST)) {
return -1;
}
if (reg1->bound != reg2->bound) {
return (int)(reg1->bound - reg2->bound);
}
/* tie breaker */
return (int)((intptr_t)reg1 - (intptr_t)reg2);
}
static inline int mca_rcache_base_vma_add_reg (mca_rcache_base_vma_item_t *vma_item,
struct mca_rcache_base_registration_t *reg)
{
mca_rcache_base_vma_reg_list_item_t *item, *entry;
entry = OBJ_NEW(mca_rcache_base_vma_reg_list_item_t);
if (!entry) {
return -1;
}
entry->reg = reg;
OPAL_LIST_FOREACH(item, &vma_item->reg_list, mca_rcache_base_vma_reg_list_item_t) {
if (mca_rcache_base_vma_compare_regs(item->reg, reg) > 0) {
continue;
}
opal_list_insert_pos (&vma_item->reg_list, &item->super, &entry->super);
return 0;
}
opal_list_append (&vma_item->reg_list, &entry->super);
return 0;
}
static inline void mca_rcache_base_vma_remove_reg (mca_rcache_base_vma_item_t *vma_item,
struct mca_rcache_base_registration_t *reg)
{
mca_rcache_base_vma_reg_list_item_t *item;
OPAL_LIST_FOREACH(item, &vma_item->reg_list, mca_rcache_base_vma_reg_list_item_t) {
if (item->reg == reg) {
opal_list_remove_item(&vma_item->reg_list, &item->super);
OBJ_RELEASE(item);
break;
}
}
}
static inline int mca_rcache_base_vma_copy_reg_list (mca_rcache_base_vma_item_t *to,
mca_rcache_base_vma_item_t *from)
{
mca_rcache_base_vma_reg_list_item_t *item_f, *item_t;
OPAL_LIST_FOREACH(item_f, &from->reg_list, mca_rcache_base_vma_reg_list_item_t) {
item_t = OBJ_NEW(mca_rcache_base_vma_reg_list_item_t);
if (NULL == item_t) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
item_t->reg = item_f->reg;
opal_list_append (&to->reg_list, &item_t->super);
}
return OPAL_SUCCESS;
}
/* returns 1 iff two lists contain the same entries */
static inline int mca_rcache_base_vma_compare_reg_lists (mca_rcache_base_vma_item_t *vma1,
mca_rcache_base_vma_item_t *vma2)
{
mca_rcache_base_vma_reg_list_item_t *i1, *i2;
if (!vma1 || !vma2 || opal_list_get_size (&vma1->reg_list) != opal_list_get_size (&vma2->reg_list)) {
return 0;
}
i2 = (mca_rcache_base_vma_reg_list_item_t *) opal_list_get_first(&vma2->reg_list);
OPAL_LIST_FOREACH(i1, &vma1->reg_list, mca_rcache_base_vma_reg_list_item_t) {
if ((void *) i2 == (void *) opal_list_get_end (&vma2->reg_list) || i1->reg != i2->reg) {
return 0;
}
i2 = (mca_rcache_base_vma_reg_list_item_t *) opal_list_get_next (&i2->super);
}
return 1;
}
int mca_rcache_base_vma_tree_init (mca_rcache_base_vma_module_t *vma_module)
{
OBJ_CONSTRUCT(&vma_module->rb_tree, opal_rb_tree_t);
OBJ_CONSTRUCT(&vma_module->vma_list, opal_list_t);
OBJ_CONSTRUCT(&vma_module->vma_gc_lifo, opal_lifo_t);
OBJ_CONSTRUCT(&vma_module->tree, opal_interval_tree_t);
vma_module->reg_cur_cache_size = 0;
return opal_rb_tree_init (&vma_module->rb_tree, mca_rcache_base_vma_tree_node_compare);
return opal_interval_tree_init (&vma_module->tree);
}
void mca_rcache_base_vma_tree_finalize (mca_rcache_base_vma_module_t *vma_module)
{
opal_rb_tree_init(&vma_module->rb_tree, mca_rcache_base_vma_tree_node_compare);
OBJ_DESTRUCT(&vma_module->vma_list);
OBJ_DESTRUCT(&vma_module->vma_gc_lifo);
OBJ_DESTRUCT(&vma_module->rb_tree);
OBJ_DESTRUCT(&vma_module->tree);
}
mca_rcache_base_registration_t *mca_rcache_base_vma_tree_find (mca_rcache_base_vma_module_t *vma_module,
unsigned char *base, unsigned char *bound)
unsigned char *base, unsigned char *bound)
{
mca_rcache_base_vma_item_t *vma;
mca_rcache_base_vma_reg_list_item_t *item;
opal_mutex_lock (&vma_module->vma_lock);
vma = (mca_rcache_base_vma_item_t *) opal_rb_tree_find_with (&vma_module->rb_tree, base,
mca_rcache_base_vma_tree_node_compare_search);
if (!vma) {
opal_mutex_unlock (&vma_module->vma_lock);
return NULL;
}
OPAL_LIST_FOREACH(item, &vma->reg_list, mca_rcache_base_vma_reg_list_item_t) {
if(item->reg->flags & MCA_RCACHE_FLAGS_INVALID) {
continue;
}
if(item->reg->bound >= bound) {
opal_mutex_unlock (&vma_module->vma_lock);
return item->reg;
}
if(!(item->reg->flags & MCA_RCACHE_FLAGS_PERSIST)) {
break;
}
}
opal_mutex_unlock (&vma_module->vma_lock);
return NULL;
return (mca_rcache_base_registration_t *) opal_interval_tree_find_overlapping (&vma_module->tree, (uintptr_t) base,
((uintptr_t) bound) + 1);
}
static inline bool is_reg_in_array (mca_rcache_base_registration_t **regs,
int cnt, mca_rcache_base_registration_t *p)
struct mca_rcache_base_vma_tree_find_all_helper_args_t {
mca_rcache_base_registration_t **regs;
int reg_cnt;
int reg_max;
};
typedef struct mca_rcache_base_vma_tree_find_all_helper_args_t mca_rcache_base_vma_tree_find_all_helper_args_t;
static int mca_rcache_base_vma_tree_find_all_helper (uint64_t low, uint64_t high, void *data, void *ctx)
{
for (int i = 0 ; i < cnt ; ++i) {
if (regs[i] == p) {
return true;
}
mca_rcache_base_vma_tree_find_all_helper_args_t *args = (mca_rcache_base_vma_tree_find_all_helper_args_t *) ctx;
mca_rcache_base_registration_t *reg = (mca_rcache_base_registration_t *) data;
if (args->reg_cnt == args->reg_max) {
return args->reg_max;
}
return false;
args->regs[args->reg_cnt++] = reg;
return OPAL_SUCCESS;
}
int mca_rcache_base_vma_tree_find_all (mca_rcache_base_vma_module_t *vma_module, unsigned char *base,
unsigned char *bound, mca_rcache_base_registration_t **regs,
int reg_cnt)
{
int cnt = 0;
mca_rcache_base_vma_tree_find_all_helper_args_t args = {.regs = regs, .reg_max = reg_cnt, .reg_cnt = 0};
if(opal_list_get_size(&vma_module->vma_list) == 0)
return cnt;
opal_mutex_lock (&vma_module->vma_lock);
do {
mca_rcache_base_vma_item_t *vma;
mca_rcache_base_vma_reg_list_item_t *vma_item;
vma = (mca_rcache_base_vma_item_t *) opal_rb_tree_find_with (&vma_module->rb_tree, base,
mca_rcache_base_vma_tree_node_compare_closest);
if (NULL == vma) {
/* base is bigger than any registered memory */
break;
}
if (base < (unsigned char *) vma->start) {
base = (unsigned char *) vma->start;
continue;
}
OPAL_LIST_FOREACH(vma_item, &vma->reg_list, mca_rcache_base_vma_reg_list_item_t) {
if (vma_item->reg->flags & MCA_RCACHE_FLAGS_INVALID ||
is_reg_in_array (regs, cnt, vma_item->reg)) {
continue;
}
regs[cnt++] = vma_item->reg;
if (cnt == reg_cnt) {
opal_mutex_unlock (&vma_module->vma_lock);
return cnt; /* no space left in the provided array */
}
}
base = (unsigned char *)vma->end + 1;
} while (bound >= base);
opal_mutex_unlock (&vma_module->vma_lock);
return cnt;
(void) opal_interval_tree_traverse (&vma_module->tree, (uint64_t) (uintptr_t) base, ((uint64_t) (uintptr_t) bound) + 1,
true, mca_rcache_base_vma_tree_find_all_helper, &args);
return args.reg_cnt;
}
static inline void mca_rcache_base_vma_update_byte_count (mca_rcache_base_vma_module_t *vma_module,
@ -365,59 +89,28 @@ static inline void mca_rcache_base_vma_update_byte_count (mca_rcache_base_vma_mo
vma_module->reg_cur_cache_size += nbytes;
}
int mca_rcache_base_vma_tree_iterate (mca_rcache_base_vma_module_t *vma_module, unsigned char *base,
size_t size, int (*callback_fn) (struct mca_rcache_base_registration_t *, void *),
struct mca_rcache_base_vma_tree_iterate_helper_args_t {
int (*callback_fn) (struct mca_rcache_base_registration_t *, void *);
void *ctx;
};
typedef struct mca_rcache_base_vma_tree_iterate_helper_args_t mca_rcache_base_vma_tree_iterate_helper_args_t;
static int mca_rcache_base_vma_tree_iterate_helper (uint64_t low, uint64_t high, void *data, void *ctx)
{
mca_rcache_base_vma_tree_iterate_helper_args_t *args = (mca_rcache_base_vma_tree_iterate_helper_args_t *) ctx;
return args->callback_fn ((mca_rcache_base_registration_t *) data, args->ctx);
}
int mca_rcache_base_vma_tree_iterate (mca_rcache_base_vma_module_t *vma_module, unsigned char *base, size_t size,
bool partial_ok, int (*callback_fn) (struct mca_rcache_base_registration_t *, void *),
void *ctx)
{
unsigned char *bound = base + size - 1;
mca_rcache_base_vma_item_t *vma;
int rc = OPAL_SUCCESS;
mca_rcache_base_vma_tree_iterate_helper_args_t args = {.callback_fn = callback_fn, .ctx = ctx};
uintptr_t bound = (uintptr_t) base + size;
int rc;
if (opal_list_get_size(&vma_module->vma_list) == 0) {
/* nothin to do */
return OPAL_SUCCESS;
}
opal_mutex_lock (&vma_module->vma_lock);
do {
mca_rcache_base_vma_reg_list_item_t *vma_item, *next;
vma = (mca_rcache_base_vma_item_t *) opal_rb_tree_find_with (&vma_module->rb_tree, base,
mca_rcache_base_vma_tree_node_compare_closest);
if (NULL == vma) {
/* base is bigger than any registered memory */
break;
}
if (base < (unsigned char *) vma->start) {
base = (unsigned char *) vma->start;
continue;
}
base = (unsigned char *)vma->end + 1;
/* all the registrations in the vma may be deleted by the callback so keep a
* reference until we are done with it. */
vma->in_use = true;
OPAL_LIST_FOREACH_SAFE(vma_item, next, &vma->reg_list, mca_rcache_base_vma_reg_list_item_t) {
rc = callback_fn (vma_item->reg, ctx);
if (OPAL_SUCCESS != rc) {
break;
}
}
vma->in_use = false;
if (OPAL_SUCCESS != rc) {
break;
}
} while (bound >= base);
opal_mutex_unlock (&vma_module->vma_lock);
return rc;
return opal_interval_tree_traverse (&vma_module->tree, (uint64_t) (intptr_t) base, bound, partial_ok,
mca_rcache_base_vma_tree_iterate_helper, &args);
}
static inline int mca_rcache_base_vma_can_insert (mca_rcache_base_vma_module_t *vma_module, size_t nbytes, size_t limit)
@ -425,139 +118,10 @@ static inline int mca_rcache_base_vma_can_insert (mca_rcache_base_vma_module_t *
return (0 == limit || vma_module->reg_cur_cache_size + nbytes <= limit);
}
/**
* Free deleted vmas. This can not be done when they are deleted without running
* into deadlock problems with some libc versions. The caller MUST hold the vma_lock
* when calling this function.
*/
static void mca_rcache_base_vma_cleanup (mca_rcache_base_vma_module_t *vma_module, int depth)
{
mca_rcache_base_vma_item_t *item;
while (NULL != (item = (mca_rcache_base_vma_item_t *) opal_lifo_pop_atomic (&vma_module->vma_gc_lifo))) {
if (OPAL_UNLIKELY(item->in_use)) {
/* another thread is currently iterating on this vma and its registrations */
if (depth < 8) {
/* try to clean up additional vmas before returning */
mca_rcache_base_vma_cleanup (vma_module, depth + 1);
}
if (item->in_use) {
/* will clean it up later */
opal_lifo_push_atomic (&vma_module->vma_gc_lifo, &item->super.super);
return;
}
}
mca_rcache_base_vma_return (vma_module, (mca_rcache_base_vma_item_t *) item);
}
}
int mca_rcache_base_vma_tree_insert (mca_rcache_base_vma_module_t *vma_module,
mca_rcache_base_registration_t *reg, size_t limit)
{
mca_rcache_base_vma_item_t *i;
uintptr_t begin = (uintptr_t)reg->base, end = (uintptr_t)reg->bound;
mca_rcache_base_vma_cleanup (vma_module, 0);
opal_mutex_lock (&vma_module->vma_lock);
i = (mca_rcache_base_vma_item_t *) opal_rb_tree_find_with (&vma_module->rb_tree,
(void *) begin, mca_rcache_base_vma_tree_node_compare_closest);
if (!i) {
i = (mca_rcache_base_vma_item_t *) opal_list_get_end (&vma_module->vma_list);
}
while (begin <= end) {
mca_rcache_base_vma_item_t *vma = NULL;
if (opal_list_get_end (&vma_module->vma_list) == &i->super.super) {
if (mca_rcache_base_vma_can_insert (vma_module, end - begin + 1, limit)) {
vma = mca_rcache_base_vma_new(vma_module, begin, end);
}
if (!vma) {
goto remove;
}
mca_rcache_base_vma_update_byte_count (vma_module, end - begin + 1);
opal_list_append(&vma_module->vma_list, &vma->super.super);
begin = vma->end + 1;
mca_rcache_base_vma_add_reg (vma, reg);
opal_mutex_unlock (&vma_module->vma_lock);
return OPAL_SUCCESS;
}
if (i->start > begin) {
uintptr_t tend = (i->start <= end) ? (i->start - 1) : end;
if (mca_rcache_base_vma_can_insert(vma_module, tend - begin + 1, limit)) {
vma = mca_rcache_base_vma_new(vma_module, begin, tend);
}
if (!vma) {
goto remove;
}
mca_rcache_base_vma_update_byte_count (vma_module, tend - begin + 1);
/* insert before */
opal_list_insert_pos(&vma_module->vma_list, &i->super.super, &vma->super.super);
i = vma;
begin = vma->end + 1;
mca_rcache_base_vma_add_reg (vma, reg);
} else if(i->start == begin) {
if (i->end > end) {
vma = mca_rcache_base_vma_new (vma_module, end + 1, i->end);
if (!vma) {
goto remove;
}
i->end = end;
mca_rcache_base_vma_copy_reg_list (vma, i);
/* add after */
opal_list_insert_pos (&vma_module->vma_list,
opal_list_get_next (&i->super),
&vma->super.super);
mca_rcache_base_vma_add_reg (i, reg);
begin = end + 1;
} else {
mca_rcache_base_vma_add_reg(i, reg);
begin = i->end + 1;
}
} else {
vma = mca_rcache_base_vma_new (vma_module, begin, i->end);
if (!vma) {
goto remove;
}
i->end = begin - 1;
mca_rcache_base_vma_copy_reg_list (vma, i);
/* add after */
opal_list_insert_pos (&vma_module->vma_list,
opal_list_get_next (&i->super.super),
&vma->super.super);
}
i = (mca_rcache_base_vma_item_t *) opal_list_get_next (&i->super);
}
opal_mutex_unlock (&vma_module->vma_lock);
return OPAL_SUCCESS;
remove:
mca_rcache_base_vma_tree_delete (vma_module, reg);
opal_mutex_unlock (&vma_module->vma_lock);
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
return opal_interval_tree_insert (&vma_module->tree, reg, (uintptr_t) reg->base, (uintptr_t) reg->bound + 1);
}
/**
@ -571,119 +135,36 @@ remove:
int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module,
mca_rcache_base_registration_t *reg)
{
mca_rcache_base_vma_item_t *vma;
return opal_interval_tree_delete (&vma_module->tree, (uintptr_t) reg->base, (uintptr_t) reg->bound + 1, reg);
}
opal_mutex_lock (&vma_module->vma_lock);
static int mca_rcache_base_tree_dump_range_helper (uint64_t low, uint64_t high, void *data, void *ctx)
{
mca_rcache_base_registration_t *reg = ( mca_rcache_base_registration_t *) data;
vma = (mca_rcache_base_vma_item_t *)
opal_rb_tree_find_with (&vma_module->rb_tree, reg->base,
mca_rcache_base_vma_tree_node_compare_search);
opal_output(0, " reg: base=%p, bound=%p, ref_count=%d, flags=0x%x",
(void *) reg->base, (void *) reg->bound, reg->ref_count, reg->flags);
if (!vma) {
opal_mutex_unlock (&vma_module->vma_lock);
return OPAL_ERROR;
}
while (vma != (mca_rcache_base_vma_item_t *) opal_list_get_end (&vma_module->vma_list)
&& vma->start <= (uintptr_t) reg->bound) {
mca_rcache_base_vma_remove_reg(vma, reg);
if(opal_list_is_empty(&vma->reg_list)) {
mca_rcache_base_vma_item_t *next =
(mca_rcache_base_vma_item_t *) opal_list_get_next (&vma->super);
opal_rb_tree_delete (&vma_module->rb_tree, vma);
mca_rcache_base_vma_update_byte_count (vma_module,
vma->start - vma->end - 1);
opal_list_remove_item (&vma_module->vma_list, &vma->super.super);
opal_lifo_push_atomic (&vma_module->vma_gc_lifo, &vma->super.super);
vma = next;
} else {
int merged;
do {
mca_rcache_base_vma_item_t *prev = NULL, *next = NULL;
if (opal_list_get_first (&vma_module->vma_list) != &vma->super.super) {
prev = (mca_rcache_base_vma_item_t *) opal_list_get_prev(vma);
}
merged = 0;
if (prev && vma->start == prev->end + 1 &&
mca_rcache_base_vma_compare_reg_lists(vma, prev)) {
prev->end = vma->end;
opal_list_remove_item(&vma_module->vma_list, &vma->super.super);
opal_rb_tree_delete(&vma_module->rb_tree, vma);
opal_lifo_push_atomic (&vma_module->vma_gc_lifo, &vma->super.super);
vma = prev;
merged = 1;
}
if (opal_list_get_last (&vma_module->vma_list) != &vma->super.super) {
next = (mca_rcache_base_vma_item_t *) opal_list_get_next (vma);
}
if (next && vma->end + 1 == next->start &&
mca_rcache_base_vma_compare_reg_lists (vma, next)) {
vma->end = next->end;
opal_list_remove_item(&vma_module->vma_list, &next->super.super);
opal_rb_tree_delete(&vma_module->rb_tree, next);
opal_lifo_push_atomic (&vma_module->vma_gc_lifo, &next->super.super);
merged = 1;
}
} while (merged);
vma = (mca_rcache_base_vma_item_t *) opal_list_get_next (vma);
}
}
opal_mutex_unlock (&vma_module->vma_lock);
return 0;
return OPAL_SUCCESS;
}
/* Dump out rcache entries within a range of memory. Useful for debugging. */
void mca_rcache_base_vma_tree_dump_range (mca_rcache_base_vma_module_t *vma_module,
unsigned char *base, size_t size, char *msg)
{
unsigned char * bound = base + size -1;
mca_rcache_base_registration_t *reg;
uintptr_t bound = (uintptr_t) base + size;
if (NULL == msg) {
msg = "";
}
opal_output(0, "Dumping rcache entries: %s", msg ? msg : "");
opal_output(0, "Dumping rcache entries: %s", msg);
if(opal_list_is_empty(&vma_module->vma_list)) {
if (opal_interval_tree_size (&vma_module->tree)) {
(void) opal_interval_tree_traverse (&vma_module->tree, (uintptr_t) base, bound, false,
mca_rcache_base_tree_dump_range_helper, NULL);
} else {
opal_output(0, " rcache is empty");
return;
}
do {
mca_rcache_base_vma_item_t *vma;
mca_rcache_base_vma_reg_list_item_t *vma_item;
vma = (mca_rcache_base_vma_item_t *)
opal_rb_tree_find_with (&vma_module->rb_tree, base,
mca_rcache_base_vma_tree_node_compare_closest);
if (NULL == vma) {
/* base is bigger than any registered memory */
break;
}
if (base < (unsigned char *) vma->start) {
base = (unsigned char *) vma->start;
continue;
}
opal_output(0, " vma: base=%p, bound=%p, size=%lu, number of registrations=%d",
(void *)vma->start, (void *)vma->end, vma->end - vma->start + 1,
(int) opal_list_get_size(&vma->reg_list));
OPAL_LIST_FOREACH(vma_item, &vma->reg_list, mca_rcache_base_vma_reg_list_item_t) {
reg = vma_item->reg;
opal_output(0, " reg: base=%p, bound=%p, ref_count=%d, flags=0x%x",
(void *) reg->base, (void *) reg->bound, reg->ref_count, reg->flags);
}
base = (unsigned char *)vma->end + 1;
} while (bound >= base);
}
size_t mca_rcache_base_vma_tree_size (mca_rcache_base_vma_module_t *vma_module)
{
return opal_interval_tree_size (&vma_module->tree);
}

Просмотреть файл

@ -15,7 +15,7 @@
* Copyright (c) 2009 IBM Corporation. All rights reserved.
*
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
* Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -35,35 +35,6 @@
#include "opal/mca/rcache/rcache.h"
#include "rcache_base_vma.h"
/*
* Data structures for the tree of allocated memory
*/
struct mca_rcache_base_vma_reg_list_item_t
{
opal_list_item_t super;
mca_rcache_base_registration_t *reg;
};
typedef struct mca_rcache_base_vma_reg_list_item_t mca_rcache_base_vma_reg_list_item_t;
OBJ_CLASS_DECLARATION(mca_rcache_base_vma_reg_list_item_t);
/**
* The item in the vma_tree itself
*/
struct mca_rcache_base_vma_item_t
{
opal_free_list_item_t super; /**< the parent class */
uintptr_t start; /**< the base of the memory range */
uintptr_t end; /**< the bound of the memory range */
opal_list_t reg_list; /**< list of regs on this vma */
bool in_use; /**< vma is in use in iterate */
mca_rcache_base_vma_module_t *vma_module; /**< pointer to rcache vma belongs to */
};
typedef struct mca_rcache_base_vma_item_t mca_rcache_base_vma_item_t;
OBJ_CLASS_DECLARATION(mca_rcache_base_vma_item_t);
/*
* initialize the vma tree
*/
@ -111,8 +82,17 @@ void mca_rcache_base_vma_tree_dump_range (mca_rcache_base_vma_module_t *vma_modu
* Iterate over matching registration handles in the tree.
*/
int mca_rcache_base_vma_tree_iterate (mca_rcache_base_vma_module_t *vma_module,
unsigned char *base, size_t size,
unsigned char *base, size_t size, bool partial_ok,
int (*callback_fn) (struct mca_rcache_base_registration_t *, void *),
void *ctx);
/**
* @brief Get the current size of the vma tree
*
* @param[in] vma_module rcache vma tree module
*
* @returns the current number of vma regions in the tree
*/
size_t mca_rcache_base_vma_tree_size (mca_rcache_base_vma_module_t *vma_module);
#endif /* MCA_RCACHE_BASE_VMA_TREE_H */

Просмотреть файл

@ -11,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
@ -34,6 +34,8 @@
#include <sys/mman.h>
#endif
#define MCA_RCACHE_GRDMA_REG_FLAG_IN_LRU MCA_RCACHE_FLAGS_MOD_RESV0
BEGIN_C_DECLS
struct mca_rcache_grdma_cache_t {

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
@ -59,16 +59,22 @@ static int mca_rcache_grdma_invalidate_range (mca_rcache_base_module_t *rcache,
size_t size);
static void mca_rcache_grdma_finalize (mca_rcache_base_module_t *rcache);
static bool mca_rcache_grdma_evict (mca_rcache_base_module_t *rcache);
static int mca_rcache_grdma_add_to_gc (mca_rcache_base_registration_t *grdma_reg);
static inline bool registration_is_cacheable(mca_rcache_base_registration_t *reg)
static inline bool registration_flags_cacheable (uint32_t flags)
{
return (mca_rcache_grdma_component.leave_pinned &&
!(reg->flags &
!(flags &
(MCA_RCACHE_FLAGS_CACHE_BYPASS |
MCA_RCACHE_FLAGS_PERSIST |
MCA_RCACHE_FLAGS_INVALID)));
}
static inline bool registration_is_cacheable(mca_rcache_base_registration_t *reg)
{
return registration_flags_cacheable (reg->flags);
}
#if OPAL_CUDA_GDR_SUPPORT
static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size);
#endif /* OPAL_CUDA_GDR_SUPPORT */
@ -132,7 +138,9 @@ static inline int dereg_mem(mca_rcache_base_registration_t *reg)
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) reg->rcache;
int rc;
if(!(reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS)) {
reg->ref_count = 0;
if (!(reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS)) {
mca_rcache_base_vma_delete (rcache_grdma->cache->vma_module, reg);
}
@ -167,14 +175,15 @@ static inline bool mca_rcache_grdma_evict_lru_local (mca_rcache_grdma_cache_t *c
opal_mutex_lock (&cache->vma_module->vma_lock);
old_reg = (mca_rcache_base_registration_t *)
opal_list_remove_first (&cache->lru_list);
opal_mutex_unlock (&cache->vma_module->vma_lock);
if (NULL == old_reg) {
opal_mutex_unlock (&cache->vma_module->vma_lock);
return false;
}
rcache_grdma = (mca_rcache_grdma_module_t *) old_reg->rcache;
(void) dereg_mem (old_reg);
opal_mutex_unlock (&cache->vma_module->vma_lock);
rcache_grdma->stat_evicted++;
@ -196,6 +205,39 @@ struct mca_rcache_base_find_args_t {
typedef struct mca_rcache_base_find_args_t mca_rcache_base_find_args_t;
static inline void mca_rcache_grdma_add_to_lru (mca_rcache_grdma_module_t *rcache_grdma, mca_rcache_base_registration_t *grdma_reg)
{
opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock);
opal_list_append(&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg);
/* ensure the append is complete before setting the flag */
opal_atomic_wmb ();
/* mark this registration as being in the LRU */
opal_atomic_fetch_or_32 ((volatile int32_t *) &grdma_reg->flags, MCA_RCACHE_GRDMA_REG_FLAG_IN_LRU);
opal_mutex_unlock (&rcache_grdma->cache->vma_module->vma_lock);
}
static inline void mca_rcache_grdma_remove_from_lru (mca_rcache_grdma_module_t *rcache_grdma, mca_rcache_base_registration_t *grdma_reg)
{
/* if the reference count was observed to be 0 (which must be the case for this
* function to be called then some thread deregistered the region. it may be the
* case that the deregistration is still ongoing so wait until the deregistration
* thread has marked this registration as being in the lru before continuing */
while (!(grdma_reg->flags & MCA_RCACHE_GRDMA_REG_FLAG_IN_LRU));
/* opal lists are not thread safe at this time so we must lock :'( */
opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock);
opal_list_remove_item (&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg);
/* clear the LRU flag */
grdma_reg->flags &= ~MCA_RCACHE_GRDMA_REG_FLAG_IN_LRU;
opal_mutex_unlock (&rcache_grdma->cache->vma_module->vma_lock);
}
static int mca_rcache_grdma_check_cached (mca_rcache_base_registration_t *grdma_reg, void *ctx)
{
mca_rcache_base_find_args_t *args = (mca_rcache_base_find_args_t *) ctx;
@ -209,39 +251,22 @@ static int mca_rcache_grdma_check_cached (mca_rcache_base_registration_t *grdma_
if (OPAL_UNLIKELY((args->access_flags & grdma_reg->access_flags) != args->access_flags)) {
args->access_flags |= grdma_reg->access_flags;
if (0 != grdma_reg->ref_count) {
if (!(grdma_reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS)) {
mca_rcache_base_vma_delete (rcache_grdma->cache->vma_module, grdma_reg);
}
/* mark the registration to go away when it is deregistered */
grdma_reg->flags |= MCA_RCACHE_FLAGS_INVALID | MCA_RCACHE_FLAGS_CACHE_BYPASS;
} else {
if (registration_is_cacheable(grdma_reg)) {
opal_list_remove_item (&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg);
}
dereg_mem (grdma_reg);
}
} else {
if (0 == grdma_reg->ref_count) {
/* Leave pinned must be set for this to still be in the rcache. */
opal_list_remove_item(&rcache_grdma->cache->lru_list,
(opal_list_item_t *) grdma_reg);
}
/* This segment fits fully within an existing segment. */
rcache_grdma->stat_cache_hit++;
int32_t ref_cnt = opal_atomic_add_fetch_32 (&grdma_reg->ref_count, 1);
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
"returning existing registration %p. references %d", (void *) grdma_reg, ref_cnt));
(void)ref_cnt;
args->reg = grdma_reg;
return 1;
/* can't use this registration */
return mca_rcache_grdma_add_to_gc (grdma_reg);
}
/* can't use this registration */
return 0;
int32_t ref_cnt = opal_atomic_fetch_add_32 (&grdma_reg->ref_count, 1);
args->reg = grdma_reg;
if (0 == ref_cnt) {
mca_rcache_grdma_remove_from_lru (rcache_grdma, grdma_reg);
}
/* This segment fits fully within an existing segment. */
(void) opal_atomic_fetch_add_32 ((volatile int32_t *) &rcache_grdma->stat_cache_hit, 1);
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
"returning existing registration %p. references %d", (void *) grdma_reg, ref_cnt));
return 1;
}
/*
@ -286,7 +311,7 @@ static int mca_rcache_grdma_register (mca_rcache_base_module_t *rcache, void *ad
.base = base, .bound = bound,
.access_flags = access_flags};
/* check to see if memory is registered */
rc = mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, base, size,
rc = mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, base, size, false,
mca_rcache_grdma_check_cached, (void *) &find_args);
if (1 == rc) {
*reg = find_args.reg;
@ -338,7 +363,6 @@ static int mca_rcache_grdma_register (mca_rcache_base_module_t *rcache, void *ad
* use in multiple simultaneous transactions. We used to set bypass_cache
* here is !mca_rcache_grdma_component.leave_pinned. */
rc = mca_rcache_base_vma_insert (rcache_grdma->cache->vma_module, grdma_reg, 0);
if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
rcache_grdma->resources.deregister_mem (rcache_grdma->resources.reg_data, grdma_reg);
opal_free_list_return_mt (&rcache_grdma->reg_list, item);
@ -395,9 +419,7 @@ static int mca_rcache_grdma_deregister (mca_rcache_base_module_t *rcache,
{
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
int32_t ref_count;
int rc = OPAL_SUCCESS;
opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock);
ref_count = opal_atomic_add_fetch_32 (&reg->ref_count, -1);
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
@ -405,25 +427,15 @@ static int mca_rcache_grdma_deregister (mca_rcache_base_module_t *rcache,
assert (ref_count >= 0);
if (ref_count > 0) {
opal_mutex_unlock (&rcache_grdma->cache->vma_module->vma_lock);
return OPAL_SUCCESS;
}
if (registration_is_cacheable(reg)) {
opal_list_append(&rcache_grdma->cache->lru_list, (opal_list_item_t *) reg);
opal_mutex_unlock (&rcache_grdma->cache->vma_module->vma_lock);
mca_rcache_grdma_add_to_lru (rcache_grdma, reg);
return OPAL_SUCCESS;
}
if (!(reg->flags & MCA_RCACHE_FLAGS_INVALID)) {
/* only call dereg mem if this registration is not in the GC lifo */
rc = dereg_mem (reg);
}
opal_mutex_unlock (&rcache_grdma->cache->vma_module->vma_lock);
return rc;
return dereg_mem (reg);
}
struct gc_add_args_t {
@ -432,9 +444,30 @@ struct gc_add_args_t {
};
typedef struct gc_add_args_t gc_add_args_t;
static int gc_add (mca_rcache_base_registration_t *grdma_reg, void *ctx)
static int mca_rcache_grdma_add_to_gc (mca_rcache_base_registration_t *grdma_reg)
{
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) grdma_reg->rcache;
uint32_t flags = opal_atomic_fetch_or_32 ((volatile int32_t *) &grdma_reg->flags, MCA_RCACHE_FLAGS_INVALID);
if ((flags & MCA_RCACHE_FLAGS_INVALID) || 0 != grdma_reg->ref_count) {
/* nothing to do */
return OPAL_SUCCESS;
}
/* This may be called from free() so avoid recursively calling into free by just
* shifting this registration into the garbage collection list. The cleanup will
* be done on the next registration attempt. */
if (registration_flags_cacheable (flags)) {
mca_rcache_grdma_remove_from_lru (rcache_grdma, grdma_reg);
}
opal_lifo_push_atomic (&rcache_grdma->cache->gc_lifo, (opal_list_item_t *) grdma_reg);
return OPAL_SUCCESS;
}
static int gc_add (mca_rcache_base_registration_t *grdma_reg, void *ctx)
{
gc_add_args_t *args = (gc_add_args_t *) ctx;
if (grdma_reg->flags & MCA_RCACHE_FLAGS_INVALID) {
@ -452,18 +485,7 @@ static int gc_add (mca_rcache_base_registration_t *grdma_reg, void *ctx)
return OPAL_ERROR;
}
/* This may be called from free() so avoid recursively calling into free by just
* shifting this registration into the garbage collection list. The cleanup will
* be done on the next registration attempt. */
if (registration_is_cacheable (grdma_reg) && !grdma_reg->ref_count) {
opal_list_remove_item (&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg);
}
grdma_reg->flags |= MCA_RCACHE_FLAGS_INVALID;
opal_lifo_push_atomic (&rcache_grdma->cache->gc_lifo, (opal_list_item_t *) grdma_reg);
return OPAL_SUCCESS;
return mca_rcache_grdma_add_to_gc (grdma_reg);
}
static int mca_rcache_grdma_invalidate_range (mca_rcache_base_module_t *rcache,
@ -471,7 +493,7 @@ static int mca_rcache_grdma_invalidate_range (mca_rcache_base_module_t *rcache,
{
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
gc_add_args_t args = {.base = base, .size = size};
return mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, base, size, gc_add, &args);
return mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, base, size, true, gc_add, &args);
}
/* Make sure this registration request is not stale. In other words, ensure
@ -498,29 +520,10 @@ static int check_for_cuda_freed_memory (mca_rcache_base_module_t *rcache, void *
/* This memory has been freed. Find all registrations and delete. Ensure they are deregistered
* now by passing dereg_mem as the delete function. This is safe because the vma lock is
* recursive and this is only called from register. */
return mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, addr, size, gc_add, NULL);
return mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, addr, size, true, gc_add, NULL);
}
#endif /* OPAL_CUDA_GDR_SUPPORT */
static int iterate_dereg_finalize (mca_rcache_base_registration_t *grdma_reg, void *ctx)
{
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) ctx;
if ((mca_rcache_base_module_t *) rcache_grdma != grdma_reg->rcache) {
return 0;
}
if (registration_is_cacheable (grdma_reg)) {
opal_list_remove_item (&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg);
}
/* set the reference count to 0 otherwise dereg will fail on assert */
grdma_reg->ref_count = 0;
return dereg_mem (grdma_reg);
}
static void mca_rcache_grdma_finalize (mca_rcache_base_module_t *rcache)
{
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t*)rcache;
@ -528,17 +531,18 @@ static void mca_rcache_grdma_finalize (mca_rcache_base_module_t *rcache)
/* Statistic */
if (true == mca_rcache_grdma_component.print_stats) {
opal_output(0, "%s grdma: stats "
"(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n",
"(hit/miss/found/not found/evicted/tree size): %d/%d/%d/%d/%d/%ld\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
rcache_grdma->stat_cache_hit, rcache_grdma->stat_cache_miss,
rcache_grdma->stat_cache_found, rcache_grdma->stat_cache_notfound,
rcache_grdma->stat_evicted);
rcache_grdma->stat_evicted, (long) mca_rcache_base_vma_size (rcache_grdma->cache->vma_module));
}
do_unregistration_gc (&rcache_grdma->super);
do_unregistration_gc (&rcache_grdma->super);
(void) mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, NULL, (size_t) -1,
iterate_dereg_finalize, (void *) rcache);
(void) mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, NULL, (size_t) -1, true,
gc_add, (void *) rcache);
do_unregistration_gc (rcache);
OBJ_RELEASE(rcache_grdma->cache);

Просмотреть файл

@ -11,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -47,6 +47,14 @@ enum {
MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM = 0x0010,
/** invalid registration (no valid for passing to rcache register) */
MCA_RCACHE_FLAGS_INVALID = 0x0080,
/** reserved for rcache module */
MCA_RCACHE_FLAGS_MOD_RESV0 = 0x0100,
/** reserved for rcache module */
MCA_RCACHE_FLAGS_MOD_RESV1 = 0x0200,
/** reserved for rcache module */
MCA_RCACHE_FLAGS_MOD_RESV2 = 0x0400,
/** reserved for rcache module */
MCA_RCACHE_FLAGS_MOD_RESV3 = 0x0800,
/** reserved for register function */
MCA_RCACHE_FLAGS_RESV0 = 0x1000,
/** reserved for register function */
@ -84,9 +92,9 @@ struct mca_rcache_base_registration_t {
/** artifact of old mpool/rcache architecture. used by cuda code */
unsigned char *alloc_base;
/** number of outstanding references */
int32_t ref_count;
volatile int32_t ref_count;
/** registration flags */
uint32_t flags;
volatile uint32_t flags;
/** internal rcache context */
void *rcache_context;
#if OPAL_CUDA_GDR_SUPPORT