Merge pull request #1673 from hjelmn/fix_rcache_deadlock
rcache: fix deadlock in multi-threaded environments
Этот коммит содержится в:
Коммит
9371a6a52d
@ -539,6 +539,17 @@ static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep)
|
|||||||
ep->fifo = NULL;
|
ep->fifo = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if OPAL_BTL_VADER_HAVE_XPMEM
|
||||||
|
static int mca_btl_vader_endpoint_rcache_cleanup (mca_rcache_base_registration_t *reg, void *ctx)
|
||||||
|
{
|
||||||
|
mca_rcache_base_vma_module_t *vma_module = (mca_rcache_base_vma_module_t *) ctx;
|
||||||
|
/* otherwise dereg will fail on assert */
|
||||||
|
reg->ref_count = 0;
|
||||||
|
(void) mca_rcache_base_vma_delete (vma_module, reg);
|
||||||
|
return OPAL_SUCCESS;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep)
|
static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep)
|
||||||
{
|
{
|
||||||
OBJ_DESTRUCT(&ep->pending_frags);
|
OBJ_DESTRUCT(&ep->pending_frags);
|
||||||
@ -548,21 +559,11 @@ static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep)
|
|||||||
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
|
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
|
||||||
if (ep->segment_data.xpmem.vma_module) {
|
if (ep->segment_data.xpmem.vma_module) {
|
||||||
/* clean out the registration cache */
|
/* clean out the registration cache */
|
||||||
const int nregs = 100;
|
(void) mca_rcache_base_vma_iterate (ep->segment_data.xpmem.vma_module,
|
||||||
mca_rcache_base_registration_t *regs[nregs];
|
NULL, (size_t) -1,
|
||||||
int reg_cnt;
|
mca_btl_vader_endpoint_rcache_cleanup,
|
||||||
|
(void *) ep->segment_data.xpmem.vma_module);
|
||||||
do {
|
OBJ_RELEASE(ep->segment_data.xpmem.vma_module);
|
||||||
reg_cnt = mca_rcache_base_vma_find_all (ep->segment_data.xpmem.vma_module,
|
|
||||||
0, (size_t) -1, regs, nregs);
|
|
||||||
for (int i = 0 ; i < reg_cnt ; ++i) {
|
|
||||||
/* otherwise dereg will fail on assert */
|
|
||||||
regs[i]->ref_count = 0;
|
|
||||||
OBJ_RELEASE(regs[i]);
|
|
||||||
}
|
|
||||||
} while (reg_cnt == nregs);
|
|
||||||
|
|
||||||
ep->segment_data.xpmem.vma_module = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ep->segment_base) {
|
if (ep->segment_base) {
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
* Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||||
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
||||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
*
|
*
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@ -144,6 +144,14 @@ int mca_rcache_base_vma_delete (mca_rcache_base_vma_module_t *vma_module,
|
|||||||
return mca_rcache_base_vma_tree_delete (vma_module, reg);
|
return mca_rcache_base_vma_tree_delete (vma_module, reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int mca_rcache_base_vma_iterate (mca_rcache_base_vma_module_t *vma_module,
|
||||||
|
unsigned char *base, size_t size,
|
||||||
|
int (*callback_fn) (struct mca_rcache_base_registration_t *, void *),
|
||||||
|
void *ctx)
|
||||||
|
{
|
||||||
|
return mca_rcache_base_vma_tree_iterate (vma_module, base, size, callback_fn, ctx);
|
||||||
|
}
|
||||||
|
|
||||||
void mca_rcache_base_vma_dump_range (mca_rcache_base_vma_module_t *vma_module,
|
void mca_rcache_base_vma_dump_range (mca_rcache_base_vma_module_t *vma_module,
|
||||||
unsigned char *base, size_t size, char *msg)
|
unsigned char *base, size_t size, char *msg)
|
||||||
{
|
{
|
||||||
|
@ -13,7 +13,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
*
|
*
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@ -34,6 +34,7 @@
|
|||||||
#include "opal_config.h"
|
#include "opal_config.h"
|
||||||
#include "opal/class/opal_list.h"
|
#include "opal/class/opal_list.h"
|
||||||
#include "opal/class/opal_rb_tree.h"
|
#include "opal/class/opal_rb_tree.h"
|
||||||
|
#include "opal/class/opal_lifo.h"
|
||||||
|
|
||||||
BEGIN_C_DECLS
|
BEGIN_C_DECLS
|
||||||
|
|
||||||
@ -69,6 +70,26 @@ int mca_rcache_base_vma_delete (mca_rcache_base_vma_module_t *vma_module,
|
|||||||
void mca_rcache_base_vma_dump_range (mca_rcache_base_vma_module_t *vma_module,
|
void mca_rcache_base_vma_dump_range (mca_rcache_base_vma_module_t *vma_module,
|
||||||
unsigned char *base, size_t size, char *msg);
|
unsigned char *base, size_t size, char *msg);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Iterate over registrations in the specified range.
|
||||||
|
*
|
||||||
|
* @param[in] vma_module vma tree
|
||||||
|
* @param[in] base base address of region
|
||||||
|
* @param[in] size size of region
|
||||||
|
* @param[in] callback_fn function to call for each matching registration handle
|
||||||
|
* @param[in] ctx callback context
|
||||||
|
*
|
||||||
|
* The callback will be made with the vma lock held. This is a recursive lock so
|
||||||
|
* it is still safe to call any vma functions on this vma_module. Keep in mind it
|
||||||
|
* is only safe to call mca_rcache_base_vma_delete() on the supplied registration
|
||||||
|
* from the callback. The iteration will terminate if the callback returns anything
|
||||||
|
* other than OPAL_SUCCESS.
|
||||||
|
*/
|
||||||
|
int mca_rcache_base_vma_iterate (mca_rcache_base_vma_module_t *vma_module,
|
||||||
|
unsigned char *base, size_t size,
|
||||||
|
int (*callback_fn) (struct mca_rcache_base_registration_t *, void *),
|
||||||
|
void *ctx);
|
||||||
|
|
||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
|
|
||||||
#endif /* MCA_RCACHE_BASE_VMA_H */
|
#endif /* MCA_RCACHE_BASE_VMA_H */
|
||||||
|
@ -258,9 +258,12 @@ mca_rcache_base_registration_t *mca_rcache_base_vma_tree_find (mca_rcache_base_v
|
|||||||
mca_rcache_base_vma_item_t *vma;
|
mca_rcache_base_vma_item_t *vma;
|
||||||
mca_rcache_base_vma_reg_list_item_t *item;
|
mca_rcache_base_vma_reg_list_item_t *item;
|
||||||
|
|
||||||
|
opal_mutex_lock (&vma_module->vma_lock);
|
||||||
|
|
||||||
vma = (mca_rcache_base_vma_item_t *) opal_rb_tree_find_with (&vma_module->rb_tree, base,
|
vma = (mca_rcache_base_vma_item_t *) opal_rb_tree_find_with (&vma_module->rb_tree, base,
|
||||||
mca_rcache_base_vma_tree_node_compare_search);
|
mca_rcache_base_vma_tree_node_compare_search);
|
||||||
if (!vma) {
|
if (!vma) {
|
||||||
|
opal_mutex_unlock (&vma_module->vma_lock);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -269,12 +272,18 @@ mca_rcache_base_registration_t *mca_rcache_base_vma_tree_find (mca_rcache_base_v
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(item->reg->bound >= bound)
|
if(item->reg->bound >= bound) {
|
||||||
|
opal_mutex_unlock (&vma_module->vma_lock);
|
||||||
return item->reg;
|
return item->reg;
|
||||||
if(!(item->reg->flags & MCA_RCACHE_FLAGS_PERSIST))
|
}
|
||||||
|
|
||||||
|
if(!(item->reg->flags & MCA_RCACHE_FLAGS_PERSIST)) {
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
opal_mutex_unlock (&vma_module->vma_lock);
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -299,6 +308,8 @@ int mca_rcache_base_vma_tree_find_all (mca_rcache_base_vma_module_t *vma_module,
|
|||||||
if(opal_list_get_size(&vma_module->vma_list) == 0)
|
if(opal_list_get_size(&vma_module->vma_list) == 0)
|
||||||
return cnt;
|
return cnt;
|
||||||
|
|
||||||
|
opal_mutex_lock (&vma_module->vma_lock);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
mca_rcache_base_vma_item_t *vma;
|
mca_rcache_base_vma_item_t *vma;
|
||||||
mca_rcache_base_vma_reg_list_item_t *vma_item;
|
mca_rcache_base_vma_reg_list_item_t *vma_item;
|
||||||
@ -316,39 +327,99 @@ int mca_rcache_base_vma_tree_find_all (mca_rcache_base_vma_module_t *vma_module,
|
|||||||
}
|
}
|
||||||
|
|
||||||
OPAL_LIST_FOREACH(vma_item, &vma->reg_list, mca_rcache_base_vma_reg_list_item_t) {
|
OPAL_LIST_FOREACH(vma_item, &vma->reg_list, mca_rcache_base_vma_reg_list_item_t) {
|
||||||
if ((vma_item->reg->flags & MCA_RCACHE_FLAGS_INVALID) ||
|
if (vma_item->reg->flags & MCA_RCACHE_FLAGS_INVALID ||
|
||||||
is_reg_in_array (regs, cnt, vma_item->reg)) {
|
is_reg_in_array (regs, cnt, vma_item->reg)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
regs[cnt++] = vma_item->reg;
|
regs[cnt++] = vma_item->reg;
|
||||||
if (cnt == reg_cnt) {
|
if (cnt == reg_cnt) {
|
||||||
|
opal_mutex_unlock (&vma_module->vma_lock);
|
||||||
return cnt; /* no space left in the provided array */
|
return cnt; /* no space left in the provided array */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
base = (unsigned char *)vma->end + 1;
|
base = (unsigned char *)vma->end + 1;
|
||||||
} while(bound >= base);
|
} while (bound >= base);
|
||||||
|
|
||||||
|
opal_mutex_unlock (&vma_module->vma_lock);
|
||||||
|
|
||||||
return cnt;
|
return cnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int mca_rcache_base_vma_can_insert (mca_rcache_base_vma_module_t *vma_module, size_t nbytes, size_t limit)
|
|
||||||
{
|
|
||||||
return (0 == limit || vma_module->reg_cur_cache_size + nbytes <= limit);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void mca_rcache_base_vma_update_byte_count (mca_rcache_base_vma_module_t *vma_module,
|
static inline void mca_rcache_base_vma_update_byte_count (mca_rcache_base_vma_module_t *vma_module,
|
||||||
size_t nbytes)
|
size_t nbytes)
|
||||||
{
|
{
|
||||||
vma_module->reg_cur_cache_size += nbytes;
|
vma_module->reg_cur_cache_size += nbytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int mca_rcache_base_vma_tree_iterate (mca_rcache_base_vma_module_t *vma_module, unsigned char *base,
|
||||||
|
size_t size, int (*callback_fn) (struct mca_rcache_base_registration_t *, void *),
|
||||||
|
void *ctx)
|
||||||
|
{
|
||||||
|
unsigned char *bound = base + size - 1;
|
||||||
|
mca_rcache_base_vma_item_t *vma;
|
||||||
|
int rc = OPAL_SUCCESS;
|
||||||
|
|
||||||
|
if (opal_list_get_size(&vma_module->vma_list) == 0) {
|
||||||
|
/* nothin to do */
|
||||||
|
return OPAL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
opal_mutex_lock (&vma_module->vma_lock);
|
||||||
|
|
||||||
|
do {
|
||||||
|
mca_rcache_base_vma_reg_list_item_t *vma_item, *next;
|
||||||
|
vma = (mca_rcache_base_vma_item_t *) opal_rb_tree_find_with (&vma_module->rb_tree, base,
|
||||||
|
mca_rcache_base_vma_tree_node_compare_closest);
|
||||||
|
|
||||||
|
if (NULL == vma) {
|
||||||
|
/* base is bigger than any registered memory */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (base < (unsigned char *) vma->start) {
|
||||||
|
base = (unsigned char *) vma->start;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
base = (unsigned char *)vma->end + 1;
|
||||||
|
|
||||||
|
/* all the registrations in the vma may be deleted by the callback so keep a
|
||||||
|
* reference until we are done with it. */
|
||||||
|
OBJ_RETAIN(vma);
|
||||||
|
|
||||||
|
OPAL_LIST_FOREACH_SAFE(vma_item, next, &vma->reg_list, mca_rcache_base_vma_reg_list_item_t) {
|
||||||
|
rc = callback_fn (vma_item->reg, ctx);
|
||||||
|
if (OPAL_SUCCESS != rc) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
OBJ_RELEASE(vma);
|
||||||
|
|
||||||
|
if (OPAL_SUCCESS != rc) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (bound >= base);
|
||||||
|
|
||||||
|
opal_mutex_unlock (&vma_module->vma_lock);
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int mca_rcache_base_vma_can_insert (mca_rcache_base_vma_module_t *vma_module, size_t nbytes, size_t limit)
|
||||||
|
{
|
||||||
|
return (0 == limit || vma_module->reg_cur_cache_size + nbytes <= limit);
|
||||||
|
}
|
||||||
|
|
||||||
int mca_rcache_base_vma_tree_insert (mca_rcache_base_vma_module_t *vma_module,
|
int mca_rcache_base_vma_tree_insert (mca_rcache_base_vma_module_t *vma_module,
|
||||||
mca_rcache_base_registration_t *reg, size_t limit)
|
mca_rcache_base_registration_t *reg, size_t limit)
|
||||||
{
|
{
|
||||||
mca_rcache_base_vma_item_t *i;
|
mca_rcache_base_vma_item_t *i;
|
||||||
uintptr_t begin = (uintptr_t)reg->base, end = (uintptr_t)reg->bound;
|
uintptr_t begin = (uintptr_t)reg->base, end = (uintptr_t)reg->bound;
|
||||||
|
|
||||||
|
opal_mutex_lock (&vma_module->vma_lock);
|
||||||
|
|
||||||
i = (mca_rcache_base_vma_item_t *) opal_rb_tree_find_with (&vma_module->rb_tree,
|
i = (mca_rcache_base_vma_item_t *) opal_rb_tree_find_with (&vma_module->rb_tree,
|
||||||
(void *) begin, mca_rcache_base_vma_tree_node_compare_closest);
|
(void *) begin, mca_rcache_base_vma_tree_node_compare_closest);
|
||||||
|
|
||||||
@ -373,6 +444,7 @@ int mca_rcache_base_vma_tree_insert (mca_rcache_base_vma_module_t *vma_module,
|
|||||||
opal_list_append(&vma_module->vma_list, &vma->super);
|
opal_list_append(&vma_module->vma_list, &vma->super);
|
||||||
begin = vma->end + 1;
|
begin = vma->end + 1;
|
||||||
mca_rcache_base_vma_add_reg (vma, reg);
|
mca_rcache_base_vma_add_reg (vma, reg);
|
||||||
|
opal_mutex_unlock (&vma_module->vma_lock);
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -434,10 +506,14 @@ int mca_rcache_base_vma_tree_insert (mca_rcache_base_vma_module_t *vma_module,
|
|||||||
i = (mca_rcache_base_vma_item_t *) opal_list_get_next (&i->super);
|
i = (mca_rcache_base_vma_item_t *) opal_list_get_next (&i->super);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
opal_mutex_unlock (&vma_module->vma_lock);
|
||||||
|
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
|
|
||||||
remove:
|
remove:
|
||||||
mca_rcache_base_vma_tree_delete (vma_module, reg);
|
mca_rcache_base_vma_tree_delete (vma_module, reg);
|
||||||
|
opal_mutex_unlock (&vma_module->vma_lock);
|
||||||
|
|
||||||
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
|
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -453,17 +529,23 @@ int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module,
|
|||||||
mca_rcache_base_registration_t *reg)
|
mca_rcache_base_registration_t *reg)
|
||||||
{
|
{
|
||||||
mca_rcache_base_vma_item_t *vma;
|
mca_rcache_base_vma_item_t *vma;
|
||||||
|
opal_list_t deleted_vmas;
|
||||||
|
|
||||||
|
opal_mutex_lock (&vma_module->vma_lock);
|
||||||
|
|
||||||
vma = (mca_rcache_base_vma_item_t *)
|
vma = (mca_rcache_base_vma_item_t *)
|
||||||
opal_rb_tree_find_with (&vma_module->rb_tree, reg->base,
|
opal_rb_tree_find_with (&vma_module->rb_tree, reg->base,
|
||||||
mca_rcache_base_vma_tree_node_compare_search);
|
mca_rcache_base_vma_tree_node_compare_search);
|
||||||
|
|
||||||
if (!vma) {
|
if (!vma) {
|
||||||
|
opal_mutex_unlock (&vma_module->vma_lock);
|
||||||
return OPAL_ERROR;
|
return OPAL_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OBJ_CONSTRUCT(&deleted_vmas, opal_list_t);
|
||||||
|
|
||||||
while (vma != (mca_rcache_base_vma_item_t *) opal_list_get_end (&vma_module->vma_list)
|
while (vma != (mca_rcache_base_vma_item_t *) opal_list_get_end (&vma_module->vma_list)
|
||||||
&& vma->start <= (uintptr_t) reg->bound) {
|
&& vma->start <= (uintptr_t) reg->bound) {
|
||||||
mca_rcache_base_vma_remove_reg(vma, reg);
|
mca_rcache_base_vma_remove_reg(vma, reg);
|
||||||
|
|
||||||
if(opal_list_is_empty(&vma->reg_list)) {
|
if(opal_list_is_empty(&vma->reg_list)) {
|
||||||
@ -473,7 +555,7 @@ int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module,
|
|||||||
mca_rcache_base_vma_update_byte_count (vma_module,
|
mca_rcache_base_vma_update_byte_count (vma_module,
|
||||||
vma->start - vma->end - 1);
|
vma->start - vma->end - 1);
|
||||||
opal_list_remove_item (&vma_module->vma_list, &vma->super);
|
opal_list_remove_item (&vma_module->vma_list, &vma->super);
|
||||||
OBJ_RELEASE(vma);
|
opal_list_append (&deleted_vmas, &vma->super);
|
||||||
vma = next;
|
vma = next;
|
||||||
} else {
|
} else {
|
||||||
int merged;
|
int merged;
|
||||||
@ -491,7 +573,7 @@ int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module,
|
|||||||
prev->end = vma->end;
|
prev->end = vma->end;
|
||||||
opal_list_remove_item(&vma_module->vma_list, &vma->super);
|
opal_list_remove_item(&vma_module->vma_list, &vma->super);
|
||||||
opal_rb_tree_delete(&vma_module->rb_tree, vma);
|
opal_rb_tree_delete(&vma_module->rb_tree, vma);
|
||||||
OBJ_RELEASE(vma);
|
opal_list_append (&deleted_vmas, &vma->super);
|
||||||
vma = prev;
|
vma = prev;
|
||||||
merged = 1;
|
merged = 1;
|
||||||
}
|
}
|
||||||
@ -505,7 +587,7 @@ int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module,
|
|||||||
vma->end = next->end;
|
vma->end = next->end;
|
||||||
opal_list_remove_item(&vma_module->vma_list, &next->super);
|
opal_list_remove_item(&vma_module->vma_list, &next->super);
|
||||||
opal_rb_tree_delete(&vma_module->rb_tree, next);
|
opal_rb_tree_delete(&vma_module->rb_tree, next);
|
||||||
OBJ_RELEASE(next);
|
opal_list_append (&deleted_vmas, &next->super);
|
||||||
merged = 1;
|
merged = 1;
|
||||||
}
|
}
|
||||||
} while (merged);
|
} while (merged);
|
||||||
@ -514,6 +596,11 @@ int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
opal_mutex_unlock (&vma_module->vma_lock);
|
||||||
|
|
||||||
|
/* actually free vmas now that the lock has been dropped */
|
||||||
|
OPAL_LIST_DESTRUCT(&deleted_vmas);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -558,7 +645,7 @@ void mca_rcache_base_vma_tree_dump_range (mca_rcache_base_vma_module_t *vma_modu
|
|||||||
OPAL_LIST_FOREACH(vma_item, &vma->reg_list, mca_rcache_base_vma_reg_list_item_t) {
|
OPAL_LIST_FOREACH(vma_item, &vma->reg_list, mca_rcache_base_vma_reg_list_item_t) {
|
||||||
reg = vma_item->reg;
|
reg = vma_item->reg;
|
||||||
opal_output(0, " reg: base=%p, bound=%p, ref_count=%d, flags=0x%x",
|
opal_output(0, " reg: base=%p, bound=%p, ref_count=%d, flags=0x%x",
|
||||||
reg->base, reg->bound, reg->ref_count, reg->flags);
|
(void *) reg->base, (void *) reg->bound, reg->ref_count, reg->flags);
|
||||||
}
|
}
|
||||||
base = (unsigned char *)vma->end + 1;
|
base = (unsigned char *)vma->end + 1;
|
||||||
} while (bound >= base);
|
} while (bound >= base);
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||||
*
|
*
|
||||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -106,4 +106,12 @@ void mca_rcache_base_vma_tree_dump_range (mca_rcache_base_vma_module_t *vma_modu
|
|||||||
unsigned char *base, size_t size, char *msg);
|
unsigned char *base, size_t size, char *msg);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Iterate over matching registration handles in the tree.
|
||||||
|
*/
|
||||||
|
int mca_rcache_base_vma_tree_iterate (mca_rcache_base_vma_module_t *vma_module,
|
||||||
|
unsigned char *base, size_t size,
|
||||||
|
int (*callback_fn) (struct mca_rcache_base_registration_t *, void *),
|
||||||
|
void *ctx);
|
||||||
|
|
||||||
#endif /* MCA_RCACHE_BASE_VMA_TREE_H */
|
#endif /* MCA_RCACHE_BASE_VMA_TREE_H */
|
||||||
|
@ -40,7 +40,7 @@ struct mca_rcache_grdma_cache_t {
|
|||||||
opal_list_item_t super;
|
opal_list_item_t super;
|
||||||
char *cache_name;
|
char *cache_name;
|
||||||
opal_list_t lru_list;
|
opal_list_t lru_list;
|
||||||
opal_list_t gc_list;
|
opal_lifo_t gc_lifo;
|
||||||
mca_rcache_base_vma_module_t *vma_module;
|
mca_rcache_base_vma_module_t *vma_module;
|
||||||
};
|
};
|
||||||
typedef struct mca_rcache_grdma_cache_t mca_rcache_grdma_cache_t;
|
typedef struct mca_rcache_grdma_cache_t mca_rcache_grdma_cache_t;
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||||
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
|
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
|
||||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
||||||
*
|
*
|
||||||
@ -75,15 +75,18 @@ static void mca_rcache_grdma_cache_contructor (mca_rcache_grdma_cache_t *cache)
|
|||||||
memset ((void *)((uintptr_t)cache + sizeof (cache->super)), 0, sizeof (*cache) - sizeof (cache->super));
|
memset ((void *)((uintptr_t)cache + sizeof (cache->super)), 0, sizeof (*cache) - sizeof (cache->super));
|
||||||
|
|
||||||
OBJ_CONSTRUCT(&cache->lru_list, opal_list_t);
|
OBJ_CONSTRUCT(&cache->lru_list, opal_list_t);
|
||||||
OBJ_CONSTRUCT(&cache->gc_list, opal_list_t);
|
OBJ_CONSTRUCT(&cache->gc_lifo, opal_lifo_t);
|
||||||
|
|
||||||
cache->vma_module = mca_rcache_base_vma_module_alloc ();
|
cache->vma_module = mca_rcache_base_vma_module_alloc ();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mca_rcache_grdma_cache_destructor (mca_rcache_grdma_cache_t *cache)
|
static void mca_rcache_grdma_cache_destructor (mca_rcache_grdma_cache_t *cache)
|
||||||
{
|
{
|
||||||
|
/* clear the lru before releasing the list */
|
||||||
|
while (NULL != opal_list_remove_first (&cache->lru_list));
|
||||||
|
|
||||||
OBJ_DESTRUCT(&cache->lru_list);
|
OBJ_DESTRUCT(&cache->lru_list);
|
||||||
OBJ_DESTRUCT(&cache->gc_list);
|
OBJ_DESTRUCT(&cache->gc_lifo);
|
||||||
if (cache->vma_module) {
|
if (cache->vma_module) {
|
||||||
OBJ_RELEASE(cache->vma_module);
|
OBJ_RELEASE(cache->vma_module);
|
||||||
}
|
}
|
||||||
@ -133,34 +136,36 @@ static inline int dereg_mem(mca_rcache_base_registration_t *reg)
|
|||||||
|
|
||||||
rc = rcache_grdma->resources.deregister_mem (rcache_grdma->resources.reg_data, reg);
|
rc = rcache_grdma->resources.deregister_mem (rcache_grdma->resources.reg_data, reg);
|
||||||
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
|
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
|
||||||
opal_free_list_return (&rcache_grdma->reg_list,
|
opal_free_list_return_mt (&rcache_grdma->reg_list,
|
||||||
(opal_free_list_item_t *) reg);
|
(opal_free_list_item_t *) reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
|
||||||
|
"registration %p destroyed", (void *) reg));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This function must be called with the rcache lock held */
|
|
||||||
static inline void do_unregistration_gc (mca_rcache_base_module_t *rcache)
|
static inline void do_unregistration_gc (mca_rcache_base_module_t *rcache)
|
||||||
{
|
{
|
||||||
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
|
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
|
||||||
opal_list_item_t *item;
|
opal_list_item_t *item;
|
||||||
|
|
||||||
/* Remove registration from garbage collection list
|
/* Remove registration from garbage collection list before deregistering it */
|
||||||
before deregistering it */
|
while (NULL != (item = opal_lifo_pop_atomic (&rcache_grdma->cache->gc_lifo))) {
|
||||||
while (NULL !=
|
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
|
||||||
(item = opal_list_remove_first(&rcache_grdma->cache->gc_list))) {
|
"deleting stale registration %p", (void *) item));
|
||||||
dereg_mem((mca_rcache_base_registration_t *) item);
|
dereg_mem ((mca_rcache_base_registration_t *) item);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool mca_rcache_grdma_evict_lru_local (mca_rcache_grdma_cache_t *cache)
|
static inline bool mca_rcache_grdma_evict_lru_local (mca_rcache_grdma_cache_t *cache)
|
||||||
{
|
{
|
||||||
mca_rcache_grdma_module_t *rcache_grdma;
|
mca_rcache_grdma_module_t *rcache_grdma;
|
||||||
mca_rcache_base_registration_t *old_reg;
|
mca_rcache_base_registration_t *old_reg;
|
||||||
|
|
||||||
|
opal_mutex_lock (&cache->vma_module->vma_lock);
|
||||||
old_reg = (mca_rcache_base_registration_t *)
|
old_reg = (mca_rcache_base_registration_t *)
|
||||||
opal_list_remove_first (&cache->lru_list);
|
opal_list_remove_first (&cache->lru_list);
|
||||||
|
opal_mutex_unlock (&cache->vma_module->vma_lock);
|
||||||
if (NULL == old_reg) {
|
if (NULL == old_reg) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -179,6 +184,63 @@ static bool mca_rcache_grdma_evict (mca_rcache_base_module_t *rcache)
|
|||||||
return mca_rcache_grdma_evict_lru_local (((mca_rcache_grdma_module_t *) rcache)->cache);
|
return mca_rcache_grdma_evict_lru_local (((mca_rcache_grdma_module_t *) rcache)->cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct mca_rcache_base_find_args_t {
|
||||||
|
mca_rcache_base_registration_t *reg;
|
||||||
|
mca_rcache_grdma_module_t *rcache_grdma;
|
||||||
|
unsigned char *base;
|
||||||
|
unsigned char *bound;
|
||||||
|
int access_flags;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct mca_rcache_base_find_args_t mca_rcache_base_find_args_t;
|
||||||
|
|
||||||
|
static int mca_rcache_grdma_check_cached (mca_rcache_base_registration_t *grdma_reg, void *ctx)
|
||||||
|
{
|
||||||
|
mca_rcache_base_find_args_t *args = (mca_rcache_base_find_args_t *) ctx;
|
||||||
|
mca_rcache_grdma_module_t *rcache_grdma = args->rcache_grdma;
|
||||||
|
|
||||||
|
if ((grdma_reg->flags & MCA_RCACHE_FLAGS_INVALID) || &rcache_grdma->super != grdma_reg->rcache ||
|
||||||
|
grdma_reg->base > args->base || grdma_reg->bound < args->bound) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OPAL_UNLIKELY((args->access_flags & grdma_reg->access_flags) != args->access_flags)) {
|
||||||
|
args->access_flags |= grdma_reg->access_flags;
|
||||||
|
|
||||||
|
if (0 != grdma_reg->ref_count) {
|
||||||
|
if (!(grdma_reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS)) {
|
||||||
|
mca_rcache_base_vma_delete (rcache_grdma->cache->vma_module, grdma_reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* mark the registration to go away when it is deregistered */
|
||||||
|
grdma_reg->flags |= MCA_RCACHE_FLAGS_INVALID | MCA_RCACHE_FLAGS_CACHE_BYPASS;
|
||||||
|
} else {
|
||||||
|
if (registration_is_cacheable(grdma_reg)) {
|
||||||
|
opal_list_remove_item (&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
dereg_mem (grdma_reg);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (0 == grdma_reg->ref_count) {
|
||||||
|
/* Leave pinned must be set for this to still be in the rcache. */
|
||||||
|
opal_list_remove_item(&rcache_grdma->cache->lru_list,
|
||||||
|
(opal_list_item_t *) grdma_reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This segment fits fully within an existing segment. */
|
||||||
|
rcache_grdma->stat_cache_hit++;
|
||||||
|
int32_t ref_cnt = opal_atomic_add_32 (&grdma_reg->ref_count, 1);
|
||||||
|
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
|
||||||
|
"returning existing registration %p. references %d", (void *) grdma_reg, ref_cnt));
|
||||||
|
args->reg = grdma_reg;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* can't use this registration */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* register memory
|
* register memory
|
||||||
*/
|
*/
|
||||||
@ -195,15 +257,11 @@ static int mca_rcache_grdma_register (mca_rcache_base_module_t *rcache, void *ad
|
|||||||
unsigned int page_size = opal_getpagesize ();
|
unsigned int page_size = opal_getpagesize ();
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
|
||||||
|
|
||||||
*reg = NULL;
|
*reg = NULL;
|
||||||
|
|
||||||
/* if cache bypass is requested don't use the cache */
|
/* if cache bypass is requested don't use the cache */
|
||||||
base = OPAL_DOWN_ALIGN_PTR(addr, page_size, unsigned char *);
|
base = OPAL_DOWN_ALIGN_PTR(addr, page_size, unsigned char *);
|
||||||
bound = OPAL_ALIGN_PTR((intptr_t) addr + size, page_size, unsigned char *) - 1;
|
bound = OPAL_ALIGN_PTR((intptr_t) addr + size, page_size, unsigned char *) - 1;
|
||||||
if (!opal_list_is_empty (&rcache_grdma->cache->gc_list))
|
|
||||||
do_unregistration_gc(rcache);
|
|
||||||
|
|
||||||
#if OPAL_CUDA_GDR_SUPPORT
|
#if OPAL_CUDA_GDR_SUPPORT
|
||||||
if (flags & MCA_RCACHE_FLAGS_CUDA_GPU_MEM) {
|
if (flags & MCA_RCACHE_FLAGS_CUDA_GPU_MEM) {
|
||||||
@ -216,58 +274,30 @@ static int mca_rcache_grdma_register (mca_rcache_base_module_t *rcache, void *ad
|
|||||||
}
|
}
|
||||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||||
|
|
||||||
|
do_unregistration_gc (rcache);
|
||||||
|
|
||||||
/* look through existing regs if not persistent registration requested.
|
/* look through existing regs if not persistent registration requested.
|
||||||
* Persistent registration are always registered and placed in the cache */
|
* Persistent registration are always registered and placed in the cache */
|
||||||
if(!(bypass_cache || persist)) {
|
if (!(bypass_cache || persist)) {
|
||||||
|
mca_rcache_base_find_args_t find_args = {.reg = NULL, .rcache_grdma = rcache_grdma,
|
||||||
|
.base = base, .bound = bound,
|
||||||
|
.access_flags = access_flags};
|
||||||
/* check to see if memory is registered */
|
/* check to see if memory is registered */
|
||||||
mca_rcache_base_vma_find (rcache_grdma->cache->vma_module, base, bound - base + 1, &grdma_reg);
|
rc = mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, base, size,
|
||||||
if (grdma_reg && !(flags & MCA_RCACHE_FLAGS_INVALID)) {
|
mca_rcache_grdma_check_cached, (void *) &find_args);
|
||||||
if (OPAL_UNLIKELY((access_flags & grdma_reg->access_flags) != access_flags)) {
|
if (1 == rc) {
|
||||||
access_flags |= grdma_reg->access_flags;
|
*reg = find_args.reg;
|
||||||
|
return OPAL_SUCCESS;
|
||||||
if (0 != grdma_reg->ref_count) {
|
|
||||||
if (!(grdma_reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS)) {
|
|
||||||
mca_rcache_base_vma_delete (rcache_grdma->cache->vma_module, grdma_reg);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* mark the registration to go away when it is deregistered */
|
|
||||||
grdma_reg->flags |= MCA_RCACHE_FLAGS_INVALID | MCA_RCACHE_FLAGS_CACHE_BYPASS;
|
|
||||||
} else {
|
|
||||||
if (registration_is_cacheable (grdma_reg)) {
|
|
||||||
/* pull the item out of the lru */
|
|
||||||
opal_list_remove_item (&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg);
|
|
||||||
}
|
|
||||||
|
|
||||||
(void) dereg_mem (grdma_reg);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
*reg = grdma_reg;
|
|
||||||
if (0 == grdma_reg->ref_count) {
|
|
||||||
/* Leave pinned must be set for this to still be in the rcache. */
|
|
||||||
opal_list_remove_item(&rcache_grdma->cache->lru_list,
|
|
||||||
(opal_list_item_t *) grdma_reg);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This segment fits fully within an existing segment. */
|
|
||||||
rcache_grdma->stat_cache_hit++;
|
|
||||||
grdma_reg->ref_count++;
|
|
||||||
OPAL_THREAD_UNLOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
|
||||||
return OPAL_SUCCESS;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rcache_grdma->stat_cache_miss++;
|
/* get updated access flags */
|
||||||
|
access_flags = find_args.access_flags;
|
||||||
|
|
||||||
/* Unless explicitly requested by the caller always store the
|
OPAL_THREAD_ADD32((volatile int32_t *) &rcache_grdma->stat_cache_miss, 1);
|
||||||
* registration in the rcache. This will speed up the case where
|
|
||||||
* no leave pinned protocol is in use but the same segment is in
|
|
||||||
* use in multiple simultaneous transactions. We used to set bypass_cache
|
|
||||||
* here is !mca_rcache_grdma_component.leave_pinned. */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
item = opal_free_list_get (&rcache_grdma->reg_list);
|
item = opal_free_list_get_mt (&rcache_grdma->reg_list);
|
||||||
if(NULL == item) {
|
if(NULL == item) {
|
||||||
OPAL_THREAD_UNLOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
|
||||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
grdma_reg = (mca_rcache_base_registration_t*)item;
|
grdma_reg = (mca_rcache_base_registration_t*)item;
|
||||||
@ -277,22 +307,13 @@ static int mca_rcache_grdma_register (mca_rcache_base_module_t *rcache, void *ad
|
|||||||
grdma_reg->bound = bound;
|
grdma_reg->bound = bound;
|
||||||
grdma_reg->flags = flags;
|
grdma_reg->flags = flags;
|
||||||
grdma_reg->access_flags = access_flags;
|
grdma_reg->access_flags = access_flags;
|
||||||
|
grdma_reg->ref_count = 1;
|
||||||
#if OPAL_CUDA_GDR_SUPPORT
|
#if OPAL_CUDA_GDR_SUPPORT
|
||||||
if (flags & MCA_RCACHE_FLAGS_CUDA_GPU_MEM) {
|
if (flags & MCA_RCACHE_FLAGS_CUDA_GPU_MEM) {
|
||||||
mca_common_cuda_get_buffer_id(grdma_reg);
|
mca_common_cuda_get_buffer_id(grdma_reg);
|
||||||
}
|
}
|
||||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||||
|
|
||||||
if (false == bypass_cache) {
|
|
||||||
rc = mca_rcache_base_vma_insert (rcache_grdma->cache->vma_module, grdma_reg, 0);
|
|
||||||
|
|
||||||
if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
|
|
||||||
OPAL_THREAD_UNLOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
|
||||||
opal_free_list_return (&rcache_grdma->reg_list, item);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
while (OPAL_ERR_OUT_OF_RESOURCE ==
|
while (OPAL_ERR_OUT_OF_RESOURCE ==
|
||||||
(rc = rcache_grdma->resources.register_mem(rcache_grdma->resources.reg_data,
|
(rc = rcache_grdma->resources.register_mem(rcache_grdma->resources.reg_data,
|
||||||
base, bound - base + 1, grdma_reg))) {
|
base, bound - base + 1, grdma_reg))) {
|
||||||
@ -303,17 +324,30 @@ static int mca_rcache_grdma_register (mca_rcache_base_module_t *rcache, void *ad
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
|
if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
|
||||||
if (false == bypass_cache) {
|
opal_free_list_return_mt (&rcache_grdma->reg_list, item);
|
||||||
mca_rcache_base_vma_delete (rcache_grdma->cache->vma_module, grdma_reg);
|
|
||||||
}
|
|
||||||
OPAL_THREAD_UNLOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
|
||||||
opal_free_list_return (&rcache_grdma->reg_list, item);
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (false == bypass_cache) {
|
||||||
|
/* Unless explicitly requested by the caller always store the
|
||||||
|
* registration in the rcache. This will speed up the case where
|
||||||
|
* no leave pinned protocol is in use but the same segment is in
|
||||||
|
* use in multiple simultaneous transactions. We used to set bypass_cache
|
||||||
|
* here is !mca_rcache_grdma_component.leave_pinned. */
|
||||||
|
rc = mca_rcache_base_vma_insert (rcache_grdma->cache->vma_module, grdma_reg, 0);
|
||||||
|
|
||||||
|
if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
|
||||||
|
rcache_grdma->resources.deregister_mem (rcache_grdma->resources.reg_data, grdma_reg);
|
||||||
|
opal_free_list_return_mt (&rcache_grdma->reg_list, item);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
|
||||||
|
"created new registration %p for region {%p, %p} with flags 0x%x",
|
||||||
|
(void *) grdma_reg, base, bound, grdma_reg->flags));
|
||||||
|
|
||||||
*reg = grdma_reg;
|
*reg = grdma_reg;
|
||||||
(*reg)->ref_count++;
|
|
||||||
OPAL_THREAD_UNLOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
|
||||||
|
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -329,7 +363,7 @@ static int mca_rcache_grdma_find (mca_rcache_base_module_t *rcache, void *addr,
|
|||||||
base = OPAL_DOWN_ALIGN_PTR(addr, page_size, unsigned char *);
|
base = OPAL_DOWN_ALIGN_PTR(addr, page_size, unsigned char *);
|
||||||
bound = OPAL_ALIGN_PTR((intptr_t) addr + size - 1, page_size, unsigned char *);
|
bound = OPAL_ALIGN_PTR((intptr_t) addr + size - 1, page_size, unsigned char *);
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock);
|
||||||
|
|
||||||
rc = mca_rcache_base_vma_find (rcache_grdma->cache->vma_module, base, bound - base + 1, reg);
|
rc = mca_rcache_base_vma_find (rcache_grdma->cache->vma_module, base, bound - base + 1, reg);
|
||||||
if(NULL != *reg &&
|
if(NULL != *reg &&
|
||||||
@ -343,12 +377,12 @@ static int mca_rcache_grdma_find (mca_rcache_base_module_t *rcache, void *addr,
|
|||||||
(opal_list_item_t*)(*reg));
|
(opal_list_item_t*)(*reg));
|
||||||
}
|
}
|
||||||
rcache_grdma->stat_cache_found++;
|
rcache_grdma->stat_cache_found++;
|
||||||
(*reg)->ref_count++;
|
opal_atomic_add_32 (&(*reg)->ref_count, 1);
|
||||||
} else {
|
} else {
|
||||||
rcache_grdma->stat_cache_notfound++;
|
rcache_grdma->stat_cache_notfound++;
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_THREAD_UNLOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
opal_mutex_unlock (&rcache_grdma->cache->vma_module->vma_lock);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -357,59 +391,70 @@ static int mca_rcache_grdma_deregister (mca_rcache_base_module_t *rcache,
|
|||||||
mca_rcache_base_registration_t *reg)
|
mca_rcache_base_registration_t *reg)
|
||||||
{
|
{
|
||||||
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
|
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
|
||||||
int rc = OPAL_SUCCESS;
|
int32_t ref_count;
|
||||||
assert(reg->ref_count > 0);
|
int rc;
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock);
|
||||||
reg->ref_count--;
|
ref_count = opal_atomic_add_32 (®->ref_count, -1);
|
||||||
if(reg->ref_count > 0) {
|
|
||||||
OPAL_THREAD_UNLOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
|
||||||
|
"returning registration %p, remaining references %d", (void *) reg, ref_count));
|
||||||
|
|
||||||
|
assert (ref_count >= 0);
|
||||||
|
if (ref_count > 0) {
|
||||||
|
opal_mutex_unlock (&rcache_grdma->cache->vma_module->vma_lock);
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (registration_is_cacheable(reg)) {
|
if (registration_is_cacheable(reg)) {
|
||||||
opal_list_append(&rcache_grdma->cache->lru_list, (opal_list_item_t *) reg);
|
opal_list_append(&rcache_grdma->cache->lru_list, (opal_list_item_t *) reg);
|
||||||
} else {
|
opal_mutex_unlock (&rcache_grdma->cache->vma_module->vma_lock);
|
||||||
rc = dereg_mem (reg);
|
|
||||||
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
OPAL_THREAD_UNLOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
|
||||||
|
rc = dereg_mem (reg);
|
||||||
|
opal_mutex_unlock (&rcache_grdma->cache->vma_module->vma_lock);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GRDMA_RCACHE_NREGS 100
|
static int gc_add (mca_rcache_base_registration_t *grdma_reg, void *ctx)
|
||||||
|
{
|
||||||
|
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) grdma_reg->rcache;
|
||||||
|
|
||||||
|
/* unused */
|
||||||
|
(void) ctx;
|
||||||
|
|
||||||
|
if (grdma_reg->flags & MCA_RCACHE_FLAGS_INVALID) {
|
||||||
|
/* nothing more to do */
|
||||||
|
return OPAL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (grdma_reg->ref_count) {
|
||||||
|
/* attempted to remove an active registration */
|
||||||
|
return OPAL_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This may be called from free() so avoid recursively calling into free by just
|
||||||
|
* shifting this registration into the garbage collection list. The cleanup will
|
||||||
|
* be done on the next registration attempt. */
|
||||||
|
if (registration_is_cacheable (grdma_reg)) {
|
||||||
|
opal_list_remove_item (&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
grdma_reg->flags |= MCA_RCACHE_FLAGS_INVALID;
|
||||||
|
|
||||||
|
opal_lifo_push_atomic (&rcache_grdma->cache->gc_lifo, (opal_list_item_t *) grdma_reg);
|
||||||
|
|
||||||
|
return OPAL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
static int mca_rcache_grdma_invalidate_range (mca_rcache_base_module_t *rcache,
|
static int mca_rcache_grdma_invalidate_range (mca_rcache_base_module_t *rcache,
|
||||||
void *base, size_t size)
|
void *base, size_t size)
|
||||||
{
|
{
|
||||||
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
|
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
|
||||||
mca_rcache_base_registration_t *regs[GRDMA_RCACHE_NREGS];
|
return mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, base, size, gc_add, NULL);
|
||||||
int reg_cnt, i, rc = OPAL_SUCCESS;
|
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
|
||||||
do {
|
|
||||||
reg_cnt = mca_rcache_base_vma_find_all (rcache_grdma->cache->vma_module, base,
|
|
||||||
size, regs, GRDMA_RCACHE_NREGS);
|
|
||||||
|
|
||||||
for(i = 0 ; i < reg_cnt ; ++i) {
|
|
||||||
regs[i]->flags |= MCA_RCACHE_FLAGS_INVALID;
|
|
||||||
if (regs[i]->ref_count) {
|
|
||||||
/* memory is being freed, but there are registration in use that
|
|
||||||
* covers the memory. This can happen even in a correct program,
|
|
||||||
* but may also be an user error. We can't tell. Mark the
|
|
||||||
* registration as invalid. It will not be used any more and
|
|
||||||
* will be unregistered when ref_count will become zero */
|
|
||||||
rc = OPAL_ERROR; /* tell caller that something was wrong */
|
|
||||||
} else {
|
|
||||||
opal_list_remove_item(&rcache_grdma->cache->lru_list,(opal_list_item_t *) regs[i]);
|
|
||||||
opal_list_append(&rcache_grdma->cache->gc_list, (opal_list_item_t *) regs[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while (reg_cnt == GRDMA_RCACHE_NREGS);
|
|
||||||
|
|
||||||
OPAL_THREAD_UNLOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
|
||||||
|
|
||||||
return rc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Make sure this registration request is not stale. In other words, ensure
|
/* Make sure this registration request is not stale. In other words, ensure
|
||||||
@ -417,11 +462,10 @@ static int mca_rcache_grdma_invalidate_range (mca_rcache_base_module_t *rcache,
|
|||||||
* kick out the regisrations and deregister. This function needs to be called
|
* kick out the regisrations and deregister. This function needs to be called
|
||||||
* with the rcache->vma_module->vma_lock held. */
|
* with the rcache->vma_module->vma_lock held. */
|
||||||
#if OPAL_CUDA_GDR_SUPPORT
|
#if OPAL_CUDA_GDR_SUPPORT
|
||||||
|
|
||||||
static int check_for_cuda_freed_memory (mca_rcache_base_module_t *rcache, void *addr, size_t size)
|
static int check_for_cuda_freed_memory (mca_rcache_base_module_t *rcache, void *addr, size_t size)
|
||||||
{
|
{
|
||||||
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
|
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
|
||||||
mca_rcache_base_registration_t *regs[GRDMA_RCACHE_NREGS];
|
|
||||||
int reg_cnt, i, rc = OPAL_SUCCESS;
|
|
||||||
mca_rcache_base_registration_t *reg;
|
mca_rcache_base_registration_t *reg;
|
||||||
|
|
||||||
mca_rcache_base_vma_find (rcache_grdma->cache->vma_module, addr, size, ®);
|
mca_rcache_base_vma_find (rcache_grdma->cache->vma_module, addr, size, ®);
|
||||||
@ -434,45 +478,35 @@ static int check_for_cuda_freed_memory (mca_rcache_base_module_t *rcache, void *
|
|||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* rcache->vma_module->rcache_dump_range(rcache->rcache, 0, (size_t)-1, "Before free"); */
|
/* This memory has been freed. Find all registrations and delete. Ensure they are deregistered
|
||||||
|
* now by passing dereg_mem as the delete function. This is safe because the vma lock is
|
||||||
/* This memory has been freed. Find all registrations and delete */
|
* recursive and this is only called from register. */
|
||||||
do {
|
return mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, base, size, gc_add, NULL);
|
||||||
reg_cnt = mca_rcache_base_vma_find_all (rcache_grdma->cache->vma_module, reg->base,
|
|
||||||
reg->bound - reg->base + 1, regs,
|
|
||||||
GRDMA_RCACHE_NREGS);
|
|
||||||
for(i = 0 ; i < reg_cnt ; ++i) {
|
|
||||||
regs[i]->flags |= MCA_RCACHE_FLAGS_INVALID;
|
|
||||||
if (regs[i]->ref_count) {
|
|
||||||
opal_output(0, "Release FAILED: ref_count=%d, base=%p, bound=%p, size=%d",
|
|
||||||
regs[i]->ref_count, regs[i]->base, regs[i]->bound,
|
|
||||||
(int) (regs[i]->bound - regs[i]->base + 1));
|
|
||||||
/* memory is being freed, but there are registration in use that
|
|
||||||
* covers the memory. This can happen even in a correct program,
|
|
||||||
* but may also be an user error. We can't tell. Mark the
|
|
||||||
* registration as invalid. It will not be used any more and
|
|
||||||
* will be unregistered when ref_count will become zero */
|
|
||||||
rc = OPAL_ERROR; /* tell caller that something was wrong */
|
|
||||||
} else {
|
|
||||||
opal_list_remove_item(&rcache_grdma->cache->lru_list,(opal_list_item_t *) regs[i]);
|
|
||||||
/* Now deregister. Do not use gc_list as we need to kick this out now. */
|
|
||||||
dereg_mem(regs[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while(reg_cnt == GRDMA_RCACHE_NREGS);
|
|
||||||
|
|
||||||
OPAL_THREAD_UNLOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
|
||||||
/* rcache->rcache->rcache_dump_range(rcache->rcache, 0, (size_t)-1, "After free");*/
|
|
||||||
|
|
||||||
return rc;
|
|
||||||
}
|
}
|
||||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||||
|
|
||||||
|
static int iterate_dereg_finalize (mca_rcache_base_registration_t *grdma_reg, void *ctx)
|
||||||
|
{
|
||||||
|
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) ctx;
|
||||||
|
|
||||||
|
if ((mca_rcache_base_module_t *) rcache_grdma != grdma_reg->rcache) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (registration_is_cacheable (grdma_reg)) {
|
||||||
|
opal_list_remove_item (&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* set the reference count to 0 otherwise dereg will fail on assert */
|
||||||
|
grdma_reg->ref_count = 0;
|
||||||
|
|
||||||
|
return dereg_mem (grdma_reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void mca_rcache_grdma_finalize (mca_rcache_base_module_t *rcache)
|
static void mca_rcache_grdma_finalize (mca_rcache_base_module_t *rcache)
|
||||||
{
|
{
|
||||||
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t*)rcache;
|
mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t*)rcache;
|
||||||
mca_rcache_base_registration_t *regs[GRDMA_RCACHE_NREGS];
|
|
||||||
int reg_cnt, i;
|
|
||||||
|
|
||||||
/* Statistic */
|
/* Statistic */
|
||||||
if (true == mca_rcache_grdma_component.print_stats) {
|
if (true == mca_rcache_grdma_component.print_stats) {
|
||||||
@ -484,30 +518,14 @@ static void mca_rcache_grdma_finalize (mca_rcache_base_module_t *rcache)
|
|||||||
rcache_grdma->stat_evicted);
|
rcache_grdma->stat_evicted);
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
do_unregistration_gc (rcache_grdma);
|
||||||
|
|
||||||
do_unregistration_gc(rcache);
|
(void) mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, NULL, (size_t) -1,
|
||||||
|
iterate_dereg_finalize, (void *) rcache);
|
||||||
do {
|
|
||||||
reg_cnt = mca_rcache_base_vma_find_all (rcache_grdma->cache->vma_module, 0, (size_t)-1,
|
|
||||||
regs, GRDMA_RCACHE_NREGS);
|
|
||||||
|
|
||||||
for (i = 0 ; i < reg_cnt ; ++i) {
|
|
||||||
if (regs[i]->ref_count) {
|
|
||||||
regs[i]->ref_count = 0; /* otherwise dereg will fail on assert */
|
|
||||||
} else if (mca_rcache_grdma_component.leave_pinned) {
|
|
||||||
opal_list_remove_item(&rcache_grdma->cache->lru_list,
|
|
||||||
(opal_list_item_t *) regs[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
(void) dereg_mem(regs[i]);
|
|
||||||
}
|
|
||||||
} while (reg_cnt == GRDMA_RCACHE_NREGS);
|
|
||||||
|
|
||||||
OBJ_RELEASE(rcache_grdma->cache);
|
OBJ_RELEASE(rcache_grdma->cache);
|
||||||
|
|
||||||
OBJ_DESTRUCT(&rcache_grdma->reg_list);
|
OBJ_DESTRUCT(&rcache_grdma->reg_list);
|
||||||
OPAL_THREAD_UNLOCK(&rcache_grdma->cache->vma_module->vma_lock);
|
|
||||||
|
|
||||||
/* this rcache was allocated by grdma_init in rcache_grdma_component.c */
|
/* this rcache was allocated by grdma_init in rcache_grdma_component.c */
|
||||||
free(rcache);
|
free(rcache);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user