2012-04-04 03:03:03 +04:00
|
|
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2013-07-04 12:34:37 +04:00
|
|
|
* Copyright (c) 2004-2013 The University of Tennessee and The University
|
2012-04-04 03:03:03 +04:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
|
|
|
|
* Copyright (c) 2006 Voltaire. All rights reserved.
|
2013-02-12 21:45:27 +04:00
|
|
|
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
|
2012-04-04 03:03:03 +04:00
|
|
|
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
|
|
|
* reserved.
|
2013-09-24 21:23:50 +04:00
|
|
|
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
2012-06-27 05:28:28 +04:00
|
|
|
*
|
2012-04-04 03:03:03 +04:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
|
|
|
|
#include "ompi_config.h"
|
2013-01-28 03:25:10 +04:00
|
|
|
|
2012-04-04 03:03:03 +04:00
|
|
|
#include <errno.h>
|
|
|
|
#include <string.h>
|
|
|
|
#ifdef HAVE_MALLOC_H
|
|
|
|
#include <malloc.h>
|
|
|
|
#endif
|
2013-01-28 03:25:10 +04:00
|
|
|
|
|
|
|
#include "opal/align.h"
|
|
|
|
|
2013-11-13 17:22:39 +04:00
|
|
|
#if OPAL_CUDA_SUPPORT_60
|
|
|
|
#include "ompi/mca/common/cuda/common_cuda.h"
|
|
|
|
#endif /* OPAL_CUDA_SUPPORT_60 */
|
2012-04-04 03:03:03 +04:00
|
|
|
#include "ompi/mca/rcache/rcache.h"
|
|
|
|
#include "ompi/mca/rcache/base/base.h"
|
2013-01-28 03:25:10 +04:00
|
|
|
#include "ompi/mca/rte/rte.h"
|
2012-04-04 03:03:03 +04:00
|
|
|
#include "ompi/runtime/params.h"
|
|
|
|
|
2013-01-28 03:25:10 +04:00
|
|
|
#include "ompi/mca/mpool/base/base.h"
|
|
|
|
#include "mpool_grdma.h"
|
|
|
|
|
2013-11-13 17:22:39 +04:00
|
|
|
#if OPAL_CUDA_SUPPORT_60
|
|
|
|
static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size);
|
|
|
|
#endif /* OPAL_CUDA_SUPPORT_60 */
|
2012-04-04 03:03:03 +04:00
|
|
|
static void mca_mpool_grdma_pool_contructor (mca_mpool_grdma_pool_t *pool)
|
|
|
|
{
|
|
|
|
memset ((void *)((uintptr_t)pool + sizeof (pool->super)), 0, sizeof (*pool) - sizeof (pool->super));
|
|
|
|
|
|
|
|
OBJ_CONSTRUCT(&pool->lru_list, opal_list_t);
|
|
|
|
OBJ_CONSTRUCT(&pool->gc_list, opal_list_t);
|
2012-06-21 03:00:25 +04:00
|
|
|
|
|
|
|
pool->rcache = mca_rcache_base_module_create(mca_mpool_grdma_component.rcache_name);
|
2012-04-04 03:03:03 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void mca_mpool_grdma_pool_destructor (mca_mpool_grdma_pool_t *pool)
|
|
|
|
{
|
|
|
|
OBJ_DESTRUCT(&pool->lru_list);
|
|
|
|
OBJ_DESTRUCT(&pool->gc_list);
|
2012-06-21 03:00:25 +04:00
|
|
|
|
|
|
|
free (pool->pool_name);
|
2012-04-04 03:03:03 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(mca_mpool_grdma_pool_t, opal_list_item_t,
|
|
|
|
mca_mpool_grdma_pool_contructor,
|
|
|
|
mca_mpool_grdma_pool_destructor);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initializes the mpool module.
|
|
|
|
*/
|
|
|
|
void mca_mpool_grdma_module_init(mca_mpool_grdma_module_t* mpool, mca_mpool_grdma_pool_t *pool)
|
|
|
|
{
|
|
|
|
OBJ_RETAIN(pool);
|
|
|
|
mpool->pool = pool;
|
|
|
|
|
|
|
|
mpool->super.mpool_component = &mca_mpool_grdma_component.super;
|
|
|
|
mpool->super.mpool_base = NULL; /* no base .. */
|
|
|
|
mpool->super.mpool_alloc = mca_mpool_grdma_alloc;
|
|
|
|
mpool->super.mpool_realloc = mca_mpool_grdma_realloc;
|
|
|
|
mpool->super.mpool_free = mca_mpool_grdma_free;
|
|
|
|
mpool->super.mpool_register = mca_mpool_grdma_register;
|
|
|
|
mpool->super.mpool_find = mca_mpool_grdma_find;
|
|
|
|
mpool->super.mpool_deregister = mca_mpool_grdma_deregister;
|
|
|
|
mpool->super.mpool_release_memory = mca_mpool_grdma_release_memory;
|
|
|
|
mpool->super.mpool_finalize = mca_mpool_grdma_finalize;
|
|
|
|
mpool->super.mpool_ft_event = mca_mpool_grdma_ft_event;
|
|
|
|
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM;
|
2012-06-21 03:00:25 +04:00
|
|
|
mpool->super.rcache = pool->rcache;
|
2012-04-04 03:03:03 +04:00
|
|
|
|
|
|
|
mpool->stat_cache_hit = mpool->stat_cache_miss = mpool->stat_evicted = 0;
|
|
|
|
mpool->stat_cache_found = mpool->stat_cache_notfound = 0;
|
|
|
|
|
|
|
|
OBJ_CONSTRUCT(&mpool->reg_list, ompi_free_list_t);
|
|
|
|
ompi_free_list_init_new(&mpool->reg_list, mpool->resources.sizeof_reg,
|
|
|
|
opal_cache_line_size,
|
|
|
|
OBJ_CLASS(mca_mpool_base_registration_t),
|
|
|
|
0, opal_cache_line_size, 0, -1, 32, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int dereg_mem(mca_mpool_base_registration_t *reg)
|
|
|
|
{
|
|
|
|
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) reg->mpool;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
|
|
|
|
reg->mpool->rcache->rcache_delete(reg->mpool->rcache, reg);
|
|
|
|
|
|
|
|
/* Drop the rcache lock before deregistring the memory */
|
|
|
|
OPAL_THREAD_UNLOCK(®->mpool->rcache->lock);
|
|
|
|
rc = mpool_grdma->resources.deregister_mem(mpool_grdma->resources.reg_data,
|
|
|
|
reg);
|
|
|
|
OPAL_THREAD_LOCK(®->mpool->rcache->lock);
|
|
|
|
|
|
|
|
if (OPAL_LIKELY(OMPI_SUCCESS == rc)) {
|
2013-07-09 02:07:52 +04:00
|
|
|
OMPI_FREE_LIST_RETURN_MT(&mpool_grdma->reg_list,
|
2012-04-04 03:03:03 +04:00
|
|
|
(ompi_free_list_item_t *) reg);
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* allocate function
|
|
|
|
*/
|
|
|
|
void* mca_mpool_grdma_alloc(mca_mpool_base_module_t *mpool, size_t size,
|
|
|
|
size_t align, uint32_t flags, mca_mpool_base_registration_t **reg)
|
|
|
|
{
|
|
|
|
void *base_addr, *addr;
|
|
|
|
|
|
|
|
if(0 == align)
|
|
|
|
align = mca_mpool_base_page_size;
|
|
|
|
|
2013-11-01 16:19:40 +04:00
|
|
|
#if OPAL_CUDA_SUPPORT
|
2012-04-04 03:03:03 +04:00
|
|
|
/* CUDA cannot handle registering overlapping regions, so make
|
|
|
|
* sure each region is page sized and page aligned. */
|
|
|
|
align = mca_mpool_base_page_size;
|
|
|
|
size = OPAL_ALIGN(size, mca_mpool_base_page_size, size_t);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef HAVE_POSIX_MEMALIGN
|
|
|
|
if((errno = posix_memalign(&base_addr, align, size)) != 0)
|
|
|
|
return NULL;
|
2013-02-12 21:45:27 +04:00
|
|
|
|
2012-04-04 03:03:03 +04:00
|
|
|
addr = base_addr;
|
|
|
|
#else
|
|
|
|
base_addr = malloc(size + align);
|
|
|
|
if(NULL == base_addr)
|
|
|
|
return NULL;
|
2013-02-12 21:45:27 +04:00
|
|
|
|
2012-04-04 03:03:03 +04:00
|
|
|
addr = (void*)OPAL_ALIGN((uintptr_t)base_addr, align, uintptr_t);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if(OMPI_SUCCESS != mca_mpool_grdma_register(mpool, addr, size, flags, reg)) {
|
|
|
|
free(base_addr);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
(*reg)->alloc_base = (unsigned char *) base_addr;
|
|
|
|
|
|
|
|
return addr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This function must be called with the rcache lock held */
|
|
|
|
static inline void do_unregistration_gc(struct mca_mpool_base_module_t *mpool)
|
|
|
|
{
|
|
|
|
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
|
|
|
|
opal_list_item_t *item;
|
|
|
|
|
|
|
|
/* Remove registration from garbage collection list
|
|
|
|
before deregistering it */
|
|
|
|
while (NULL !=
|
|
|
|
(item = opal_list_remove_first(&mpool_grdma->pool->gc_list))) {
|
|
|
|
dereg_mem((mca_mpool_base_registration_t *) item);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool mca_mpool_grdma_evict_lru_local (mca_mpool_grdma_pool_t *pool)
|
|
|
|
{
|
|
|
|
mca_mpool_grdma_module_t *mpool_grdma;
|
|
|
|
mca_mpool_base_registration_t *old_reg;
|
|
|
|
|
|
|
|
old_reg = (mca_mpool_base_registration_t *)
|
|
|
|
opal_list_remove_first (&pool->lru_list);
|
|
|
|
if (NULL == old_reg) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
mpool_grdma = (mca_mpool_grdma_module_t *) old_reg->mpool;
|
|
|
|
|
|
|
|
(void) dereg_mem (old_reg);
|
|
|
|
|
|
|
|
mpool_grdma->stat_evicted++;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
enum {
|
|
|
|
MCA_MPOOL_GRDMA_MSG_EMPTY = 0,
|
|
|
|
MCA_MPOOL_GRDMA_MSG_NEED_DEREG = 1,
|
|
|
|
MCA_MPOOL_GRDMA_MSG_BUSY = 2,
|
|
|
|
MCA_MPOOL_GRDMA_MSG_COMPLETE = 3
|
|
|
|
};
|
|
|
|
|
|
|
|
bool mca_mpool_grdma_evict (struct mca_mpool_base_module_t *mpool)
|
|
|
|
{
|
2012-06-21 03:00:25 +04:00
|
|
|
return mca_mpool_grdma_evict_lru_local (((mca_mpool_grdma_module_t *) mpool)->pool);
|
2012-04-04 03:03:03 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* register memory
|
|
|
|
*/
|
|
|
|
int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
|
|
|
|
size_t size, uint32_t flags,
|
|
|
|
mca_mpool_base_registration_t **reg)
|
|
|
|
{
|
|
|
|
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
|
2013-08-01 03:50:41 +04:00
|
|
|
const bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS);
|
|
|
|
const bool persist = !!(flags & MCA_MPOOL_FLAGS_PERSIST);
|
2012-04-04 03:03:03 +04:00
|
|
|
mca_mpool_base_registration_t *grdma_reg;
|
|
|
|
ompi_free_list_item_t *item;
|
|
|
|
unsigned char *base, *bound;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
|
|
|
|
|
|
|
/* if cache bypass is requested don't use the cache */
|
|
|
|
base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log);
|
|
|
|
bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1),
|
|
|
|
mca_mpool_base_page_size_log);
|
|
|
|
if (!opal_list_is_empty (&mpool_grdma->pool->gc_list))
|
|
|
|
do_unregistration_gc(mpool);
|
|
|
|
|
2013-11-13 17:22:39 +04:00
|
|
|
#if OPAL_CUDA_SUPPORT_60
|
|
|
|
if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
|
|
|
|
size_t psize;
|
|
|
|
mca_common_cuda_get_address_range(&base, &psize, addr);
|
|
|
|
bound = base + psize - 1;
|
|
|
|
/* Check to see if this memory is in the cache and if it has been freed. If so,
|
|
|
|
* this call will boot it out of the cache. */
|
|
|
|
check_for_cuda_freed_memory(mpool, base, psize);
|
|
|
|
}
|
|
|
|
#endif /* OPAL_CUDA_SUPPORT_60 */
|
|
|
|
|
2012-04-04 03:03:03 +04:00
|
|
|
/* look through existing regs if not persistent registration requested.
|
|
|
|
* Persistent registration are always registered and placed in the cache */
|
2013-08-01 03:50:41 +04:00
|
|
|
if(!(bypass_cache || persist)) {
|
2012-04-04 03:03:03 +04:00
|
|
|
/* check to see if memory is registered */
|
2013-09-24 21:23:50 +04:00
|
|
|
mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, reg);
|
2013-08-01 03:50:41 +04:00
|
|
|
if (*reg && !(flags & MCA_MPOOL_FLAGS_INVALID)) {
|
|
|
|
if (0 == (*reg)->ref_count) {
|
|
|
|
/* Leave pinned must be set for this to still be in the rcache. */
|
2012-04-04 03:03:03 +04:00
|
|
|
opal_list_remove_item(&mpool_grdma->pool->lru_list,
|
2013-08-01 03:50:41 +04:00
|
|
|
(opal_list_item_t *)(*reg));
|
2012-04-04 03:03:03 +04:00
|
|
|
}
|
2013-08-01 03:50:41 +04:00
|
|
|
|
|
|
|
/* This segment fits fully within an existing segment. */
|
2012-04-04 03:03:03 +04:00
|
|
|
mpool_grdma->stat_cache_hit++;
|
|
|
|
(*reg)->ref_count++;
|
|
|
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
mpool_grdma->stat_cache_miss++;
|
|
|
|
*reg = NULL; /* in case previous find found something */
|
|
|
|
|
2013-08-01 03:50:41 +04:00
|
|
|
/* Unless explicitly requested by the caller always store the
|
|
|
|
* registration in the rcache. This will speed up the case where
|
|
|
|
* no leave pinned protocol is in use but the same segment is in
|
|
|
|
* use in multiple simultaneous transactions. We used to set bypass_cache
|
|
|
|
* here is !mca_mpool_grdma_component.leave_pinned. */
|
2012-04-04 03:03:03 +04:00
|
|
|
}
|
|
|
|
|
2013-07-09 02:07:52 +04:00
|
|
|
OMPI_FREE_LIST_GET_MT(&mpool_grdma->reg_list, item);
|
2013-07-04 12:34:37 +04:00
|
|
|
if(NULL == item) {
|
2012-04-04 03:03:03 +04:00
|
|
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
2013-07-04 12:34:37 +04:00
|
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
2012-04-04 03:03:03 +04:00
|
|
|
}
|
|
|
|
grdma_reg = (mca_mpool_base_registration_t*)item;
|
|
|
|
|
|
|
|
grdma_reg->mpool = mpool;
|
|
|
|
grdma_reg->base = base;
|
|
|
|
grdma_reg->bound = bound;
|
|
|
|
grdma_reg->flags = flags;
|
2013-11-13 17:22:39 +04:00
|
|
|
#if OPAL_CUDA_SUPPORT_60
|
|
|
|
if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
|
|
|
|
mca_common_cuda_get_buffer_id(grdma_reg);
|
|
|
|
}
|
|
|
|
#endif /* OPAL_CUDA_SUPPORT_60 */
|
2012-04-04 03:03:03 +04:00
|
|
|
|
|
|
|
if (false == bypass_cache) {
|
|
|
|
rc = mpool->rcache->rcache_insert(mpool->rcache, grdma_reg, 0);
|
|
|
|
|
|
|
|
if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
|
|
|
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
2013-07-09 02:07:52 +04:00
|
|
|
OMPI_FREE_LIST_RETURN_MT(&mpool_grdma->reg_list, item);
|
2012-04-04 03:03:03 +04:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
while (OMPI_ERR_OUT_OF_RESOURCE ==
|
|
|
|
(rc = mpool_grdma->resources.register_mem(mpool_grdma->resources.reg_data,
|
|
|
|
base, bound - base + 1, grdma_reg))) {
|
|
|
|
/* try to remove one unused reg and retry */
|
|
|
|
if (!mca_mpool_grdma_evict (mpool)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
|
|
|
|
if (false == bypass_cache) {
|
|
|
|
mpool->rcache->rcache_delete(mpool->rcache, grdma_reg);
|
|
|
|
}
|
|
|
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
2013-07-09 02:07:52 +04:00
|
|
|
OMPI_FREE_LIST_RETURN_MT(&mpool_grdma->reg_list, item);
|
2012-04-04 03:03:03 +04:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
*reg = grdma_reg;
|
|
|
|
(*reg)->ref_count++;
|
|
|
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
|
|
|
|
|
|
|
/* Cleanup any vmas that we have deferred deletion on */
|
|
|
|
mpool->rcache->rcache_clean(mpool->rcache);
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* realloc function
|
|
|
|
*/
|
|
|
|
void* mca_mpool_grdma_realloc(mca_mpool_base_module_t *mpool, void *addr,
|
|
|
|
size_t size, mca_mpool_base_registration_t **reg)
|
|
|
|
{
|
|
|
|
mca_mpool_base_registration_t *old_reg = *reg;
|
|
|
|
void *new_mem = mca_mpool_grdma_alloc(mpool, size, 0, old_reg->flags, reg);
|
|
|
|
memcpy(new_mem, addr, old_reg->bound - old_reg->base + 1);
|
|
|
|
mca_mpool_grdma_free(mpool, addr, old_reg);
|
|
|
|
|
|
|
|
return new_mem;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* free function
|
|
|
|
*/
|
|
|
|
void mca_mpool_grdma_free(mca_mpool_base_module_t *mpool, void *addr,
|
|
|
|
mca_mpool_base_registration_t *registration)
|
|
|
|
{
|
|
|
|
void *alloc_base = registration->alloc_base;
|
|
|
|
mca_mpool_grdma_deregister(mpool, registration);
|
|
|
|
free(alloc_base);
|
|
|
|
}
|
|
|
|
|
|
|
|
int mca_mpool_grdma_find(struct mca_mpool_base_module_t *mpool, void *addr,
|
|
|
|
size_t size, mca_mpool_base_registration_t **reg)
|
|
|
|
{
|
|
|
|
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
|
|
|
|
unsigned char *base, *bound;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log);
|
|
|
|
bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1),
|
|
|
|
mca_mpool_base_page_size_log);
|
|
|
|
|
|
|
|
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
|
|
|
|
2013-09-24 21:23:50 +04:00
|
|
|
rc = mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, reg);
|
2012-04-04 03:03:03 +04:00
|
|
|
if(NULL != *reg &&
|
|
|
|
(mca_mpool_grdma_component.leave_pinned ||
|
|
|
|
((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
|
|
|
|
((*reg)->base == base && (*reg)->bound == bound))) {
|
|
|
|
assert(((void*)(*reg)->bound) >= addr);
|
|
|
|
if(0 == (*reg)->ref_count &&
|
|
|
|
mca_mpool_grdma_component.leave_pinned) {
|
|
|
|
opal_list_remove_item(&mpool_grdma->pool->lru_list,
|
|
|
|
(opal_list_item_t*)(*reg));
|
|
|
|
}
|
|
|
|
mpool_grdma->stat_cache_found++;
|
|
|
|
(*reg)->ref_count++;
|
|
|
|
} else {
|
|
|
|
mpool_grdma->stat_cache_notfound++;
|
|
|
|
}
|
|
|
|
|
|
|
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2013-08-01 03:50:41 +04:00
|
|
|
static inline bool registration_is_cacheable(mca_mpool_base_registration_t *reg)
|
2012-04-04 03:03:03 +04:00
|
|
|
{
|
|
|
|
return (mca_mpool_grdma_component.leave_pinned &&
|
|
|
|
!(reg->flags &
|
|
|
|
(MCA_MPOOL_FLAGS_CACHE_BYPASS |
|
|
|
|
MCA_MPOOL_FLAGS_PERSIST |
|
|
|
|
MCA_MPOOL_FLAGS_INVALID)));
|
|
|
|
}
|
|
|
|
|
|
|
|
int mca_mpool_grdma_deregister(struct mca_mpool_base_module_t *mpool,
|
|
|
|
mca_mpool_base_registration_t *reg)
|
|
|
|
{
|
|
|
|
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool;
|
|
|
|
int rc = OMPI_SUCCESS;
|
|
|
|
assert(reg->ref_count > 0);
|
|
|
|
|
|
|
|
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
|
|
|
reg->ref_count--;
|
|
|
|
if(reg->ref_count > 0) {
|
|
|
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2013-08-01 03:50:41 +04:00
|
|
|
if(registration_is_cacheable(reg)) {
|
2012-04-04 03:03:03 +04:00
|
|
|
opal_list_append(&mpool_grdma->pool->lru_list, (opal_list_item_t *) reg);
|
|
|
|
} else {
|
|
|
|
rc = dereg_mem (reg);
|
|
|
|
}
|
|
|
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
|
|
|
|
|
|
|
/* Cleanup any vmas that we have deferred deletion on */
|
|
|
|
mpool->rcache->rcache_clean(mpool->rcache);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define GRDMA_MPOOL_NREGS 100
|
|
|
|
|
|
|
|
int mca_mpool_grdma_release_memory(struct mca_mpool_base_module_t *mpool,
|
|
|
|
void *base, size_t size)
|
|
|
|
{
|
|
|
|
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool;
|
|
|
|
mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS];
|
|
|
|
int reg_cnt, i, rc = OMPI_SUCCESS;
|
|
|
|
|
|
|
|
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
|
|
|
do {
|
|
|
|
reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, base, size,
|
2012-06-21 03:00:25 +04:00
|
|
|
regs, GRDMA_MPOOL_NREGS);
|
2012-04-04 03:03:03 +04:00
|
|
|
|
|
|
|
for(i = 0 ; i < reg_cnt ; ++i) {
|
|
|
|
regs[i]->flags |= MCA_MPOOL_FLAGS_INVALID;
|
|
|
|
if (regs[i]->ref_count) {
|
|
|
|
/* memory is being freed, but there are registration in use that
|
|
|
|
* covers the memory. This can happen even in a correct program,
|
|
|
|
* but may also be an user error. We can't tell. Mark the
|
|
|
|
* registration as invalid. It will not be used any more and
|
|
|
|
* will be unregistered when ref_count will become zero */
|
|
|
|
rc = OMPI_ERROR; /* tell caller that something was wrong */
|
|
|
|
} else {
|
|
|
|
opal_list_remove_item(&mpool_grdma->pool->lru_list,(opal_list_item_t *) regs[i]);
|
|
|
|
opal_list_append(&mpool_grdma->pool->gc_list, (opal_list_item_t *) regs[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} while(reg_cnt == GRDMA_MPOOL_NREGS);
|
|
|
|
|
|
|
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2013-11-13 17:22:39 +04:00
|
|
|
/* Make sure this registration request is not stale. In other words, ensure
|
|
|
|
* that we do not have a cuMemAlloc, cuMemFree, cuMemAlloc state. If we do
|
|
|
|
* kick out the regisrations and deregister. This function needs to be called
|
|
|
|
* with the mpool->rcache->lock held. */
|
|
|
|
#if OPAL_CUDA_SUPPORT_60
|
|
|
|
static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size)
|
|
|
|
{
|
|
|
|
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool;
|
|
|
|
mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS];
|
|
|
|
int reg_cnt, i, rc = OMPI_SUCCESS;
|
|
|
|
mca_mpool_base_registration_t *reg;
|
|
|
|
|
|
|
|
mpool->rcache->rcache_find(mpool->rcache, addr, size, ®);
|
|
|
|
if (NULL == reg) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If not previously freed memory, just return 0 */
|
|
|
|
if (!(mca_common_cuda_previously_freed_memory(reg))) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mpool->rcache->rcache_dump_range(mpool->rcache, 0, (size_t)-1, "Before free"); */
|
|
|
|
|
|
|
|
/* This memory has been freed. Find all registrations and delete */
|
|
|
|
do {
|
|
|
|
reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, reg->base, reg->bound - reg->base + 1,
|
|
|
|
regs, GRDMA_MPOOL_NREGS);
|
|
|
|
for(i = 0 ; i < reg_cnt ; ++i) {
|
|
|
|
regs[i]->flags |= MCA_MPOOL_FLAGS_INVALID;
|
|
|
|
if (regs[i]->ref_count) {
|
|
|
|
opal_output(0, "Release FAILED: ref_count=%d, base=%p, bound=%p, size=%d",
|
|
|
|
regs[i]->ref_count, regs[i]->base, regs[i]->bound,
|
|
|
|
(int) (regs[i]->bound - regs[i]->base + 1));
|
|
|
|
/* memory is being freed, but there are registration in use that
|
|
|
|
* covers the memory. This can happen even in a correct program,
|
|
|
|
* but may also be an user error. We can't tell. Mark the
|
|
|
|
* registration as invalid. It will not be used any more and
|
|
|
|
* will be unregistered when ref_count will become zero */
|
|
|
|
rc = OMPI_ERROR; /* tell caller that something was wrong */
|
|
|
|
} else {
|
|
|
|
opal_list_remove_item(&mpool_grdma->pool->lru_list,(opal_list_item_t *) regs[i]);
|
|
|
|
/* Now deregister. Do not use gc_list as we need to kick this out now. */
|
|
|
|
dereg_mem(regs[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} while(reg_cnt == GRDMA_MPOOL_NREGS);
|
|
|
|
|
|
|
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
|
|
|
/* mpool->rcache->rcache_dump_range(mpool->rcache, 0, (size_t)-1, "After free");*/
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
#endif /* OPAL_CUDA_SUPPORT_60 */
|
|
|
|
|
2012-04-04 03:03:03 +04:00
|
|
|
void mca_mpool_grdma_finalize(struct mca_mpool_base_module_t *mpool)
|
|
|
|
{
|
|
|
|
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
|
|
|
|
mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS];
|
|
|
|
int reg_cnt, i;
|
|
|
|
|
|
|
|
/* Statistic */
|
|
|
|
if (true == mca_mpool_grdma_component.print_stats) {
|
|
|
|
opal_output(0, "%s grdma: stats "
|
|
|
|
"(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n",
|
2013-01-28 03:25:10 +04:00
|
|
|
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),
|
2012-04-04 03:03:03 +04:00
|
|
|
mpool_grdma->stat_cache_hit, mpool_grdma->stat_cache_miss,
|
|
|
|
mpool_grdma->stat_cache_found, mpool_grdma->stat_cache_notfound,
|
|
|
|
mpool_grdma->stat_evicted);
|
|
|
|
}
|
|
|
|
|
|
|
|
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
|
|
|
|
|
|
|
do_unregistration_gc(mpool);
|
|
|
|
|
|
|
|
do {
|
|
|
|
reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, 0, (size_t)-1,
|
|
|
|
regs, GRDMA_MPOOL_NREGS);
|
|
|
|
|
|
|
|
for (i = 0 ; i < reg_cnt ; ++i) {
|
|
|
|
if (regs[i]->ref_count) {
|
|
|
|
regs[i]->ref_count = 0; /* otherwise dereg will fail on assert */
|
|
|
|
} else if (mca_mpool_grdma_component.leave_pinned) {
|
|
|
|
opal_list_remove_item(&mpool_grdma->pool->lru_list,
|
|
|
|
(opal_list_item_t *) regs[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
(void) dereg_mem(regs[i]);
|
|
|
|
}
|
|
|
|
} while (reg_cnt == GRDMA_MPOOL_NREGS);
|
|
|
|
|
|
|
|
OBJ_RELEASE(mpool_grdma->pool);
|
|
|
|
|
|
|
|
OBJ_DESTRUCT(&mpool_grdma->reg_list);
|
|
|
|
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
|
|
|
|
|
|
|
/* Cleanup any vmas that we have deferred deletion on */
|
|
|
|
mpool->rcache->rcache_clean(mpool->rcache);
|
2013-10-23 19:51:55 +04:00
|
|
|
|
|
|
|
/* this mpool was allocated by grdma_init in mpool_grdma_component.c */
|
|
|
|
free(mpool);
|
2012-04-04 03:03:03 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
int mca_mpool_grdma_ft_event(int state) {
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|