opal/mpool: add support for passing access flags to register
This commit adds a access_flags argument to the mpool registration function. This flag indicates what kind of access is being requested: local write, remote read, remote write, and remote atomic. The values of the registration access flags in the btl are tied to the new flags in the mpool. All mpools have been updated to include the new argument but only the grdma and udreg mpools have been updated to make use of the access flags. In both mpools existing registrations are checked for sufficient access before being returned. If a registration does not contain sufficient access it is marked as invalid and a new registration is generated. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
260905b9f4
Коммит
59aa93e1b6
@ -250,28 +250,29 @@ typedef uint8_t mca_btl_base_tag_t;
|
||||
#define MCA_BTL_ERROR_FLAGS_NONFATAL 0x2
|
||||
#define MCA_BTL_ERROR_FLAGS_ADD_CUDA_IPC 0x4
|
||||
|
||||
/** registration flags */
|
||||
/** registration flags. the access flags are a 1-1 mapping with the mpool
|
||||
* access flags. */
|
||||
enum {
|
||||
/** Allow local write on the registered region. If a region is registered
|
||||
* with this flag the registration can be used as the local handle for a
|
||||
* btl_get operation. */
|
||||
MCA_BTL_REG_FLAG_LOCAL_WRITE = 0x00000001,
|
||||
MCA_BTL_REG_FLAG_LOCAL_WRITE = MCA_MPOOL_ACCESS_LOCAL_WRITE,
|
||||
/** Allow remote read on the registered region. If a region is registered
|
||||
* with this flag the registration can be used as the remote handle for a
|
||||
* btl_get operation. */
|
||||
MCA_BTL_REG_FLAG_REMOTE_READ = 0x00000002,
|
||||
MCA_BTL_REG_FLAG_REMOTE_READ = MCA_MPOOL_ACCESS_REMOTE_READ,
|
||||
/** Allow remote write on the registered region. If a region is registered
|
||||
* with this flag the registration can be used as the remote handle for a
|
||||
* btl_put operation. */
|
||||
MCA_BTL_REG_FLAG_REMOTE_WRITE = 0x00000004,
|
||||
MCA_BTL_REG_FLAG_REMOTE_WRITE = MCA_MPOOL_ACCESS_REMOTE_WRITE,
|
||||
/** Allow remote atomic operations on the registered region. If a region is
|
||||
* registered with this flag the registration can be used as the remote
|
||||
* handle for a btl_atomic_op or btl_atomic_fop operation. */
|
||||
MCA_BTL_REG_FLAG_REMOTE_ATOMIC = 0x00000008,
|
||||
MCA_BTL_REG_FLAG_REMOTE_ATOMIC = MCA_MPOOL_ACCESS_REMOTE_ATOMIC,
|
||||
/** Allow any btl operation on the registered region. If a region is registered
|
||||
* with this flag the registration can be used as the local or remote handle for
|
||||
* any btl operation. */
|
||||
MCA_BTL_REG_FLAG_ACCESS_ANY = 0x0000000f,
|
||||
MCA_BTL_REG_FLAG_ACCESS_ANY = MCA_MPOOL_ACCESS_ANY,
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
/** Region is in GPU memory */
|
||||
MCA_BTL_REG_FLAG_CUDA_GPU_MEM = 0x00010000,
|
||||
|
@ -1753,6 +1753,7 @@ static mca_btl_base_registration_handle_t *mca_btl_openib_register_mem (mca_btl_
|
||||
{
|
||||
mca_btl_openib_reg_t *reg;
|
||||
uint32_t mflags = 0;
|
||||
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
|
||||
int rc;
|
||||
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
@ -1761,7 +1762,7 @@ static mca_btl_base_registration_handle_t *mca_btl_openib_register_mem (mca_btl_
|
||||
}
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
|
||||
rc = btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, mflags,
|
||||
rc = btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, mflags, access_flags,
|
||||
(mca_mpool_base_registration_t **) ®);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == reg)) {
|
||||
return NULL;
|
||||
|
@ -586,11 +586,24 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size,
|
||||
{
|
||||
mca_btl_openib_device_t *device = (mca_btl_openib_device_t*)reg_data;
|
||||
mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg;
|
||||
enum ibv_access_flags access_flag = (enum ibv_access_flags) (IBV_ACCESS_LOCAL_WRITE |
|
||||
IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
|
||||
enum ibv_access_flags access_flag = 0;
|
||||
|
||||
if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_READ) {
|
||||
access_flag |= IBV_ACCESS_REMOTE_READ;
|
||||
}
|
||||
|
||||
if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_WRITE) {
|
||||
access_flag |= IBV_ACCESS_REMOTE_WRITE;
|
||||
}
|
||||
|
||||
if (reg->access_flags & MCA_MPOOL_ACCESS_LOCAL_WRITE) {
|
||||
access_flag |= IBV_ACCESS_LOCAL_WRITE;
|
||||
}
|
||||
|
||||
#if HAVE_DECL_IBV_ATOMIC_HCA
|
||||
if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_ATOMIC) {
|
||||
access_flag |= IBV_ACCESS_REMOTE_ATOMIC;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (device->mem_reg_max &&
|
||||
|
@ -181,6 +181,7 @@ static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca
|
||||
void *base, size_t size, uint32_t flags)
|
||||
{
|
||||
mca_btl_scif_reg_t *scif_reg;
|
||||
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
|
||||
int rc;
|
||||
|
||||
if (MCA_BTL_ENDPOINT_ANY == endpoint) {
|
||||
@ -199,7 +200,7 @@ static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca
|
||||
}
|
||||
}
|
||||
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, base, size, 0,
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, base, size, 0, access_flags,
|
||||
(mca_mpool_base_registration_t **) &scif_reg);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return NULL;
|
||||
|
@ -1010,6 +1010,7 @@ static struct mca_btl_base_registration_handle_t *mca_btl_smcuda_register_mem (
|
||||
size_t size, uint32_t flags)
|
||||
{
|
||||
mca_mpool_common_cuda_reg_t *reg;
|
||||
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
|
||||
int mpool_flags = 0;
|
||||
|
||||
if (MCA_BTL_REG_FLAG_CUDA_GPU_MEM & flags) {
|
||||
@ -1017,7 +1018,7 @@ static struct mca_btl_base_registration_handle_t *mca_btl_smcuda_register_mem (
|
||||
}
|
||||
|
||||
btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, mpool_flags,
|
||||
(mca_mpool_base_registration_t **) ®);
|
||||
access_flags, (mca_mpool_base_registration_t **) ®);
|
||||
if (OPAL_UNLIKELY(NULL == reg)) {
|
||||
return NULL;
|
||||
}
|
||||
@ -1088,6 +1089,7 @@ int mca_btl_smcuda_get_cuda (struct mca_btl_base_module_t *btl,
|
||||
* support. */
|
||||
rc = ep->mpool->mpool_register(ep->mpool, remote_handle->reg_data.memh_seg_addr.pval,
|
||||
remote_handle->reg_data.memh_seg_len, ep->peer_smp_rank,
|
||||
MCA_MPOOL_ACCESS_LOCAL_WRITE,
|
||||
(mca_mpool_base_registration_t **)®_ptr);
|
||||
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
|
@ -272,15 +272,24 @@ static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size,
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
|
||||
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg;
|
||||
gni_return_t rc;
|
||||
int flags;
|
||||
|
||||
if (ugni_module->reg_count >= ugni_module->reg_max) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
if (reg->access_flags & (MCA_MPOOL_ACCESS_REMOTE_WRITE | MCA_MPOOL_ACCESS_LOCAL_WRITE |
|
||||
MCA_MPOOL_ACCESS_REMOTE_ATOMIC)) {
|
||||
flags = GNI_MEM_READWRITE;
|
||||
} else {
|
||||
flags = GNI_MEM_READ_ONLY;
|
||||
}
|
||||
|
||||
flags |= GNI_MEM_RELAXED_PI_ORDERING;
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
|
||||
size, NULL, GNI_MEM_READWRITE | GNI_MEM_RELAXED_PI_ORDERING,
|
||||
-1, &(ugni_reg->handle.gni_handle));
|
||||
size, NULL, flags, -1, &(ugni_reg->handle.gni_handle));
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
|
@ -304,9 +304,10 @@ mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *
|
||||
size_t size, uint32_t flags)
|
||||
{
|
||||
mca_btl_ugni_reg_t *reg;
|
||||
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
|
||||
int rc;
|
||||
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, base, size, 0,
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, base, size, 0, access_flags,
|
||||
(mca_mpool_base_registration_t **) ®);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return NULL;
|
||||
|
@ -75,6 +75,7 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl,
|
||||
|
||||
if (OPAL_UNLIKELY(true == use_eager_get)) {
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, data_ptr, *size, 0,
|
||||
MCA_MPOOL_ACCESS_REMOTE_READ,
|
||||
(mca_mpool_base_registration_t **)®istration);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
mca_btl_ugni_frag_return (frag);
|
||||
|
@ -35,8 +35,15 @@ static int mca_btl_vader_knem_reg (void *reg_data, void *base, size_t size,
|
||||
|
||||
knem_cr.iovec_array = (uintptr_t) &knem_iov;
|
||||
knem_cr.iovec_nr = 1;
|
||||
/* TODO -- set proper access flags when the protection is passed down */
|
||||
knem_cr.protection = PROT_READ | PROT_WRITE;
|
||||
knem_cr.protection = 0;
|
||||
|
||||
if (reg->access_flags & (MCA_MPOOL_ACCESS_LOCAL_WRITE | MCA_MPOOL_ACCESS_REMOTE_WRITE)) {
|
||||
knem_cr.protection |= PROT_WRITE;
|
||||
}
|
||||
|
||||
if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_READ) {
|
||||
knem_cr.protection |= PROT_READ;
|
||||
}
|
||||
|
||||
/* Vader will explicitly destroy this cookie */
|
||||
knem_cr.flags = 0;
|
||||
@ -66,9 +73,10 @@ mca_btl_vader_register_mem_knem (struct mca_btl_base_module_t* btl,
|
||||
void *base, size_t size, uint32_t flags)
|
||||
{
|
||||
mca_btl_vader_registration_handle_t *reg = NULL;
|
||||
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
|
||||
int rc;
|
||||
|
||||
rc = btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, 0,
|
||||
rc = btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, 0, access_flags,
|
||||
(mca_mpool_base_registration_t **) ®);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return NULL;
|
||||
|
@ -253,7 +253,7 @@ void *mca_mpool_base_alloc(size_t size, opal_info_t *info)
|
||||
mpool_tree_item->regs[mpool_tree_item->count++] = registration;
|
||||
} else {
|
||||
if(mpool->mpool_register(mpool, mem, size, MCA_MPOOL_FLAGS_PERSIST,
|
||||
®istration) != OPAL_SUCCESS) {
|
||||
MCA_MPOOL_ACCESS_ANY, ®istration) != OPAL_SUCCESS) {
|
||||
if(mpool_requested) {
|
||||
unregister_tree_item(mpool_tree_item);
|
||||
goto out;
|
||||
|
@ -75,7 +75,7 @@ void mca_mpool_gpusm_module_init(mca_mpool_gpusm_module_t *mpool);
|
||||
* register block of memory
|
||||
*/
|
||||
int mca_mpool_gpusm_register(mca_mpool_base_module_t* mpool, void *addr,
|
||||
size_t size, uint32_t flags, mca_mpool_base_registration_t **reg);
|
||||
size_t size, uint32_t flags, int32_t access_flags, mca_mpool_base_registration_t **reg);
|
||||
|
||||
/**
|
||||
* deregister memory
|
||||
|
@ -109,7 +109,7 @@ int mca_mpool_gpusm_find(mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size,
|
||||
mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
return mca_mpool_gpusm_register(mpool, addr, size, 0, reg);
|
||||
return mca_mpool_gpusm_register(mpool, addr, size, 0, 0, reg);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -119,7 +119,7 @@ int mca_mpool_gpusm_find(mca_mpool_base_module_t *mpool, void *addr,
|
||||
* deregister function is a no-op.
|
||||
*/
|
||||
int mca_mpool_gpusm_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size, uint32_t flags,
|
||||
size_t size, uint32_t flags, int32_t access_flags,
|
||||
mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_gpusm_module_t *mpool_gpusm = (mca_mpool_gpusm_module_t*)mpool;
|
||||
@ -147,6 +147,7 @@ int mca_mpool_gpusm_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
gpusm_reg->base = base;
|
||||
gpusm_reg->bound = bound;
|
||||
gpusm_reg->flags = flags;
|
||||
gpusm_reg->access_flags = access_flags;
|
||||
|
||||
rc = mpool_gpusm->resources.register_mem(base, size, gpusm_reg, NULL);
|
||||
|
||||
|
@ -112,7 +112,7 @@ void* mca_mpool_grdma_realloc( mca_mpool_base_module_t *mpool, void* addr,
|
||||
* register block of memory
|
||||
*/
|
||||
int mca_mpool_grdma_register(mca_mpool_base_module_t* mpool, void *addr,
|
||||
size_t size, uint32_t flags, mca_mpool_base_registration_t **reg);
|
||||
size_t size, uint32_t flags, int32_t access_flags, mca_mpool_base_registration_t **reg);
|
||||
|
||||
/**
|
||||
* deregister memory
|
||||
|
@ -44,6 +44,15 @@
|
||||
#include "opal/mca/mpool/base/base.h"
|
||||
#include "mpool_grdma.h"
|
||||
|
||||
static inline bool registration_is_cacheable(mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
return (mca_mpool_grdma_component.leave_pinned &&
|
||||
!(reg->flags &
|
||||
(MCA_MPOOL_FLAGS_CACHE_BYPASS |
|
||||
MCA_MPOOL_FLAGS_PERSIST |
|
||||
MCA_MPOOL_FLAGS_INVALID)));
|
||||
}
|
||||
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size);
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
@ -155,7 +164,8 @@ void* mca_mpool_grdma_alloc(mca_mpool_base_module_t *mpool, size_t size,
|
||||
addr = (void*)OPAL_ALIGN((uintptr_t)base_addr, align, uintptr_t);
|
||||
#endif
|
||||
|
||||
if(OPAL_SUCCESS != mca_mpool_grdma_register(mpool, addr, size, flags, reg)) {
|
||||
if(OPAL_SUCCESS != mca_mpool_grdma_register(mpool, addr, size, flags,
|
||||
MCA_MPOOL_ACCESS_ANY, reg)) {
|
||||
free(base_addr);
|
||||
return NULL;
|
||||
}
|
||||
@ -214,7 +224,7 @@ bool mca_mpool_grdma_evict (struct mca_mpool_base_module_t *mpool)
|
||||
* register memory
|
||||
*/
|
||||
int mca_mpool_grdma_register (mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size, uint32_t flags,
|
||||
size_t size, uint32_t flags, int32_t access_flags,
|
||||
mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
|
||||
@ -227,6 +237,8 @@ int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
|
||||
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
||||
|
||||
*reg = NULL;
|
||||
|
||||
/* if cache bypass is requested don't use the cache */
|
||||
base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log);
|
||||
bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1),
|
||||
@ -249,23 +261,43 @@ int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
* Persistent registration are always registered and placed in the cache */
|
||||
if(!(bypass_cache || persist)) {
|
||||
/* check to see if memory is registered */
|
||||
mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, reg);
|
||||
if (*reg && !(flags & MCA_MPOOL_FLAGS_INVALID)) {
|
||||
if (0 == (*reg)->ref_count) {
|
||||
mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, &grdma_reg);
|
||||
if (grdma_reg && !(flags & MCA_MPOOL_FLAGS_INVALID)) {
|
||||
if (OPAL_UNLIKELY((access_flags & grdma_reg->access_flags) != access_flags)) {
|
||||
access_flags |= grdma_reg->access_flags;
|
||||
|
||||
if (0 != grdma_reg->ref_count) {
|
||||
if (!(grdma_reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) {
|
||||
grdma_reg->mpool->rcache->rcache_delete(grdma_reg->mpool->rcache, grdma_reg);
|
||||
}
|
||||
|
||||
/* mark the registration to go away when it is deregistered */
|
||||
grdma_reg->flags |= MCA_MPOOL_FLAGS_INVALID | MCA_MPOOL_FLAGS_CACHE_BYPASS;
|
||||
} else {
|
||||
if (registration_is_cacheable (grdma_reg)) {
|
||||
/* pull the item out of the lru */
|
||||
opal_list_remove_item (&mpool_grdma->pool->lru_list, (opal_list_item_t *) grdma_reg);
|
||||
}
|
||||
|
||||
(void) dereg_mem (grdma_reg);
|
||||
}
|
||||
} else {
|
||||
*reg = grdma_reg;
|
||||
if (0 == grdma_reg->ref_count) {
|
||||
/* Leave pinned must be set for this to still be in the rcache. */
|
||||
opal_list_remove_item(&mpool_grdma->pool->lru_list,
|
||||
(opal_list_item_t *)(*reg));
|
||||
(opal_list_item_t *) grdma_reg);
|
||||
}
|
||||
|
||||
/* This segment fits fully within an existing segment. */
|
||||
mpool_grdma->stat_cache_hit++;
|
||||
(*reg)->ref_count++;
|
||||
grdma_reg->ref_count++;
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
mpool_grdma->stat_cache_miss++;
|
||||
*reg = NULL; /* in case previous find found something */
|
||||
|
||||
/* Unless explicitly requested by the caller always store the
|
||||
* registration in the rcache. This will speed up the case where
|
||||
@ -285,6 +317,7 @@ int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
grdma_reg->base = base;
|
||||
grdma_reg->bound = bound;
|
||||
grdma_reg->flags = flags;
|
||||
grdma_reg->access_flags = access_flags;
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
|
||||
mca_common_cuda_get_buffer_id(grdma_reg);
|
||||
@ -389,15 +422,6 @@ int mca_mpool_grdma_find(struct mca_mpool_base_module_t *mpool, void *addr,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline bool registration_is_cacheable(mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
return (mca_mpool_grdma_component.leave_pinned &&
|
||||
!(reg->flags &
|
||||
(MCA_MPOOL_FLAGS_CACHE_BYPASS |
|
||||
MCA_MPOOL_FLAGS_PERSIST |
|
||||
MCA_MPOOL_FLAGS_INVALID)));
|
||||
}
|
||||
|
||||
int mca_mpool_grdma_deregister(struct mca_mpool_base_module_t *mpool,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
|
@ -48,6 +48,14 @@ struct opal_info_t;
|
||||
* hooks (ptmalloc2, etc) are required. */
|
||||
#define MCA_MPOOL_FLAGS_NO_HOOKS 0x80
|
||||
|
||||
/* access flags */
|
||||
enum {
|
||||
MCA_MPOOL_ACCESS_LOCAL_WRITE = 0x01,
|
||||
MCA_MPOOL_ACCESS_REMOTE_READ = 0x02,
|
||||
MCA_MPOOL_ACCESS_REMOTE_WRITE = 0x04,
|
||||
MCA_MPOOL_ACCESS_REMOTE_ATOMIC = 0x08,
|
||||
MCA_MPOOL_ACCESS_ANY = 0x0f,
|
||||
};
|
||||
|
||||
struct mca_mpool_base_resources_t;
|
||||
|
||||
@ -63,6 +71,7 @@ struct mca_mpool_base_registration_t {
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
unsigned long long gpu_bufID;
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
int32_t access_flags;
|
||||
};
|
||||
|
||||
typedef struct mca_mpool_base_registration_t mca_mpool_base_registration_t;
|
||||
@ -110,6 +119,7 @@ typedef int (*mca_mpool_base_module_register_fn_t)(
|
||||
void * addr,
|
||||
size_t size,
|
||||
uint32_t flags,
|
||||
int32_t access_flags,
|
||||
mca_mpool_base_registration_t** registration);
|
||||
|
||||
/**
|
||||
|
@ -79,7 +79,7 @@ void mca_mpool_rgpusm_module_init(mca_mpool_rgpusm_module_t *mpool);
|
||||
* register block of memory
|
||||
*/
|
||||
int mca_mpool_rgpusm_register(mca_mpool_base_module_t* mpool, void *addr,
|
||||
size_t size, uint32_t flags, mca_mpool_base_registration_t **reg);
|
||||
size_t size, uint32_t flags, int32_t access_flags, mca_mpool_base_registration_t **reg);
|
||||
|
||||
/**
|
||||
* deregister memory
|
||||
|
@ -181,7 +181,7 @@ void mca_mpool_rgpusm_module_init(mca_mpool_rgpusm_module_t* mpool)
|
||||
* memory for caching the registration.
|
||||
*/
|
||||
int mca_mpool_rgpusm_register (mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size, uint32_t flags,
|
||||
size_t size, uint32_t flags, int32_t access_flags,
|
||||
mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool;
|
||||
|
@ -98,6 +98,9 @@ struct mca_mpool_udreg_module_t {
|
||||
mca_mpool_udreg_hugepage_t *huge_page;
|
||||
opal_mutex_t lock;
|
||||
void *udreg_handle;
|
||||
/** used to communicate the access flags to the underlying registration
|
||||
* function */
|
||||
int requested_access_flags;
|
||||
};
|
||||
typedef struct mca_mpool_udreg_module_t mca_mpool_udreg_module_t;
|
||||
|
||||
@ -129,7 +132,7 @@ void* mca_mpool_udreg_realloc( mca_mpool_base_module_t *mpool, void* addr,
|
||||
* register block of memory
|
||||
*/
|
||||
int mca_mpool_udreg_register(mca_mpool_base_module_t* mpool, void *addr,
|
||||
size_t size, uint32_t flags, mca_mpool_base_registration_t **reg);
|
||||
size_t size, uint32_t flags, int32_t access_flags, mca_mpool_base_registration_t **reg);
|
||||
|
||||
/**
|
||||
* deregister memory
|
||||
|
@ -204,6 +204,8 @@ static void *mca_mpool_udreg_reg_func (void *addr, uint64_t len, void *reg_conte
|
||||
udreg_reg->mpool = reg_context;
|
||||
udreg_reg->base = addr;
|
||||
udreg_reg->bound = (void *)((uintptr_t) addr + len);
|
||||
/* pull the access flags out of the mpool module */
|
||||
udreg_reg->access_flags = mpool_udreg->requested_access_flags;
|
||||
|
||||
rc = mpool_udreg->resources.register_mem(mpool_udreg->resources.reg_data,
|
||||
addr, len, udreg_reg);
|
||||
@ -221,6 +223,11 @@ static uint32_t mca_mpool_udreg_dereg_func (void *device_data, void *dreg_contex
|
||||
mca_mpool_base_registration_t *udreg_reg = (mca_mpool_base_registration_t *) device_data;
|
||||
int rc;
|
||||
|
||||
if (udreg_reg->ref_count) {
|
||||
/* there are still users of this registration. leave it alone */
|
||||
return 0;
|
||||
}
|
||||
|
||||
rc = mpool_udreg->resources.deregister_mem(mpool_udreg->resources.reg_data, udreg_reg);
|
||||
|
||||
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
|
||||
@ -327,7 +334,7 @@ void* mca_mpool_udreg_alloc(mca_mpool_base_module_t *mpool, size_t size,
|
||||
#endif
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS != mca_mpool_udreg_register(mpool, addr, size, flags, reg)) {
|
||||
if (OPAL_SUCCESS != mca_mpool_udreg_register(mpool, addr, size, flags, MCA_MPOOL_ACCESS_ANY, reg)) {
|
||||
if (udreg_module->huge_page) {
|
||||
mca_mpool_udreg_free_huge ((mca_mpool_udreg_hugepage_alloc_t *) base_addr);
|
||||
} else {
|
||||
@ -355,47 +362,87 @@ bool mca_mpool_udreg_evict (struct mca_mpool_base_module_t *mpool)
|
||||
* register memory
|
||||
*/
|
||||
int mca_mpool_udreg_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size, uint32_t flags,
|
||||
size_t size, uint32_t flags, int32_t access_flags,
|
||||
mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) mpool;
|
||||
mca_mpool_base_registration_t *udreg_reg;
|
||||
mca_mpool_base_registration_t *udreg_reg, *old_reg;
|
||||
bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS);
|
||||
udreg_entry_t *udreg_entry;
|
||||
udreg_return_t urc;
|
||||
|
||||
*reg = NULL;
|
||||
|
||||
OPAL_THREAD_LOCK(&mpool_udreg->lock);
|
||||
|
||||
/* we hold the lock so no other thread can modify these flags until the registration is complete */
|
||||
mpool_udreg->requested_access_flags = access_flags;
|
||||
|
||||
if (false == bypass_cache) {
|
||||
/* Get a udreg entry for this region */
|
||||
OPAL_THREAD_LOCK(&mpool_udreg->lock);
|
||||
do {
|
||||
while (UDREG_RC_SUCCESS !=
|
||||
(urc = UDREG_Register (mpool_udreg->udreg_handle, addr, size, &udreg_entry))) {
|
||||
/* try to remove one unused reg and retry */
|
||||
if (!mca_mpool_udreg_evict (mpool)) {
|
||||
*reg = NULL;
|
||||
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
|
||||
|
||||
udreg_reg = (mca_mpool_base_registration_t *) udreg_entry->device_data;
|
||||
|
||||
if ((udreg_reg->access_flags & access_flags) == access_flags) {
|
||||
/* sufficient access */
|
||||
break;
|
||||
}
|
||||
|
||||
old_reg = udreg_reg;
|
||||
|
||||
/* to not confuse udreg make sure the new registration covers the same address
|
||||
* range as the old one. */
|
||||
addr = old_reg->base;
|
||||
size = (size_t)((intptr_t) old_reg->bound - (intptr_t) old_reg->base);
|
||||
|
||||
/* make the new access flags more permissive */
|
||||
mpool_udreg->requested_access_flags = access_flags | old_reg->access_flags;
|
||||
|
||||
/* get a new registration */
|
||||
udreg_reg = mca_mpool_udreg_reg_func (addr, size, mpool);
|
||||
if (NULL == udreg_reg) {
|
||||
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* update the device data with the new registration */
|
||||
udreg_entry->device_data = udreg_reg;
|
||||
|
||||
/* ensure that mca_mpool_udreg_deregister does not call into udreg since
|
||||
* we are forcefully evicting the registration here */
|
||||
old_reg->flags |= MCA_MPOOL_FLAGS_CACHE_BYPASS | MCA_MPOOL_FLAGS_INVALID;
|
||||
|
||||
mca_mpool_udreg_dereg_func (old_reg, mpool);
|
||||
} while (0);
|
||||
|
||||
udreg_reg->mpool_context = udreg_entry;
|
||||
} else {
|
||||
/* if cache bypass is requested don't use the udreg cache */
|
||||
while (NULL == (udreg_reg = mca_mpool_udreg_reg_func (addr, size, mpool))) {
|
||||
/* try to remove one unused reg and retry */
|
||||
if (!mca_mpool_udreg_evict (mpool)) {
|
||||
*reg = NULL;
|
||||
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
udreg_reg->mpool_context = NULL;
|
||||
}
|
||||
|
||||
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
|
||||
|
||||
udreg_reg->flags = flags;
|
||||
|
||||
*reg = udreg_reg;
|
||||
(*reg)->ref_count++;
|
||||
udreg_reg->ref_count++;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -445,14 +492,14 @@ int mca_mpool_udreg_deregister(struct mca_mpool_base_module_t *mpool,
|
||||
|
||||
assert(reg->ref_count > 0);
|
||||
|
||||
reg->ref_count--;
|
||||
--reg->ref_count;
|
||||
|
||||
if (0 == reg->ref_count && reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS) {
|
||||
mca_mpool_udreg_dereg_func (reg, mpool);
|
||||
} else if (!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) {
|
||||
if (!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) {
|
||||
OPAL_THREAD_LOCK(&mpool_udreg->lock);
|
||||
UDREG_DecrRefcount (mpool_udreg->udreg_handle, reg->mpool_context);
|
||||
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
|
||||
} else {
|
||||
mca_mpool_udreg_dereg_func (reg, mpool);
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user