1
1

oshmem/memheap: optimized mkey lookup.

Fast path lookup is done in inline funcion.
Этот коммит содержится в:
Alex Mikheev 2015-07-06 16:46:12 +03:00 коммит произвёл yosefe
родитель bd3f4c8cc7
Коммит b020b628fc
12 изменённых файлов: 114 добавлений и 133 удалений

Просмотреть файл

@ -17,6 +17,7 @@
#include "oshmem/mca/atomic/atomic.h"
#include "oshmem/mca/atomic/base/base.h"
#include "oshmem/mca/memheap/memheap.h"
#include "oshmem/mca/memheap/base/base.h"
#include "oshmem/runtime/runtime.h"
#include "atomic_mxm.h"
@ -75,10 +76,7 @@ int mca_atomic_mxm_cswap(void *target,
if (MXM_PTL_SHM == ptl_id) {
ptl_id = MXM_PTL_RDMA;
}
r_mkey = mca_memheap.memheap_get_cached_mkey(pe,
target,
ptl_id,
&remote_addr);
r_mkey = mca_memheap_base_get_cached_mkey(pe, target, ptl_id, &remote_addr);
if (!r_mkey) {
ATOMIC_ERROR("[#%d] %p is not address of symmetric variable",
my_pe, target);

Просмотреть файл

@ -18,6 +18,7 @@
#include "oshmem/mca/atomic/atomic.h"
#include "oshmem/mca/atomic/base/base.h"
#include "oshmem/mca/memheap/memheap.h"
#include "oshmem/mca/memheap/base/base.h"
#include "oshmem/runtime/runtime.h"
#include "atomic_mxm.h"
@ -77,10 +78,7 @@ int mca_atomic_mxm_fadd(void *target,
if (MXM_PTL_SHM == ptl_id) {
ptl_id = MXM_PTL_RDMA;
}
r_mkey = mca_memheap.memheap_get_cached_mkey(pe,
target,
ptl_id,
&remote_addr);
r_mkey = mca_memheap_base_get_cached_mkey(pe, target, ptl_id, &remote_addr);
if (!r_mkey) {
ATOMIC_ERROR("[#%d] %p is not address of symmetric variable",
my_pe, target);

Просмотреть файл

@ -72,10 +72,11 @@ OSHMEM_DECLSPEC uint64_t mca_memheap_base_find_offset(int pe,
OSHMEM_DECLSPEC int mca_memheap_base_is_symmetric_addr(const void* va);
OSHMEM_DECLSPEC sshmem_mkey_t *mca_memheap_base_get_mkey(void* va,
int tr_id);
OSHMEM_DECLSPEC sshmem_mkey_t * mca_memheap_base_get_cached_mkey(int pe,
void* va,
int btl_id,
void** rva);
OSHMEM_DECLSPEC sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(map_segment_t *s,
int pe,
void* va,
int btl_id,
void** rva);
OSHMEM_DECLSPEC void mca_memheap_modex_recv_all(void);
/* This function is for internal usage only
@ -147,6 +148,84 @@ OSHMEM_DECLSPEC extern mca_base_framework_t oshmem_memheap_base_framework;
oshmem_output_verbose(0, oshmem_memheap_base_framework.framework_output, \
"Warning %s:%d - %s()", __SPML_FILE__, __LINE__, __func__, __VA_ARGS__)
extern int mca_memheap_seg_cmp(const void *k, const void *v);
/* Turn ON/OFF debug output from build (default 0) */
#ifndef MEMHEAP_BASE_DEBUG
#define MEMHEAP_BASE_DEBUG 0
#endif
#define MEMHEAP_VERBOSE_FASTPATH(...)
extern mca_memheap_map_t* memheap_map;
static inline map_segment_t *memheap_find_va(const void* va)
{
map_segment_t *s;
if (OPAL_LIKELY((uintptr_t)va >= (uintptr_t)memheap_map->mem_segs[HEAP_SEG_INDEX].seg_base_addr &&
(uintptr_t)va < (uintptr_t)memheap_map->mem_segs[HEAP_SEG_INDEX].end)) {
s = &memheap_map->mem_segs[HEAP_SEG_INDEX];
} else {
s = bsearch(va,
&memheap_map->mem_segs[SYMB_SEG_INDEX],
memheap_map->n_segments - 1,
sizeof(*s),
mca_memheap_seg_cmp);
}
#if MEMHEAP_BASE_DEBUG == 1
if (s) {
MEMHEAP_VERBOSE(5, "match seg#%02ld: 0x%llX - 0x%llX %llu bytes va=%p",
s - memheap_map->mem_segs,
(long long)s->seg_base_addr,
(long long)s->end,
(long long)(s->end - s->seg_base_addr),
(void *)va);
}
#endif
return s;
}
static inline void* memheap_va2rva(void* va, void* local_base, void* remote_base)
{
return (void*) (remote_base > local_base ?
(uintptr_t)va + ((uintptr_t)remote_base - (uintptr_t)local_base) :
(uintptr_t)va - ((uintptr_t)local_base - (uintptr_t)remote_base));
}
static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(int pe,
void* va,
int btl_id,
void** rva)
{
map_segment_t *s;
sshmem_mkey_t *mkey;
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p", pe, va);
s = memheap_find_va(va);
if (OPAL_UNLIKELY(NULL == s))
return NULL ;
if (OPAL_UNLIKELY(!MAP_SEGMENT_IS_VALID(s)))
return NULL ;
if (OPAL_UNLIKELY(pe == oshmem_my_proc_id())) {
*rva = va;
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (local) %lx %p", pe, va,
s->mkeys[btl_id].u.key, *rva);
return &s->mkeys[btl_id];
}
if (OPAL_LIKELY(s->mkeys_cache[pe])) {
mkey = &s->mkeys_cache[pe][btl_id];
*rva = memheap_va2rva(va, s->seg_base_addr, mkey->va_base);
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (cached) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva);
return mkey;
}
return mca_memheap_base_get_cached_mkey_slow(s, pe, va, btl_id, rva);
}
END_C_DECLS
#endif /* MCA_MEMHEAP_BASE_H */

Просмотреть файл

@ -55,9 +55,7 @@ struct oob_comm {
opal_list_t req_list;
};
#define MEMHEAP_VERBOSE_FASTPATH(...)
static mca_memheap_map_t* memheap_map = NULL;
mca_memheap_map_t* memheap_map = NULL;
struct oob_comm memheap_oob = {{{0}}};
@ -70,12 +68,12 @@ static int memheap_oob_get_mkeys(int pe,
uint32_t va_seg_num,
sshmem_mkey_t *mkey);
static inline void* __seg2base_va(int seg)
static inline void* mca_memheap_seg2base_va(int seg)
{
return memheap_map->mem_segs[seg].seg_base_addr;
}
static int _seg_cmp(const void *k, const void *v)
int mca_memheap_seg_cmp(const void *k, const void *v)
{
uintptr_t va = (uintptr_t) k;
map_segment_t *s = (map_segment_t *) v;
@ -88,34 +86,6 @@ static int _seg_cmp(const void *k, const void *v)
return 0;
}
static inline map_segment_t *__find_va(const void* va)
{
map_segment_t *s;
if (OPAL_LIKELY((uintptr_t)va >= (uintptr_t)memheap_map->mem_segs[HEAP_SEG_INDEX].seg_base_addr &&
(uintptr_t)va < (uintptr_t)memheap_map->mem_segs[HEAP_SEG_INDEX].end)) {
s = &memheap_map->mem_segs[HEAP_SEG_INDEX];
} else {
s = bsearch(va,
&memheap_map->mem_segs[SYMB_SEG_INDEX],
memheap_map->n_segments - 1,
sizeof(*s),
_seg_cmp);
}
#if MEMHEAP_BASE_DEBUG == 1
if (s) {
MEMHEAP_VERBOSE(5, "match seg#%02ld: 0x%llX - 0x%llX %llu bytes va=%p",
s - memheap_map->mem_segs,
(long long)s->seg_base_addr,
(long long)s->end,
(long long)(s->end - s->seg_base_addr),
(void *)va);
}
#endif
return s;
}
/**
* @param all_trs
* 0 - pack mkeys for transports to given pe
@ -146,7 +116,7 @@ static int pack_local_mkeys(opal_buffer_t *msg, int pe, int seg, int all_trs)
else {
tr_id = i;
}
mkey = mca_memheap_base_get_mkey(__seg2base_va(seg), tr_id);
mkey = mca_memheap_base_get_mkey(mca_memheap_seg2base_va(seg), tr_id);
if (!mkey) {
MEMHEAP_ERROR("seg#%d tr_id: %d failed to find local mkey",
seg, tr_id);
@ -511,7 +481,7 @@ static int memheap_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys)
if (OSHMEM_SUCCESS == MCA_SPML_CALL(oob_get_mkeys(pe, seg, mkeys))) {
for (i = 0; i < memheap_map->num_transports; i++) {
mkeys[i].va_base = __seg2base_va(seg);
mkeys[i].va_base = mca_memheap_seg2base_va(seg);
MEMHEAP_VERBOSE(5,
"MKEY CALCULATED BY LOCAL SPML: pe: %d tr_id: %d %s",
pe,
@ -721,46 +691,15 @@ exit_fatal:
}
}
static inline void* va2rva(void* va,
void* local_base,
void* remote_base)
sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(map_segment_t *s,
int pe,
void* va,
int btl_id,
void** rva)
{
return (void*) (remote_base > local_base ?
(uintptr_t)va + ((uintptr_t)remote_base - (uintptr_t)local_base) :
(uintptr_t)va - ((uintptr_t)local_base - (uintptr_t)remote_base));
}
sshmem_mkey_t * mca_memheap_base_get_cached_mkey(int pe,
void* va,
int btl_id,
void** rva)
{
map_segment_t *s;
int rc;
sshmem_mkey_t *mkey;
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p", pe, va);
s = __find_va(va);
if (NULL == s)
return NULL ;
if (!MAP_SEGMENT_IS_VALID(s))
return NULL ;
if (pe == oshmem_my_proc_id()) {
*rva = va;
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (local) %lx %p", pe, va,
s->mkeys[btl_id].u.key, *rva);
return &s->mkeys[btl_id];
}
if (OPAL_LIKELY(s->mkeys_cache[pe])) {
mkey = &s->mkeys_cache[pe][btl_id];
*rva = va2rva(va, s->seg_base_addr, mkey->va_base);
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (cached) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva);
return mkey;
}
s->mkeys_cache[pe] = (sshmem_mkey_t *) calloc(memheap_map->num_transports,
sizeof(sshmem_mkey_t));
if (!s->mkeys_cache[pe])
@ -773,7 +712,7 @@ sshmem_mkey_t * mca_memheap_base_get_cached_mkey(int pe,
return NULL ;
mkey = &s->mkeys_cache[pe][btl_id];
*rva = va2rva(va, s->seg_base_addr, mkey->va_base);
*rva = memheap_va2rva(va, s->seg_base_addr, mkey->va_base);
MEMHEAP_VERBOSE_FASTPATH(5, "rkey: pe=%d va=%p -> (remote lookup) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva);
return mkey;
@ -783,7 +722,7 @@ sshmem_mkey_t *mca_memheap_base_get_mkey(void* va, int tr_id)
{
map_segment_t *s;
s = __find_va(va);
s = memheap_find_va(va);
return ((s && MAP_SEGMENT_IS_VALID(s)) ? &s->mkeys[tr_id] : NULL );
}
@ -796,7 +735,7 @@ uint64_t mca_memheap_base_find_offset(int pe,
map_segment_t *s;
int my_pe = oshmem_my_proc_id();
s = __find_va(va);
s = memheap_find_va(va);
if (my_pe == pe) {
return (uintptr_t)va - (uintptr_t)s->seg_base_addr;
@ -808,7 +747,7 @@ uint64_t mca_memheap_base_find_offset(int pe,
int mca_memheap_base_is_symmetric_addr(const void* va)
{
return (__find_va(va) ? 1 : 0);
return (memheap_find_va(va) ? 1 : 0);
}
int mca_memheap_base_detect_addr_type(void* va)
@ -816,7 +755,7 @@ int mca_memheap_base_detect_addr_type(void* va)
int addr_type = ADDR_INVALID;
map_segment_t *s;
s = __find_va(va);
s = memheap_find_va(va);
if (s) {
if (s->type == MAP_SEGMENT_STATIC) {

Просмотреть файл

@ -33,7 +33,6 @@ mca_memheap_buddy_module_t memheap_buddy = {
mca_memheap_buddy_private_alloc,
mca_memheap_buddy_private_free,
mca_memheap_base_get_cached_mkey,
mca_memheap_base_get_mkey,
mca_memheap_base_find_offset,
mca_memheap_base_is_symmetric_addr,

Просмотреть файл

@ -66,14 +66,6 @@ typedef uint64_t (*mca_memheap_base_module_find_offset_fn_t)(int pe,
void* va,
void* rva);
/**
* @return mkey suitable to access pe via given transport id. rva is set to virtual address mapping of (va)
* on remote pe.
*/
typedef sshmem_mkey_t * (*mca_memheap_base_module_get_cached_mkey_fn_t)(int pe,
void* va,
int transport_id,
void** rva);
typedef sshmem_mkey_t * (*mca_memheap_base_module_get_local_mkey_fn_t)(void* va,
int transport_id);
@ -118,7 +110,6 @@ struct mca_memheap_base_module_t {
mca_memheap_base_module_alloc_fn_t memheap_private_alloc;
mca_memheap_base_module_free_fn_t memheap_private_free;
mca_memheap_base_module_get_cached_mkey_fn_t memheap_get_cached_mkey;
mca_memheap_base_module_get_local_mkey_fn_t memheap_get_local_mkey;
mca_memheap_base_module_find_offset_fn_t memheap_find_offset;
mca_memheap_base_is_memheap_addr_fn_t memheap_is_symmetric_addr;

Просмотреть файл

@ -31,7 +31,6 @@ mca_memheap_ptmalloc_module_t memheap_ptmalloc = {
mca_memheap_ptmalloc_alloc,
mca_memheap_ptmalloc_free,
mca_memheap_base_get_cached_mkey,
mca_memheap_base_get_mkey,
mca_memheap_base_find_offset,
mca_memheap_base_is_symmetric_addr,

Просмотреть файл

@ -69,7 +69,7 @@ OSHMEM_DECLSPEC int mca_spml_base_oob_get_mkeys(int pe,
uint32_t seg,
sshmem_mkey_t *mkeys);
OSHMEM_DECLSPEC void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey);
OSHMEM_DECLSPEC void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey, int pe);
OSHMEM_DECLSPEC void mca_spml_base_rmkey_free(sshmem_mkey_t *mkey);
/*

Просмотреть файл

@ -158,7 +158,7 @@ int mca_spml_base_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys)
return OSHMEM_ERROR;
}
void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey)
void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey, int pe)
{
}

Просмотреть файл

@ -779,10 +779,7 @@ static int mca_spml_ikrit_get_helper(mxm_send_req_t *sreq,
/**
* Get the address to the remote rkey.
**/
r_mkey = mca_memheap.memheap_get_cached_mkey(src,
src_addr,
ptl_id,
&rva);
r_mkey = mca_memheap_base_get_cached_mkey(src, src_addr, ptl_id, &rva);
if (!r_mkey) {
SPML_ERROR("pe=%d: %p is not address of shared variable",
src, src_addr);
@ -829,10 +826,7 @@ static inline int mca_spml_ikrit_get_shm(void *src_addr,
if (ptl_id != MXM_PTL_SHM)
return OSHMEM_ERROR;
r_mkey = mca_memheap.memheap_get_cached_mkey(src,
src_addr,
ptl_id,
&rva);
r_mkey = mca_memheap_base_get_cached_mkey(src, src_addr, ptl_id, &rva);
if (!r_mkey) {
SPML_ERROR("pe=%d: %p is not address of shared variable",
src, src_addr);
@ -1067,10 +1061,7 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
ptl_id = get_ptl_id(dst);
/* Get rkey of remote PE (dst proc) which must be on memheap */
r_mkey = mca_memheap.memheap_get_cached_mkey(dst,
dst_addr,
ptl_id,
&rva);
r_mkey = mca_memheap_base_get_cached_mkey(dst, dst_addr, ptl_id, &rva);
if (!r_mkey) {
SPML_ERROR("pe=%d: %p is not address of shared variable",
dst, dst_addr);
@ -1094,10 +1085,7 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
}
/* segment not mapped - fallback to rmda */
ptl_id = MXM_PTL_RDMA;
r_mkey = mca_memheap.memheap_get_cached_mkey(dst,
dst_addr,
ptl_id,
&rva);
r_mkey = mca_memheap_base_get_cached_mkey(dst, dst_addr, ptl_id, &rva);
if (!r_mkey) {
SPML_ERROR("pe=%d: %p is not address of shared variable",
dst, dst_addr);
@ -1209,11 +1197,7 @@ int mca_spml_ikrit_put_simple(void* dst_addr,
ptl_id = get_ptl_id(dst);
/* Get rkey of remote PE (dst proc) which must be on memheap */
r_mkey = mca_memheap.memheap_get_cached_mkey(dst,
//(unsigned long) dst_addr,
dst_addr,
ptl_id,
&rva);
r_mkey = mca_memheap_base_get_cached_mkey(dst, dst_addr, ptl_id, &rva);
if (!r_mkey) {
SPML_ERROR("pe=%d: %p is not address of shared variable",
dst, dst_addr);
@ -1236,7 +1220,7 @@ int mca_spml_ikrit_put_simple(void* dst_addr,
}
/* segment not mapped - fallback to rmda */
ptl_id = MXM_PTL_RDMA;
r_mkey = mca_memheap.memheap_get_cached_mkey(dst,
r_mkey = mca_memheap_base_get_cached_mkey(dst,
//(unsigned long) dst_addr,
dst_addr,
ptl_id,

Просмотреть файл

@ -761,10 +761,7 @@ static inline int mca_spml_yoda_put_internal(void *dst_addr,
put_via_send = !(bml_btl->btl->btl_flags & MCA_BTL_FLAGS_PUT);
/* Get rkey of remote PE (dst proc) which must be on memheap*/
r_mkey = mca_memheap.memheap_get_cached_mkey(dst,
dst_addr,
btl_id,
&rva);
r_mkey = mca_memheap_base_get_cached_mkey(dst, dst_addr, btl_id, &rva);
if (!r_mkey) {
SPML_ERROR("pe=%d: %p is not address of shared variable",
dst, dst_addr);
@ -1035,10 +1032,7 @@ int mca_spml_yoda_get(void* src_addr, size_t size, void* dst_addr, int src)
(bml_btl->btl->btl_flags & (MCA_BTL_FLAGS_PUT)) );
/* Get rkey of remote PE (src proc) which must be on memheap*/
r_mkey = mca_memheap.memheap_get_cached_mkey(src,
src_addr,
btl_id,
&rva);
r_mkey = mca_memheap_base_get_cached_mkey(src, src_addr, btl_id, &rva);
if (!r_mkey) {
SPML_ERROR("pe=%d: %p is not address of shared variable",
src, src_addr);

Просмотреть файл

@ -14,6 +14,7 @@
#include "oshmem/runtime/runtime.h"
#include "oshmem/mca/memheap/memheap.h"
#include "oshmem/mca/memheap/base/base.h"
#if OSHMEM_PROFILING
#include "oshmem/include/pshmem.h"
@ -28,8 +29,7 @@ int shmem_addr_accessible(void *addr, int pe)
RUNTIME_CHECK_INIT();
mkey = MCA_MEMHEAP_CALL(get_cached_mkey(pe, addr,
oshmem_get_transport_id(pe), &rva));
mkey = mca_memheap_base_get_cached_mkey(pe, addr, oshmem_get_transport_id(pe), &rva);
return mkey ? 1 : 0;
}