1
1

Merge pull request #2354 from alex-mikheev/topic/oshmem_mkey_cache

ikrit spml cleanup, mkey cache and assorted bug fixes
Этот коммит содержится в:
Joshua Ladd 2016-11-14 17:22:13 -05:00 коммит произвёл GitHub
родитель bcc8230501 864904e8ab
Коммит 9a79da729f
24 изменённых файлов: 640 добавлений и 987 удалений

Просмотреть файл

@ -19,6 +19,7 @@
/* This component does uses SPML:IKRIT */ /* This component does uses SPML:IKRIT */
#include "oshmem/mca/spml/ikrit/spml_ikrit.h" #include "oshmem/mca/spml/ikrit/spml_ikrit.h"
#include "oshmem/runtime/runtime.h"
BEGIN_C_DECLS BEGIN_C_DECLS
@ -60,16 +61,76 @@ struct mca_atomic_mxm_module_t {
typedef struct mca_atomic_mxm_module_t mca_atomic_mxm_module_t; typedef struct mca_atomic_mxm_module_t mca_atomic_mxm_module_t;
OBJ_CLASS_DECLARATION(mca_atomic_mxm_module_t); OBJ_CLASS_DECLARATION(mca_atomic_mxm_module_t);
static inline uint8_t mca_atomic_mxm_order(size_t nlong)
{
if (OPAL_LIKELY(8 == nlong)) {
return 3;
}
if (OPAL_LIKELY(4 == nlong)) {
return 2;
}
if (2 == nlong) {
return 1;
}
if (1 == nlong) {
return 0;
}
ATOMIC_ERROR("Type size must be 1/2/4 or 8 bytes.");
oshmem_shmem_abort(-1);
return OSHMEM_ERR_BAD_PARAM;
}
static inline void mca_atomic_mxm_req_init(mxm_send_req_t *sreq, int pe, void *target, size_t nlong)
{
uint8_t nlong_order;
void *remote_addr;
mxm_mem_key_t *mkey;
nlong_order = mca_atomic_mxm_order(nlong);
mkey = mca_spml_ikrit_get_mkey(pe, target, MXM_PTL_RDMA, &remote_addr);
/* mxm request init */
sreq->base.state = MXM_REQ_NEW;
sreq->base.mq = mca_atomic_mxm_spml_self->mxm_mq;
sreq->base.conn = mca_atomic_mxm_spml_self->mxm_peers[pe].mxm_hw_rdma_conn;
sreq->base.completed_cb = NULL;
sreq->base.data_type = MXM_REQ_DATA_BUFFER;
sreq->base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
sreq->base.data.buffer.length = nlong;
sreq->op.atomic.remote_vaddr = (uintptr_t) remote_addr;
sreq->op.atomic.remote_mkey = mkey;
sreq->op.atomic.order = nlong_order;
sreq->flags = 0;
}
static inline void mca_atomic_mxm_post(mxm_send_req_t *sreq)
{
mxm_error_t mxm_err;
mxm_err = mxm_req_send(sreq);
if (OPAL_UNLIKELY(MXM_OK != mxm_err)) {
ATOMIC_ERROR("mxm_req_send failed, mxm_error = %d",
mxm_err);
oshmem_shmem_abort(-1);
}
mxm_req_wait(&sreq->base);
if (OPAL_UNLIKELY(MXM_OK != sreq->base.error)) {
ATOMIC_ERROR("mxm_req_wait got non MXM_OK error: %d",
sreq->base.error);
oshmem_shmem_abort(-1);
}
}
END_C_DECLS END_C_DECLS
#if MXM_API >= MXM_VERSION(2,0)
static inline mxm_mem_key_t *to_mxm_mkey(sshmem_mkey_t *mkey) {
if (0 == mkey->len) {
return &mxm_empty_mem_key;
}
return (mxm_mem_key_t *)mkey->u.data;
}
#endif
#endif /* MCA_ATOMIC_MXM_H */ #endif /* MCA_ATOMIC_MXM_H */

Просмотреть файл

@ -31,96 +31,20 @@ int mca_atomic_mxm_cswap(void *target,
size_t nlong, size_t nlong,
int pe) int pe)
{ {
unsigned my_pe;
uint8_t nlong_order;
void *remote_addr;
int ptl_id;
mxm_send_req_t sreq; mxm_send_req_t sreq;
mxm_error_t mxm_err;
sshmem_mkey_t *r_mkey;
my_pe = oshmem_my_proc_id(); mca_atomic_mxm_req_init(&sreq, pe, target, nlong);
ptl_id = -1;
mxm_err = MXM_OK;
switch (nlong) {
case 1:
nlong_order = 0;
break;
case 2:
nlong_order = 1;
break;
case 4:
nlong_order = 2;
break;
case 8:
nlong_order = 3;
break;
default:
ATOMIC_ERROR("[#%d] Type size must be 1/2/4 or 8 bytes.", my_pe);
oshmem_shmem_abort(-1);
return OSHMEM_ERR_BAD_PARAM;
}
ptl_id = OSHMEM_PROC_DATA(oshmem_proc_group_all(pe))->transport_ids[0];
if (MXM_PTL_SHM == ptl_id) {
ptl_id = MXM_PTL_RDMA;
}
r_mkey = mca_memheap_base_get_cached_mkey(pe, target, ptl_id, &remote_addr);
if (!r_mkey) {
ATOMIC_ERROR("[#%d] %p is not address of symmetric variable",
my_pe, target);
oshmem_shmem_abort(-1);
return OSHMEM_ERR_BAD_PARAM;
}
/* mxm request init */
sreq.base.state = MXM_REQ_NEW;
sreq.base.mq = mca_atomic_mxm_spml_self->mxm_mq;
sreq.base.conn = mca_atomic_mxm_spml_self->mxm_peers[pe]->mxm_hw_rdma_conn;
sreq.base.completed_cb = NULL;
sreq.base.data_type = MXM_REQ_DATA_BUFFER;
/* set data */
sreq.base.data.buffer.ptr = (void *) value; sreq.base.data.buffer.ptr = (void *) value;
sreq.base.data.buffer.length = nlong;
sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
sreq.op.atomic.remote_vaddr = (uintptr_t) remote_addr;
#if MXM_API < MXM_VERSION(2,0)
sreq.base.flags = 0;
sreq.op.atomic.remote_memh = MXM_INVALID_MEM_HANDLE;
#else
sreq.flags = 0;
sreq.op.atomic.remote_mkey = to_mxm_mkey(r_mkey);
#endif
sreq.op.atomic.order = nlong_order;
if (NULL == cond) { if (NULL == cond) {
sreq.opcode = MXM_REQ_OP_ATOMIC_SWAP; sreq.opcode = MXM_REQ_OP_ATOMIC_SWAP;
} else { } else {
#if MXM_API < MXM_VERSION(2,0)
memcpy(&sreq.op.atomic.value8, cond, nlong);
#else
memcpy(&sreq.op.atomic.value, cond, nlong); memcpy(&sreq.op.atomic.value, cond, nlong);
#endif
sreq.opcode = MXM_REQ_OP_ATOMIC_CSWAP; sreq.opcode = MXM_REQ_OP_ATOMIC_CSWAP;
} }
if (MXM_OK != (mxm_err = mxm_req_send(&sreq))) { mca_atomic_mxm_post(&sreq);
ATOMIC_ERROR("[#%d] mxm_req_send failed, mxm_error = %d",
my_pe, mxm_err);
oshmem_shmem_abort(-1);
return OSHMEM_ERROR;
}
mxm_req_wait(&sreq.base);
if (MXM_OK != sreq.base.error) {
ATOMIC_ERROR("[#%d] mxm_req_wait got non MXM_OK error: %d",
my_pe, sreq.base.error);
oshmem_shmem_abort(-1);
return OSHMEM_ERROR;
}
memcpy(prev, value, nlong); memcpy(prev, value, nlong);
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;

Просмотреть файл

@ -32,106 +32,20 @@ int mca_atomic_mxm_fadd(void *target,
int pe, int pe,
struct oshmem_op_t *op) struct oshmem_op_t *op)
{ {
unsigned my_pe;
uint8_t nlong_order;
void *remote_addr;
int ptl_id;
mxm_send_req_t sreq; mxm_send_req_t sreq;
mxm_error_t mxm_err;
sshmem_mkey_t *r_mkey;
static char dummy_buf[8]; static char dummy_buf[8];
my_pe = oshmem_my_proc_id(); mca_atomic_mxm_req_init(&sreq, pe, target, nlong);
ptl_id = -1;
mxm_err = MXM_OK;
switch (nlong) {
case 1:
nlong_order = 0;
break;
case 2:
nlong_order = 1;
break;
case 4:
nlong_order = 2;
break;
case 8:
nlong_order = 3;
break;
default:
ATOMIC_ERROR("[#%d] Type size must be 1/2/4 or 8 bytes.", my_pe);
oshmem_shmem_abort(-1);
return OSHMEM_ERR_BAD_PARAM;
}
ptl_id = OSHMEM_PROC_DATA(oshmem_proc_group_all(pe))->transport_ids[0];
if (MXM_PTL_SHM == ptl_id) {
ptl_id = MXM_PTL_RDMA;
}
r_mkey = mca_memheap_base_get_cached_mkey(pe, target, ptl_id, &remote_addr);
if (!r_mkey) {
ATOMIC_ERROR("[#%d] %p is not address of symmetric variable",
my_pe, target);
oshmem_shmem_abort(-1);
return OSHMEM_ERR_BAD_PARAM;
}
/* mxm request init */
sreq.base.state = MXM_REQ_NEW;
sreq.base.mq = mca_atomic_mxm_spml_self->mxm_mq;
sreq.base.conn = mca_atomic_mxm_spml_self->mxm_peers[pe]->mxm_hw_rdma_conn;
sreq.base.completed_cb = NULL;
sreq.base.data_type = MXM_REQ_DATA_BUFFER;
sreq.op.atomic.remote_vaddr = (uintptr_t) remote_addr;
#if MXM_API < MXM_VERSION(2,0)
sreq.op.atomic.remote_memh = MXM_INVALID_MEM_HANDLE;
memcpy(&sreq.op.atomic.value8, value, nlong);
#else
sreq.op.atomic.remote_mkey = to_mxm_mkey(r_mkey);
memcpy(&sreq.op.atomic.value, value, nlong); memcpy(&sreq.op.atomic.value, value, nlong);
#endif sreq.opcode = MXM_REQ_OP_ATOMIC_FADD;
sreq.op.atomic.order = nlong_order;
/* Do we need atomic 'add' or atomic 'fetch and add'? */
if (NULL == prev) { if (NULL == prev) {
sreq.base.data.buffer.ptr = dummy_buf; sreq.base.data.buffer.ptr = dummy_buf;
sreq.base.data.buffer.length = nlong;
sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
#if MXM_API < MXM_VERSION(2,0)
sreq.base.flags = MXM_REQ_FLAG_SEND_SYNC;
sreq.opcode = MXM_REQ_OP_ATOMIC_ADD;
#else
sreq.flags = 0;
sreq.opcode = MXM_REQ_OP_ATOMIC_FADD;
#endif
} else { } else {
sreq.base.data.buffer.ptr = prev; sreq.base.data.buffer.ptr = prev;
sreq.base.data.buffer.length = nlong;
sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
#if MXM_API < MXM_VERSION(2,0)
sreq.base.flags = 0;
#else
sreq.flags = 0;
#endif
sreq.opcode = MXM_REQ_OP_ATOMIC_FADD;
} }
if (MXM_OK != (mxm_err = mxm_req_send(&sreq))) { mca_atomic_mxm_post(&sreq);
ATOMIC_ERROR("[#%d] mxm_req_send failed, mxm_error = %d",
my_pe, mxm_err);
oshmem_shmem_abort(-1);
return OSHMEM_ERROR;
}
mxm_req_wait(&sreq.base);
if (MXM_OK != sreq.base.error) {
ATOMIC_ERROR("[#%d] mxm_req_wait got non MXM_OK error: %d",
my_pe, sreq.base.error);
oshmem_shmem_abort(-1);
return OSHMEM_ERROR;
}
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
} }

Просмотреть файл

@ -44,9 +44,13 @@ extern char* mca_memheap_base_exclude;
extern int mca_memheap_base_already_opened; extern int mca_memheap_base_already_opened;
extern int mca_memheap_base_key_exchange; extern int mca_memheap_base_key_exchange;
#define MCA_MEMHEAP_MAX_SEGMENTS 256 #define MCA_MEMHEAP_MAX_SEGMENTS 4
#define HEAP_SEG_INDEX 0 #define HEAP_SEG_INDEX 0
#define SYMB_SEG_INDEX 1 #define SYMB_SEG_INDEX 1
#define MCA_MEMHEAP_SEG_COUNT (SYMB_SEG_INDEX+1)
#define MEMHEAP_SEG_INVALID 0xFFFF
typedef struct mca_memheap_map { typedef struct mca_memheap_map {
map_segment_t mem_segs[MCA_MEMHEAP_MAX_SEGMENTS]; /* TODO: change into pointer array */ map_segment_t mem_segs[MCA_MEMHEAP_MAX_SEGMENTS]; /* TODO: change into pointer array */
@ -158,32 +162,29 @@ extern int mca_memheap_seg_cmp(const void *k, const void *v);
extern mca_memheap_map_t* memheap_map; extern mca_memheap_map_t* memheap_map;
static inline map_segment_t *memheap_find_va(const void* va) static inline int map_segment_is_va_in(map_base_segment_t *s, void *va)
{ {
map_segment_t *s; return (va >= s->va_base && va < s->va_end);
}
if (OPAL_LIKELY((uintptr_t)va >= (uintptr_t)memheap_map->mem_segs[HEAP_SEG_INDEX].seg_base_addr && static inline map_segment_t *memheap_find_seg(int segno)
(uintptr_t)va < (uintptr_t)memheap_map->mem_segs[HEAP_SEG_INDEX].end)) { {
s = &memheap_map->mem_segs[HEAP_SEG_INDEX]; return &mca_memheap_base_map.mem_segs[segno];
} else { }
s = bsearch(va,
&memheap_map->mem_segs[SYMB_SEG_INDEX],
memheap_map->n_segments - 1,
sizeof(*s),
mca_memheap_seg_cmp);
}
#if MEMHEAP_BASE_DEBUG == 1 static inline int memheap_is_va_in_segment(void *va, int segno)
if (s) { {
MEMHEAP_VERBOSE(5, "match seg#%02ld: 0x%llX - 0x%llX %llu bytes va=%p", return map_segment_is_va_in(&memheap_find_seg(segno)->super, va);
s - memheap_map->mem_segs, }
(long long)s->seg_base_addr,
(long long)s->end, static inline int memheap_find_segnum(void *va)
(long long)(s->end - s->seg_base_addr), {
(void *)va); if (OPAL_LIKELY(memheap_is_va_in_segment(va, SYMB_SEG_INDEX))) {
return SYMB_SEG_INDEX;
} else if (memheap_is_va_in_segment(va, HEAP_SEG_INDEX)) {
return HEAP_SEG_INDEX;
} }
#endif return MEMHEAP_SEG_INVALID;
return s;
} }
static inline void* memheap_va2rva(void* va, void* local_base, void* remote_base) static inline void* memheap_va2rva(void* va, void* local_base, void* remote_base)
@ -193,6 +194,62 @@ static inline void* memheap_va2rva(void* va, void* local_base, void* remote_base
(uintptr_t)va - ((uintptr_t)local_base - (uintptr_t)remote_base)); (uintptr_t)va - ((uintptr_t)local_base - (uintptr_t)remote_base));
} }
static inline void *map_segment_va2rva(mkey_segment_t *seg, void *va)
{
return memheap_va2rva(va, seg->super.va_base, seg->rva_base);
}
static inline map_base_segment_t *map_segment_find_va(map_base_segment_t *segs, size_t elem_size, void *va)
{
map_base_segment_t *rseg;
rseg = (map_base_segment_t *)((char *)segs + elem_size * HEAP_SEG_INDEX);
if (OPAL_LIKELY(map_segment_is_va_in(rseg, va))) {
return rseg;
}
rseg = (map_base_segment_t *)((char *)segs + elem_size * SYMB_SEG_INDEX);
if (OPAL_LIKELY(map_segment_is_va_in(rseg, va))) {
return rseg;
}
return NULL;
}
void mkey_segment_init(mkey_segment_t *seg, sshmem_mkey_t *mkey, uint32_t segno);
static inline map_segment_t *memheap_find_va(void* va)
{
map_segment_t *s;
/* most probably there will be only two segments: heap and global data */
if (OPAL_LIKELY(memheap_is_va_in_segment(va, SYMB_SEG_INDEX))) {
s = &memheap_map->mem_segs[SYMB_SEG_INDEX];
} else if (memheap_is_va_in_segment(va, HEAP_SEG_INDEX)) {
s = &memheap_map->mem_segs[HEAP_SEG_INDEX];
} else if (memheap_map->n_segments - 2 > 0) {
s = bsearch(va,
&memheap_map->mem_segs[SYMB_SEG_INDEX+1],
memheap_map->n_segments - 2,
sizeof(*s),
mca_memheap_seg_cmp);
} else {
s = NULL;
}
#if MEMHEAP_BASE_DEBUG == 1
if (s) {
MEMHEAP_VERBOSE(5, "match seg#%02ld: 0x%llX - 0x%llX %llu bytes va=%p",
s - memheap_map->mem_segs,
(long long)s->super.va_base,
(long long)s->super.va_end,
(long long)(s->super.va_end - s->super.va_base),
(void *)va);
}
#endif
return s;
}
static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(int pe, static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(int pe,
void* va, void* va,
int btl_id, int btl_id,
@ -218,7 +275,7 @@ static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(int pe,
if (OPAL_LIKELY(s->mkeys_cache[pe])) { if (OPAL_LIKELY(s->mkeys_cache[pe])) {
mkey = &s->mkeys_cache[pe][btl_id]; mkey = &s->mkeys_cache[pe][btl_id];
*rva = memheap_va2rva(va, s->seg_base_addr, mkey->va_base); *rva = memheap_va2rva(va, s->super.va_base, mkey->va_base);
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (cached) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva); MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (cached) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva);
return mkey; return mkey;
} }
@ -226,6 +283,16 @@ static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(int pe,
return mca_memheap_base_get_cached_mkey_slow(s, pe, va, btl_id, rva); return mca_memheap_base_get_cached_mkey_slow(s, pe, va, btl_id, rva);
} }
static inline int mca_memheap_base_num_transports(void)
{
return memheap_map->num_transports;
}
static inline void* mca_memheap_seg2base_va(int seg)
{
return memheap_map->mem_segs[seg].super.va_base;
}
END_C_DECLS END_C_DECLS
#endif /* MCA_MEMHEAP_BASE_H */ #endif /* MCA_MEMHEAP_BASE_H */

Просмотреть файл

@ -38,7 +38,7 @@ char* mca_memheap_base_exclude = NULL;
opal_list_t mca_memheap_base_components_opened = {{0}}; opal_list_t mca_memheap_base_components_opened = {{0}};
struct mca_memheap_base_module_t* mca_memheap_base_module_initialized = NULL; struct mca_memheap_base_module_t* mca_memheap_base_module_initialized = NULL;
int mca_memheap_base_already_opened = 0; int mca_memheap_base_already_opened = 0;
mca_memheap_map_t mca_memheap_base_map = {{{0}}}; mca_memheap_map_t mca_memheap_base_map;
static int mca_memheap_base_register(mca_base_register_flag_t flags) static int mca_memheap_base_register(mca_base_register_flag_t flags)
{ {

Просмотреть файл

@ -49,10 +49,12 @@ typedef struct oob_comm_request {
struct oob_comm { struct oob_comm {
opal_mutex_t lck; opal_mutex_t lck;
opal_condition_t cond; opal_condition_t cond;
uint32_t segno;
sshmem_mkey_t *mkeys; sshmem_mkey_t *mkeys;
int mkeys_rcvd; int mkeys_rcvd;
oob_comm_request_t req_pool[MEMHEAP_RECV_REQS_MAX]; oob_comm_request_t req_pool[MEMHEAP_RECV_REQS_MAX];
opal_list_t req_list; opal_list_t req_list;
int is_inited;
}; };
mca_memheap_map_t* memheap_map = NULL; mca_memheap_map_t* memheap_map = NULL;
@ -68,61 +70,36 @@ static int memheap_oob_get_mkeys(int pe,
uint32_t va_seg_num, uint32_t va_seg_num,
sshmem_mkey_t *mkey); sshmem_mkey_t *mkey);
static inline void* mca_memheap_seg2base_va(int seg)
{
return memheap_map->mem_segs[seg].seg_base_addr;
}
int mca_memheap_seg_cmp(const void *k, const void *v) int mca_memheap_seg_cmp(const void *k, const void *v)
{ {
uintptr_t va = (uintptr_t) k; uintptr_t va = (uintptr_t) k;
map_segment_t *s = (map_segment_t *) v; map_segment_t *s = (map_segment_t *) v;
if (va < (uintptr_t)s->seg_base_addr) if (va < (uintptr_t)s->super.va_base)
return -1; return -1;
if (va >= (uintptr_t)s->end) if (va >= (uintptr_t)s->super.va_end)
return 1; return 1;
return 0; return 0;
} }
/** static int pack_local_mkeys(opal_buffer_t *msg, int pe, int seg)
* @param all_trs
* 0 - pack mkeys for transports to given pe
* 1 - pack mkeys for ALL possible transports. value of pe is ignored
*/
static int pack_local_mkeys(opal_buffer_t *msg, int pe, int seg, int all_trs)
{ {
ompi_proc_t *proc; int i, n;
int i, n, tr_id;
sshmem_mkey_t *mkey; sshmem_mkey_t *mkey;
/* go over all transports to remote pe and pack mkeys */ /* go over all transports and pack mkeys */
if (!all_trs) { n = memheap_map->num_transports;
n = oshmem_get_transport_count(pe);
proc = oshmem_proc_group_find(oshmem_group_all, pe);
}
else {
proc = NULL;
n = memheap_map->num_transports;
}
opal_dss.pack(msg, &n, 1, OPAL_UINT32); opal_dss.pack(msg, &n, 1, OPAL_UINT32);
MEMHEAP_VERBOSE(5, "found %d transports to %d", n, pe); MEMHEAP_VERBOSE(5, "found %d transports to %d", n, pe);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
if (!all_trs) { mkey = mca_memheap_base_get_mkey(mca_memheap_seg2base_va(seg), i);
tr_id = OSHMEM_PROC_DATA(proc)->transport_ids[i];
}
else {
tr_id = i;
}
mkey = mca_memheap_base_get_mkey(mca_memheap_seg2base_va(seg), tr_id);
if (!mkey) { if (!mkey) {
MEMHEAP_ERROR("seg#%d tr_id: %d failed to find local mkey", MEMHEAP_ERROR("seg#%d tr_id: %d failed to find local mkey",
seg, tr_id); seg, i);
return OSHMEM_ERROR; return OSHMEM_ERROR;
} }
opal_dss.pack(msg, &tr_id, 1, OPAL_UINT32); opal_dss.pack(msg, &i, 1, OPAL_UINT32);
opal_dss.pack(msg, &mkey->va_base, 1, OPAL_UINT64); opal_dss.pack(msg, &mkey->va_base, 1, OPAL_UINT64);
if (0 == mkey->va_base) { if (0 == mkey->va_base) {
opal_dss.pack(msg, &mkey->u.key, 1, OPAL_UINT64); opal_dss.pack(msg, &mkey->u.key, 1, OPAL_UINT64);
@ -134,7 +111,7 @@ static int pack_local_mkeys(opal_buffer_t *msg, int pe, int seg, int all_trs)
} }
MEMHEAP_VERBOSE(5, MEMHEAP_VERBOSE(5,
"seg#%d tr_id: %d %s", "seg#%d tr_id: %d %s",
seg, tr_id, mca_spml_base_mkey2str(mkey)); seg, i, mca_spml_base_mkey2str(mkey));
} }
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
} }
@ -202,10 +179,10 @@ static void unpack_remote_mkeys(opal_buffer_t *msg, int remote_pe)
} }
cnt = memheap_oob.mkeys[tr_id].len; cnt = memheap_oob.mkeys[tr_id].len;
opal_dss.unpack(msg, memheap_oob.mkeys[tr_id].u.data, &cnt, OPAL_BYTE); opal_dss.unpack(msg, memheap_oob.mkeys[tr_id].u.data, &cnt, OPAL_BYTE);
MCA_SPML_CALL(rmkey_unpack(&memheap_oob.mkeys[tr_id], remote_pe));
} else { } else {
memheap_oob.mkeys[tr_id].u.key = MAP_SEGMENT_SHM_INVALID; memheap_oob.mkeys[tr_id].u.key = MAP_SEGMENT_SHM_INVALID;
} }
MCA_SPML_CALL(rmkey_unpack(&memheap_oob.mkeys[tr_id], memheap_oob.segno, remote_pe, tr_id));
} }
MEMHEAP_VERBOSE(5, MEMHEAP_VERBOSE(5,
@ -249,7 +226,7 @@ static void do_recv(int source_pe, opal_buffer_t* buffer)
msg_type = MEMHEAP_RKEY_RESP; msg_type = MEMHEAP_RKEY_RESP;
opal_dss.pack(msg, &msg_type, 1, OPAL_UINT8); opal_dss.pack(msg, &msg_type, 1, OPAL_UINT8);
if (OSHMEM_SUCCESS != pack_local_mkeys(msg, source_pe, seg, 0)) { if (OSHMEM_SUCCESS != pack_local_mkeys(msg, source_pe, seg)) {
OBJ_RELEASE(msg); OBJ_RELEASE(msg);
goto send_fail; goto send_fail;
} }
@ -435,6 +412,7 @@ int memheap_oob_init(mca_memheap_map_t *map)
} }
opal_progress_register(oshmem_mkey_recv_cb); opal_progress_register(oshmem_mkey_recv_cb);
memheap_oob.is_inited = 1;
return rc; return rc;
} }
@ -444,6 +422,10 @@ void memheap_oob_destruct(void)
int i; int i;
oob_comm_request_t *r; oob_comm_request_t *r;
if (!memheap_oob.is_inited) {
return;
}
opal_progress_unregister(oshmem_mkey_recv_cb); opal_progress_unregister(oshmem_mkey_recv_cb);
for (i = 0; i < MEMHEAP_RECV_REQS_MAX; i++) { for (i = 0; i < MEMHEAP_RECV_REQS_MAX; i++) {
@ -455,6 +437,7 @@ void memheap_oob_destruct(void)
OBJ_DESTRUCT(&memheap_oob.req_list); OBJ_DESTRUCT(&memheap_oob.req_list);
OBJ_DESTRUCT(&memheap_oob.lck); OBJ_DESTRUCT(&memheap_oob.lck);
OBJ_DESTRUCT(&memheap_oob.cond); OBJ_DESTRUCT(&memheap_oob.cond);
memheap_oob.is_inited = 0;
} }
static int send_buffer(int pe, opal_buffer_t *msg) static int send_buffer(int pe, opal_buffer_t *msg)
@ -481,7 +464,6 @@ static int memheap_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys)
if (OSHMEM_SUCCESS == MCA_SPML_CALL(oob_get_mkeys(pe, seg, mkeys))) { if (OSHMEM_SUCCESS == MCA_SPML_CALL(oob_get_mkeys(pe, seg, mkeys))) {
for (i = 0; i < memheap_map->num_transports; i++) { for (i = 0; i < memheap_map->num_transports; i++) {
mkeys[i].va_base = mca_memheap_seg2base_va(seg);
MEMHEAP_VERBOSE(5, MEMHEAP_VERBOSE(5,
"MKEY CALCULATED BY LOCAL SPML: pe: %d tr_id: %d %s", "MKEY CALCULATED BY LOCAL SPML: pe: %d tr_id: %d %s",
pe, pe,
@ -494,6 +476,7 @@ static int memheap_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys)
OPAL_THREAD_LOCK(&memheap_oob.lck); OPAL_THREAD_LOCK(&memheap_oob.lck);
memheap_oob.mkeys = mkeys; memheap_oob.mkeys = mkeys;
memheap_oob.segno = seg;
memheap_oob.mkeys_rcvd = 0; memheap_oob.mkeys_rcvd = 0;
msg = OBJ_NEW(opal_buffer_t); msg = OBJ_NEW(opal_buffer_t);
@ -585,7 +568,7 @@ void mca_memheap_modex_recv_all(void)
} }
for (j = 0; j < memheap_map->n_segments; j++) { for (j = 0; j < memheap_map->n_segments; j++) {
pack_local_mkeys(msg, 0, j, 1); pack_local_mkeys(msg, 0, j);
} }
/* we assume here that int32_t returned by opal_dss.unload /* we assume here that int32_t returned by opal_dss.unload
@ -661,6 +644,7 @@ void mca_memheap_modex_recv_all(void)
} }
} }
memheap_oob.mkeys = s->mkeys_cache[i]; memheap_oob.mkeys = s->mkeys_cache[i];
memheap_oob.segno = j;
unpack_remote_mkeys(msg, i); unpack_remote_mkeys(msg, i);
} }
} }
@ -699,6 +683,10 @@ sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(map_segment_t *s,
int rc; int rc;
sshmem_mkey_t *mkey; sshmem_mkey_t *mkey;
if (!memheap_oob.is_inited) {
return NULL;
}
s->mkeys_cache[pe] = (sshmem_mkey_t *) calloc(memheap_map->num_transports, s->mkeys_cache[pe] = (sshmem_mkey_t *) calloc(memheap_map->num_transports,
sizeof(sshmem_mkey_t)); sizeof(sshmem_mkey_t));
if (!s->mkeys_cache[pe]) if (!s->mkeys_cache[pe])
@ -711,7 +699,7 @@ sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(map_segment_t *s,
return NULL ; return NULL ;
mkey = &s->mkeys_cache[pe][btl_id]; mkey = &s->mkeys_cache[pe][btl_id];
*rva = memheap_va2rva(va, s->seg_base_addr, mkey->va_base); *rva = memheap_va2rva(va, s->super.va_base, mkey->va_base);
MEMHEAP_VERBOSE_FASTPATH(5, "rkey: pe=%d va=%p -> (remote lookup) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva); MEMHEAP_VERBOSE_FASTPATH(5, "rkey: pe=%d va=%p -> (remote lookup) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva);
return mkey; return mkey;
@ -737,7 +725,7 @@ uint64_t mca_memheap_base_find_offset(int pe,
s = memheap_find_va(va); s = memheap_find_va(va);
if (my_pe == pe) { if (my_pe == pe) {
return (uintptr_t)va - (uintptr_t)s->seg_base_addr; return (uintptr_t)va - (uintptr_t)s->super.va_base;
} }
else { else {
return ((s && MAP_SEGMENT_IS_VALID(s)) ? ((uintptr_t)rva - (uintptr_t)(s->mkeys_cache[pe][tr_id].va_base)) : 0); return ((s && MAP_SEGMENT_IS_VALID(s)) ? ((uintptr_t)rva - (uintptr_t)(s->mkeys_cache[pe][tr_id].va_base)) : 0);
@ -746,7 +734,7 @@ uint64_t mca_memheap_base_find_offset(int pe,
int mca_memheap_base_is_symmetric_addr(const void* va) int mca_memheap_base_is_symmetric_addr(const void* va)
{ {
return (memheap_find_va(va) ? 1 : 0); return (memheap_find_va((void *)va) ? 1 : 0);
} }
int mca_memheap_base_detect_addr_type(void* va) int mca_memheap_base_detect_addr_type(void* va)
@ -759,14 +747,31 @@ int mca_memheap_base_detect_addr_type(void* va)
if (s) { if (s) {
if (s->type == MAP_SEGMENT_STATIC) { if (s->type == MAP_SEGMENT_STATIC) {
addr_type = ADDR_STATIC; addr_type = ADDR_STATIC;
} else if ((uintptr_t)va >= (uintptr_t) s->seg_base_addr } else if ((uintptr_t)va >= (uintptr_t) s->super.va_base
&& (uintptr_t)va < (uintptr_t) ((uintptr_t)s->seg_base_addr + mca_memheap.memheap_size)) { && (uintptr_t)va < (uintptr_t) ((uintptr_t)s->super.va_base + mca_memheap.memheap_size)) {
addr_type = ADDR_USER; addr_type = ADDR_USER;
} else { } else {
assert( (uintptr_t)va >= (uintptr_t) ((uintptr_t)s->seg_base_addr + mca_memheap.memheap_size) && (uintptr_t)va < (uintptr_t)s->end); assert( (uintptr_t)va >= (uintptr_t) ((uintptr_t)s->super.va_base + mca_memheap.memheap_size) && (uintptr_t)va < (uintptr_t)s->super.va_end);
addr_type = ADDR_PRIVATE; addr_type = ADDR_PRIVATE;
} }
} }
return addr_type; return addr_type;
} }
void mkey_segment_init(mkey_segment_t *seg, sshmem_mkey_t *mkey, uint32_t segno)
{
map_segment_t *s;
if (segno >= MCA_MEMHEAP_SEG_COUNT) {
return;
}
s = memheap_find_seg(segno);
assert(NULL != s);
seg->super.va_base = s->super.va_base;
seg->super.va_end = s->super.va_end;
seg->rva_base = mkey->va_base;
}

Просмотреть файл

@ -32,9 +32,9 @@ int mca_memheap_base_reg(mca_memheap_map_t *memheap_map)
MEMHEAP_VERBOSE(5, MEMHEAP_VERBOSE(5,
"register seg#%02d: 0x%p - 0x%p %llu bytes type=0x%X id=0x%X", "register seg#%02d: 0x%p - 0x%p %llu bytes type=0x%X id=0x%X",
i, i,
s->seg_base_addr, s->super.va_base,
s->end, s->super.va_end,
(long long)((uintptr_t)s->end - (uintptr_t)s->seg_base_addr), (long long)((uintptr_t)s->super.va_end - (uintptr_t)s->super.va_base),
s->type, s->type,
s->seg_id); s->seg_id);
ret = _reg_segment(s, &memheap_map->num_transports); ret = _reg_segment(s, &memheap_map->num_transports);
@ -60,9 +60,9 @@ int mca_memheap_base_dereg(mca_memheap_map_t *memheap_map)
MEMHEAP_VERBOSE(5, MEMHEAP_VERBOSE(5,
"deregistering segment#%d: %p - %p %llu bytes", "deregistering segment#%d: %p - %p %llu bytes",
i, i,
s->seg_base_addr, s->super.va_base,
s->end, s->super.va_end,
(long long)((uintptr_t)s->end - (uintptr_t)s->seg_base_addr)); (long long)((uintptr_t)s->super.va_end - (uintptr_t)s->super.va_base));
(void)_dereg_segment(s); (void)_dereg_segment(s);
} }
@ -120,8 +120,8 @@ static int _reg_segment(map_segment_t *s, int *num_btl)
} }
if (!rc) { if (!rc) {
s->mkeys = MCA_SPML_CALL(register((void *)(unsigned long)s->seg_base_addr, s->mkeys = MCA_SPML_CALL(register((void *)(unsigned long)s->super.va_base,
(uintptr_t)s->end - (uintptr_t)s->seg_base_addr, (uintptr_t)s->super.va_end - (uintptr_t)s->super.va_base,
s->seg_id, s->seg_id,
num_btl)); num_btl));
if (NULL == s->mkeys) { if (NULL == s->mkeys) {

Просмотреть файл

@ -218,10 +218,10 @@ static memheap_context_t* _memheap_create(void)
context.user_size = user_size; context.user_size = user_size;
context.private_size = MEMHEAP_BASE_PRIVATE_SIZE; context.private_size = MEMHEAP_BASE_PRIVATE_SIZE;
context.user_base_addr = context.user_base_addr =
(void*) ((unsigned char*) mca_memheap_base_map.mem_segs[HEAP_SEG_INDEX].seg_base_addr (void*) ((unsigned char*) mca_memheap_base_map.mem_segs[HEAP_SEG_INDEX].super.va_base
+ 0); + 0);
context.private_base_addr = context.private_base_addr =
(void*) ((unsigned char*) mca_memheap_base_map.mem_segs[HEAP_SEG_INDEX].seg_base_addr (void*) ((unsigned char*) mca_memheap_base_map.mem_segs[HEAP_SEG_INDEX].super.va_base
+ context.user_size); + context.user_size);
} }

Просмотреть файл

@ -63,13 +63,13 @@ int mca_memheap_base_static_init(mca_memheap_map_t *map)
memset(s, 0, sizeof(*s)); memset(s, 0, sizeof(*s));
MAP_SEGMENT_RESET_FLAGS(s); MAP_SEGMENT_RESET_FLAGS(s);
s->seg_id = MAP_SEGMENT_SHM_INVALID; s->seg_id = MAP_SEGMENT_SHM_INVALID;
s->seg_base_addr = memheap_context.mem_segs[i].start; s->super.va_base = memheap_context.mem_segs[i].start;
s->end = memheap_context.mem_segs[i].end; s->super.va_end = memheap_context.mem_segs[i].end;
s->seg_size = ((uintptr_t)s->end - (uintptr_t)s->seg_base_addr); s->seg_size = ((uintptr_t)s->super.va_end - (uintptr_t)s->super.va_base);
s->type = MAP_SEGMENT_STATIC; s->type = MAP_SEGMENT_STATIC;
map->n_segments++; map->n_segments++;
total_mem += ((uintptr_t)s->end - (uintptr_t)s->seg_base_addr); total_mem += ((uintptr_t)s->super.va_end - (uintptr_t)s->super.va_base);
} }
MEMHEAP_VERBOSE(1, MEMHEAP_VERBOSE(1,
"Memheap static memory: %llu byte(s), %d segments", "Memheap static memory: %llu byte(s), %d segments",

Просмотреть файл

@ -71,7 +71,7 @@ OSHMEM_DECLSPEC int mca_spml_base_oob_get_mkeys(int pe,
uint32_t seg, uint32_t seg,
sshmem_mkey_t *mkeys); sshmem_mkey_t *mkeys);
OSHMEM_DECLSPEC void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey, int pe); OSHMEM_DECLSPEC void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t seg, int pe, int tr_id);
OSHMEM_DECLSPEC void mca_spml_base_rmkey_free(sshmem_mkey_t *mkey); OSHMEM_DECLSPEC void mca_spml_base_rmkey_free(sshmem_mkey_t *mkey);
OSHMEM_DECLSPEC int mca_spml_base_put_nb(void *dst_addr, OSHMEM_DECLSPEC int mca_spml_base_put_nb(void *dst_addr,
size_t size, size_t size,
@ -104,6 +104,8 @@ OSHMEM_DECLSPEC extern mca_base_framework_t oshmem_spml_base_framework;
#define SPML_VERBOSE(level, ...) #define SPML_VERBOSE(level, ...)
#endif #endif
#define SPML_VERBOSE_FASTPATH(level, ...)
#define SPML_ERROR(...) \ #define SPML_ERROR(...) \
oshmem_output(oshmem_spml_base_framework.framework_output, \ oshmem_output(oshmem_spml_base_framework.framework_output, \
"Error %s:%d - %s()", __SPML_FILE__, __LINE__, __func__, __VA_ARGS__) "Error %s:%d - %s()", __SPML_FILE__, __LINE__, __func__, __VA_ARGS__)

Просмотреть файл

@ -153,12 +153,12 @@ int mca_spml_base_wait_nb(void* handle)
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
} }
int mca_spml_base_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys) int mca_spml_base_oob_get_mkeys(int pe, uint32_t segno, sshmem_mkey_t *mkeys)
{ {
return OSHMEM_ERROR; return OSHMEM_ERROR;
} }
void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey, int pe) void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id)
{ {
} }

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -33,6 +33,7 @@
#include "opal/class/opal_list.h" #include "opal/class/opal_list.h"
#include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_globals.h"
#include "oshmem/mca/memheap/base/base.h"
#include <mxm/api/mxm_api.h> #include <mxm/api/mxm_api.h>
@ -40,11 +41,6 @@
#define MXM_VERSION(major, minor) (((major)<<MXM_MAJOR_BIT)|((minor)<<MXM_MINOR_BIT)) #define MXM_VERSION(major, minor) (((major)<<MXM_MAJOR_BIT)|((minor)<<MXM_MINOR_BIT))
#endif #endif
#if MXM_API < MXM_VERSION(2,0)
#include <mxm/api/mxm_addr.h>
#include <mxm/api/mxm_stats.h>
#endif
#define MXM_SHMEM_MQ_ID 0x7119 #define MXM_SHMEM_MQ_ID 0x7119
/* start request explicit ack once our buffer pool is less than watermark */ /* start request explicit ack once our buffer pool is less than watermark */
@ -52,22 +48,38 @@
/* request explicit ack (SYNC) per every X put requests per connection */ /* request explicit ack (SYNC) per every X put requests per connection */
#define SPML_IKRIT_PACKETS_PER_SYNC 64 #define SPML_IKRIT_PACKETS_PER_SYNC 64
#define spml_ikrit_container_of(ptr, type, member) ( \
(type *)( ((char *)(ptr)) - offsetof(type,member) ))
#define MXM_MAX_ADDR_LEN 512
#define MXM_PTL_RDMA 0
#define MXM_PTL_SHM 1
#define MXM_PTL_LAST 2
BEGIN_C_DECLS BEGIN_C_DECLS
/** /**
* UD MXM SPML module * MXM SPML module
*/ */
/* TODO: move va_xx to base struct */
struct spml_ikrit_mkey {
mkey_segment_t super;
mxm_mem_key_t key;
};
typedef struct spml_ikrit_mkey spml_ikrit_mkey_t;
struct mxm_peer { struct mxm_peer {
opal_list_item_t super;
mxm_conn_h mxm_conn; mxm_conn_h mxm_conn;
mxm_conn_h mxm_hw_rdma_conn; mxm_conn_h mxm_hw_rdma_conn;
int pe; uint8_t ptl_id;
uint8_t need_fence;
int32_t n_active_puts; int32_t n_active_puts;
int need_fence; opal_list_item_t link;
spml_ikrit_mkey_t mkeys[MCA_MEMHEAP_SEG_COUNT];
}; };
typedef struct mxm_peer mxm_peer_t; typedef struct mxm_peer mxm_peer_t;
OBJ_CLASS_DECLARATION(mxm_peer_t);
struct mca_spml_ikrit_t { struct mca_spml_ikrit_t {
mca_spml_base_module_t super; mca_spml_base_module_t super;
@ -79,7 +91,7 @@ struct mca_spml_ikrit_t {
mxm_ep_h mxm_ep; mxm_ep_h mxm_ep;
mxm_ep_h mxm_hw_rdma_ep; mxm_ep_h mxm_hw_rdma_ep;
mxm_mq_h mxm_mq; mxm_mq_h mxm_mq;
mxm_peer_t **mxm_peers; mxm_peer_t *mxm_peers;
int32_t n_active_puts; int32_t n_active_puts;
int32_t n_active_gets; int32_t n_active_gets;
@ -103,22 +115,13 @@ struct mca_spml_ikrit_t {
int hw_rdma_channel; /* true if we provide separate channel that int hw_rdma_channel; /* true if we provide separate channel that
has true one sided capability */ has true one sided capability */
int np; int np;
#if MXM_API >= MXM_VERSION(2,0)
int unsync_conn_max; int unsync_conn_max;
#endif
size_t put_zcopy_threshold; /* enable zcopy in put if message size is size_t put_zcopy_threshold; /* enable zcopy in put if message size is
greater than the threshold */ greater than the threshold */
}; };
typedef struct mca_spml_ikrit_t mca_spml_ikrit_t; typedef struct mca_spml_ikrit_t mca_spml_ikrit_t;
#define MXM_MAX_ADDR_LEN 512
#if MXM_API >= MXM_VERSION(2,0)
#define MXM_PTL_SHM 0
#define MXM_PTL_RDMA 1
#define MXM_PTL_LAST 2
#endif
typedef struct spml_ikrit_mxm_ep_conn_info_t { typedef struct spml_ikrit_mxm_ep_conn_info_t {
union { union {
@ -139,11 +142,6 @@ extern int mca_spml_ikrit_get_nb(void* src_addr,
void* dst_addr, void* dst_addr,
int src, int src,
void **handle); void **handle);
/* extension. used 4 fence implementation b4 fence was added to mxm */
extern int mca_spml_ikrit_get_async(void *src_addr,
size_t size,
void *dst_addr,
int src);
extern int mca_spml_ikrit_put(void* dst_addr, extern int mca_spml_ikrit_put(void* dst_addr,
size_t size, size_t size,
@ -167,7 +165,7 @@ extern sshmem_mkey_t *mca_spml_ikrit_register(void* addr,
int *count); int *count);
extern int mca_spml_ikrit_deregister(sshmem_mkey_t *mkeys); extern int mca_spml_ikrit_deregister(sshmem_mkey_t *mkeys);
extern int mca_spml_ikrit_oob_get_mkeys(int pe, extern int mca_spml_ikrit_oob_get_mkeys(int pe,
uint32_t seg, uint32_t segno,
sshmem_mkey_t *mkeys); sshmem_mkey_t *mkeys);
extern int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs); extern int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs);
@ -175,6 +173,30 @@ extern int mca_spml_ikrit_del_procs(ompi_proc_t** procs, size_t nprocs);
extern int mca_spml_ikrit_fence(void); extern int mca_spml_ikrit_fence(void);
extern int spml_ikrit_progress(void); extern int spml_ikrit_progress(void);
mxm_mem_key_t *mca_spml_ikrit_get_mkey_slow(int pe, void *va, int ptl_id, void **rva);
/* the functionreturns NULL if data can be directly copied via shared memory
* else it returns mxm mem key
*
* the function will abort() if va is not symmetric var address.
*/
static inline mxm_mem_key_t *mca_spml_ikrit_get_mkey(int pe, void *va, int ptl_id, void **rva)
{
spml_ikrit_mkey_t *mkey;
if (OPAL_UNLIKELY(MXM_PTL_RDMA != ptl_id)) {
return mca_spml_ikrit_get_mkey_slow(pe, va, ptl_id, rva);
}
mkey = mca_spml_ikrit.mxm_peers[pe].mkeys;
mkey = (spml_ikrit_mkey_t *)map_segment_find_va(&mkey->super.super, sizeof(*mkey), va);
if (OPAL_UNLIKELY(NULL == mkey)) {
return mca_spml_ikrit_get_mkey_slow(pe, va, ptl_id, rva);
}
*rva = map_segment_va2rva(&mkey->super, va);
return &mkey->key;
}
END_C_DECLS END_C_DECLS
#endif #endif

Просмотреть файл

@ -233,17 +233,11 @@ static int mca_spml_ikrit_component_register(void)
&mca_spml_ikrit.mxm_tls); &mca_spml_ikrit.mxm_tls);
mca_spml_ikrit_param_register_int("np", mca_spml_ikrit_param_register_int("np",
#if MXM_API <= MXM_VERSION(2,0) 0,
128, "[integer] Minimal allowed job's NP to activate ikrit", &mca_spml_ikrit.np);
#else
0,
#endif
"[integer] Minimal allowed job's NP to activate ikrit", &mca_spml_ikrit.np);
#if MXM_API >= MXM_VERSION(2,0)
mca_spml_ikrit_param_register_int("unsync_conn_max", 8, mca_spml_ikrit_param_register_int("unsync_conn_max", 8,
"[integer] Max number of connections that do not require notification of PUT operation remote completion. Increasing this number improves efficiency of p2p communication but increases overhead of shmem_fence/shmem_quiet/shmem_barrier", "[integer] Max number of connections that do not require notification of PUT operation remote completion. Increasing this number improves efficiency of p2p communication but increases overhead of shmem_fence/shmem_quiet/shmem_barrier",
&mca_spml_ikrit.unsync_conn_max); &mca_spml_ikrit.unsync_conn_max);
#endif
mca_spml_ikrit_param_register_size_t("put_zcopy_threshold", 16384ULL, mca_spml_ikrit_param_register_size_t("put_zcopy_threshold", 16384ULL,
"[size_t] Use zero copy put if message size is greater than the threshold", "[size_t] Use zero copy put if message size is greater than the threshold",
@ -312,10 +306,6 @@ static int mca_spml_ikrit_component_open(void)
return OSHMEM_ERROR; return OSHMEM_ERROR;
} }
#if MXM_API < MXM_VERSION(2,0)
mca_spml_ikrit.ud_only = 1;
mca_spml_ikrit.mxm_ctx_opts->ptl_bitmap = (MXM_BIT(MXM_PTL_SELF) | MXM_BIT(MXM_PTL_RDMA));
#endif
SPML_VERBOSE(5, "UD only mode is %s", SPML_VERBOSE(5, "UD only mode is %s",
mca_spml_ikrit.ud_only ? "enabled" : "disabled"); mca_spml_ikrit.ud_only ? "enabled" : "disabled");
@ -354,15 +344,10 @@ static int mca_spml_ikrit_component_close(void)
} }
if (mca_spml_ikrit.mxm_context) { if (mca_spml_ikrit.mxm_context) {
mxm_cleanup(mca_spml_ikrit.mxm_context); mxm_cleanup(mca_spml_ikrit.mxm_context);
#if MXM_API < MXM_VERSION(2,0)
mxm_config_free(mca_spml_ikrit.mxm_ep_opts);
mxm_config_free(mca_spml_ikrit.mxm_ctx_opts);
#else
mxm_config_free_ep_opts(mca_spml_ikrit.mxm_ep_opts); mxm_config_free_ep_opts(mca_spml_ikrit.mxm_ep_opts);
mxm_config_free_context_opts(mca_spml_ikrit.mxm_ctx_opts); mxm_config_free_context_opts(mca_spml_ikrit.mxm_ctx_opts);
if (mca_spml_ikrit.hw_rdma_channel) if (mca_spml_ikrit.hw_rdma_channel)
mxm_config_free_ep_opts(mca_spml_ikrit.mxm_ep_hw_rdma_opts); mxm_config_free_ep_opts(mca_spml_ikrit.mxm_ep_hw_rdma_opts);
#endif
} }
mca_spml_ikrit.mxm_mq = NULL; mca_spml_ikrit.mxm_mq = NULL;
mca_spml_ikrit.mxm_context = NULL; mca_spml_ikrit.mxm_context = NULL;
@ -373,14 +358,6 @@ static int spml_ikrit_mxm_init(void)
{ {
mxm_error_t err; mxm_error_t err;
#if MXM_API < MXM_VERSION(2,0)
/* Only relevant for SHM PTL - ignore */
mca_spml_ikrit.mxm_ep_opts->job_id = 0;
mca_spml_ikrit.mxm_ep_opts->local_rank = 0;
mca_spml_ikrit.mxm_ep_opts->num_local_procs = 0;
mca_spml_ikrit.mxm_ep_opts->rdma.drain_cq = 1;
#endif
/* Open MXM endpoint */ /* Open MXM endpoint */
err = mxm_ep_create(mca_spml_ikrit.mxm_context, err = mxm_ep_create(mca_spml_ikrit.mxm_context,
mca_spml_ikrit.mxm_ep_opts, mca_spml_ikrit.mxm_ep_opts,

Просмотреть файл

@ -118,7 +118,7 @@ typedef int (*mca_spml_base_module_wait_fn_t)(void* addr,
* *
* @param mkey remote mkey * @param mkey remote mkey
*/ */
typedef void (*mca_spml_base_module_mkey_unpack_fn_t)(sshmem_mkey_t *, int remote_pe); typedef void (*mca_spml_base_module_mkey_unpack_fn_t)(sshmem_mkey_t *, uint32_t segno, int remote_pe, int tr_id);
/** /**
* free resources used by deserialized remote mkey * free resources used by deserialized remote mkey
@ -149,9 +149,9 @@ typedef int (*mca_spml_base_module_deregister_fn_t)(sshmem_mkey_t *mkeys);
/** /**
* try to fill up mkeys that can be used to reach remote pe. * try to fill up mkeys that can be used to reach remote pe.
* @param pe remote pe * @param pe remote pe
* @param seg 0 - symmetric heap, 1 - static data, everything else are static data in .so * @param seg 0 - symmetric heap, 1 - static data, everything else are static data in .so
* @param mkeys mkeys array * @param mkeys mkeys array
* *
* @return OSHMEM_SUCCSESS if keys are found * @return OSHMEM_SUCCSESS if keys are found
*/ */

Просмотреть файл

@ -115,7 +115,6 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs)
int my_rank = oshmem_my_proc_id(); int my_rank = oshmem_my_proc_id();
size_t num_reqs, max_reqs; size_t num_reqs, max_reqs;
void *dreq, **dreqs; void *dreq, **dreqs;
ompi_proc_t *proc;
ucp_ep_h ep; ucp_ep_h ep;
size_t i, n; size_t i, n;
@ -157,7 +156,7 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs)
mca_spml_ucx.ucp_peers[n].ucp_conn = NULL; mca_spml_ucx.ucp_peers[n].ucp_conn = NULL;
if (num_reqs >= mca_spml_ucx.num_disconnect) { if ((int)num_reqs >= mca_spml_ucx.num_disconnect) {
mca_spml_ucx_waitall(dreqs, &num_reqs); mca_spml_ucx_waitall(dreqs, &num_reqs);
} }
} }
@ -322,6 +321,21 @@ error:
} }
spml_ucx_mkey_t * mca_spml_ucx_get_mkey_slow(int pe, void *va, void **rva)
{
sshmem_mkey_t *r_mkey;
r_mkey = mca_memheap_base_get_cached_mkey(pe, va, 0, rva);
if (OPAL_UNLIKELY(!r_mkey)) {
SPML_ERROR("pe=%d: %p is not address of symmetric variable",
pe, va);
oshmem_shmem_abort(-1);
return NULL;
}
return (spml_ucx_mkey_t *)(r_mkey->spml_context);
}
void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey) void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey)
{ {
spml_ucx_mkey_t *ucx_mkey; spml_ucx_mkey_t *ucx_mkey;
@ -331,19 +345,22 @@ void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey)
} }
ucx_mkey = (spml_ucx_mkey_t *)(mkey->spml_context); ucx_mkey = (spml_ucx_mkey_t *)(mkey->spml_context);
ucp_rkey_destroy(ucx_mkey->rkey); ucp_rkey_destroy(ucx_mkey->rkey);
free(ucx_mkey);
} }
void mca_spml_ucx_rmkey_unpack(sshmem_mkey_t *mkey, int pe) static void mca_spml_ucx_cache_mkey(sshmem_mkey_t *mkey, uint32_t segno, int dst_pe)
{
ucp_peer_t *peer;
peer = &mca_spml_ucx.ucp_peers[dst_pe];
mkey_segment_init(&peer->mkeys[segno].super, mkey, segno);
}
void mca_spml_ucx_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id)
{ {
spml_ucx_mkey_t *ucx_mkey; spml_ucx_mkey_t *ucx_mkey;
ucs_status_t err; ucs_status_t err;
ucx_mkey = (spml_ucx_mkey_t *)malloc(sizeof(*ucx_mkey)); ucx_mkey = &mca_spml_ucx.ucp_peers[pe].mkeys[segno].key;
if (!ucx_mkey) {
SPML_ERROR("not enough memory to allocate mkey");
goto error_fatal;
}
err = ucp_ep_rkey_unpack(mca_spml_ucx.ucp_peers[pe].ucp_conn, err = ucp_ep_rkey_unpack(mca_spml_ucx.ucp_peers[pe].ucp_conn,
mkey->u.data, mkey->u.data,
@ -354,6 +371,7 @@ void mca_spml_ucx_rmkey_unpack(sshmem_mkey_t *mkey, int pe)
} }
mkey->spml_context = ucx_mkey; mkey->spml_context = ucx_mkey;
mca_spml_ucx_cache_mkey(mkey, segno, pe);
return; return;
error_fatal: error_fatal:
@ -370,23 +388,23 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr,
ucs_status_t err; ucs_status_t err;
spml_ucx_mkey_t *ucx_mkey; spml_ucx_mkey_t *ucx_mkey;
size_t len; size_t len;
int my_pe = oshmem_my_proc_id();
int seg;
*count = 0; *count = 0;
mkeys = (sshmem_mkey_t *) calloc(1, sizeof(*mkeys)); mkeys = (sshmem_mkey_t *) calloc(1, sizeof(*mkeys));
if (!mkeys) { if (!mkeys) {
return NULL ; return NULL;
} }
ucx_mkey = (spml_ucx_mkey_t *)malloc(sizeof(*ucx_mkey)); seg = memheap_find_segnum(addr);
if (!ucx_mkey) {
goto error_out;
}
ucx_mkey = &mca_spml_ucx.ucp_peers[my_pe].mkeys[seg].key;
mkeys[0].spml_context = ucx_mkey; mkeys[0].spml_context = ucx_mkey;
err = ucp_mem_map(mca_spml_ucx.ucp_context,
&addr, size, 0, &ucx_mkey->mem_h); err = ucp_mem_map(mca_spml_ucx.ucp_context, &addr, size, 0, &ucx_mkey->mem_h);
if (UCS_OK != err) { if (UCS_OK != err) {
goto error_out1; goto error_out;
} }
err = ucp_rkey_pack(mca_spml_ucx.ucp_context, ucx_mkey->mem_h, err = ucp_rkey_pack(mca_spml_ucx.ucp_context, ucx_mkey->mem_h,
@ -412,12 +430,11 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr,
mkeys[0].len = len; mkeys[0].len = len;
mkeys[0].va_base = addr; mkeys[0].va_base = addr;
*count = 1; *count = 1;
mca_spml_ucx_cache_mkey(&mkeys[0], seg, my_pe);
return mkeys; return mkeys;
error_unmap: error_unmap:
ucp_mem_unmap(mca_spml_ucx.ucp_context, ucx_mkey->mem_h); ucp_mem_unmap(mca_spml_ucx.ucp_context, ucx_mkey->mem_h);
error_out1:
free(ucx_mkey);
error_out: error_out:
free(mkeys); free(mkeys);
@ -442,7 +459,6 @@ int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys)
ucp_rkey_buffer_release(mkeys[0].u.data); ucp_rkey_buffer_release(mkeys[0].u.data);
} }
free(ucx_mkey);
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
} }

Просмотреть файл

@ -40,10 +40,22 @@ BEGIN_C_DECLS
/** /**
* UCX SPML module * UCX SPML module
*/ */
struct ucp_peer { struct spml_ucx_mkey {
ucp_ep_h ucp_conn; ucp_rkey_h rkey;
ucp_mem_h mem_h;
}; };
typedef struct spml_ucx_mkey spml_ucx_mkey_t;
struct spml_ucx_cached_mkey {
mkey_segment_t super;
spml_ucx_mkey_t key;
};
typedef struct spml_ucx_cached_mkey spml_ucx_cached_mkey_t;
struct ucp_peer {
ucp_ep_h ucp_conn;
spml_ucx_cached_mkey_t mkeys[MCA_MEMHEAP_SEG_COUNT];
};
typedef struct ucp_peer ucp_peer_t; typedef struct ucp_peer ucp_peer_t;
struct mca_spml_ucx { struct mca_spml_ucx {
@ -56,16 +68,8 @@ struct mca_spml_ucx {
int priority; /* component priority */ int priority; /* component priority */
bool enabled; bool enabled;
}; };
typedef struct mca_spml_ucx mca_spml_ucx_t; typedef struct mca_spml_ucx mca_spml_ucx_t;
struct spml_ucx_mkey {
ucp_rkey_h rkey;
ucp_mem_h mem_h;
};
typedef struct spml_ucx_mkey spml_ucx_mkey_t;
extern mca_spml_ucx_t mca_spml_ucx; extern mca_spml_ucx_t mca_spml_ucx;
@ -103,7 +107,7 @@ extern sshmem_mkey_t *mca_spml_ucx_register(void* addr,
int *count); int *count);
extern int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys); extern int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys);
extern void mca_spml_ucx_rmkey_unpack(sshmem_mkey_t *mkey, int pe); extern void mca_spml_ucx_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id);
extern void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey); extern void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey);
extern int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs); extern int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs);
@ -113,30 +117,38 @@ extern int mca_spml_ucx_quiet(void);
extern int spml_ucx_progress(void); extern int spml_ucx_progress(void);
spml_ucx_mkey_t * mca_spml_ucx_get_mkey_slow(int pe, void *va, void **rva);
static inline spml_ucx_mkey_t * static inline spml_ucx_mkey_t *
mca_spml_ucx_get_mkey(int pe, void *va, void **rva) mca_spml_ucx_get_mkey(int pe, void *va, void **rva)
{ {
sshmem_mkey_t *r_mkey; spml_ucx_cached_mkey_t *mkey;
r_mkey = mca_memheap_base_get_cached_mkey(pe, va, 0, rva); mkey = mca_spml_ucx.ucp_peers[pe].mkeys;
if (OPAL_UNLIKELY(!r_mkey)) { mkey = (spml_ucx_cached_mkey_t *)map_segment_find_va(&mkey->super.super, sizeof(*mkey), va);
SPML_ERROR("pe=%d: %p is not address of symmetric variable", if (OPAL_UNLIKELY(NULL == mkey)) {
pe, va); return mca_spml_ucx_get_mkey_slow(pe, va, rva);
oshmem_shmem_abort(-1);
return NULL;
} }
return (spml_ucx_mkey_t *)(r_mkey->spml_context); *rva = map_segment_va2rva(&mkey->super, va);
return &mkey->key;
} }
static inline int ucx_status_to_oshmem(ucs_status_t status) static inline int ucx_status_to_oshmem(ucs_status_t status)
{ {
#if OSHMEM_PARAM_CHECK == 1
return OPAL_LIKELY(UCS_OK == status) ? OSHMEM_SUCCESS : OSHMEM_ERROR; return OPAL_LIKELY(UCS_OK == status) ? OSHMEM_SUCCESS : OSHMEM_ERROR;
#else
return OSHMEM_SUCCESS;
#endif
} }
static inline int ucx_status_to_oshmem_nb(ucs_status_t status) static inline int ucx_status_to_oshmem_nb(ucs_status_t status)
{ {
#if OSHMEM_PARAM_CHECK == 1
return OPAL_LIKELY(status >= 0) ? OSHMEM_SUCCESS : OSHMEM_ERROR; return OPAL_LIKELY(status >= 0) ? OSHMEM_SUCCESS : OSHMEM_ERROR;
#else
return OSHMEM_SUCCESS;
#endif
} }
END_C_DECLS END_C_DECLS

Просмотреть файл

@ -118,8 +118,8 @@ shmem_ds_reset(map_segment_t *ds_buf)
MAP_SEGMENT_RESET_FLAGS(ds_buf); MAP_SEGMENT_RESET_FLAGS(ds_buf);
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID; ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
ds_buf->seg_base_addr = 0; ds_buf->super.va_base = 0;
ds_buf->end = 0; ds_buf->super.va_end = 0;
ds_buf->seg_size = 0; ds_buf->seg_size = 0;
ds_buf->type = MAP_SEGMENT_UNKNOWN; ds_buf->type = MAP_SEGMENT_UNKNOWN;
unlink(ds_buf->seg_name); unlink(ds_buf->seg_name);
@ -218,9 +218,9 @@ segment_create(map_segment_t *ds_buf,
*/ */
ds_buf->seg_id = oshmem_my_proc_id(); ds_buf->seg_id = oshmem_my_proc_id();
} }
ds_buf->seg_base_addr = addr; ds_buf->super.va_base = addr;
ds_buf->seg_size = size; ds_buf->seg_size = size;
ds_buf->end = (void*)((uintptr_t)ds_buf->seg_base_addr + ds_buf->seg_size); ds_buf->super.va_end = (void*)((uintptr_t)ds_buf->super.va_base + ds_buf->seg_size);
OPAL_OUTPUT_VERBOSE( OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output, (70, oshmem_sshmem_base_framework.framework_output,
@ -229,7 +229,7 @@ segment_create(map_segment_t *ds_buf,
mca_sshmem_mmap_component.super.base_version.mca_type_name, mca_sshmem_mmap_component.super.base_version.mca_type_name,
mca_sshmem_mmap_component.super.base_version.mca_component_name, mca_sshmem_mmap_component.super.base_version.mca_component_name,
(rc ? "failure" : "successful"), (rc ? "failure" : "successful"),
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name) ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
); );
return rc; return rc;
@ -319,7 +319,7 @@ segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
"(id: %d, addr: %p size: %lu, name: %s | va_base: 0x%p len: %d key %llx)\n", "(id: %d, addr: %p size: %lu, name: %s | va_base: 0x%p len: %d key %llx)\n",
mca_sshmem_mmap_component.super.base_version.mca_type_name, mca_sshmem_mmap_component.super.base_version.mca_type_name,
mca_sshmem_mmap_component.super.base_version.mca_component_name, mca_sshmem_mmap_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name, ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name,
mkey->va_base, mkey->len, (unsigned long long)mkey->u.key) mkey->va_base, mkey->len, (unsigned long long)mkey->u.key)
); );
@ -341,10 +341,10 @@ segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
"(id: %d, addr: %p size: %lu, name: %s)\n", "(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_mmap_component.super.base_version.mca_type_name, mca_sshmem_mmap_component.super.base_version.mca_type_name,
mca_sshmem_mmap_component.super.base_version.mca_component_name, mca_sshmem_mmap_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name) ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
); );
munmap((void *)ds_buf->seg_base_addr, ds_buf->seg_size); munmap((void *)ds_buf->super.va_base, ds_buf->seg_size);
/* reset the contents of the map_segment_t associated with this /* reset the contents of the map_segment_t associated with this
* shared memory segment. * shared memory segment.
@ -366,7 +366,7 @@ segment_unlink(map_segment_t *ds_buf)
"(id: %d, addr: %p size: %lu, name: %s)\n", "(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_mmap_component.super.base_version.mca_type_name, mca_sshmem_mmap_component.super.base_version.mca_type_name,
mca_sshmem_mmap_component.super.base_version.mca_component_name, mca_sshmem_mmap_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name) ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
); );
/* don't completely reset. in particular, only reset /* don't completely reset. in particular, only reset

Просмотреть файл

@ -96,16 +96,25 @@ typedef struct sshmem_mkey {
void *spml_context; /* spml module can attach internal structures here */ void *spml_context; /* spml module can attach internal structures here */
} sshmem_mkey_t; } sshmem_mkey_t;
typedef struct map_segment_t { typedef struct map_base_segment {
sshmem_mkey_t **mkeys_cache; /* includes remote segment bases in va_base */ void *va_base; /* base address of the segment */
sshmem_mkey_t *mkeys; /* includes local segment bases in va_base */ void *va_end; /* final address of the segment */
segment_flag_t flags; /* enable/disable flag */ } map_base_segment_t;
int seg_id;
void* seg_base_addr; /* base address of the segment */ typedef struct mkey_segment {
void* end; /* final address of the segment */ map_base_segment_t super;
char seg_name[OPAL_PATH_MAX]; void *rva_base; /* base va on remote pe */
size_t seg_size; /* length of the segment */ } mkey_segment_t;
segment_type_t type; /* type of the segment */
typedef struct map_segment {
map_base_segment_t super;
sshmem_mkey_t **mkeys_cache; /* includes remote segment bases in va_base */
sshmem_mkey_t *mkeys; /* includes local segment bases in va_base */
segment_flag_t flags; /* enable/disable flag */
int seg_id;
char seg_name[OPAL_PATH_MAX];
size_t seg_size; /* length of the segment */
segment_type_t type; /* type of the segment */
} map_segment_t; } map_segment_t;
END_C_DECLS END_C_DECLS

Просмотреть файл

@ -114,8 +114,8 @@ shmem_ds_reset(map_segment_t *ds_buf)
MAP_SEGMENT_RESET_FLAGS(ds_buf); MAP_SEGMENT_RESET_FLAGS(ds_buf);
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID; ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
ds_buf->seg_base_addr = 0; ds_buf->super.va_base = 0;
ds_buf->end = 0; ds_buf->super.va_end = 0;
ds_buf->seg_size = 0; ds_buf->seg_size = 0;
ds_buf->type = MAP_SEGMENT_UNKNOWN; ds_buf->type = MAP_SEGMENT_UNKNOWN;
memset(ds_buf->seg_name, '\0', sizeof(ds_buf->seg_name)); memset(ds_buf->seg_name, '\0', sizeof(ds_buf->seg_name));
@ -225,9 +225,9 @@ segment_create(map_segment_t *ds_buf,
ds_buf->type = MAP_SEGMENT_ALLOC_SHM; ds_buf->type = MAP_SEGMENT_ALLOC_SHM;
ds_buf->seg_id = shmid; ds_buf->seg_id = shmid;
ds_buf->seg_base_addr = addr; ds_buf->super.va_base = addr;
ds_buf->seg_size = size; ds_buf->seg_size = size;
ds_buf->end = (void*)((uintptr_t)ds_buf->seg_base_addr + ds_buf->seg_size); ds_buf->super.va_end = (void*)((uintptr_t)ds_buf->super.va_base + ds_buf->seg_size);
OPAL_OUTPUT_VERBOSE( OPAL_OUTPUT_VERBOSE(
(70, oshmem_sshmem_base_framework.framework_output, (70, oshmem_sshmem_base_framework.framework_output,
@ -236,7 +236,7 @@ segment_create(map_segment_t *ds_buf,
mca_sshmem_sysv_component.super.base_version.mca_type_name, mca_sshmem_sysv_component.super.base_version.mca_type_name,
mca_sshmem_sysv_component.super.base_version.mca_component_name, mca_sshmem_sysv_component.super.base_version.mca_component_name,
(rc ? "failure" : "successful"), (rc ? "failure" : "successful"),
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name) ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
); );
return rc; return rc;
@ -264,7 +264,7 @@ segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
"(id: %d, addr: %p size: %lu, name: %s | va_base: 0x%p len: %d key %llx)\n", "(id: %d, addr: %p size: %lu, name: %s | va_base: 0x%p len: %d key %llx)\n",
mca_sshmem_sysv_component.super.base_version.mca_type_name, mca_sshmem_sysv_component.super.base_version.mca_type_name,
mca_sshmem_sysv_component.super.base_version.mca_component_name, mca_sshmem_sysv_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name, ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name,
mkey->va_base, mkey->len, (unsigned long long)mkey->u.key) mkey->va_base, mkey->len, (unsigned long long)mkey->u.key)
); );
@ -286,7 +286,7 @@ segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
"(id: %d, addr: %p size: %lu, name: %s)\n", "(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_sysv_component.super.base_version.mca_type_name, mca_sshmem_sysv_component.super.base_version.mca_type_name,
mca_sshmem_sysv_component.super.base_version.mca_component_name, mca_sshmem_sysv_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name) ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
); );
if (ds_buf->seg_id != MAP_SEGMENT_SHM_INVALID) { if (ds_buf->seg_id != MAP_SEGMENT_SHM_INVALID) {

Просмотреть файл

@ -110,8 +110,8 @@ shmem_ds_reset(map_segment_t *ds_buf)
MAP_SEGMENT_RESET_FLAGS(ds_buf); MAP_SEGMENT_RESET_FLAGS(ds_buf);
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID; ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
ds_buf->seg_base_addr = 0; ds_buf->super.va_base = 0;
ds_buf->end = 0; ds_buf->super.va_end = 0;
ds_buf->seg_size = 0; ds_buf->seg_size = 0;
ds_buf->type = MAP_SEGMENT_UNKNOWN; ds_buf->type = MAP_SEGMENT_UNKNOWN;
memset(ds_buf->seg_name, '\0', sizeof(ds_buf->seg_name)); memset(ds_buf->seg_name, '\0', sizeof(ds_buf->seg_name));
@ -320,9 +320,9 @@ segment_create(map_segment_t *ds_buf,
ds_buf->type = MAP_SEGMENT_ALLOC_IBV_NOSHMR; ds_buf->type = MAP_SEGMENT_ALLOC_IBV_NOSHMR;
ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID; ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
} }
ds_buf->seg_base_addr = ib_mr->addr; ds_buf->super.va_base = ib_mr->addr;
ds_buf->seg_size = size; ds_buf->seg_size = size;
ds_buf->end = (void*)((uintptr_t)ds_buf->seg_base_addr + ds_buf->seg_size); ds_buf->super.va_end = (void*)((uintptr_t)ds_buf->super.va_base + ds_buf->seg_size);
} }
} }
@ -333,7 +333,7 @@ segment_create(map_segment_t *ds_buf,
mca_sshmem_verbs_component.super.base_version.mca_type_name, mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name, mca_sshmem_verbs_component.super.base_version.mca_component_name,
(rc ? "failure" : "successful"), (rc ? "failure" : "successful"),
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name) ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
); );
return rc; return rc;
@ -398,7 +398,7 @@ segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
"(id: %d, addr: %p size: %lu, name: %s | va_base: 0x%p len: %d key %llx)\n", "(id: %d, addr: %p size: %lu, name: %s | va_base: 0x%p len: %d key %llx)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name, mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name, mca_sshmem_verbs_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name, ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name,
mkey->va_base, mkey->len, (unsigned long long)mkey->u.key) mkey->va_base, mkey->len, (unsigned long long)mkey->u.key)
); );
@ -422,7 +422,7 @@ segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
"(id: %d, addr: %p size: %lu, name: %s)\n", "(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name, mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name, mca_sshmem_verbs_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name) ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
); );
if (device) { if (device) {
@ -501,7 +501,7 @@ segment_unlink(map_segment_t *ds_buf)
"(id: %d, addr: %p size: %lu, name: %s)\n", "(id: %d, addr: %p size: %lu, name: %s)\n",
mca_sshmem_verbs_component.super.base_version.mca_type_name, mca_sshmem_verbs_component.super.base_version.mca_type_name,
mca_sshmem_verbs_component.super.base_version.mca_component_name, mca_sshmem_verbs_component.super.base_version.mca_component_name,
ds_buf->seg_id, ds_buf->seg_base_addr, (unsigned long)ds_buf->seg_size, ds_buf->seg_name) ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
); );
/* don't completely reset. in particular, only reset /* don't completely reset. in particular, only reset

Просмотреть файл

@ -26,10 +26,16 @@ int shmem_addr_accessible(const void *addr, int pe)
{ {
void* rva; void* rva;
sshmem_mkey_t *mkey; sshmem_mkey_t *mkey;
int i;
RUNTIME_CHECK_INIT(); RUNTIME_CHECK_INIT();
mkey = mca_memheap_base_get_cached_mkey(pe, (void *)addr, oshmem_get_transport_id(pe), &rva); for (i = 0; i < mca_memheap_base_num_transports(); i++) {
mkey = mca_memheap_base_get_cached_mkey(pe, (void *)addr, i, &rva);
if (mkey) {
return 1;
}
}
return mkey ? 1 : 0; return 0;
} }

Просмотреть файл

@ -270,7 +270,23 @@ static uint64_t shmem_lock_cswap(void *target,
prev_value = prev_value_32; prev_value = prev_value_32;
} }
return prev_value;
}
/* function is used to busy wait for the value.
* Call opal_progress() so that ompi will no deadlock
* (for example may need to respond to rkey requests)
*/
static uint64_t shmem_lock_cswap_poll(void *target,
int target_size,
uint64_t cond,
uint64_t value,
int pe)
{
uint64_t prev_value;
prev_value = shmem_lock_cswap(target, target_size, cond, value, pe);
opal_progress();
return prev_value; return prev_value;
} }
@ -316,11 +332,11 @@ static int pack_first_word(void *lock,
extract_second_word(&lock_value, lock_size, &two); extract_second_word(&lock_value, lock_size, &two);
pack_2_words(&new_long_value, lock_size, one, &two); pack_2_words(&new_long_value, lock_size, one, &two);
while (lock_value while (lock_value
!= (temp = shmem_lock_cswap(lock, != (temp = shmem_lock_cswap_poll(lock,
lock_size, lock_size,
lock_value, lock_value,
new_long_value, new_long_value,
my_pe))) { my_pe))) {
lock_value = temp; lock_value = temp;
extract_second_word(&lock_value, lock_size, &two); extract_second_word(&lock_value, lock_size, &two);
pack_2_words(&new_long_value, lock_size, one, &two); pack_2_words(&new_long_value, lock_size, one, &two);
@ -367,11 +383,11 @@ static int pack_second_word(void *lock,
extract_first_word(&lock_value, lock_size, &one); extract_first_word(&lock_value, lock_size, &one);
pack_2_words(&new_long_value, lock_size, &one, two); pack_2_words(&new_long_value, lock_size, &one, two);
while (lock_value while (lock_value
!= (temp = shmem_lock_cswap(lock, != (temp = shmem_lock_cswap_poll(lock,
lock_size, lock_size,
lock_value, lock_value,
new_long_value, new_long_value,
my_pe))) { my_pe))) {
lock_value = temp; lock_value = temp;
extract_first_word(&lock_value, lock_size, &one); extract_first_word(&lock_value, lock_size, &one);
pack_2_words(&new_long_value, lock_size, &one, two); pack_2_words(&new_long_value, lock_size, &one, two);
@ -691,11 +707,11 @@ static int shmem_lock_wait_for_ticket(void *lock,
new_server_lock = server_lock = temp; new_server_lock = server_lock = temp;
lock_pack_pe_last(&new_server_lock, lock_size, &my_pe, 0); lock_pack_pe_last(&new_server_lock, lock_size, &my_pe, 0);
} while (server_lock } while (server_lock
!= (temp = shmem_lock_cswap(lock, != (temp = shmem_lock_cswap_poll(lock,
lock_size, lock_size,
server_lock, server_lock,
new_server_lock, new_server_lock,
server_pe))); server_pe)));
lock_extract_pe_last(&server_lock, lock_size, pe_last); lock_extract_pe_last(&server_lock, lock_size, pe_last);
if (*pe_last == -1) { if (*pe_last == -1) {
/* we are first in queue for the lock */ /* we are first in queue for the lock */
@ -751,11 +767,11 @@ static int shmem_lock_subscribe_for_informing(void *lock,
prev_remote_value += my_pe + 1; prev_remote_value += my_pe + 1;
while (prev_remote_value while (prev_remote_value
!= (temp_value = shmem_lock_cswap(lock, != (temp_value = shmem_lock_cswap_poll(lock,
lock_size, lock_size,
prev_remote_value, prev_remote_value,
new_remote_value, new_remote_value,
pe_last))) { pe_last))) {
prev_remote_value = temp_value; prev_remote_value = temp_value;
lock_extract_counter(&prev_remote_value, lock_extract_counter(&prev_remote_value,
lock_size, lock_size,
@ -849,11 +865,11 @@ static int shmem_lock_inform_next(void *lock, int lock_size, int pe_next)
| (((uint64_t) 1) << (lock_bitwise_size - 1)); | (((uint64_t) 1) << (lock_bitwise_size - 1));
while (remote_value while (remote_value
!= (temp_value = shmem_lock_cswap(lock, != (temp_value = shmem_lock_cswap_poll(lock,
lock_size, lock_size,
remote_value, remote_value,
new_remote_value, new_remote_value,
pe_next))) { pe_next))) {
remote_value = temp_value; remote_value = temp_value;
new_remote_value = remote_value new_remote_value = remote_value
| (((uint64_t) 1) << (lock_bitwise_size - 1)); | (((uint64_t) 1) << (lock_bitwise_size - 1));
@ -938,7 +954,7 @@ static int shmem_lock_try_inform_server(void *lock, int lock_size)
&incorrect_pe, &incorrect_pe,
&my_pe); &my_pe);
return !(remote_value return !(remote_value
== shmem_lock_cswap(lock, lock_size, remote_value, zero, server_pe)); == shmem_lock_cswap_poll(lock, lock_size, remote_value, zero, server_pe));
} }
/***************************************************************************/ /***************************************************************************/

Просмотреть файл

@ -23,7 +23,7 @@ void oshmem_output_verbose(int level, int output_id, const char* prefix,
char *buff, *str; char *buff, *str;
int ret = 0; int ret = 0;
if (level < opal_output_get_verbosity(output_id)) { if (level <= opal_output_get_verbosity(output_id)) {
UNREFERENCED_PARAMETER(ret); UNREFERENCED_PARAMETER(ret);
va_start(args, format); va_start(args, format);