1
1

OSHMEM: port 6 patches from git mirror to svn

Subject: [PATCH 1/6] OSHMEM: mkey refactoring
mkey can be either shared memory style id or it can be
arbitrary byte string
removed hack that used spml_context to store generic keys
coding style fixes

Subject: [PATCH 2/6] OSHMEM: added support of MXM 2.0 rc transport
coding style fixed, typos, check error condition

Subject: [PATCH 3/6] OSHMEM: mxm2.0: remove PTL_SELF
There is no need to have special case for 'self'
connection in mxm 2.0. It also solves the problem
of passing incorrect mkey when doing put/get to
self

Subject: [PATCH 4/6] OSHMEM: fixes mxm fadd
give a dummy buffer if doing atomic add

Subject: [PATCH 5/6] OSHMEM: mxm2.0: do not use MXM_REQ_FLAG_SEND_LAZY
Subject: [PATCH 6/6] OSHMEM: remove unused include, causes compilation fail on ubuntu

Refs trac:3763

This commit was SVN r30129.

The following Trac tickets were found above:
  Ticket 3763 --> https://svn.open-mpi.org/trac/ompi/ticket/3763
Этот коммит содержится в:
Mike Dubman 2014-01-07 11:56:36 +00:00
родитель 8bf4ad9030
Коммит 6fb0dbdab5
10 изменённых файлов: 196 добавлений и 227 удалений

Просмотреть файл

@ -36,6 +36,7 @@ int mca_atomic_mxm_fadd(void *target,
int ptl_id;
mxm_send_req_t sreq;
mxm_error_t mxm_err;
static char dummy_buf[8];
my_pe = oshmem_my_proc_id();
ptl_id = -1;
@ -106,8 +107,8 @@ int mca_atomic_mxm_fadd(void *target,
/* Do we need atomic 'add' or atomic 'fetch and add'? */
if (NULL == prev) {
sreq.base.data.buffer.ptr = NULL;
sreq.base.data.buffer.length = 0;
sreq.base.data.buffer.ptr = dummy_buf;
sreq.base.data.buffer.length = nlong;
sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
#if MXM_API < MXM_VERSION(2,0)
sreq.base.flags = MXM_REQ_FLAG_SEND_SYNC;

Просмотреть файл

@ -157,26 +157,18 @@ static int pack_local_mkeys(opal_buffer_t *msg, int pe, int seg, int all_trs)
return OSHMEM_ERROR;
}
opal_dss.pack(msg, &tr_id, 1, OPAL_UINT32);
opal_dss.pack(msg, &mkey->handle.key, 1, OPAL_UINT64);
opal_dss.pack(msg, &mkey->va_base, 1, OPAL_UINT64);
if (NULL != MCA_SPML_CALL(get_remote_context_size)) {
uint32_t context_size =
(mkey->spml_context == NULL ) ?
0 :
(uint32_t) MCA_SPML_CALL(get_remote_context_size(mkey->spml_context));
opal_dss.pack(msg, &context_size, 1, OPAL_UINT32);
if (0 != context_size) {
opal_dss.pack(msg,
MCA_SPML_CALL(get_remote_context(mkey->spml_context)),
context_size,
OPAL_BYTE);
if (0 == mkey->va_base) {
opal_dss.pack(msg, &mkey->u.key, 1, OPAL_UINT64);
} else {
opal_dss.pack(msg, &mkey->len, 1, OPAL_UINT16);
if (0 < mkey->len) {
opal_dss.pack(msg, mkey->u.data, mkey->len, OPAL_BYTE);
}
}
MEMHEAP_VERBOSE(5,
"seg#%d tr_id: %d key %llx base_va %p",
seg, tr_id, (unsigned long long)mkey->handle.key, mkey->va_base);
"seg#%d tr_id: %d %s",
seg, tr_id, mca_spml_base_mkey2str(mkey));
}
return OSHMEM_SUCCESS;
}
@ -188,70 +180,70 @@ static void memheap_attach_segment(mca_spml_mkey_t *mkey, int tr_id)
* - key is set as (type|shmid);
* - va_base is set as 0;
*/
if (!mkey->va_base
&& ((int) MEMHEAP_SHM_GET_ID(mkey->handle.key) != MEMHEAP_SHM_INVALID)) {
MEMHEAP_VERBOSE(5,
"shared memory usage tr_id: %d key %llx base_va %p shmid 0x%X|0x%X",
tr_id,
(unsigned long long)mkey->handle.key,
mkey->va_base,
MEMHEAP_SHM_GET_TYPE(mkey->handle.key),
MEMHEAP_SHM_GET_ID(mkey->handle.key));
assert(mkey->va_base == 0);
if (MEMHEAP_SHM_GET_TYPE(mkey->handle.key) == MAP_SEGMENT_ALLOC_SHM) {
mkey->va_base = shmat(MEMHEAP_SHM_GET_ID(mkey->handle.key),
0,
0);
} else if (MEMHEAP_SHM_GET_TYPE(mkey->handle.key) == MAP_SEGMENT_ALLOC_IBV) {
if (MEMHEAP_SHM_INVALID == (int) MEMHEAP_SHM_GET_ID(mkey->u.key)) {
return;
}
MEMHEAP_VERBOSE(5,
"shared memory usage tr_id: %d key %llx base_va %p shmid 0x%X|0x%X",
tr_id,
(unsigned long long)mkey->u.key,
mkey->va_base,
MEMHEAP_SHM_GET_TYPE(mkey->u.key),
MEMHEAP_SHM_GET_ID(mkey->u.key));
if (MAP_SEGMENT_ALLOC_SHM == MEMHEAP_SHM_GET_TYPE(mkey->u.key)) {
mkey->va_base = shmat(MEMHEAP_SHM_GET_ID(mkey->u.key),
0,
0);
} else if (MAP_SEGMENT_ALLOC_IBV == MEMHEAP_SHM_GET_TYPE(mkey->u.key)) {
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
openib_device_t *device = NULL;
struct ibv_mr *ib_mr;
void *addr;
static int mr_count;
openib_device_t *device = NULL;
struct ibv_mr *ib_mr;
void *addr;
static int mr_count;
int access_flag = IBV_ACCESS_LOCAL_WRITE |
int access_flag = IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_NO_RDMA;
device = (openib_device_t *)memheap_map->mem_segs[HEAP_SEG_INDEX].context;
assert(device);
device = (openib_device_t *)memheap_map->mem_segs[HEAP_SEG_INDEX].context;
assert(device);
/* workaround mtt problem - request aligned addresses */
++mr_count;
addr = (void *)((uintptr_t)mca_memheap_base_start_address + mca_memheap_base_mr_interleave_factor*1024ULL*1024ULL*1024ULL*mr_count);
ib_mr = ibv_reg_shared_mr(MEMHEAP_SHM_GET_ID(mkey->handle.key),
device->ib_pd, addr, access_flag);
if (NULL == ib_mr)
{
mkey->va_base = (void*)-1;
MEMHEAP_ERROR("error to ibv_reg_shared_mr() errno says %d: %s",
errno, strerror(errno));
}
else
{
if (ib_mr->addr != addr) {
MEMHEAP_WARN("Failed to map shared region to address %p got addr %p. Try to increase 'memheap_mr_interleave_factor' from %d", addr, ib_mr->addr, mca_memheap_base_mr_interleave_factor);
}
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
mkey->va_base = ib_mr->addr;
}
#endif /* MPAGE_ENABLE */
/* workaround mtt problem - request aligned addresses */
++mr_count;
addr = (void *)((uintptr_t)mca_memheap_base_start_address + mca_memheap_base_mr_interleave_factor*1024ULL*1024ULL*1024ULL*mr_count);
ib_mr = ibv_reg_shared_mr(MEMHEAP_SHM_GET_ID(mkey->u.key),
device->ib_pd, addr, access_flag);
if (NULL == ib_mr) {
mkey->va_base = (void*)-1;
MEMHEAP_ERROR("error to ibv_reg_shared_mr() errno says %d: %s",
errno, strerror(errno));
} else {
MEMHEAP_ERROR("tr_id: %d key %llx attach failed: incorrect shmid 0x%X|0x%X",
tr_id,
(unsigned long long)mkey->handle.key,
MEMHEAP_SHM_GET_TYPE(mkey->handle.key),
MEMHEAP_SHM_GET_ID(mkey->handle.key));
oshmem_shmem_abort(-1);
}
if (ib_mr->addr != addr) {
MEMHEAP_WARN("Failed to map shared region to address %p got addr %p. Try to increase 'memheap_mr_interleave_factor' from %d", addr, ib_mr->addr, mca_memheap_base_mr_interleave_factor);
}
if ((void *) -1 == (void *) mkey->va_base) {
MEMHEAP_ERROR("tr_id: %d key %llx attach failed: errno = %d",
tr_id, (unsigned long long)mkey->handle.key, errno);
oshmem_shmem_abort(-1);
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
mkey->va_base = ib_mr->addr;
}
#endif /* MPAGE_ENABLE */
} else {
MEMHEAP_ERROR("tr_id: %d key %llx attach failed: incorrect shmid 0x%X|0x%X",
tr_id,
(unsigned long long)mkey->u.key,
MEMHEAP_SHM_GET_TYPE(mkey->u.key),
MEMHEAP_SHM_GET_ID(mkey->u.key));
oshmem_shmem_abort(-1);
}
if ((void *) -1 == (void *) mkey->va_base) {
MEMHEAP_ERROR("tr_id: %d key %llx attach failed: errno = %d",
tr_id, (unsigned long long)mkey->u.key, errno);
oshmem_shmem_abort(-1);
}
}
@ -268,32 +260,36 @@ static void unpack_remote_mkeys(opal_buffer_t *msg, int remote_pe)
cnt = 1;
opal_dss.unpack(msg, &n, &cnt, OPAL_UINT32);
for (i = 0; i < n; i++) {
cnt = 1;
opal_dss.unpack(msg, &tr_id, &cnt, OPAL_UINT32);
opal_dss.unpack(msg, &memheap_oob.mkeys[tr_id].handle.key, &cnt, OPAL_UINT64);
cnt = 1;
opal_dss.unpack(msg,
&memheap_oob.mkeys[tr_id].va_base,
&cnt,
OPAL_UINT64);
if (NULL != MCA_SPML_CALL(set_remote_context_size)) {
int32_t context_size;
opal_dss.unpack(msg, &context_size, &cnt, OPAL_UINT32);
if (0 != context_size) {
MCA_SPML_CALL(set_remote_context_size(&(memheap_oob.mkeys[tr_id].spml_context), context_size));
void* context;
context = calloc(1, context_size);
opal_dss.unpack(msg, context, &context_size, OPAL_BYTE);
MCA_SPML_CALL(set_remote_context(&(memheap_oob.mkeys[tr_id].spml_context),context));
if (0 == memheap_oob.mkeys[tr_id].va_base) {
cnt = 1;
opal_dss.unpack(msg, &memheap_oob.mkeys[tr_id].u.key, &cnt, OPAL_UINT64);
if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags))
memheap_attach_segment(&memheap_oob.mkeys[tr_id], tr_id);
} else {
cnt = 1;
opal_dss.unpack(msg, &memheap_oob.mkeys[tr_id].len, &cnt, OPAL_UINT16);
if (0 < memheap_oob.mkeys[tr_id].len) {
memheap_oob.mkeys[tr_id].u.data = malloc(memheap_oob.mkeys[tr_id].len);
if (NULL == memheap_oob.mkeys[tr_id].u.data) {
MEMHEAP_ERROR("Failed allocate %d bytes", memheap_oob.mkeys[tr_id].len);
oshmem_shmem_abort(-1);
}
cnt = memheap_oob.mkeys[tr_id].len;
opal_dss.unpack(msg, memheap_oob.mkeys[tr_id].u.data, &cnt, OPAL_BYTE);
}
}
if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags))
memheap_attach_segment(&memheap_oob.mkeys[tr_id], tr_id);
MEMHEAP_VERBOSE(5,
"tr_id: %d key %llx base_va %p",
tr_id, (unsigned long long)memheap_oob.mkeys[tr_id].handle.key, memheap_oob.mkeys[tr_id].va_base);
"tr_id: %d %s",
tr_id, mca_spml_base_mkey2str(&memheap_oob.mkeys[tr_id]));
}
}
@ -533,11 +529,10 @@ static int memheap_oob_get_mkeys(int pe, uint32_t seg, mca_spml_mkey_t *mkeys)
for (i = 0; i < memheap_map->num_transports; i++) {
mkeys[i].va_base = __seg2base_va(seg);
MEMHEAP_VERBOSE(5,
"MKEY CALCULATED BY LOCAL SPML: pe: %d tr_id: %d key %llx base_va %p",
"MKEY CALCULATED BY LOCAL SPML: pe: %d tr_id: %d %s",
pe,
i,
(unsigned long long)mkeys[i].handle.key,
mkeys[i].va_base);
mca_spml_base_mkey2str(&mkeys[i]));
}
return OSHMEM_SUCCESS;
}
@ -707,14 +702,14 @@ mca_spml_mkey_t * mca_memheap_base_get_cached_mkey(int pe,
if (pe == oshmem_my_proc_id()) {
*rva = va;
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (local) %lx %p", pe, va,
s->mkeys[btl_id].handle.key, *rva);
s->mkeys[btl_id].u.key, *rva);
return &s->mkeys[btl_id];
}
if (OPAL_LIKELY(s->mkeys_cache[pe])) {
mkey = &s->mkeys_cache[pe][btl_id];
*rva = va2rva(va, s->start, mkey->va_base);
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (cached) %lx %p", pe, (void *)va, mkey->handle.key, (void *)*rva);
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (cached) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva);
return mkey;
}
@ -732,7 +727,7 @@ mca_spml_mkey_t * mca_memheap_base_get_cached_mkey(int pe,
mkey = &s->mkeys_cache[pe][btl_id];
*rva = va2rva(va, s->start, mkey->va_base);
MEMHEAP_VERBOSE_FASTPATH(5, "rkey: pe=%d va=%p -> (remote lookup) %lx %p", pe, (void *)va, mkey->handle.key, (void *)*rva);
MEMHEAP_VERBOSE_FASTPATH(5, "rkey: pe=%d va=%p -> (remote lookup) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva);
return mkey;
}

Просмотреть файл

@ -79,6 +79,10 @@ static int _dereg_segment(map_segment_t *s)
if (j == my_pe)
continue;
if (s->mkeys_cache[j]) {
if (s->mkeys_cache[j]->len) {
free(s->mkeys_cache[j]->u.data);
s->mkeys_cache[j]->len = 0;
}
free(s->mkeys_cache[j]);
s->mkeys_cache[j] = NULL;
}

Просмотреть файл

@ -73,6 +73,11 @@ static int spml_ikrit_get_ep_address(spml_ikrit_mxm_ep_conn_info_t *ep_info,
return OSHMEM_SUCCESS;
}
#else
static inline mxm_mem_key_t *to_mxm_mkey(mca_spml_mkey_t *mkey) {
return (mxm_mem_key_t *)mkey->u.data;
}
#endif
static inline void mca_spml_irkit_req_wait(mxm_req_base_t *req)
@ -193,11 +198,6 @@ mca_spml_ikrit_t mca_spml_ikrit = {
mca_spml_base_wait,
mca_spml_base_wait_nb,
mca_spml_ikrit_fence,
NULL,
NULL,
NULL,
NULL,
(void*)&mca_spml_ikrit
}
};
@ -301,9 +301,11 @@ static int create_ptl_idx(int dst_pe)
return OSHMEM_ERROR;
proc->num_transports = 1;
#if MXM_API < MXM_VERSION(2,0)
if (oshmem_my_proc_id() == dst_pe)
proc->transport_ids[0] = MXM_PTL_SELF;
else
#endif
proc->transport_ids[0] = MXM_PTL_RDMA;
return OSHMEM_SUCCESS;
}
@ -531,6 +533,7 @@ mca_spml_mkey_t *mca_spml_ikrit_register(void* addr,
mca_spml_mkey_t *mkeys;
#if MXM_API >= MXM_VERSION(2,0)
mxm_error_t err;
mxm_mem_key_t *m_key;
#endif
*count = 0;
@ -543,32 +546,47 @@ mca_spml_mkey_t *mca_spml_ikrit_register(void* addr,
switch (i) {
case MXM_PTL_SHM:
if ((int) MEMHEAP_SHM_GET_ID(shmid) != MEMHEAP_SHM_INVALID) {
mkeys[i].handle.key = shmid;
mkeys[i].u.key = shmid;
mkeys[i].va_base = 0;
} else {
mkeys[i].handle.key = 0;
mkeys[i].len = 0;
mkeys[i].va_base = addr;
}
mkeys[i].spml_context = 0;
break;
#if MXM_API < MXM_VERSION(2,0)
case MXM_PTL_SELF:
mkeys[i].handle.key = 0;
mkeys[i].len = 0;
mkeys[i].spml_context = 0;
mkeys[i].va_base = addr;
break;
#endif
case MXM_PTL_RDMA:
mkeys[i].va_base = addr;
mkeys[i].spml_context = 0;
#if MXM_API < MXM_VERSION(2,0)
mkeys[i].handle.ib.lkey = mkeys[i].handle.ib.rkey = 0;
mkeys[i].len = 0;
#else
mkeys[i].handle.ib.lkey = mkeys[i].handle.ib.rkey = 0;
err = mxm_mem_map(mca_spml_ikrit.mxm_context, &addr, &size, 0, 0, 0);
if (MXM_OK != err) {
SPML_VERBOSE(1, "failed to register memory: %s", mxm_error_string(err));
SPML_ERROR("Failed to register memory: %s", mxm_error_string(err));
goto error_out;
}
mkeys[i].spml_context = (void *)(unsigned long)size;
m_key = malloc(sizeof(*m_key));
if (NULL == m_key) {
SPML_ERROR("Failed to allocate m_key memory");
goto error_out;
}
mkeys[i].len = sizeof(*m_key);
mkeys[i].u.data = m_key;
err = mxm_mem_get_key(mca_spml_ikrit.mxm_context, addr, m_key);
if (MXM_OK != err) {
SPML_ERROR("Failed to get memory key: %s", mxm_error_string(err));
goto error_out;
}
#endif
break;
@ -577,15 +595,16 @@ mca_spml_mkey_t *mca_spml_ikrit_register(void* addr,
goto error_out;
}
SPML_VERBOSE(5,
"rank %d ptl %d rkey %x lkey %x key %llx address 0x%llX len %llu shmid 0x%X|0x%X",
oshmem_proc_local_proc->proc_name.vpid, i, mkeys[i].handle.ib.rkey, mkeys[i].handle.ib.lkey, (unsigned long long)mkeys[i].handle.key, (unsigned long long)mkeys[i].va_base, (unsigned long long)size, MEMHEAP_SHM_GET_TYPE(shmid), MEMHEAP_SHM_GET_ID(shmid));
"rank %d ptl %d addr %p size %llu %s",
oshmem_proc_local_proc->proc_name.vpid, i, addr, (unsigned long long)size,
mca_spml_base_mkey2str(&mkeys[i]));
}
*count = MXM_PTL_LAST;
return mkeys;
error_out:
error_out:
mca_spml_ikrit_deregister(mkeys);
return NULL ;
@ -600,7 +619,9 @@ int mca_spml_ikrit_deregister(mca_spml_mkey_t *mkeys)
for (i = 0; i < MXM_PTL_LAST; i++) {
switch (i) {
#if MXM_API < MXM_VERSION(2,0)
case MXM_PTL_SELF:
#endif
case MXM_PTL_SHM:
break;
case MXM_PTL_RDMA:
@ -612,6 +633,9 @@ int mca_spml_ikrit_deregister(mca_spml_mkey_t *mkeys)
(void *)mkeys[i].va_base,
(unsigned long)mkeys[i].spml_context,
0);
if (0 < mkeys[i].len) {
free(mkeys[i].u.data);
}
#endif
break;
}
@ -636,8 +660,8 @@ static inline int get_ptl_id(int dst)
int mca_spml_ikrit_oob_get_mkeys(int pe, uint32_t seg, mca_spml_mkey_t *mkeys)
{
#if MXM_API < MXM_VERSION(2,0)
int ptl;
ptl = get_ptl_id(pe);
if (ptl < 0)
return OSHMEM_ERROR;
@ -649,6 +673,12 @@ int mca_spml_ikrit_oob_get_mkeys(int pe, uint32_t seg, mca_spml_mkey_t *mkeys)
return OSHMEM_ERROR;
return OSHMEM_SUCCESS;
#else
/* we are actually registering memory in 2.0 and later.
* So can not really skip mkey exchange
*/
return OSHMEM_ERROR;
#endif
}
static int mca_spml_ikrit_get_helper(mxm_send_req_t *sreq,
@ -683,8 +713,8 @@ static int mca_spml_ikrit_get_helper(mxm_send_req_t *sreq,
}
SPML_VERBOSE(100,
"get: pe:%d ptl=%d src=%p -> dst: %p sz=%d. src_rva=%p, src_rkey=0x%lx",
src, ptl_id, src_addr, dst_addr, (int)size, (void *)rva, r_mkey->handle.key);
"get: pe:%d ptl=%d src=%p -> dst: %p sz=%d. src_rva=%p, %s",
src, ptl_id, src_addr, dst_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));
/* mxm does not really cares for get lkey */
sreq->base.mq = mca_spml_ikrit.mxm_mq;
@ -696,7 +726,7 @@ static int mca_spml_ikrit_get_helper(mxm_send_req_t *sreq,
sreq->base.data.buffer.memh = NULL;
sreq->op.mem.remote_memh = NULL;
#else
sreq->op.mem.remote_mkey = &mxm_empty_mem_key;
sreq->op.mem.remote_mkey = to_mxm_mkey(r_mkey);
#endif
sreq->opcode = MXM_REQ_OP_GET;
sreq->op.mem.remote_vaddr = (intptr_t) rva;
@ -736,8 +766,8 @@ static inline int mca_spml_ikrit_get_shm(void *src_addr,
return OSHMEM_ERROR;
SPML_VERBOSE(100,
"shm get: pe:%d src=%p -> dst: %p sz=%d. src_rva=%p, src_rkey=0x%lx",
src, src_addr, dst_addr, (int)size, (void *)rva, r_mkey->handle.key);
"shm get: pe:%d src=%p -> dst: %p sz=%d. src_rva=%p, %s",
src, src_addr, dst_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));
memcpy(dst_addr, (void *) (unsigned long) rva, size);
opal_progress();
return OSHMEM_SUCCESS;
@ -972,8 +1002,8 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
#if SPML_IKRIT_PUT_DEBUG == 1
SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, dst_rkey=0x%lx",
dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, r_mkey->handle.key);
SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s",
dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));
#endif
if (ptl_id == MXM_PTL_SHM) {
@ -999,8 +1029,8 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
}
#if SPML_IKRIT_PUT_DEBUG == 1
SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, dst_rkey=0x%lx",
dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, r_mkey->handle.key);
SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s",
dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));
#endif
put_req = alloc_put_req();
@ -1026,13 +1056,13 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
put_req->mxm_req.base.flags = MXM_REQ_FLAG_SEND_LAZY|MXM_REQ_FLAG_SEND_SYNC;
}
#else
put_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC;
if (mca_spml_ikrit.free_list_max - mca_spml_ikrit.n_active_puts <= SPML_IKRIT_PUT_LOW_WATER ||
(mca_spml_ikrit.mxm_peers[dst]->n_active_puts + 1) % SPML_IKRIT_PACKETS_PER_SYNC == 0) {
put_req->mxm_req.flags = 0;
need_progress = 1;
put_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC;
} else {
put_req->mxm_req.flags = MXM_REQ_SEND_FLAG_LAZY;
put_req->mxm_req.opcode = MXM_REQ_OP_PUT;
}
if (!zcopy) {
put_req->mxm_req.flags |= MXM_REQ_SEND_FLAG_BLOCKING;
@ -1045,7 +1075,6 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
put_req->mxm_req.base.data.buffer.length = size;
put_req->mxm_req.base.completed_cb = put_completion_cb;
put_req->mxm_req.base.context = put_req;
put_req->mxm_req.opcode = MXM_REQ_OP_PUT;
put_req->mxm_req.op.mem.remote_vaddr = (intptr_t) rva;
put_req->mxm_req.base.state = MXM_REQ_NEW;
put_req->pe = dst;
@ -1054,7 +1083,7 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
put_req->mxm_req.base.data.buffer.memh = NULL;
put_req->mxm_req.op.mem.remote_memh = NULL;
#else
put_req->mxm_req.op.mem.remote_mkey = &mxm_empty_mem_key;
put_req->mxm_req.op.mem.remote_mkey = to_mxm_mkey(r_mkey);
#endif
if (mca_spml_ikrit.mxm_peers[dst]->pe_relay >= 0
@ -1140,8 +1169,8 @@ int mca_spml_ikrit_put_simple(void* dst_addr,
}
#if SPML_IKRIT_PUT_DEBUG == 1
SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, dst_rkey=0x%lx",
dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, r_mkey->handle.key);
SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s",
dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));
#endif
if (ptl_id == MXM_PTL_SHM) {
@ -1168,8 +1197,8 @@ int mca_spml_ikrit_put_simple(void* dst_addr,
}
#if SPML_IKRIT_PUT_DEBUG == 1
SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, dst_rkey=0x%lx",
dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, r_mkey->handle.key);
SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s",
dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));
#endif
/* fill out request */
@ -1194,7 +1223,7 @@ int mca_spml_ikrit_put_simple(void* dst_addr,
mxm_req.base.data.buffer.memh = NULL;
mxm_req.op.mem.remote_memh = NULL;
#else
mxm_req.op.mem.remote_mkey = &mxm_empty_mem_key;
mxm_req.op.mem.remote_mkey = to_mxm_mkey(r_mkey);
#endif
if (mca_spml_ikrit.mxm_peers[dst]->need_fence == 0) {

Просмотреть файл

@ -101,9 +101,8 @@ typedef struct mca_spml_ikrit_t mca_spml_ikrit_t;
#if MXM_API >= MXM_VERSION(2,0)
#define MXM_PTL_SHM 0
#define MXM_PTL_SELF 1
#define MXM_PTL_RDMA 2
#define MXM_PTL_LAST 3
#define MXM_PTL_RDMA 1
#define MXM_PTL_LAST 2
#endif
typedef struct spml_ikrit_mxm_ep_conn_info_t {

Просмотреть файл

@ -109,7 +109,7 @@ static int mca_spml_ikrit_component_register(void)
"[integer] ikrit priority");
mca_spml_ikrit_param_register_string("mxm_tls",
"ud,self",
"rc,ud,self",
"[string] TL channels for MXM",
&mca_spml_ikrit.mxm_tls);

Просмотреть файл

@ -71,19 +71,30 @@ typedef mca_spml_base_component_2_0_0_t mca_spml_base_component_t;
*/
/**
* memory key
* We have two kinds of keys:
* - shared memory type of keys. Memory segment must be attached before access
* such keys use va_base = 0 and key
* - ib type of key. Key is passed with each put/get op.
* use va_base = <remote vaddr>, key is stored in mkey struct
*/
typedef struct mca_spml_mkey {
union {
struct {
uint32_t rkey;
uint32_t lkey;
} ib;
uint64_t key;
} handle;
void* va_base;
uint16_t len;
union {
void *data;
uint64_t key;
} u;
void *spml_context; /* spml module can attach internal structures here */
} mca_spml_mkey_t;
static inline char *mca_spml_base_mkey2str(mca_spml_mkey_t *mkey)
{
static char buf[64];
snprintf(buf, sizeof(buf), "mkey: base=%p len=%d key=%0X", mkey->va_base, mkey->len, mkey->u.key);
return buf;
}
/**
* Downcall from MCA layer to enable the PML/BTLs.
*
@ -237,14 +248,6 @@ typedef int (*mca_spml_base_module_fence_fn_t)(void);
*/
typedef int (*mca_spml_base_module_wait_nb_fn_t)(void*);
typedef void* (*mca_spml_base_module_get_remote_context_fn_t)(void*);
typedef void (*mca_spml_base_module_set_remote_context_fn_t)(void**, void*);
typedef int (*mca_spml_base_module_get_remote_context_size_fn_t)(void*);
typedef void (*mca_spml_base_module_set_remote_context_size_fn_t)(void**, int);
/**
* SPML instance.
*/
@ -268,10 +271,6 @@ struct mca_spml_base_module_1_0_0_t {
mca_spml_base_module_wait_fn_t spml_wait;
mca_spml_base_module_wait_nb_fn_t spml_wait_nb;
mca_spml_base_module_fence_fn_t spml_fence;
mca_spml_base_module_get_remote_context_fn_t spml_get_remote_context;
mca_spml_base_module_set_remote_context_fn_t spml_set_remote_context;
mca_spml_base_module_get_remote_context_size_fn_t spml_get_remote_context_size;
mca_spml_base_module_set_remote_context_size_fn_t spml_set_remote_context_size;
void *self;
};

Просмотреть файл

@ -57,10 +57,6 @@ mca_spml_yoda_module_t mca_spml_yoda = {
mca_spml_base_wait,
mca_spml_base_wait_nb,
mca_spml_yoda_fence,
mca_spml_yoda_get_remote_context,
mca_spml_yoda_set_remote_context,
mca_spml_yoda_get_remote_context_size,
mca_spml_yoda_set_remote_context_size,
(void *)&mca_spml_yoda
}
@ -302,12 +298,6 @@ int mca_spml_yoda_deregister(mca_spml_mkey_t *mkeys)
ybtl->btl->btl_free(ybtl->btl, yoda_context->btl_src_descriptor);
yoda_context->btl_src_descriptor = NULL;
}
if (yoda_context->btl_src_segment) {
free(yoda_context->btl_src_segment);
yoda_context->btl_src_segment = NULL;
}
yoda_context->btl_src_segment_size = 0;
if (yoda_context->registration) {
ybtl->btl->btl_mpool->mpool_deregister(ybtl->btl->btl_mpool,
yoda_context->registration);
@ -372,15 +362,15 @@ mca_spml_mkey_t *mca_spml_yoda_register(void* addr,
}
/* If we have shared memory just save its id*/
if ((YODA_BTL_SM == ybtl->btl_type)
&& ((int) MEMHEAP_SHM_GET_ID(shmid) != MEMHEAP_SHM_INVALID)) {
mkeys[i].handle.key = shmid;
if (YODA_BTL_SM == ybtl->btl_type
&& MEMHEAP_SHM_INVALID != (int) MEMHEAP_SHM_GET_ID(shmid)) {
mkeys[i].u.key = shmid;
mkeys[i].va_base = 0;
continue;
}
yoda_context = calloc(1, sizeof(*yoda_context));
mkeys[i].spml_context = (void*) yoda_context;
mkeys[i].spml_context = yoda_context;
yoda_context->registration = NULL;
if (NULL != ybtl->btl->btl_prepare_src) {
@ -421,21 +411,16 @@ mca_spml_mkey_t *mca_spml_yoda_register(void* addr,
SPML_ERROR("%s: failed to register source memory. ",
btl_type2str(ybtl->btl_type));
}
/* copy source descriptor to local structures*/
yoda_context->btl_src_descriptor = des;
yoda_context->btl_src_segment_size = ybtl->btl->btl_seg_size;
if (0 != yoda_context->btl_src_segment_size) {
yoda_context->btl_src_segment =
malloc(yoda_context->btl_src_segment_size);
memcpy(yoda_context->btl_src_segment,
des->des_src,
yoda_context->btl_src_segment_size);
}
mkeys[i].u.data = des->des_src;
mkeys[i].len = ybtl->btl->btl_seg_size;
}
SPML_VERBOSE(5,
"rank %d btl %s rkey %x lkey %x key %llx address 0x%p len %llu shmid 0x%X|0x%X",
oshmem_proc_local_proc->proc_name.vpid, btl_type2str(ybtl->btl_type), mkeys[i].handle.ib.rkey, mkeys[i].handle.ib.lkey, (unsigned long long)mkeys[i].handle.key, mkeys[i].va_base, (unsigned long long)size, MEMHEAP_SHM_GET_TYPE(shmid), MEMHEAP_SHM_GET_ID(shmid));
"rank %d btl %s address 0x%p len %llu shmid 0x%X|0x%X",
oshmem_proc_local_proc->proc_name.vpid, btl_type2str(ybtl->btl_type),
mkeys[i].va_base, (unsigned long long)size, MEMHEAP_SHM_GET_TYPE(shmid), MEMHEAP_SHM_GET_ID(shmid));
}
OBJ_DESTRUCT(&convertor);
*count = mca_spml_yoda.n_btls;
@ -735,7 +720,6 @@ static inline int mca_spml_yoda_put_internal(void *dst_addr,
unsigned ncopied = 0;
unsigned int frag_size = 0;
char *p_src, *p_dst;
mca_spml_yoda_context_t* yoda_context;
void* rva;
mca_spml_mkey_t *r_mkey;
int btl_id = 0;
@ -768,8 +752,8 @@ static inline int mca_spml_yoda_put_internal(void *dst_addr,
}
#if SPML_YODA_DEBUG == 1
SPML_VERBOSE(100, "put: pe:%d dst=%p <- src: %p sz=%d. dst_rva=%p, dst_rkey=0x%lx",
dst, dst_addr, src_addr, (int)size, (void *)rva, r_mkey->handle.key);
SPML_VERBOSE(100, "put: pe:%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s",
dst, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));
#endif
ybtl = &mca_spml_yoda.btl_type_map[btl_id];
@ -818,12 +802,11 @@ static inline int mca_spml_yoda_put_internal(void *dst_addr,
/* Preparing destination buffer */
yoda_context = (mca_spml_yoda_context_t*) r_mkey->spml_context;
assert( (NULL != yoda_context) && (0 != yoda_context->btl_src_segment_size));
assert( NULL != r_mkey->u.data && 0 != r_mkey->len);
memcpy(&frag->rdma_segs[0].base_seg,
yoda_context->btl_src_segment,
yoda_context->btl_src_segment_size);
r_mkey->u.data,
r_mkey->len);
frag->rdma_segs[0].base_seg.seg_addr.lval = (uintptr_t) p_dst;
frag->rdma_segs[0].base_seg.seg_len = (put_via_send ?
@ -903,42 +886,6 @@ int mca_spml_yoda_wait_gets(void)
return OSHMEM_SUCCESS;
}
void* mca_spml_yoda_get_remote_context(void* spml_context)
{
return ((mca_spml_yoda_context_t*) spml_context)->btl_src_segment;
}
void mca_spml_yoda_set_remote_context(void** spml_context,
void* spml_remote_context)
{
mca_spml_yoda_context_t * yoda_context;
yoda_context = *(spml_context);
if (NULL == yoda_context) {
yoda_context = (mca_spml_yoda_context_t*) malloc(sizeof(*yoda_context));
}
yoda_context->btl_src_segment =
(mca_btl_base_segment_t*) spml_remote_context;
*(spml_context) = yoda_context;
}
int mca_spml_yoda_get_remote_context_size(void* spml_context)
{
return ((mca_spml_yoda_context_t*) spml_context)->btl_src_segment_size;
}
void mca_spml_yoda_set_remote_context_size(void** spml_context,
int spml_remote_context_size)
{
mca_spml_yoda_context_t *yoda_context;
yoda_context = *(spml_context);
if (NULL == yoda_context) {
yoda_context = calloc(1, sizeof(*yoda_context));
}
yoda_context->btl_src_segment_size = spml_remote_context_size;
*(spml_context) = yoda_context;
}
int mca_spml_yoda_enable(bool enable)
{
@ -1024,7 +971,6 @@ int mca_spml_yoda_get(void* src_addr, size_t size, void* dst_addr, int src)
struct mca_spml_yoda_getreq_parent get_holder;
struct yoda_btl *ybtl;
int btl_id = 0;
mca_spml_yoda_context_t* yoda_context;
int get_via_send;
const opal_datatype_t *datatype = &opal_datatype_wchar;
opal_convertor_t convertor;
@ -1059,8 +1005,8 @@ int mca_spml_yoda_get(void* src_addr, size_t size, void* dst_addr, int src)
oshmem_shmem_abort(-1);
}
#if SPML_YODA_DEBUG == 1
SPML_VERBOSE(100, "get: pe:%d src=%p -> dst: %p sz=%d. src_rva=%p, src_rkey=0x%lx",
src, src_addr, dst_addr, (int)size, (void *)rva, r_mkey->handle.key);
SPML_VERBOSE(100, "get: pe:%d src=%p -> dst: %p sz=%d. src_rva=%p, %s",
src, src_addr, dst_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));
#endif
ybtl = &mca_spml_yoda.btl_type_map[btl_id];
@ -1111,11 +1057,10 @@ int mca_spml_yoda_get(void* src_addr, size_t size, void* dst_addr, int src)
ncopied = i < nfrags - 1 ? frag_size :(unsigned) ((char *) dst_addr + size - p_dst);
frag->allocated = 0;
/* Prepare destination descriptor*/
yoda_context = r_mkey->spml_context;
assert(0 != yoda_context->btl_src_segment_size);
assert(0 != r_mkey->len);
memcpy(&frag->rdma_segs[0].base_seg,
yoda_context->btl_src_segment,
yoda_context->btl_src_segment_size);
r_mkey->u.data,
r_mkey->len);
frag->rdma_segs[0].base_seg.seg_len = (get_via_send ? ncopied + SPML_YODA_SEND_CONTEXT_SIZE : ncopied);
if (get_via_send) {

Просмотреть файл

@ -82,8 +82,6 @@ typedef struct mca_spml_yoda_t mca_spml_yoda_module_t;
struct mca_spml_yoda_context_t {
mca_btl_base_descriptor_t* btl_src_descriptor;
int btl_src_segment_size;
mca_btl_base_segment_t* btl_src_segment;
mca_mpool_base_registration_t* registration;
};
typedef struct mca_spml_yoda_context_t mca_spml_yoda_context_t;

Просмотреть файл

@ -22,7 +22,6 @@
#include "orte/types.h"
#include "orte/runtime/orte_globals.h"
#include "ompi/mca/bml/bml.h"
BEGIN_C_DECLS