From 6fb0dbdab5dc57d2aee5beecd8e4e2c6d84a81dc Mon Sep 17 00:00:00 2001 From: Mike Dubman Date: Tue, 7 Jan 2014 11:56:36 +0000 Subject: [PATCH] OSHMEM: port 6 patches from git mirror to svn Subject: [PATCH 1/6] OSHMEM: mkey refactoring mkey can be either shared memory style id or it can be arbitrary byte string removed hack that used spml_context to store generic keys coding style fixes Subject: [PATCH 2/6] OSHMEM: added support of MXM 2.0 rc transport coding style fixed, typos, check error condition Subject: [PATCH 3/6] OSHMEM: mxm2.0: remove PTL_SELF There is no need to have special case for 'self' connection in mxm 2.0. It also solves the problem of passing incorrect mkey when doing put/get to self Subject: [PATCH 4/6] OSHMEM: fixes mxm fadd give a dummy buffer if doing atomic add Subject: [PATCH 5/6] OSHMEM: mxm2.0: do not use MXM_REQ_FLAG_SEND_LAZY Subject: [PATCH 6/6] OSHMEM: remove unused include, causes compilation fail on ubuntu Refs trac:3763 This commit was SVN r30129. The following Trac tickets were found above: Ticket 3763 --> https://svn.open-mpi.org/trac/ompi/ticket/3763 --- oshmem/mca/atomic/mxm/atomic_mxm_fadd.c | 5 +- oshmem/mca/memheap/base/memheap_base_mkey.c | 177 +++++++++--------- .../mca/memheap/base/memheap_base_register.c | 4 + oshmem/mca/spml/ikrit/spml_ikrit.c | 95 ++++++---- oshmem/mca/spml/ikrit/spml_ikrit.h | 5 +- oshmem/mca/spml/ikrit/spml_ikrit_component.c | 2 +- oshmem/mca/spml/spml.h | 37 ++-- oshmem/mca/spml/yoda/spml_yoda.c | 95 ++-------- oshmem/mca/spml/yoda/spml_yoda.h | 2 - oshmem/proc/proc.h | 1 - 10 files changed, 196 insertions(+), 227 deletions(-) diff --git a/oshmem/mca/atomic/mxm/atomic_mxm_fadd.c b/oshmem/mca/atomic/mxm/atomic_mxm_fadd.c index c5a494e5c7..2a56f2c4e8 100644 --- a/oshmem/mca/atomic/mxm/atomic_mxm_fadd.c +++ b/oshmem/mca/atomic/mxm/atomic_mxm_fadd.c @@ -36,6 +36,7 @@ int mca_atomic_mxm_fadd(void *target, int ptl_id; mxm_send_req_t sreq; mxm_error_t mxm_err; + static char dummy_buf[8]; my_pe = oshmem_my_proc_id(); ptl_id = -1; @@ -106,8 +107,8 @@ int mca_atomic_mxm_fadd(void *target, /* Do we need atomic 'add' or atomic 'fetch and add'? */ if (NULL == prev) { - sreq.base.data.buffer.ptr = NULL; - sreq.base.data.buffer.length = 0; + sreq.base.data.buffer.ptr = dummy_buf; + sreq.base.data.buffer.length = nlong; sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE; #if MXM_API < MXM_VERSION(2,0) sreq.base.flags = MXM_REQ_FLAG_SEND_SYNC; diff --git a/oshmem/mca/memheap/base/memheap_base_mkey.c b/oshmem/mca/memheap/base/memheap_base_mkey.c index 4d4b842363..4a303ff807 100644 --- a/oshmem/mca/memheap/base/memheap_base_mkey.c +++ b/oshmem/mca/memheap/base/memheap_base_mkey.c @@ -157,26 +157,18 @@ static int pack_local_mkeys(opal_buffer_t *msg, int pe, int seg, int all_trs) return OSHMEM_ERROR; } opal_dss.pack(msg, &tr_id, 1, OPAL_UINT32); - opal_dss.pack(msg, &mkey->handle.key, 1, OPAL_UINT64); opal_dss.pack(msg, &mkey->va_base, 1, OPAL_UINT64); - - if (NULL != MCA_SPML_CALL(get_remote_context_size)) { - uint32_t context_size = - (mkey->spml_context == NULL ) ? - 0 : - (uint32_t) MCA_SPML_CALL(get_remote_context_size(mkey->spml_context)); - opal_dss.pack(msg, &context_size, 1, OPAL_UINT32); - if (0 != context_size) { - opal_dss.pack(msg, - MCA_SPML_CALL(get_remote_context(mkey->spml_context)), - context_size, - OPAL_BYTE); + if (0 == mkey->va_base) { + opal_dss.pack(msg, &mkey->u.key, 1, OPAL_UINT64); + } else { + opal_dss.pack(msg, &mkey->len, 1, OPAL_UINT16); + if (0 < mkey->len) { + opal_dss.pack(msg, mkey->u.data, mkey->len, OPAL_BYTE); } } - MEMHEAP_VERBOSE(5, - "seg#%d tr_id: %d key %llx base_va %p", - seg, tr_id, (unsigned long long)mkey->handle.key, mkey->va_base); + "seg#%d tr_id: %d %s", + seg, tr_id, mca_spml_base_mkey2str(mkey)); } return OSHMEM_SUCCESS; } @@ -188,70 +180,70 @@ static void memheap_attach_segment(mca_spml_mkey_t *mkey, int tr_id) * - key is set as (type|shmid); * - va_base is set as 0; */ - if (!mkey->va_base - && ((int) MEMHEAP_SHM_GET_ID(mkey->handle.key) != MEMHEAP_SHM_INVALID)) { - MEMHEAP_VERBOSE(5, - "shared memory usage tr_id: %d key %llx base_va %p shmid 0x%X|0x%X", - tr_id, - (unsigned long long)mkey->handle.key, - mkey->va_base, - MEMHEAP_SHM_GET_TYPE(mkey->handle.key), - MEMHEAP_SHM_GET_ID(mkey->handle.key)); + assert(mkey->va_base == 0); - if (MEMHEAP_SHM_GET_TYPE(mkey->handle.key) == MAP_SEGMENT_ALLOC_SHM) { - mkey->va_base = shmat(MEMHEAP_SHM_GET_ID(mkey->handle.key), - 0, - 0); - } else if (MEMHEAP_SHM_GET_TYPE(mkey->handle.key) == MAP_SEGMENT_ALLOC_IBV) { + if (MEMHEAP_SHM_INVALID == (int) MEMHEAP_SHM_GET_ID(mkey->u.key)) { + return; + } + + MEMHEAP_VERBOSE(5, + "shared memory usage tr_id: %d key %llx base_va %p shmid 0x%X|0x%X", + tr_id, + (unsigned long long)mkey->u.key, + mkey->va_base, + MEMHEAP_SHM_GET_TYPE(mkey->u.key), + MEMHEAP_SHM_GET_ID(mkey->u.key)); + + if (MAP_SEGMENT_ALLOC_SHM == MEMHEAP_SHM_GET_TYPE(mkey->u.key)) { + mkey->va_base = shmat(MEMHEAP_SHM_GET_ID(mkey->u.key), + 0, + 0); + } else if (MAP_SEGMENT_ALLOC_IBV == MEMHEAP_SHM_GET_TYPE(mkey->u.key)) { #if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0) - openib_device_t *device = NULL; - struct ibv_mr *ib_mr; - void *addr; - static int mr_count; + openib_device_t *device = NULL; + struct ibv_mr *ib_mr; + void *addr; + static int mr_count; - int access_flag = IBV_ACCESS_LOCAL_WRITE | + int access_flag = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_NO_RDMA; - device = (openib_device_t *)memheap_map->mem_segs[HEAP_SEG_INDEX].context; - assert(device); + device = (openib_device_t *)memheap_map->mem_segs[HEAP_SEG_INDEX].context; + assert(device); - /* workaround mtt problem - request aligned addresses */ - ++mr_count; - addr = (void *)((uintptr_t)mca_memheap_base_start_address + mca_memheap_base_mr_interleave_factor*1024ULL*1024ULL*1024ULL*mr_count); - ib_mr = ibv_reg_shared_mr(MEMHEAP_SHM_GET_ID(mkey->handle.key), - device->ib_pd, addr, access_flag); - if (NULL == ib_mr) - { - mkey->va_base = (void*)-1; - MEMHEAP_ERROR("error to ibv_reg_shared_mr() errno says %d: %s", - errno, strerror(errno)); - } - else - { - if (ib_mr->addr != addr) { - MEMHEAP_WARN("Failed to map shared region to address %p got addr %p. Try to increase 'memheap_mr_interleave_factor' from %d", addr, ib_mr->addr, mca_memheap_base_mr_interleave_factor); - } - - opal_value_array_append_item(&device->ib_mr_array, &ib_mr); - mkey->va_base = ib_mr->addr; - } -#endif /* MPAGE_ENABLE */ + /* workaround mtt problem - request aligned addresses */ + ++mr_count; + addr = (void *)((uintptr_t)mca_memheap_base_start_address + mca_memheap_base_mr_interleave_factor*1024ULL*1024ULL*1024ULL*mr_count); + ib_mr = ibv_reg_shared_mr(MEMHEAP_SHM_GET_ID(mkey->u.key), + device->ib_pd, addr, access_flag); + if (NULL == ib_mr) { + mkey->va_base = (void*)-1; + MEMHEAP_ERROR("error to ibv_reg_shared_mr() errno says %d: %s", + errno, strerror(errno)); } else { - MEMHEAP_ERROR("tr_id: %d key %llx attach failed: incorrect shmid 0x%X|0x%X", - tr_id, - (unsigned long long)mkey->handle.key, - MEMHEAP_SHM_GET_TYPE(mkey->handle.key), - MEMHEAP_SHM_GET_ID(mkey->handle.key)); - oshmem_shmem_abort(-1); - } + if (ib_mr->addr != addr) { + MEMHEAP_WARN("Failed to map shared region to address %p got addr %p. Try to increase 'memheap_mr_interleave_factor' from %d", addr, ib_mr->addr, mca_memheap_base_mr_interleave_factor); + } - if ((void *) -1 == (void *) mkey->va_base) { - MEMHEAP_ERROR("tr_id: %d key %llx attach failed: errno = %d", - tr_id, (unsigned long long)mkey->handle.key, errno); - oshmem_shmem_abort(-1); + opal_value_array_append_item(&device->ib_mr_array, &ib_mr); + mkey->va_base = ib_mr->addr; } +#endif /* MPAGE_ENABLE */ + } else { + MEMHEAP_ERROR("tr_id: %d key %llx attach failed: incorrect shmid 0x%X|0x%X", + tr_id, + (unsigned long long)mkey->u.key, + MEMHEAP_SHM_GET_TYPE(mkey->u.key), + MEMHEAP_SHM_GET_ID(mkey->u.key)); + oshmem_shmem_abort(-1); + } + + if ((void *) -1 == (void *) mkey->va_base) { + MEMHEAP_ERROR("tr_id: %d key %llx attach failed: errno = %d", + tr_id, (unsigned long long)mkey->u.key, errno); + oshmem_shmem_abort(-1); } } @@ -268,32 +260,36 @@ static void unpack_remote_mkeys(opal_buffer_t *msg, int remote_pe) cnt = 1; opal_dss.unpack(msg, &n, &cnt, OPAL_UINT32); for (i = 0; i < n; i++) { + cnt = 1; opal_dss.unpack(msg, &tr_id, &cnt, OPAL_UINT32); - - opal_dss.unpack(msg, &memheap_oob.mkeys[tr_id].handle.key, &cnt, OPAL_UINT64); + cnt = 1; opal_dss.unpack(msg, &memheap_oob.mkeys[tr_id].va_base, &cnt, OPAL_UINT64); - if (NULL != MCA_SPML_CALL(set_remote_context_size)) { - int32_t context_size; - opal_dss.unpack(msg, &context_size, &cnt, OPAL_UINT32); - if (0 != context_size) { - MCA_SPML_CALL(set_remote_context_size(&(memheap_oob.mkeys[tr_id].spml_context), context_size)); - void* context; - context = calloc(1, context_size); - opal_dss.unpack(msg, context, &context_size, OPAL_BYTE); - MCA_SPML_CALL(set_remote_context(&(memheap_oob.mkeys[tr_id].spml_context),context)); + if (0 == memheap_oob.mkeys[tr_id].va_base) { + cnt = 1; + opal_dss.unpack(msg, &memheap_oob.mkeys[tr_id].u.key, &cnt, OPAL_UINT64); + if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) + memheap_attach_segment(&memheap_oob.mkeys[tr_id], tr_id); + } else { + cnt = 1; + opal_dss.unpack(msg, &memheap_oob.mkeys[tr_id].len, &cnt, OPAL_UINT16); + if (0 < memheap_oob.mkeys[tr_id].len) { + memheap_oob.mkeys[tr_id].u.data = malloc(memheap_oob.mkeys[tr_id].len); + if (NULL == memheap_oob.mkeys[tr_id].u.data) { + MEMHEAP_ERROR("Failed allocate %d bytes", memheap_oob.mkeys[tr_id].len); + oshmem_shmem_abort(-1); + } + cnt = memheap_oob.mkeys[tr_id].len; + opal_dss.unpack(msg, memheap_oob.mkeys[tr_id].u.data, &cnt, OPAL_BYTE); } } - if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) - memheap_attach_segment(&memheap_oob.mkeys[tr_id], tr_id); - MEMHEAP_VERBOSE(5, - "tr_id: %d key %llx base_va %p", - tr_id, (unsigned long long)memheap_oob.mkeys[tr_id].handle.key, memheap_oob.mkeys[tr_id].va_base); + "tr_id: %d %s", + tr_id, mca_spml_base_mkey2str(&memheap_oob.mkeys[tr_id])); } } @@ -533,11 +529,10 @@ static int memheap_oob_get_mkeys(int pe, uint32_t seg, mca_spml_mkey_t *mkeys) for (i = 0; i < memheap_map->num_transports; i++) { mkeys[i].va_base = __seg2base_va(seg); MEMHEAP_VERBOSE(5, - "MKEY CALCULATED BY LOCAL SPML: pe: %d tr_id: %d key %llx base_va %p", + "MKEY CALCULATED BY LOCAL SPML: pe: %d tr_id: %d %s", pe, i, - (unsigned long long)mkeys[i].handle.key, - mkeys[i].va_base); + mca_spml_base_mkey2str(&mkeys[i])); } return OSHMEM_SUCCESS; } @@ -707,14 +702,14 @@ mca_spml_mkey_t * mca_memheap_base_get_cached_mkey(int pe, if (pe == oshmem_my_proc_id()) { *rva = va; MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (local) %lx %p", pe, va, - s->mkeys[btl_id].handle.key, *rva); + s->mkeys[btl_id].u.key, *rva); return &s->mkeys[btl_id]; } if (OPAL_LIKELY(s->mkeys_cache[pe])) { mkey = &s->mkeys_cache[pe][btl_id]; *rva = va2rva(va, s->start, mkey->va_base); - MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (cached) %lx %p", pe, (void *)va, mkey->handle.key, (void *)*rva); + MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (cached) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva); return mkey; } @@ -732,7 +727,7 @@ mca_spml_mkey_t * mca_memheap_base_get_cached_mkey(int pe, mkey = &s->mkeys_cache[pe][btl_id]; *rva = va2rva(va, s->start, mkey->va_base); - MEMHEAP_VERBOSE_FASTPATH(5, "rkey: pe=%d va=%p -> (remote lookup) %lx %p", pe, (void *)va, mkey->handle.key, (void *)*rva); + MEMHEAP_VERBOSE_FASTPATH(5, "rkey: pe=%d va=%p -> (remote lookup) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva); return mkey; } diff --git a/oshmem/mca/memheap/base/memheap_base_register.c b/oshmem/mca/memheap/base/memheap_base_register.c index 3f50c718de..0e7756c027 100644 --- a/oshmem/mca/memheap/base/memheap_base_register.c +++ b/oshmem/mca/memheap/base/memheap_base_register.c @@ -79,6 +79,10 @@ static int _dereg_segment(map_segment_t *s) if (j == my_pe) continue; if (s->mkeys_cache[j]) { + if (s->mkeys_cache[j]->len) { + free(s->mkeys_cache[j]->u.data); + s->mkeys_cache[j]->len = 0; + } free(s->mkeys_cache[j]); s->mkeys_cache[j] = NULL; } diff --git a/oshmem/mca/spml/ikrit/spml_ikrit.c b/oshmem/mca/spml/ikrit/spml_ikrit.c index 67378ee03f..cb57a40071 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit.c +++ b/oshmem/mca/spml/ikrit/spml_ikrit.c @@ -73,6 +73,11 @@ static int spml_ikrit_get_ep_address(spml_ikrit_mxm_ep_conn_info_t *ep_info, return OSHMEM_SUCCESS; } +#else +static inline mxm_mem_key_t *to_mxm_mkey(mca_spml_mkey_t *mkey) { + + return (mxm_mem_key_t *)mkey->u.data; +} #endif static inline void mca_spml_irkit_req_wait(mxm_req_base_t *req) @@ -193,11 +198,6 @@ mca_spml_ikrit_t mca_spml_ikrit = { mca_spml_base_wait, mca_spml_base_wait_nb, mca_spml_ikrit_fence, - NULL, - NULL, - NULL, - NULL, - (void*)&mca_spml_ikrit } }; @@ -301,9 +301,11 @@ static int create_ptl_idx(int dst_pe) return OSHMEM_ERROR; proc->num_transports = 1; +#if MXM_API < MXM_VERSION(2,0) if (oshmem_my_proc_id() == dst_pe) proc->transport_ids[0] = MXM_PTL_SELF; else +#endif proc->transport_ids[0] = MXM_PTL_RDMA; return OSHMEM_SUCCESS; } @@ -531,6 +533,7 @@ mca_spml_mkey_t *mca_spml_ikrit_register(void* addr, mca_spml_mkey_t *mkeys; #if MXM_API >= MXM_VERSION(2,0) mxm_error_t err; + mxm_mem_key_t *m_key; #endif *count = 0; @@ -543,32 +546,47 @@ mca_spml_mkey_t *mca_spml_ikrit_register(void* addr, switch (i) { case MXM_PTL_SHM: if ((int) MEMHEAP_SHM_GET_ID(shmid) != MEMHEAP_SHM_INVALID) { - mkeys[i].handle.key = shmid; + mkeys[i].u.key = shmid; mkeys[i].va_base = 0; } else { - mkeys[i].handle.key = 0; + mkeys[i].len = 0; mkeys[i].va_base = addr; } mkeys[i].spml_context = 0; break; +#if MXM_API < MXM_VERSION(2,0) case MXM_PTL_SELF: - mkeys[i].handle.key = 0; + mkeys[i].len = 0; mkeys[i].spml_context = 0; mkeys[i].va_base = addr; break; +#endif case MXM_PTL_RDMA: mkeys[i].va_base = addr; mkeys[i].spml_context = 0; #if MXM_API < MXM_VERSION(2,0) - mkeys[i].handle.ib.lkey = mkeys[i].handle.ib.rkey = 0; + mkeys[i].len = 0; #else - mkeys[i].handle.ib.lkey = mkeys[i].handle.ib.rkey = 0; err = mxm_mem_map(mca_spml_ikrit.mxm_context, &addr, &size, 0, 0, 0); if (MXM_OK != err) { - SPML_VERBOSE(1, "failed to register memory: %s", mxm_error_string(err)); + SPML_ERROR("Failed to register memory: %s", mxm_error_string(err)); goto error_out; } mkeys[i].spml_context = (void *)(unsigned long)size; + + m_key = malloc(sizeof(*m_key)); + if (NULL == m_key) { + SPML_ERROR("Failed to allocate m_key memory"); + goto error_out; + } + mkeys[i].len = sizeof(*m_key); + mkeys[i].u.data = m_key; + + err = mxm_mem_get_key(mca_spml_ikrit.mxm_context, addr, m_key); + if (MXM_OK != err) { + SPML_ERROR("Failed to get memory key: %s", mxm_error_string(err)); + goto error_out; + } #endif break; @@ -577,15 +595,16 @@ mca_spml_mkey_t *mca_spml_ikrit_register(void* addr, goto error_out; } SPML_VERBOSE(5, - "rank %d ptl %d rkey %x lkey %x key %llx address 0x%llX len %llu shmid 0x%X|0x%X", - oshmem_proc_local_proc->proc_name.vpid, i, mkeys[i].handle.ib.rkey, mkeys[i].handle.ib.lkey, (unsigned long long)mkeys[i].handle.key, (unsigned long long)mkeys[i].va_base, (unsigned long long)size, MEMHEAP_SHM_GET_TYPE(shmid), MEMHEAP_SHM_GET_ID(shmid)); + "rank %d ptl %d addr %p size %llu %s", + oshmem_proc_local_proc->proc_name.vpid, i, addr, (unsigned long long)size, + mca_spml_base_mkey2str(&mkeys[i])); } *count = MXM_PTL_LAST; return mkeys; - error_out: +error_out: mca_spml_ikrit_deregister(mkeys); return NULL ; @@ -600,7 +619,9 @@ int mca_spml_ikrit_deregister(mca_spml_mkey_t *mkeys) for (i = 0; i < MXM_PTL_LAST; i++) { switch (i) { +#if MXM_API < MXM_VERSION(2,0) case MXM_PTL_SELF: +#endif case MXM_PTL_SHM: break; case MXM_PTL_RDMA: @@ -612,6 +633,9 @@ int mca_spml_ikrit_deregister(mca_spml_mkey_t *mkeys) (void *)mkeys[i].va_base, (unsigned long)mkeys[i].spml_context, 0); + if (0 < mkeys[i].len) { + free(mkeys[i].u.data); + } #endif break; } @@ -636,8 +660,8 @@ static inline int get_ptl_id(int dst) int mca_spml_ikrit_oob_get_mkeys(int pe, uint32_t seg, mca_spml_mkey_t *mkeys) { +#if MXM_API < MXM_VERSION(2,0) int ptl; - ptl = get_ptl_id(pe); if (ptl < 0) return OSHMEM_ERROR; @@ -649,6 +673,12 @@ int mca_spml_ikrit_oob_get_mkeys(int pe, uint32_t seg, mca_spml_mkey_t *mkeys) return OSHMEM_ERROR; return OSHMEM_SUCCESS; +#else + /* we are actually registering memory in 2.0 and later. + * So can not really skip mkey exchange + */ + return OSHMEM_ERROR; +#endif } static int mca_spml_ikrit_get_helper(mxm_send_req_t *sreq, @@ -683,8 +713,8 @@ static int mca_spml_ikrit_get_helper(mxm_send_req_t *sreq, } SPML_VERBOSE(100, - "get: pe:%d ptl=%d src=%p -> dst: %p sz=%d. src_rva=%p, src_rkey=0x%lx", - src, ptl_id, src_addr, dst_addr, (int)size, (void *)rva, r_mkey->handle.key); + "get: pe:%d ptl=%d src=%p -> dst: %p sz=%d. src_rva=%p, %s", + src, ptl_id, src_addr, dst_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); /* mxm does not really cares for get lkey */ sreq->base.mq = mca_spml_ikrit.mxm_mq; @@ -696,7 +726,7 @@ static int mca_spml_ikrit_get_helper(mxm_send_req_t *sreq, sreq->base.data.buffer.memh = NULL; sreq->op.mem.remote_memh = NULL; #else - sreq->op.mem.remote_mkey = &mxm_empty_mem_key; + sreq->op.mem.remote_mkey = to_mxm_mkey(r_mkey); #endif sreq->opcode = MXM_REQ_OP_GET; sreq->op.mem.remote_vaddr = (intptr_t) rva; @@ -736,8 +766,8 @@ static inline int mca_spml_ikrit_get_shm(void *src_addr, return OSHMEM_ERROR; SPML_VERBOSE(100, - "shm get: pe:%d src=%p -> dst: %p sz=%d. src_rva=%p, src_rkey=0x%lx", - src, src_addr, dst_addr, (int)size, (void *)rva, r_mkey->handle.key); + "shm get: pe:%d src=%p -> dst: %p sz=%d. src_rva=%p, %s", + src, src_addr, dst_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); memcpy(dst_addr, (void *) (unsigned long) rva, size); opal_progress(); return OSHMEM_SUCCESS; @@ -972,8 +1002,8 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr, #if SPML_IKRIT_PUT_DEBUG == 1 - SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, dst_rkey=0x%lx", - dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, r_mkey->handle.key); + SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s", + dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); #endif if (ptl_id == MXM_PTL_SHM) { @@ -999,8 +1029,8 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr, } #if SPML_IKRIT_PUT_DEBUG == 1 - SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, dst_rkey=0x%lx", - dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, r_mkey->handle.key); + SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s", + dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); #endif put_req = alloc_put_req(); @@ -1026,13 +1056,13 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr, put_req->mxm_req.base.flags = MXM_REQ_FLAG_SEND_LAZY|MXM_REQ_FLAG_SEND_SYNC; } #else - put_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC; if (mca_spml_ikrit.free_list_max - mca_spml_ikrit.n_active_puts <= SPML_IKRIT_PUT_LOW_WATER || (mca_spml_ikrit.mxm_peers[dst]->n_active_puts + 1) % SPML_IKRIT_PACKETS_PER_SYNC == 0) { put_req->mxm_req.flags = 0; need_progress = 1; + put_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC; } else { - put_req->mxm_req.flags = MXM_REQ_SEND_FLAG_LAZY; + put_req->mxm_req.opcode = MXM_REQ_OP_PUT; } if (!zcopy) { put_req->mxm_req.flags |= MXM_REQ_SEND_FLAG_BLOCKING; @@ -1045,7 +1075,6 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr, put_req->mxm_req.base.data.buffer.length = size; put_req->mxm_req.base.completed_cb = put_completion_cb; put_req->mxm_req.base.context = put_req; - put_req->mxm_req.opcode = MXM_REQ_OP_PUT; put_req->mxm_req.op.mem.remote_vaddr = (intptr_t) rva; put_req->mxm_req.base.state = MXM_REQ_NEW; put_req->pe = dst; @@ -1054,7 +1083,7 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr, put_req->mxm_req.base.data.buffer.memh = NULL; put_req->mxm_req.op.mem.remote_memh = NULL; #else - put_req->mxm_req.op.mem.remote_mkey = &mxm_empty_mem_key; + put_req->mxm_req.op.mem.remote_mkey = to_mxm_mkey(r_mkey); #endif if (mca_spml_ikrit.mxm_peers[dst]->pe_relay >= 0 @@ -1140,8 +1169,8 @@ int mca_spml_ikrit_put_simple(void* dst_addr, } #if SPML_IKRIT_PUT_DEBUG == 1 - SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, dst_rkey=0x%lx", - dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, r_mkey->handle.key); + SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s", + dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); #endif if (ptl_id == MXM_PTL_SHM) { @@ -1168,8 +1197,8 @@ int mca_spml_ikrit_put_simple(void* dst_addr, } #if SPML_IKRIT_PUT_DEBUG == 1 - SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, dst_rkey=0x%lx", - dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, r_mkey->handle.key); + SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s", + dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); #endif /* fill out request */ @@ -1194,7 +1223,7 @@ int mca_spml_ikrit_put_simple(void* dst_addr, mxm_req.base.data.buffer.memh = NULL; mxm_req.op.mem.remote_memh = NULL; #else - mxm_req.op.mem.remote_mkey = &mxm_empty_mem_key; + mxm_req.op.mem.remote_mkey = to_mxm_mkey(r_mkey); #endif if (mca_spml_ikrit.mxm_peers[dst]->need_fence == 0) { diff --git a/oshmem/mca/spml/ikrit/spml_ikrit.h b/oshmem/mca/spml/ikrit/spml_ikrit.h index f09ba0b3b7..6045718b42 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit.h +++ b/oshmem/mca/spml/ikrit/spml_ikrit.h @@ -101,9 +101,8 @@ typedef struct mca_spml_ikrit_t mca_spml_ikrit_t; #if MXM_API >= MXM_VERSION(2,0) #define MXM_PTL_SHM 0 -#define MXM_PTL_SELF 1 -#define MXM_PTL_RDMA 2 -#define MXM_PTL_LAST 3 +#define MXM_PTL_RDMA 1 +#define MXM_PTL_LAST 2 #endif typedef struct spml_ikrit_mxm_ep_conn_info_t { diff --git a/oshmem/mca/spml/ikrit/spml_ikrit_component.c b/oshmem/mca/spml/ikrit/spml_ikrit_component.c index 9a52d4d484..383e0ef3fd 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit_component.c +++ b/oshmem/mca/spml/ikrit/spml_ikrit_component.c @@ -109,7 +109,7 @@ static int mca_spml_ikrit_component_register(void) "[integer] ikrit priority"); mca_spml_ikrit_param_register_string("mxm_tls", - "ud,self", + "rc,ud,self", "[string] TL channels for MXM", &mca_spml_ikrit.mxm_tls); diff --git a/oshmem/mca/spml/spml.h b/oshmem/mca/spml/spml.h index b1331ffeed..adeb450f8e 100644 --- a/oshmem/mca/spml/spml.h +++ b/oshmem/mca/spml/spml.h @@ -71,19 +71,30 @@ typedef mca_spml_base_component_2_0_0_t mca_spml_base_component_t; */ /** * memory key + * We have two kinds of keys: + * - shared memory type of keys. Memory segment must be attached before access + * such keys use va_base = 0 and key + * - ib type of key. Key is passed with each put/get op. + * use va_base = , key is stored in mkey struct */ typedef struct mca_spml_mkey { - union { - struct { - uint32_t rkey; - uint32_t lkey; - } ib; - uint64_t key; - } handle; void* va_base; + uint16_t len; + union { + void *data; + uint64_t key; + } u; void *spml_context; /* spml module can attach internal structures here */ } mca_spml_mkey_t; +static inline char *mca_spml_base_mkey2str(mca_spml_mkey_t *mkey) +{ + static char buf[64]; + + snprintf(buf, sizeof(buf), "mkey: base=%p len=%d key=%0X", mkey->va_base, mkey->len, mkey->u.key); + return buf; +} + /** * Downcall from MCA layer to enable the PML/BTLs. * @@ -237,14 +248,6 @@ typedef int (*mca_spml_base_module_fence_fn_t)(void); */ typedef int (*mca_spml_base_module_wait_nb_fn_t)(void*); -typedef void* (*mca_spml_base_module_get_remote_context_fn_t)(void*); - -typedef void (*mca_spml_base_module_set_remote_context_fn_t)(void**, void*); - -typedef int (*mca_spml_base_module_get_remote_context_size_fn_t)(void*); - -typedef void (*mca_spml_base_module_set_remote_context_size_fn_t)(void**, int); - /** * SPML instance. */ @@ -268,10 +271,6 @@ struct mca_spml_base_module_1_0_0_t { mca_spml_base_module_wait_fn_t spml_wait; mca_spml_base_module_wait_nb_fn_t spml_wait_nb; mca_spml_base_module_fence_fn_t spml_fence; - mca_spml_base_module_get_remote_context_fn_t spml_get_remote_context; - mca_spml_base_module_set_remote_context_fn_t spml_set_remote_context; - mca_spml_base_module_get_remote_context_size_fn_t spml_get_remote_context_size; - mca_spml_base_module_set_remote_context_size_fn_t spml_set_remote_context_size; void *self; }; diff --git a/oshmem/mca/spml/yoda/spml_yoda.c b/oshmem/mca/spml/yoda/spml_yoda.c index e9611f099b..93bcf33c22 100644 --- a/oshmem/mca/spml/yoda/spml_yoda.c +++ b/oshmem/mca/spml/yoda/spml_yoda.c @@ -57,10 +57,6 @@ mca_spml_yoda_module_t mca_spml_yoda = { mca_spml_base_wait, mca_spml_base_wait_nb, mca_spml_yoda_fence, - mca_spml_yoda_get_remote_context, - mca_spml_yoda_set_remote_context, - mca_spml_yoda_get_remote_context_size, - mca_spml_yoda_set_remote_context_size, (void *)&mca_spml_yoda } @@ -302,12 +298,6 @@ int mca_spml_yoda_deregister(mca_spml_mkey_t *mkeys) ybtl->btl->btl_free(ybtl->btl, yoda_context->btl_src_descriptor); yoda_context->btl_src_descriptor = NULL; } - if (yoda_context->btl_src_segment) { - free(yoda_context->btl_src_segment); - yoda_context->btl_src_segment = NULL; - } - yoda_context->btl_src_segment_size = 0; - if (yoda_context->registration) { ybtl->btl->btl_mpool->mpool_deregister(ybtl->btl->btl_mpool, yoda_context->registration); @@ -372,15 +362,15 @@ mca_spml_mkey_t *mca_spml_yoda_register(void* addr, } /* If we have shared memory just save its id*/ - if ((YODA_BTL_SM == ybtl->btl_type) - && ((int) MEMHEAP_SHM_GET_ID(shmid) != MEMHEAP_SHM_INVALID)) { - mkeys[i].handle.key = shmid; + if (YODA_BTL_SM == ybtl->btl_type + && MEMHEAP_SHM_INVALID != (int) MEMHEAP_SHM_GET_ID(shmid)) { + mkeys[i].u.key = shmid; mkeys[i].va_base = 0; continue; } yoda_context = calloc(1, sizeof(*yoda_context)); - mkeys[i].spml_context = (void*) yoda_context; + mkeys[i].spml_context = yoda_context; yoda_context->registration = NULL; if (NULL != ybtl->btl->btl_prepare_src) { @@ -421,21 +411,16 @@ mca_spml_mkey_t *mca_spml_yoda_register(void* addr, SPML_ERROR("%s: failed to register source memory. ", btl_type2str(ybtl->btl_type)); } - /* copy source descriptor to local structures*/ + yoda_context->btl_src_descriptor = des; - yoda_context->btl_src_segment_size = ybtl->btl->btl_seg_size; - if (0 != yoda_context->btl_src_segment_size) { - yoda_context->btl_src_segment = - malloc(yoda_context->btl_src_segment_size); - memcpy(yoda_context->btl_src_segment, - des->des_src, - yoda_context->btl_src_segment_size); - } + mkeys[i].u.data = des->des_src; + mkeys[i].len = ybtl->btl->btl_seg_size; } SPML_VERBOSE(5, - "rank %d btl %s rkey %x lkey %x key %llx address 0x%p len %llu shmid 0x%X|0x%X", - oshmem_proc_local_proc->proc_name.vpid, btl_type2str(ybtl->btl_type), mkeys[i].handle.ib.rkey, mkeys[i].handle.ib.lkey, (unsigned long long)mkeys[i].handle.key, mkeys[i].va_base, (unsigned long long)size, MEMHEAP_SHM_GET_TYPE(shmid), MEMHEAP_SHM_GET_ID(shmid)); + "rank %d btl %s address 0x%p len %llu shmid 0x%X|0x%X", + oshmem_proc_local_proc->proc_name.vpid, btl_type2str(ybtl->btl_type), + mkeys[i].va_base, (unsigned long long)size, MEMHEAP_SHM_GET_TYPE(shmid), MEMHEAP_SHM_GET_ID(shmid)); } OBJ_DESTRUCT(&convertor); *count = mca_spml_yoda.n_btls; @@ -735,7 +720,6 @@ static inline int mca_spml_yoda_put_internal(void *dst_addr, unsigned ncopied = 0; unsigned int frag_size = 0; char *p_src, *p_dst; - mca_spml_yoda_context_t* yoda_context; void* rva; mca_spml_mkey_t *r_mkey; int btl_id = 0; @@ -768,8 +752,8 @@ static inline int mca_spml_yoda_put_internal(void *dst_addr, } #if SPML_YODA_DEBUG == 1 - SPML_VERBOSE(100, "put: pe:%d dst=%p <- src: %p sz=%d. dst_rva=%p, dst_rkey=0x%lx", - dst, dst_addr, src_addr, (int)size, (void *)rva, r_mkey->handle.key); + SPML_VERBOSE(100, "put: pe:%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s", + dst, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); #endif ybtl = &mca_spml_yoda.btl_type_map[btl_id]; @@ -818,12 +802,11 @@ static inline int mca_spml_yoda_put_internal(void *dst_addr, /* Preparing destination buffer */ - yoda_context = (mca_spml_yoda_context_t*) r_mkey->spml_context; - assert( (NULL != yoda_context) && (0 != yoda_context->btl_src_segment_size)); + assert( NULL != r_mkey->u.data && 0 != r_mkey->len); memcpy(&frag->rdma_segs[0].base_seg, - yoda_context->btl_src_segment, - yoda_context->btl_src_segment_size); + r_mkey->u.data, + r_mkey->len); frag->rdma_segs[0].base_seg.seg_addr.lval = (uintptr_t) p_dst; frag->rdma_segs[0].base_seg.seg_len = (put_via_send ? @@ -903,42 +886,6 @@ int mca_spml_yoda_wait_gets(void) return OSHMEM_SUCCESS; } -void* mca_spml_yoda_get_remote_context(void* spml_context) -{ - return ((mca_spml_yoda_context_t*) spml_context)->btl_src_segment; -} - -void mca_spml_yoda_set_remote_context(void** spml_context, - void* spml_remote_context) -{ - mca_spml_yoda_context_t * yoda_context; - yoda_context = *(spml_context); - - if (NULL == yoda_context) { - yoda_context = (mca_spml_yoda_context_t*) malloc(sizeof(*yoda_context)); - } - yoda_context->btl_src_segment = - (mca_btl_base_segment_t*) spml_remote_context; - *(spml_context) = yoda_context; -} - -int mca_spml_yoda_get_remote_context_size(void* spml_context) -{ - return ((mca_spml_yoda_context_t*) spml_context)->btl_src_segment_size; -} - -void mca_spml_yoda_set_remote_context_size(void** spml_context, - int spml_remote_context_size) -{ - mca_spml_yoda_context_t *yoda_context; - yoda_context = *(spml_context); - - if (NULL == yoda_context) { - yoda_context = calloc(1, sizeof(*yoda_context)); - } - yoda_context->btl_src_segment_size = spml_remote_context_size; - *(spml_context) = yoda_context; -} int mca_spml_yoda_enable(bool enable) { @@ -1024,7 +971,6 @@ int mca_spml_yoda_get(void* src_addr, size_t size, void* dst_addr, int src) struct mca_spml_yoda_getreq_parent get_holder; struct yoda_btl *ybtl; int btl_id = 0; - mca_spml_yoda_context_t* yoda_context; int get_via_send; const opal_datatype_t *datatype = &opal_datatype_wchar; opal_convertor_t convertor; @@ -1059,8 +1005,8 @@ int mca_spml_yoda_get(void* src_addr, size_t size, void* dst_addr, int src) oshmem_shmem_abort(-1); } #if SPML_YODA_DEBUG == 1 - SPML_VERBOSE(100, "get: pe:%d src=%p -> dst: %p sz=%d. src_rva=%p, src_rkey=0x%lx", - src, src_addr, dst_addr, (int)size, (void *)rva, r_mkey->handle.key); + SPML_VERBOSE(100, "get: pe:%d src=%p -> dst: %p sz=%d. src_rva=%p, %s", + src, src_addr, dst_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); #endif ybtl = &mca_spml_yoda.btl_type_map[btl_id]; @@ -1111,11 +1057,10 @@ int mca_spml_yoda_get(void* src_addr, size_t size, void* dst_addr, int src) ncopied = i < nfrags - 1 ? frag_size :(unsigned) ((char *) dst_addr + size - p_dst); frag->allocated = 0; /* Prepare destination descriptor*/ - yoda_context = r_mkey->spml_context; - assert(0 != yoda_context->btl_src_segment_size); + assert(0 != r_mkey->len); memcpy(&frag->rdma_segs[0].base_seg, - yoda_context->btl_src_segment, - yoda_context->btl_src_segment_size); + r_mkey->u.data, + r_mkey->len); frag->rdma_segs[0].base_seg.seg_len = (get_via_send ? ncopied + SPML_YODA_SEND_CONTEXT_SIZE : ncopied); if (get_via_send) { diff --git a/oshmem/mca/spml/yoda/spml_yoda.h b/oshmem/mca/spml/yoda/spml_yoda.h index 44ca96391f..195a0a1526 100644 --- a/oshmem/mca/spml/yoda/spml_yoda.h +++ b/oshmem/mca/spml/yoda/spml_yoda.h @@ -82,8 +82,6 @@ typedef struct mca_spml_yoda_t mca_spml_yoda_module_t; struct mca_spml_yoda_context_t { mca_btl_base_descriptor_t* btl_src_descriptor; - int btl_src_segment_size; - mca_btl_base_segment_t* btl_src_segment; mca_mpool_base_registration_t* registration; }; typedef struct mca_spml_yoda_context_t mca_spml_yoda_context_t; diff --git a/oshmem/proc/proc.h b/oshmem/proc/proc.h index 263f8a16d9..08111b2685 100644 --- a/oshmem/proc/proc.h +++ b/oshmem/proc/proc.h @@ -22,7 +22,6 @@ #include "orte/types.h" #include "orte/runtime/orte_globals.h" -#include "ompi/mca/bml/bml.h" BEGIN_C_DECLS