1
1

OSHMEM: mxm versions less than 2.0 are no longer supported

Signed-off-by: Alex Mikheev <alexm@mellanox.com>
Этот коммит содержится в:
Alex Mikheev 2016-10-25 18:07:16 +03:00
родитель f11b0c7edf
Коммит 2f91ce7281
6 изменённых файлов: 10 добавлений и 266 удалений

Просмотреть файл

@ -62,7 +62,7 @@ OBJ_CLASS_DECLARATION(mca_atomic_mxm_module_t);
END_C_DECLS END_C_DECLS
#if MXM_API >= MXM_VERSION(2,0) /* move to spml/ikrit */
static inline mxm_mem_key_t *to_mxm_mkey(sshmem_mkey_t *mkey) { static inline mxm_mem_key_t *to_mxm_mkey(sshmem_mkey_t *mkey) {
if (0 == mkey->len) { if (0 == mkey->len) {
@ -70,6 +70,5 @@ static inline mxm_mem_key_t *to_mxm_mkey(sshmem_mkey_t *mkey) {
} }
return (mxm_mem_key_t *)mkey->u.data; return (mxm_mem_key_t *)mkey->u.data;
} }
#endif
#endif /* MCA_ATOMIC_MXM_H */ #endif /* MCA_ATOMIC_MXM_H */

Просмотреть файл

@ -87,23 +87,14 @@ int mca_atomic_mxm_cswap(void *target,
sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE; sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
sreq.op.atomic.remote_vaddr = (uintptr_t) remote_addr; sreq.op.atomic.remote_vaddr = (uintptr_t) remote_addr;
#if MXM_API < MXM_VERSION(2,0)
sreq.base.flags = 0;
sreq.op.atomic.remote_memh = MXM_INVALID_MEM_HANDLE;
#else
sreq.flags = 0; sreq.flags = 0;
sreq.op.atomic.remote_mkey = to_mxm_mkey(r_mkey); sreq.op.atomic.remote_mkey = to_mxm_mkey(r_mkey);
#endif
sreq.op.atomic.order = nlong_order; sreq.op.atomic.order = nlong_order;
if (NULL == cond) { if (NULL == cond) {
sreq.opcode = MXM_REQ_OP_ATOMIC_SWAP; sreq.opcode = MXM_REQ_OP_ATOMIC_SWAP;
} else { } else {
#if MXM_API < MXM_VERSION(2,0)
memcpy(&sreq.op.atomic.value8, cond, nlong);
#else
memcpy(&sreq.op.atomic.value, cond, nlong); memcpy(&sreq.op.atomic.value, cond, nlong);
#endif
sreq.opcode = MXM_REQ_OP_ATOMIC_CSWAP; sreq.opcode = MXM_REQ_OP_ATOMIC_CSWAP;
} }

Просмотреть файл

@ -84,13 +84,8 @@ int mca_atomic_mxm_fadd(void *target,
sreq.base.data_type = MXM_REQ_DATA_BUFFER; sreq.base.data_type = MXM_REQ_DATA_BUFFER;
sreq.op.atomic.remote_vaddr = (uintptr_t) remote_addr; sreq.op.atomic.remote_vaddr = (uintptr_t) remote_addr;
#if MXM_API < MXM_VERSION(2,0)
sreq.op.atomic.remote_memh = MXM_INVALID_MEM_HANDLE;
memcpy(&sreq.op.atomic.value8, value, nlong);
#else
sreq.op.atomic.remote_mkey = to_mxm_mkey(r_mkey); sreq.op.atomic.remote_mkey = to_mxm_mkey(r_mkey);
memcpy(&sreq.op.atomic.value, value, nlong); memcpy(&sreq.op.atomic.value, value, nlong);
#endif
sreq.op.atomic.order = nlong_order; sreq.op.atomic.order = nlong_order;
/* Do we need atomic 'add' or atomic 'fetch and add'? */ /* Do we need atomic 'add' or atomic 'fetch and add'? */
@ -98,22 +93,13 @@ int mca_atomic_mxm_fadd(void *target,
sreq.base.data.buffer.ptr = dummy_buf; sreq.base.data.buffer.ptr = dummy_buf;
sreq.base.data.buffer.length = nlong; sreq.base.data.buffer.length = nlong;
sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE; sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
#if MXM_API < MXM_VERSION(2,0)
sreq.base.flags = MXM_REQ_FLAG_SEND_SYNC;
sreq.opcode = MXM_REQ_OP_ATOMIC_ADD;
#else
sreq.flags = 0; sreq.flags = 0;
sreq.opcode = MXM_REQ_OP_ATOMIC_FADD; sreq.opcode = MXM_REQ_OP_ATOMIC_FADD;
#endif
} else { } else {
sreq.base.data.buffer.ptr = prev; sreq.base.data.buffer.ptr = prev;
sreq.base.data.buffer.length = nlong; sreq.base.data.buffer.length = nlong;
sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE; sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
#if MXM_API < MXM_VERSION(2,0)
sreq.base.flags = 0;
#else
sreq.flags = 0; sreq.flags = 0;
#endif
sreq.opcode = MXM_REQ_OP_ATOMIC_FADD; sreq.opcode = MXM_REQ_OP_ATOMIC_FADD;
} }

Просмотреть файл

@ -70,29 +70,6 @@ struct mca_spml_ikrit_put_request {
typedef struct mca_spml_ikrit_put_request mca_spml_ikrit_put_request_t; typedef struct mca_spml_ikrit_put_request mca_spml_ikrit_put_request_t;
OBJ_CLASS_DECLARATION(mca_spml_ikrit_put_request_t); OBJ_CLASS_DECLARATION(mca_spml_ikrit_put_request_t);
#if MXM_API < MXM_VERSION(2,0)
static int spml_ikrit_get_ep_address(spml_ikrit_mxm_ep_conn_info_t *ep_info,
mxm_ptl_id_t ptlid)
{
size_t addrlen;
mxm_error_t err;
addrlen = sizeof(ep_info->addr.ptl_addr[ptlid]);
err = mxm_ep_address(mca_spml_ikrit.mxm_ep,
ptlid,
(struct sockaddr *) &ep_info->addr.ptl_addr[ptlid],
&addrlen);
if (MXM_OK != err) {
orte_show_help("help-oshmem-spml-ikrit.txt",
"unable to get endpoint address",
true,
mxm_error_string(err));
return OSHMEM_ERROR;
}
return OSHMEM_SUCCESS;
}
#else
static inline mxm_mem_key_t *to_mxm_mkey(sshmem_mkey_t *mkey) { static inline mxm_mem_key_t *to_mxm_mkey(sshmem_mkey_t *mkey) {
if (0 == mkey->len) { if (0 == mkey->len) {
@ -100,8 +77,6 @@ static inline mxm_mem_key_t *to_mxm_mkey(sshmem_mkey_t *mkey) {
} }
return (mxm_mem_key_t *)mkey->u.data; return (mxm_mem_key_t *)mkey->u.data;
} }
#endif
static inline void mca_spml_irkit_req_wait(mxm_req_base_t *req) static inline void mca_spml_irkit_req_wait(mxm_req_base_t *req)
{ {
@ -234,32 +209,6 @@ mca_spml_ikrit_t mca_spml_ikrit = {
} }
}; };
#if MXM_API < MXM_VERSION(2,0)
void mca_spml_ikrit_dump_stats(void);
void mca_spml_ikrit_dump_stats()
{
int num_procs;
int i;
char sbuf[1024];
FILE *fp;
fp = fmemopen(sbuf, sizeof(sbuf), "rw");
num_procs = oshmem_num_procs();
for (i = 0; i < num_procs; i++) {
mxm_print_conn_state(mca_spml_ikrit.mxm_peers[i]->mxm_conn,
MXM_STATE_DETAIL_LEVEL_DATA,
"",
fp);
printf("=========== pe:%d conn:%p stats:\n %s==================\n",
i,
mca_spml_ikrit.mxm_peers[i]->mxm_conn,
sbuf);
rewind(fp);
}
fclose(fp);
}
#endif
static inline mca_spml_ikrit_put_request_t *alloc_put_req(void) static inline mca_spml_ikrit_put_request_t *alloc_put_req(void)
{ {
mca_spml_ikrit_put_request_t *req; mca_spml_ikrit_put_request_t *req;
@ -341,12 +290,7 @@ static int create_ptl_idx(int dst_pe)
return OSHMEM_ERROR; return OSHMEM_ERROR;
OSHMEM_PROC_DATA(proc)->num_transports = 1; OSHMEM_PROC_DATA(proc)->num_transports = 1;
#if MXM_API < MXM_VERSION(2,0) OSHMEM_PROC_DATA(proc)->transport_ids[0] = MXM_PTL_RDMA;
if (oshmem_my_proc_id() == dst_pe)
OSHMEM_PROC_DATA(proc)->transport_ids[0] = MXM_PTL_SELF;
else
#endif
OSHMEM_PROC_DATA(proc)->transport_ids[0] = MXM_PTL_RDMA;
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
} }
@ -382,11 +326,9 @@ int mca_spml_ikrit_del_procs(ompi_proc_t** procs, size_t nprocs)
int my_rank = oshmem_my_proc_id(); int my_rank = oshmem_my_proc_id();
oshmem_shmem_barrier(); oshmem_shmem_barrier();
#if MXM_API >= MXM_VERSION(2,0)
if (mca_spml_ikrit.bulk_disconnect) { if (mca_spml_ikrit.bulk_disconnect) {
mxm_ep_powerdown(mca_spml_ikrit.mxm_ep); mxm_ep_powerdown(mca_spml_ikrit.mxm_ep);
} }
#endif
while (NULL != opal_list_remove_first(&mca_spml_ikrit.active_peers)) { while (NULL != opal_list_remove_first(&mca_spml_ikrit.active_peers)) {
}; };
@ -412,12 +354,7 @@ int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs)
spml_ikrit_mxm_ep_conn_info_t *ep_info = NULL; spml_ikrit_mxm_ep_conn_info_t *ep_info = NULL;
spml_ikrit_mxm_ep_conn_info_t *ep_hw_rdma_info = NULL; spml_ikrit_mxm_ep_conn_info_t *ep_hw_rdma_info = NULL;
spml_ikrit_mxm_ep_conn_info_t my_ep_info = {{0}}; spml_ikrit_mxm_ep_conn_info_t my_ep_info = {{0}};
#if MXM_API < MXM_VERSION(2,0)
mxm_conn_req_t *conn_reqs;
int timeout;
#else
size_t mxm_addr_len = MXM_MAX_ADDR_LEN; size_t mxm_addr_len = MXM_MAX_ADDR_LEN;
#endif
mxm_error_t err; mxm_error_t err;
size_t i, n; size_t i, n;
int rc = OSHMEM_ERROR; int rc = OSHMEM_ERROR;
@ -426,14 +363,6 @@ int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs)
OBJ_CONSTRUCT(&mca_spml_ikrit.active_peers, opal_list_t); OBJ_CONSTRUCT(&mca_spml_ikrit.active_peers, opal_list_t);
/* Allocate connection requests */ /* Allocate connection requests */
#if MXM_API < MXM_VERSION(2,0)
conn_reqs = malloc(nprocs * sizeof(mxm_conn_req_t));
if (NULL == conn_reqs) {
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
goto bail;
}
memset(conn_reqs, 0x0, sizeof(mxm_conn_req_t));
#endif
ep_info = calloc(sizeof(spml_ikrit_mxm_ep_conn_info_t), nprocs); ep_info = calloc(sizeof(spml_ikrit_mxm_ep_conn_info_t), nprocs);
if (NULL == ep_info) { if (NULL == ep_info) {
rc = OSHMEM_ERR_OUT_OF_RESOURCE; rc = OSHMEM_ERR_OUT_OF_RESOURCE;
@ -455,18 +384,6 @@ int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs)
goto bail; goto bail;
} }
#if MXM_API < MXM_VERSION(2,0)
if (OSHMEM_SUCCESS
!= spml_ikrit_get_ep_address(&my_ep_info, MXM_PTL_SELF)) {
rc = OSHMEM_ERROR;
goto bail;
}
if (OSHMEM_SUCCESS
!= spml_ikrit_get_ep_address(&my_ep_info, MXM_PTL_RDMA)) {
rc = OSHMEM_ERROR;
goto bail;
}
#else
if (mca_spml_ikrit.hw_rdma_channel) { if (mca_spml_ikrit.hw_rdma_channel) {
err = mxm_ep_get_address(mca_spml_ikrit.mxm_hw_rdma_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len); err = mxm_ep_get_address(mca_spml_ikrit.mxm_hw_rdma_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len);
if (MXM_OK != err) { if (MXM_OK != err) {
@ -485,7 +402,7 @@ int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs)
rc = OSHMEM_ERROR; rc = OSHMEM_ERROR;
goto bail; goto bail;
} }
#endif
oshmem_shmem_allgather(&my_ep_info, ep_info, oshmem_shmem_allgather(&my_ep_info, ep_info,
sizeof(spml_ikrit_mxm_ep_conn_info_t)); sizeof(spml_ikrit_mxm_ep_conn_info_t));
@ -504,13 +421,6 @@ int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs)
} }
mca_spml_ikrit.mxm_peers[i]->pe = i; mca_spml_ikrit.mxm_peers[i]->pe = i;
#if MXM_API < MXM_VERSION(2,0)
conn_reqs[i].ptl_addr[MXM_PTL_SELF] =
(struct sockaddr *) &ep_info[i].addr.ptl_addr[MXM_PTL_SELF];
conn_reqs[i].ptl_addr[MXM_PTL_SHM] = NULL;
conn_reqs[i].ptl_addr[MXM_PTL_RDMA] =
(struct sockaddr *) &ep_info[i].addr.ptl_addr[MXM_PTL_RDMA];
#else
err = mxm_ep_connect(mca_spml_ikrit.mxm_ep, ep_info[i].addr.ep_addr, &mca_spml_ikrit.mxm_peers[i]->mxm_conn); err = mxm_ep_connect(mca_spml_ikrit.mxm_ep, ep_info[i].addr.ep_addr, &mca_spml_ikrit.mxm_peers[i]->mxm_conn);
if (MXM_OK != err) { if (MXM_OK != err) {
SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
@ -528,55 +438,18 @@ int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs)
} else { } else {
mca_spml_ikrit.mxm_peers[i]->mxm_hw_rdma_conn = mca_spml_ikrit.mxm_peers[i]->mxm_conn; mca_spml_ikrit.mxm_peers[i]->mxm_hw_rdma_conn = mca_spml_ikrit.mxm_peers[i]->mxm_conn;
} }
#endif
} }
#if MXM_API < MXM_VERSION(2,0)
/* Connect to remote peers */
if (mxm_get_version() < MXM_VERSION(1,5)) {
timeout = 1000;
} else {
timeout = -1;
}
err = mxm_ep_connect(mca_spml_ikrit.mxm_ep, conn_reqs, nprocs, timeout);
if (MXM_OK != err) {
SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
for (i = 0; i < nprocs; ++i) {
if (MXM_OK != conn_reqs[i].error) {
SPML_ERROR("MXM EP connect to %s error: %s\n",
procs[i]->proc_hostname, mxm_error_string(conn_reqs[i].error));
}
}
rc = OSHMEM_ERR_CONNECTION_FAILED;
goto bail;
}
/* Save returned connections */
for (i = 0; i < nprocs; ++i) {
mca_spml_ikrit.mxm_peers[i]->mxm_conn = conn_reqs[i].conn;
if (OSHMEM_SUCCESS != create_ptl_idx(i)) {
rc = OSHMEM_ERR_CONNECTION_FAILED;
goto bail;
}
mxm_conn_ctx_set(conn_reqs[i].conn, mca_spml_ikrit.mxm_peers[i]);
}
if (conn_reqs)
free(conn_reqs);
#endif
if (ep_info) if (ep_info)
free(ep_info); free(ep_info);
if (ep_hw_rdma_info) if (ep_hw_rdma_info)
free(ep_hw_rdma_info); free(ep_hw_rdma_info);
#if MXM_API >= MXM_VERSION(2,0)
if (mca_spml_ikrit.bulk_connect) { if (mca_spml_ikrit.bulk_connect) {
/* Need a barrier to ensure remote peers already created connection */ /* Need a barrier to ensure remote peers already created connection */
oshmem_shmem_barrier(); oshmem_shmem_barrier();
mxm_ep_wireup(mca_spml_ikrit.mxm_ep); mxm_ep_wireup(mca_spml_ikrit.mxm_ep);
} }
#endif
proc_self = oshmem_proc_group_find(oshmem_group_all, my_rank); proc_self = oshmem_proc_group_find(oshmem_group_all, my_rank);
/* identify local processes and change transport to SHM */ /* identify local processes and change transport to SHM */
@ -598,10 +471,6 @@ int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs)
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
bail: bail:
#if MXM_API < MXM_VERSION(2,0)
if (conn_reqs)
free(conn_reqs);
#endif
if (ep_info) if (ep_info)
free(ep_info); free(ep_info);
if (ep_hw_rdma_info) if (ep_hw_rdma_info)
@ -619,10 +488,8 @@ sshmem_mkey_t *mca_spml_ikrit_register(void* addr,
{ {
int i; int i;
sshmem_mkey_t *mkeys; sshmem_mkey_t *mkeys;
#if MXM_API >= MXM_VERSION(2,0)
mxm_error_t err; mxm_error_t err;
mxm_mem_key_t *m_key; mxm_mem_key_t *m_key;
#endif
*count = 0; *count = 0;
mkeys = (sshmem_mkey_t *) calloc(1, MXM_PTL_LAST * sizeof(*mkeys)); mkeys = (sshmem_mkey_t *) calloc(1, MXM_PTL_LAST * sizeof(*mkeys));
@ -643,19 +510,10 @@ sshmem_mkey_t *mca_spml_ikrit_register(void* addr,
} }
mkeys[i].spml_context = 0; mkeys[i].spml_context = 0;
break; break;
#if MXM_API < MXM_VERSION(2,0)
case MXM_PTL_SELF:
mkeys[i].len = 0;
mkeys[i].spml_context = 0;
mkeys[i].va_base = addr;
break;
#endif
case MXM_PTL_RDMA: case MXM_PTL_RDMA:
mkeys[i].va_base = addr; mkeys[i].va_base = addr;
mkeys[i].spml_context = 0; mkeys[i].spml_context = 0;
#if MXM_API < MXM_VERSION(2,0)
mkeys[i].len = 0;
#else
if (mca_spml_ikrit.ud_only) { if (mca_spml_ikrit.ud_only) {
mkeys[i].len = 0; mkeys[i].len = 0;
break; break;
@ -681,7 +539,6 @@ sshmem_mkey_t *mca_spml_ikrit_register(void* addr,
SPML_ERROR("Failed to get memory key: %s", mxm_error_string(err)); SPML_ERROR("Failed to get memory key: %s", mxm_error_string(err));
goto error_out; goto error_out;
} }
#endif
break; break;
default: default:
@ -714,16 +571,12 @@ int mca_spml_ikrit_deregister(sshmem_mkey_t *mkeys)
for (i = 0; i < MXM_PTL_LAST; i++) { for (i = 0; i < MXM_PTL_LAST; i++) {
switch (i) { switch (i) {
#if MXM_API < MXM_VERSION(2,0)
case MXM_PTL_SELF:
#endif
case MXM_PTL_SHM: case MXM_PTL_SHM:
break; break;
case MXM_PTL_RDMA: case MXM_PTL_RDMA:
/* dereg memory */ /* dereg memory */
if (!mkeys[i].spml_context) if (!mkeys[i].spml_context)
break; break;
#if MXM_API >= MXM_VERSION(2,0)
mxm_mem_unmap(mca_spml_ikrit.mxm_context, mxm_mem_unmap(mca_spml_ikrit.mxm_context,
(void *)mkeys[i].va_base, (void *)mkeys[i].va_base,
(unsigned long)mkeys[i].spml_context, (unsigned long)mkeys[i].spml_context,
@ -731,7 +584,6 @@ int mca_spml_ikrit_deregister(sshmem_mkey_t *mkeys)
if (0 < mkeys[i].len) { if (0 < mkeys[i].len) {
free(mkeys[i].u.data); free(mkeys[i].u.data);
} }
#endif
break; break;
} }
} }
@ -765,14 +617,6 @@ int mca_spml_ikrit_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys)
if (ptl != MXM_PTL_RDMA) if (ptl != MXM_PTL_RDMA)
return OSHMEM_ERROR; return OSHMEM_ERROR;
#if MXM_API < MXM_VERSION(2,0)
if (seg > 1)
return OSHMEM_ERROR;
mkeys[ptl].len = 0;
mkeys[ptl].u.key = MAP_SEGMENT_SHM_INVALID;
return OSHMEM_SUCCESS;
#else
/* we are actually registering memory in 2.0 and later. /* we are actually registering memory in 2.0 and later.
* So can only skip mkey exchange when ud is the only transport * So can only skip mkey exchange when ud is the only transport
*/ */
@ -783,7 +627,6 @@ int mca_spml_ikrit_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys)
} }
return OSHMEM_ERROR; return OSHMEM_ERROR;
#endif
} }
static int mca_spml_ikrit_get_helper(mxm_send_req_t *sreq, static int mca_spml_ikrit_get_helper(mxm_send_req_t *sreq,
@ -824,12 +667,7 @@ static int mca_spml_ikrit_get_helper(mxm_send_req_t *sreq,
sreq->base.data_type = MXM_REQ_DATA_BUFFER; sreq->base.data_type = MXM_REQ_DATA_BUFFER;
sreq->base.data.buffer.ptr = dst_addr; sreq->base.data.buffer.ptr = dst_addr;
sreq->base.data.buffer.length = size; sreq->base.data.buffer.length = size;
#if MXM_API < MXM_VERSION(2,0)
sreq->base.data.buffer.memh = NULL;
sreq->op.mem.remote_memh = NULL;
#else
sreq->op.mem.remote_mkey = to_mxm_mkey(r_mkey); sreq->op.mem.remote_mkey = to_mxm_mkey(r_mkey);
#endif
sreq->opcode = MXM_REQ_OP_GET; sreq->opcode = MXM_REQ_OP_GET;
sreq->op.mem.remote_vaddr = (intptr_t) rva; sreq->op.mem.remote_vaddr = (intptr_t) rva;
sreq->base.state = MXM_REQ_NEW; sreq->base.state = MXM_REQ_NEW;
@ -957,11 +795,7 @@ int mca_spml_ikrit_get_async(void *src_addr,
return OSHMEM_ERROR; return OSHMEM_ERROR;
} }
#if MXM_API < MXM_VERSION(2,0)
get_req->mxm_req.base.flags = 0;
#else
get_req->mxm_req.flags = 0; get_req->mxm_req.flags = 0;
#endif
get_req->mxm_req.base.completed_cb = get_completion_cb; get_req->mxm_req.base.completed_cb = get_completion_cb;
get_req->mxm_req.base.context = get_req; get_req->mxm_req.base.context = get_req;
OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_gets, 1); OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_gets, 1);
@ -997,10 +831,6 @@ static int mca_spml_ikrit_mxm_fence(int dst)
fence_req->mxm_req.base.mq = mca_spml_ikrit.mxm_mq; fence_req->mxm_req.base.mq = mca_spml_ikrit.mxm_mq;
fence_req->mxm_req.base.conn = mca_spml_ikrit.mxm_peers[dst]->mxm_conn; fence_req->mxm_req.base.conn = mca_spml_ikrit.mxm_peers[dst]->mxm_conn;
#if MXM_API < MXM_VERSION(2,0)
fence_req->mxm_req.opcode = MXM_REQ_OP_FENCE;
fence_req->mxm_req.base.flags = MXM_REQ_FLAG_SEND_SYNC;
#else
fence_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC; fence_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC;
fence_req->mxm_req.flags = MXM_REQ_SEND_FLAG_FENCE; fence_req->mxm_req.flags = MXM_REQ_SEND_FLAG_FENCE;
fence_req->mxm_req.op.mem.remote_vaddr = 0; fence_req->mxm_req.op.mem.remote_vaddr = 0;
@ -1008,7 +838,6 @@ static int mca_spml_ikrit_mxm_fence(int dst)
fence_req->mxm_req.base.data_type = MXM_REQ_DATA_BUFFER; fence_req->mxm_req.base.data_type = MXM_REQ_DATA_BUFFER;
fence_req->mxm_req.base.data.buffer.ptr = 0; fence_req->mxm_req.base.data.buffer.ptr = 0;
fence_req->mxm_req.base.data.buffer.length = 0; fence_req->mxm_req.base.data.buffer.length = 0;
#endif
fence_req->mxm_req.base.state = MXM_REQ_NEW; fence_req->mxm_req.base.state = MXM_REQ_NEW;
fence_req->mxm_req.base.completed_cb = fence_completion_cb; fence_req->mxm_req.base.completed_cb = fence_completion_cb;
fence_req->mxm_req.base.context = fence_req; fence_req->mxm_req.base.context = fence_req;
@ -1041,19 +870,11 @@ static inline void put_completion_cb(void *ctx)
if (0 < peer->n_active_puts) { if (0 < peer->n_active_puts) {
peer->n_active_puts--; peer->n_active_puts--;
#if MXM_API < MXM_VERSION(2,0)
if (0 == peer->n_active_puts &&
(put_req->mxm_req.base.flags & MXM_REQ_FLAG_SEND_SYNC)) {
opal_list_remove_item(&mca_spml_ikrit.active_peers, &peer->super);
peer->need_fence = 0;
}
#else
if (0 == peer->n_active_puts && if (0 == peer->n_active_puts &&
(put_req->mxm_req.opcode == MXM_REQ_OP_PUT_SYNC)) { (put_req->mxm_req.opcode == MXM_REQ_OP_PUT_SYNC)) {
opal_list_remove_item(&mca_spml_ikrit.active_peers, &peer->super); opal_list_remove_item(&mca_spml_ikrit.active_peers, &peer->super);
peer->need_fence = 0; peer->need_fence = 0;
} }
#endif
} }
put_req->req_put.req_base.req_spml_complete = true; put_req->req_put.req_base.req_spml_complete = true;
@ -1137,7 +958,7 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
put_req->mxm_req.base.mq = mca_spml_ikrit.mxm_mq; put_req->mxm_req.base.mq = mca_spml_ikrit.mxm_mq;
/* request immediate responce if we are getting low on send buffers. We only get responce from remote on ack timeout. /* request immediate responce if we are getting low on send buffers. We only get responce from remote on ack timeout.
* Also request explicit ack once in a while */ * Also request explicit ack once in a while */
#if MXM_API < MXM_VERSION(2,0) #if 0
put_req->mxm_req.opcode = MXM_REQ_OP_PUT; put_req->mxm_req.opcode = MXM_REQ_OP_PUT;
if (mca_spml_ikrit.free_list_max - mca_spml_ikrit.n_active_puts <= SPML_IKRIT_PUT_LOW_WATER || if (mca_spml_ikrit.free_list_max - mca_spml_ikrit.n_active_puts <= SPML_IKRIT_PUT_LOW_WATER ||
(mca_spml_ikrit.mxm_peers[dst]->n_active_puts + 1) % SPML_IKRIT_PACKETS_PER_SYNC == 0) { (mca_spml_ikrit.mxm_peers[dst]->n_active_puts + 1) % SPML_IKRIT_PACKETS_PER_SYNC == 0) {
@ -1146,7 +967,7 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
} else { } else {
put_req->mxm_req.base.flags = MXM_REQ_FLAG_SEND_LAZY|MXM_REQ_FLAG_SEND_SYNC; put_req->mxm_req.base.flags = MXM_REQ_FLAG_SEND_LAZY|MXM_REQ_FLAG_SEND_SYNC;
} }
#else #endif
put_req->mxm_req.flags = 0; put_req->mxm_req.flags = 0;
if (mca_spml_ikrit.free_list_max - mca_spml_ikrit.n_active_puts <= SPML_IKRIT_PUT_LOW_WATER || if (mca_spml_ikrit.free_list_max - mca_spml_ikrit.n_active_puts <= SPML_IKRIT_PUT_LOW_WATER ||
(int)opal_list_get_size(&mca_spml_ikrit.active_peers) > mca_spml_ikrit.unsync_conn_max || (int)opal_list_get_size(&mca_spml_ikrit.active_peers) > mca_spml_ikrit.unsync_conn_max ||
@ -1163,7 +984,6 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
put_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC; put_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC;
} }
} }
#endif
put_req->mxm_req.base.conn = mca_spml_ikrit.mxm_peers[dst]->mxm_conn; put_req->mxm_req.base.conn = mca_spml_ikrit.mxm_peers[dst]->mxm_conn;
put_req->mxm_req.base.data_type = MXM_REQ_DATA_BUFFER; put_req->mxm_req.base.data_type = MXM_REQ_DATA_BUFFER;
@ -1175,12 +995,7 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
put_req->mxm_req.base.state = MXM_REQ_NEW; put_req->mxm_req.base.state = MXM_REQ_NEW;
put_req->pe = dst; put_req->pe = dst;
#if MXM_API < MXM_VERSION(2,0)
put_req->mxm_req.base.data.buffer.memh = NULL;
put_req->mxm_req.op.mem.remote_memh = NULL;
#else
put_req->mxm_req.op.mem.remote_mkey = to_mxm_mkey(r_mkey); put_req->mxm_req.op.mem.remote_mkey = to_mxm_mkey(r_mkey);
#endif
OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_puts, 1); OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_puts, 1);
if (mca_spml_ikrit.mxm_peers[dst]->need_fence == 0) { if (mca_spml_ikrit.mxm_peers[dst]->need_fence == 0) {
@ -1262,11 +1077,7 @@ int mca_spml_ikrit_put_simple(void* dst_addr,
/* fill out request */ /* fill out request */
mxm_req.base.mq = mca_spml_ikrit.mxm_mq; mxm_req.base.mq = mca_spml_ikrit.mxm_mq;
#if MXM_API < MXM_VERSION(2,0)
mxm_req.base.flags = MXM_REQ_FLAG_BLOCKING;
#else
mxm_req.flags = MXM_REQ_SEND_FLAG_BLOCKING; mxm_req.flags = MXM_REQ_SEND_FLAG_BLOCKING;
#endif
mxm_req.base.conn = mca_spml_ikrit.mxm_peers[dst]->mxm_conn; mxm_req.base.conn = mca_spml_ikrit.mxm_peers[dst]->mxm_conn;
mxm_req.base.data_type = MXM_REQ_DATA_BUFFER; mxm_req.base.data_type = MXM_REQ_DATA_BUFFER;
mxm_req.base.data.buffer.ptr = src_addr; mxm_req.base.data.buffer.ptr = src_addr;
@ -1278,12 +1089,7 @@ int mca_spml_ikrit_put_simple(void* dst_addr,
mxm_req.base.state = MXM_REQ_NEW; mxm_req.base.state = MXM_REQ_NEW;
mxm_req.base.error = MXM_OK; mxm_req.base.error = MXM_OK;
#if MXM_API < MXM_VERSION(2, 0)
mxm_req.base.data.buffer.memh = NULL;
mxm_req.op.mem.remote_memh = NULL;
#else
mxm_req.op.mem.remote_mkey = to_mxm_mkey(r_mkey); mxm_req.op.mem.remote_mkey = to_mxm_mkey(r_mkey);
#endif
if (mca_spml_ikrit.mxm_peers[dst]->need_fence == 0) { if (mca_spml_ikrit.mxm_peers[dst]->need_fence == 0) {
opal_list_append(&mca_spml_ikrit.active_peers, opal_list_append(&mca_spml_ikrit.active_peers,
@ -1371,6 +1177,7 @@ int mca_spml_ikrit_fence(void)
oshmem_request_wait_any_completion(); oshmem_request_wait_any_completion();
} }
SPML_VERBOSE(20, "fence completed"); SPML_VERBOSE(20, "fence completed");
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
} }
@ -1392,9 +1199,6 @@ int mca_spml_ikrit_recv(void* buf, size_t size, int src)
req.base.state = MXM_REQ_NEW; req.base.state = MXM_REQ_NEW;
req.base.mq = mca_spml_ikrit.mxm_mq; req.base.mq = mca_spml_ikrit.mxm_mq;
req.base.conn = NULL; req.base.conn = NULL;
#if MXM_API < MXM_VERSION(2,0)
req.base.flags = MXM_REQ_FLAG_BLOCKING;
#endif
req.base.completed_cb = NULL; req.base.completed_cb = NULL;
req.base.data_type = MXM_REQ_DATA_BUFFER; req.base.data_type = MXM_REQ_DATA_BUFFER;
@ -1436,11 +1240,7 @@ int mca_spml_ikrit_send(void* buf,
req.base.state = MXM_REQ_NEW; req.base.state = MXM_REQ_NEW;
req.base.mq = mca_spml_ikrit.mxm_mq; req.base.mq = mca_spml_ikrit.mxm_mq;
req.base.conn = mca_spml_ikrit.mxm_peers[dst]->mxm_conn; req.base.conn = mca_spml_ikrit.mxm_peers[dst]->mxm_conn;
#if MXM_API < MXM_VERSION(2,0) req.flags = MXM_REQ_SEND_FLAG_BLOCKING;
req.base.flags = MXM_REQ_FLAG_BLOCKING;
#else
req.flags = MXM_REQ_SEND_FLAG_BLOCKING;
#endif
req.base.completed_cb = NULL; req.base.completed_cb = NULL;
req.base.data_type = MXM_REQ_DATA_BUFFER; req.base.data_type = MXM_REQ_DATA_BUFFER;

Просмотреть файл

@ -40,11 +40,6 @@
#define MXM_VERSION(major, minor) (((major)<<MXM_MAJOR_BIT)|((minor)<<MXM_MINOR_BIT)) #define MXM_VERSION(major, minor) (((major)<<MXM_MAJOR_BIT)|((minor)<<MXM_MINOR_BIT))
#endif #endif
#if MXM_API < MXM_VERSION(2,0)
#include <mxm/api/mxm_addr.h>
#include <mxm/api/mxm_stats.h>
#endif
#define MXM_SHMEM_MQ_ID 0x7119 #define MXM_SHMEM_MQ_ID 0x7119
/* start request explicit ack once our buffer pool is less than watermark */ /* start request explicit ack once our buffer pool is less than watermark */
@ -103,9 +98,7 @@ struct mca_spml_ikrit_t {
int hw_rdma_channel; /* true if we provide separate channel that int hw_rdma_channel; /* true if we provide separate channel that
has true one sided capability */ has true one sided capability */
int np; int np;
#if MXM_API >= MXM_VERSION(2,0)
int unsync_conn_max; int unsync_conn_max;
#endif
size_t put_zcopy_threshold; /* enable zcopy in put if message size is size_t put_zcopy_threshold; /* enable zcopy in put if message size is
greater than the threshold */ greater than the threshold */
}; };
@ -114,11 +107,9 @@ typedef struct mca_spml_ikrit_t mca_spml_ikrit_t;
#define MXM_MAX_ADDR_LEN 512 #define MXM_MAX_ADDR_LEN 512
#if MXM_API >= MXM_VERSION(2,0)
#define MXM_PTL_SHM 0 #define MXM_PTL_SHM 0
#define MXM_PTL_RDMA 1 #define MXM_PTL_RDMA 1
#define MXM_PTL_LAST 2 #define MXM_PTL_LAST 2
#endif
typedef struct spml_ikrit_mxm_ep_conn_info_t { typedef struct spml_ikrit_mxm_ep_conn_info_t {
union { union {

Просмотреть файл

@ -233,17 +233,11 @@ static int mca_spml_ikrit_component_register(void)
&mca_spml_ikrit.mxm_tls); &mca_spml_ikrit.mxm_tls);
mca_spml_ikrit_param_register_int("np", mca_spml_ikrit_param_register_int("np",
#if MXM_API <= MXM_VERSION(2,0) 0,
128, "[integer] Minimal allowed job's NP to activate ikrit", &mca_spml_ikrit.np);
#else
0,
#endif
"[integer] Minimal allowed job's NP to activate ikrit", &mca_spml_ikrit.np);
#if MXM_API >= MXM_VERSION(2,0)
mca_spml_ikrit_param_register_int("unsync_conn_max", 8, mca_spml_ikrit_param_register_int("unsync_conn_max", 8,
"[integer] Max number of connections that do not require notification of PUT operation remote completion. Increasing this number improves efficiency of p2p communication but increases overhead of shmem_fence/shmem_quiet/shmem_barrier", "[integer] Max number of connections that do not require notification of PUT operation remote completion. Increasing this number improves efficiency of p2p communication but increases overhead of shmem_fence/shmem_quiet/shmem_barrier",
&mca_spml_ikrit.unsync_conn_max); &mca_spml_ikrit.unsync_conn_max);
#endif
mca_spml_ikrit_param_register_size_t("put_zcopy_threshold", 16384ULL, mca_spml_ikrit_param_register_size_t("put_zcopy_threshold", 16384ULL,
"[size_t] Use zero copy put if message size is greater than the threshold", "[size_t] Use zero copy put if message size is greater than the threshold",
@ -312,10 +306,6 @@ static int mca_spml_ikrit_component_open(void)
return OSHMEM_ERROR; return OSHMEM_ERROR;
} }
#if MXM_API < MXM_VERSION(2,0)
mca_spml_ikrit.ud_only = 1;
mca_spml_ikrit.mxm_ctx_opts->ptl_bitmap = (MXM_BIT(MXM_PTL_SELF) | MXM_BIT(MXM_PTL_RDMA));
#endif
SPML_VERBOSE(5, "UD only mode is %s", SPML_VERBOSE(5, "UD only mode is %s",
mca_spml_ikrit.ud_only ? "enabled" : "disabled"); mca_spml_ikrit.ud_only ? "enabled" : "disabled");
@ -354,15 +344,10 @@ static int mca_spml_ikrit_component_close(void)
} }
if (mca_spml_ikrit.mxm_context) { if (mca_spml_ikrit.mxm_context) {
mxm_cleanup(mca_spml_ikrit.mxm_context); mxm_cleanup(mca_spml_ikrit.mxm_context);
#if MXM_API < MXM_VERSION(2,0)
mxm_config_free(mca_spml_ikrit.mxm_ep_opts);
mxm_config_free(mca_spml_ikrit.mxm_ctx_opts);
#else
mxm_config_free_ep_opts(mca_spml_ikrit.mxm_ep_opts); mxm_config_free_ep_opts(mca_spml_ikrit.mxm_ep_opts);
mxm_config_free_context_opts(mca_spml_ikrit.mxm_ctx_opts); mxm_config_free_context_opts(mca_spml_ikrit.mxm_ctx_opts);
if (mca_spml_ikrit.hw_rdma_channel) if (mca_spml_ikrit.hw_rdma_channel)
mxm_config_free_ep_opts(mca_spml_ikrit.mxm_ep_hw_rdma_opts); mxm_config_free_ep_opts(mca_spml_ikrit.mxm_ep_hw_rdma_opts);
#endif
} }
mca_spml_ikrit.mxm_mq = NULL; mca_spml_ikrit.mxm_mq = NULL;
mca_spml_ikrit.mxm_context = NULL; mca_spml_ikrit.mxm_context = NULL;
@ -373,14 +358,6 @@ static int spml_ikrit_mxm_init(void)
{ {
mxm_error_t err; mxm_error_t err;
#if MXM_API < MXM_VERSION(2,0)
/* Only relevant for SHM PTL - ignore */
mca_spml_ikrit.mxm_ep_opts->job_id = 0;
mca_spml_ikrit.mxm_ep_opts->local_rank = 0;
mca_spml_ikrit.mxm_ep_opts->num_local_procs = 0;
mca_spml_ikrit.mxm_ep_opts->rdma.drain_cq = 1;
#endif
/* Open MXM endpoint */ /* Open MXM endpoint */
err = mxm_ep_create(mca_spml_ikrit.mxm_context, err = mxm_ep_create(mca_spml_ikrit.mxm_context,
mca_spml_ikrit.mxm_ep_opts, mca_spml_ikrit.mxm_ep_opts,