1
1

ATOMIC/UCX: optimization for cswap

- used uint64_t output datatype to avoid branches in
  implementations

Signed-off-by: Sergey Oblomov <sergeyo@mellanox.com>
Этот коммит содержится в:
Sergey Oblomov 2018-06-27 16:31:04 +03:00
родитель f574c14e3a
Коммит 5eb8c99cd7
9 изменённых файлов: 30 добавлений и 52 удалений

Просмотреть файл

@ -101,7 +101,8 @@ struct mca_atomic_base_module_1_0_0_t {
size_t size,
int pe);
int (*atomic_cswap)(void *target,
void *prev,
uint64_t *prev, /* prev is used internally by wrapper, we may
always use 64-bit value */
uint64_t cond,
uint64_t value,
size_t size,

Просмотреть файл

@ -37,7 +37,7 @@ mca_atomic_base_module_t*
mca_atomic_basic_query(int *priority);
int mca_atomic_basic_cswap(void *target,
void *prev,
uint64_t *prev,
uint64_t cond,
uint64_t value,
size_t size,

Просмотреть файл

@ -19,7 +19,7 @@
#include "atomic_basic.h"
int mca_atomic_basic_cswap(void *target,
void *prev,
uint64_t *prev,
uint64_t cond,
uint64_t value,
size_t nlong,

Просмотреть файл

@ -57,7 +57,7 @@ int mca_atomic_mxm_swap(void *target,
size_t nlong,
int pe);
int mca_atomic_mxm_cswap(void *target,
void *prev,
uint64_t *prev,
uint64_t cond,
uint64_t value,
size_t nlong,

Просмотреть файл

@ -44,7 +44,7 @@ int mca_atomic_mxm_swap(void *target,
}
int mca_atomic_mxm_cswap(void *target,
void *prev,
uint64_t *prev,
uint64_t cond,
uint64_t value,
size_t nlong,
@ -53,8 +53,8 @@ int mca_atomic_mxm_cswap(void *target,
mxm_send_req_t sreq;
mca_atomic_mxm_req_init(&sreq, pe, target, nlong);
memcpy(prev, &value, nlong);
*prev = value;
sreq.op.atomic.value = value;
sreq.base.data.buffer.ptr = prev;
sreq.opcode = MXM_REQ_OP_ATOMIC_CSWAP;

Просмотреть файл

@ -43,7 +43,7 @@ mca_atomic_base_module_t*
mca_atomic_ucx_query(int *priority);
int mca_atomic_ucx_cswap(void *target,
void *prev,
uint64_t *prev,
uint64_t cond,
uint64_t value,
size_t size,

Просмотреть файл

@ -19,53 +19,30 @@
#include "atomic_ucx.h"
/* size argument should be constant to hint compiler
* to calculate size relative branches in compile time */
static inline
int mca_atomic_ucx_cswap_inner(void *target,
void *prev,
uint64_t cond,
uint64_t value,
size_t size,
int pe)
{
int status;
ucs_status_ptr_t status_ptr;
spml_ucx_mkey_t *ucx_mkey;
uint64_t rva;
uint64_t val;
val = value;
ucx_mkey = mca_spml_ucx_get_mkey(pe, target, (void *)&rva);
status_ptr = ucp_atomic_fetch_nb(mca_spml_self->ucp_peers[pe].ucp_conn,
UCP_ATOMIC_FETCH_OP_CSWAP, cond, &val, size,
rva, ucx_mkey->rkey,
opal_common_ucx_empty_complete_cb);
status = opal_common_ucx_wait_request_opal_status(status_ptr, mca_spml_self->ucp_worker);
if (OPAL_SUCCESS == status) {
assert(NULL != prev);
if (sizeof(uint32_t) == size) {
*(uint32_t*)prev = val;
} else {
*(uint64_t*)prev = val;
}
}
return status;
}
int mca_atomic_ucx_cswap(void *target,
void *prev,
uint64_t *prev,
uint64_t cond,
uint64_t value,
size_t size,
int pe)
{
if (sizeof(uint64_t) == size) {
return mca_atomic_ucx_cswap_inner(target, prev, cond, value, sizeof(uint64_t), pe);
} else if (sizeof(uint32_t) == size) {
return mca_atomic_ucx_cswap_inner(target, prev, cond, value, sizeof(uint32_t), pe);
} else {
int status;
ucs_status_ptr_t status_ptr;
spml_ucx_mkey_t *ucx_mkey;
uint64_t rva;
if ((8 != size) && (4 != size)) {
ATOMIC_ERROR("[#%d] Type size must be 4 or 8 bytes.", my_pe);
return OSHMEM_ERROR;
}
assert(NULL != prev);
*prev = value;
ucx_mkey = mca_spml_ucx_get_mkey(pe, target, (void *)&rva);
status_ptr = ucp_atomic_fetch_nb(mca_spml_self->ucp_peers[pe].ucp_conn,
UCP_ATOMIC_FETCH_OP_CSWAP, cond, prev, size,
rva, ucx_mkey->rkey,
opal_common_ucx_empty_complete_cb);
return opal_common_ucx_wait_request_opal_status(status_ptr, mca_spml_self->ucp_worker);
}

Просмотреть файл

@ -29,13 +29,13 @@
{ \
int rc = OSHMEM_SUCCESS; \
size_t size = 0; \
type out_value; \
uint64_t out_value; \
\
RUNTIME_CHECK_INIT(); \
RUNTIME_CHECK_PE(pe); \
RUNTIME_CHECK_ADDR(target); \
\
size = sizeof(out_value); \
size = sizeof(value); \
rc = MCA_ATOMIC_CALL(cswap( \
(void*)target, \
(void*)&out_value, \

Просмотреть файл

@ -36,12 +36,12 @@ SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_integer4_t,
ompi_fortran_integer4_t shmem_int4_cswap_f(FORTRAN_POINTER_T target, MPI_Fint *cond, FORTRAN_POINTER_T value, MPI_Fint *pe)
{
ompi_fortran_integer4_t out_value = 0;
ompi_fortran_integer8_t out_value = 0;
MCA_ATOMIC_CALL(cswap(FPTR_2_VOID_PTR(target),
(void *)&out_value,
FPTR_2_INT(cond, sizeof(out_value)),
FPTR_2_INT(value, sizeof(out_value)),
FPTR_2_INT(cond, sizeof(ompi_fortran_integer4_t)),
FPTR_2_INT(value, sizeof(ompi_fortran_integer4_t)),
sizeof(out_value),
OMPI_FINT_2_INT(*pe)));