ATOMIC/UCX: optimization for cswap
- used uint64_t output datatype to avoid branches in implementations Signed-off-by: Sergey Oblomov <sergeyo@mellanox.com>
Этот коммит содержится в:
родитель
f574c14e3a
Коммит
5eb8c99cd7
@ -101,7 +101,8 @@ struct mca_atomic_base_module_1_0_0_t {
|
||||
size_t size,
|
||||
int pe);
|
||||
int (*atomic_cswap)(void *target,
|
||||
void *prev,
|
||||
uint64_t *prev, /* prev is used internally by wrapper, we may
|
||||
always use 64-bit value */
|
||||
uint64_t cond,
|
||||
uint64_t value,
|
||||
size_t size,
|
||||
|
@ -37,7 +37,7 @@ mca_atomic_base_module_t*
|
||||
mca_atomic_basic_query(int *priority);
|
||||
|
||||
int mca_atomic_basic_cswap(void *target,
|
||||
void *prev,
|
||||
uint64_t *prev,
|
||||
uint64_t cond,
|
||||
uint64_t value,
|
||||
size_t size,
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include "atomic_basic.h"
|
||||
|
||||
int mca_atomic_basic_cswap(void *target,
|
||||
void *prev,
|
||||
uint64_t *prev,
|
||||
uint64_t cond,
|
||||
uint64_t value,
|
||||
size_t nlong,
|
||||
|
@ -57,7 +57,7 @@ int mca_atomic_mxm_swap(void *target,
|
||||
size_t nlong,
|
||||
int pe);
|
||||
int mca_atomic_mxm_cswap(void *target,
|
||||
void *prev,
|
||||
uint64_t *prev,
|
||||
uint64_t cond,
|
||||
uint64_t value,
|
||||
size_t nlong,
|
||||
|
@ -44,7 +44,7 @@ int mca_atomic_mxm_swap(void *target,
|
||||
}
|
||||
|
||||
int mca_atomic_mxm_cswap(void *target,
|
||||
void *prev,
|
||||
uint64_t *prev,
|
||||
uint64_t cond,
|
||||
uint64_t value,
|
||||
size_t nlong,
|
||||
@ -53,8 +53,8 @@ int mca_atomic_mxm_cswap(void *target,
|
||||
mxm_send_req_t sreq;
|
||||
|
||||
mca_atomic_mxm_req_init(&sreq, pe, target, nlong);
|
||||
memcpy(prev, &value, nlong);
|
||||
|
||||
*prev = value;
|
||||
sreq.op.atomic.value = value;
|
||||
sreq.base.data.buffer.ptr = prev;
|
||||
sreq.opcode = MXM_REQ_OP_ATOMIC_CSWAP;
|
||||
|
@ -43,7 +43,7 @@ mca_atomic_base_module_t*
|
||||
mca_atomic_ucx_query(int *priority);
|
||||
|
||||
int mca_atomic_ucx_cswap(void *target,
|
||||
void *prev,
|
||||
uint64_t *prev,
|
||||
uint64_t cond,
|
||||
uint64_t value,
|
||||
size_t size,
|
||||
|
@ -19,53 +19,30 @@
|
||||
|
||||
#include "atomic_ucx.h"
|
||||
|
||||
/* size argument should be constant to hint compiler
|
||||
* to calculate size relative branches in compile time */
|
||||
static inline
|
||||
int mca_atomic_ucx_cswap_inner(void *target,
|
||||
void *prev,
|
||||
uint64_t cond,
|
||||
uint64_t value,
|
||||
size_t size,
|
||||
int pe)
|
||||
{
|
||||
int status;
|
||||
ucs_status_ptr_t status_ptr;
|
||||
spml_ucx_mkey_t *ucx_mkey;
|
||||
uint64_t rva;
|
||||
uint64_t val;
|
||||
|
||||
val = value;
|
||||
ucx_mkey = mca_spml_ucx_get_mkey(pe, target, (void *)&rva);
|
||||
status_ptr = ucp_atomic_fetch_nb(mca_spml_self->ucp_peers[pe].ucp_conn,
|
||||
UCP_ATOMIC_FETCH_OP_CSWAP, cond, &val, size,
|
||||
rva, ucx_mkey->rkey,
|
||||
opal_common_ucx_empty_complete_cb);
|
||||
status = opal_common_ucx_wait_request_opal_status(status_ptr, mca_spml_self->ucp_worker);
|
||||
if (OPAL_SUCCESS == status) {
|
||||
assert(NULL != prev);
|
||||
if (sizeof(uint32_t) == size) {
|
||||
*(uint32_t*)prev = val;
|
||||
} else {
|
||||
*(uint64_t*)prev = val;
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
int mca_atomic_ucx_cswap(void *target,
|
||||
void *prev,
|
||||
uint64_t *prev,
|
||||
uint64_t cond,
|
||||
uint64_t value,
|
||||
size_t size,
|
||||
int pe)
|
||||
{
|
||||
if (sizeof(uint64_t) == size) {
|
||||
return mca_atomic_ucx_cswap_inner(target, prev, cond, value, sizeof(uint64_t), pe);
|
||||
} else if (sizeof(uint32_t) == size) {
|
||||
return mca_atomic_ucx_cswap_inner(target, prev, cond, value, sizeof(uint32_t), pe);
|
||||
} else {
|
||||
int status;
|
||||
ucs_status_ptr_t status_ptr;
|
||||
spml_ucx_mkey_t *ucx_mkey;
|
||||
uint64_t rva;
|
||||
|
||||
if ((8 != size) && (4 != size)) {
|
||||
ATOMIC_ERROR("[#%d] Type size must be 4 or 8 bytes.", my_pe);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
assert(NULL != prev);
|
||||
|
||||
*prev = value;
|
||||
ucx_mkey = mca_spml_ucx_get_mkey(pe, target, (void *)&rva);
|
||||
status_ptr = ucp_atomic_fetch_nb(mca_spml_self->ucp_peers[pe].ucp_conn,
|
||||
UCP_ATOMIC_FETCH_OP_CSWAP, cond, prev, size,
|
||||
rva, ucx_mkey->rkey,
|
||||
opal_common_ucx_empty_complete_cb);
|
||||
return opal_common_ucx_wait_request_opal_status(status_ptr, mca_spml_self->ucp_worker);
|
||||
}
|
||||
|
@ -29,13 +29,13 @@
|
||||
{ \
|
||||
int rc = OSHMEM_SUCCESS; \
|
||||
size_t size = 0; \
|
||||
type out_value; \
|
||||
uint64_t out_value; \
|
||||
\
|
||||
RUNTIME_CHECK_INIT(); \
|
||||
RUNTIME_CHECK_PE(pe); \
|
||||
RUNTIME_CHECK_ADDR(target); \
|
||||
\
|
||||
size = sizeof(out_value); \
|
||||
size = sizeof(value); \
|
||||
rc = MCA_ATOMIC_CALL(cswap( \
|
||||
(void*)target, \
|
||||
(void*)&out_value, \
|
||||
|
@ -36,12 +36,12 @@ SHMEM_GENERATE_FORTRAN_BINDINGS_FUNCTION (ompi_fortran_integer4_t,
|
||||
|
||||
ompi_fortran_integer4_t shmem_int4_cswap_f(FORTRAN_POINTER_T target, MPI_Fint *cond, FORTRAN_POINTER_T value, MPI_Fint *pe)
|
||||
{
|
||||
ompi_fortran_integer4_t out_value = 0;
|
||||
ompi_fortran_integer8_t out_value = 0;
|
||||
|
||||
MCA_ATOMIC_CALL(cswap(FPTR_2_VOID_PTR(target),
|
||||
(void *)&out_value,
|
||||
FPTR_2_INT(cond, sizeof(out_value)),
|
||||
FPTR_2_INT(value, sizeof(out_value)),
|
||||
FPTR_2_INT(cond, sizeof(ompi_fortran_integer4_t)),
|
||||
FPTR_2_INT(value, sizeof(ompi_fortran_integer4_t)),
|
||||
sizeof(out_value),
|
||||
OMPI_FINT_2_INT(*pe)));
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user