diff --git a/config/ompi_check_ucx.m4 b/config/ompi_check_ucx.m4 index e5a5ccf047..8b5332faac 100644 --- a/config/ompi_check_ucx.m4 +++ b/config/ompi_check_ucx.m4 @@ -112,6 +112,14 @@ AC_DEFUN([OMPI_CHECK_UCX],[ ucp_request_check_status, ucp_put_nb, ucp_get_nb], [], [], [#include ]) + AC_CHECK_DECLS([UCP_ATOMIC_POST_OP_AND, + UCP_ATOMIC_POST_OP_OR, + UCP_ATOMIC_POST_OP_XOR, + UCP_ATOMIC_FETCH_OP_FAND, + UCP_ATOMIC_FETCH_OP_FOR, + UCP_ATOMIC_FETCH_OP_FXOR], + [], [], + [#include ]) CPPFLAGS=$old_CPPFLAGS OPAL_SUMMARY_ADD([[Transports]],[[Open UCX]],[$1],[$ompi_check_ucx_happy])])]) diff --git a/oshmem/include/pshmem.h b/oshmem/include/pshmem.h index 8ab2cda818..f01201b403 100644 --- a/oshmem/include/pshmem.h +++ b/oshmem/include/pshmem.h @@ -378,6 +378,42 @@ OSHMEM_DECLSPEC long long pshmem_longlong_fadd(long long *target, long long valu long long*: pshmem_longlong_fadd)(dst, val, pe) #endif +/* Atomic Fetch&And */ +OSHMEM_DECLSPEC int pshmem_int_atomic_fand(int *target, int value, int pe); +OSHMEM_DECLSPEC long pshmem_long_atomic_fand(long *target, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_longlong_atomic_fand(long long *target, long long value, int pe); +#if OSHMEMP_HAVE_C11 +#define pshmem_atomic_fand(dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_fand, \ + long*: pshmem_long_atomic_fand, \ + long long*: pshmem_longlong_atomic_fand)(dst, val, pe) +#endif + +/* Atomic Fetch&Or */ +OSHMEM_DECLSPEC int pshmem_int_atomic_for(int *target, int value, int pe); +OSHMEM_DECLSPEC long pshmem_long_atomic_for(long *target, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_longlong_atomic_for(long long *target, long long value, int pe); +#if OSHMEMP_HAVE_C11 +#define pshmem_atomic_for(dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_for, \ + long*: pshmem_long_atomic_for, \ + long long*: pshmem_longlong_atomic_for)(dst, val, pe) +#endif + +/* Atomic Fetch&Xor */ +OSHMEM_DECLSPEC int pshmem_int_atomic_fxor(int *target, int value, int pe); +OSHMEM_DECLSPEC long pshmem_long_atomic_fxor(long *target, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_longlong_atomic_fxor(long long *target, long long value, int pe); +#if OSHMEMP_HAVE_C11 +#define pshmem_atomic_fxor(dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_fxor, \ + long*: pshmem_long_atomic_fxor, \ + long long*: pshmem_longlong_atomic_fxor)(dst, val, pe) +#endif + /* Atomic Fetch */ OSHMEM_DECLSPEC int pshmem_int_fetch(const int *target, int pe); OSHMEM_DECLSPEC long pshmem_long_fetch(const long *target, int pe); @@ -406,7 +442,7 @@ OSHMEM_DECLSPEC long long pshmem_longlong_finc(long long *target, int pe); long long*: pshmem_longlong_finc)(dst, val, pe) #endif -/* Atomic Add*/ +/* Atomic Add */ OSHMEM_DECLSPEC void pshmem_int_add(int *target, int value, int pe); OSHMEM_DECLSPEC void pshmem_long_add(long *target, long value, int pe); OSHMEM_DECLSPEC void pshmem_longlong_add(long long *target, long long value, int pe); @@ -418,6 +454,42 @@ OSHMEM_DECLSPEC void pshmem_longlong_add(long long *target, long long value, int long long*: pshmem_longlong_add)(dst, val, pe) #endif +/* Atomic And */ +OSHMEM_DECLSPEC void pshmem_int_atomic_and(int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_and(long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_and(long long *target, long long value, int pe); +#if OSHMEMP_HAVE_C11 +#define pshmem_atomic_and(dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_and, \ + long*: pshmem_long_atomic_and, \ + long long*: pshmem_longlong_atomic_and)(dst, val, pe) +#endif + +/* Atomic Or */ +OSHMEM_DECLSPEC void pshmem_int_atomic_or(int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_or(long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_or(long long *target, long long value, int pe); +#if OSHMEMP_HAVE_C11 +#define pshmem_atomic_or(dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_or, \ + long*: pshmem_long_atomic_or, \ + long long*: pshmem_longlong_atomic_or)(dst, val, pe) +#endif + +/* Atomic Xor */ +OSHMEM_DECLSPEC void pshmem_int_atomic_xor(int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_xor(long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_xor(long long *target, long long value, int pe); +#if OSHMEMP_HAVE_C11 +#define pshmem_atomic_xor(dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_xor, \ + long*: pshmem_long_atomic_xor, \ + long long*: pshmem_longlong_atomic_xor)(dst, val, pe) +#endif + /* Atomic Inc */ OSHMEM_DECLSPEC void pshmem_int_inc(int *target, int pe); OSHMEM_DECLSPEC void pshmem_long_inc(long *target, int pe); diff --git a/oshmem/include/pshmemx.h b/oshmem/include/pshmemx.h index 81e929cd55..f5472f6221 100644 --- a/oshmem/include/pshmemx.h +++ b/oshmem/include/pshmemx.h @@ -89,6 +89,18 @@ OSHMEM_DECLSPEC int64_t pshmemx_int64_cswap(int64_t *target, int64_t cond, int64 OSHMEM_DECLSPEC int32_t pshmemx_int32_fadd(int32_t *target, int32_t value, int pe); OSHMEM_DECLSPEC int64_t pshmemx_int64_fadd(int64_t *target, int64_t value, int pe); +/* Atomic Fetch&And */ +OSHMEM_DECLSPEC int32_t pshmemx_int32_atomic_fand(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmemx_int64_atomic_fand(int64_t *target, int64_t value, int pe); + +/* Atomic Fetch&Or */ +OSHMEM_DECLSPEC int32_t pshmemx_int32_atomic_for(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmemx_int64_atomic_for(int64_t *target, int64_t value, int pe); + +/* Atomic Fetch&Xor */ +OSHMEM_DECLSPEC int32_t pshmemx_int32_atomic_fxor(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t pshmemx_int64_atomic_fxor(int64_t *target, int64_t value, int pe); + /* Atomic Fetch */ OSHMEM_DECLSPEC int32_t pshmemx_int32_fetch(const int32_t *target, int pe); OSHMEM_DECLSPEC int64_t pshmemx_int64_fetch(const int64_t *target, int pe); @@ -97,10 +109,22 @@ OSHMEM_DECLSPEC int64_t pshmemx_int64_fetch(const int64_t *target, int pe); OSHMEM_DECLSPEC int32_t pshmemx_int32_finc(int32_t *target, int pe); OSHMEM_DECLSPEC int64_t pshmemx_int64_finc(int64_t *target, int pe); -/* Atomic Add*/ +/* Atomic Add */ OSHMEM_DECLSPEC void pshmemx_int32_add(int32_t *target, int32_t value, int pe); OSHMEM_DECLSPEC void pshmemx_int64_add(int64_t *target, int64_t value, int pe); +/* Atomic And */ +OSHMEM_DECLSPEC void pshmemx_int32_atomic_and(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmemx_int64_atomic_and(int64_t *target, int64_t value, int pe); + +/* Atomic Or */ +OSHMEM_DECLSPEC void pshmemx_int32_atomic_or(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmemx_int64_atomic_or(int64_t *target, int64_t value, int pe); + +/* Atomic Xor */ +OSHMEM_DECLSPEC void pshmemx_int32_atomic_xor(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void pshmemx_int64_atomic_xor(int64_t *target, int64_t value, int pe); + /* Atomic Inc */ OSHMEM_DECLSPEC void pshmemx_int32_inc(int32_t *target, int pe); OSHMEM_DECLSPEC void pshmemx_int64_inc(int64_t *target, int pe); diff --git a/oshmem/include/shmem.h.in b/oshmem/include/shmem.h.in index a81e890cdc..c58099f1e1 100644 --- a/oshmem/include/shmem.h.in +++ b/oshmem/include/shmem.h.in @@ -463,6 +463,42 @@ OSHMEM_DECLSPEC long long shmem_longlong_fadd(long long *target, long long value long long*: shmem_longlong_fadd)(dst, val, pe) #endif +/* Atomic Fetch&And */ +OSHMEM_DECLSPEC int shmem_int_atomic_fand(int *target, int value, int pe); +OSHMEM_DECLSPEC long shmem_long_atomic_fand(long *target, long value, int pe); +OSHMEM_DECLSPEC long long shmem_longlong_atomic_fand(long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fand(dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_fand, \ + long*: shmem_long_atomic_fand, \ + long long*: shmem_longlong_atomic_fand)(dst, val, pe) +#endif + +/* Atomic Fetch&Or */ +OSHMEM_DECLSPEC int shmem_int_atomic_for(int *target, int value, int pe); +OSHMEM_DECLSPEC long shmem_long_atomic_for(long *target, long value, int pe); +OSHMEM_DECLSPEC long long shmem_longlong_atomic_for(long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_for(dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_for, \ + long*: shmem_long_atomic_for, \ + long long*: shmem_longlong_atomic_for)(dst, val, pe) +#endif + +/* Atomic Fetch&Xor */ +OSHMEM_DECLSPEC int shmem_int_atomic_fxor(int *target, int value, int pe); +OSHMEM_DECLSPEC long shmem_long_atomic_fxor(long *target, long value, int pe); +OSHMEM_DECLSPEC long long shmem_longlong_atomic_fxor(long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fxor(dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_fxor, \ + long*: shmem_long_atomic_fxor, \ + long long*: shmem_longlong_atomic_fxor)(dst, val, pe) +#endif + /* Atomic Fetch */ OSHMEM_DECLSPEC int shmem_int_fetch(const int *target, int pe); OSHMEM_DECLSPEC long shmem_long_fetch(const long *target, int pe); @@ -491,7 +527,7 @@ OSHMEM_DECLSPEC long long shmem_longlong_finc(long long *target, int pe); long long*: shmem_longlong_finc)(dst, pe) #endif -/* Atomic Add*/ +/* Atomic Add */ OSHMEM_DECLSPEC void shmem_int_add(int *target, int value, int pe); OSHMEM_DECLSPEC void shmem_long_add(long *target, long value, int pe); OSHMEM_DECLSPEC void shmem_longlong_add(long long *target, long long value, int pe); @@ -503,6 +539,42 @@ OSHMEM_DECLSPEC void shmem_longlong_add(long long *target, long long value, int long long*: shmem_longlong_add)(dst, val, pe) #endif +/* Atomic And */ +OSHMEM_DECLSPEC void shmem_int_atomic_and(int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_and(long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_and(long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_and(dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_and, \ + long*: shmem_long_atomic_and, \ + long long*: shmem_longlong_atomic_and)(dst, val, pe) +#endif + +/* Atomic Or */ +OSHMEM_DECLSPEC void shmem_int_atomic_or(int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_or(long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_or(long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_or(dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_or, \ + long*: shmem_long_atomic_or, \ + long long*: shmem_longlong_atomic_or)(dst, val, pe) +#endif + +/* Atomic Xor */ +OSHMEM_DECLSPEC void shmem_int_atomic_xor(int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_xor(long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_xor(long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_xor(dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_xor, \ + long*: shmem_long_atomic_xor, \ + long long*: shmem_longlong_atomic_xor)(dst, val, pe) +#endif + /* Atomic Inc */ OSHMEM_DECLSPEC void shmem_int_inc(int *target, int pe); OSHMEM_DECLSPEC void shmem_long_inc(long *target, int pe); diff --git a/oshmem/include/shmemx.h b/oshmem/include/shmemx.h index 4186cc04c6..22aac84c8d 100644 --- a/oshmem/include/shmemx.h +++ b/oshmem/include/shmemx.h @@ -76,6 +76,18 @@ OSHMEM_DECLSPEC int64_t shmemx_int64_cswap(int64_t *target, int64_t cond, int64_ OSHMEM_DECLSPEC int32_t shmemx_int32_fadd(int32_t *target, int32_t value, int pe); OSHMEM_DECLSPEC int64_t shmemx_int64_fadd(int64_t *target, int64_t value, int pe); +/* Atomic Fetch&And */ +OSHMEM_DECLSPEC int32_t shmemx_int32_atomic_fand(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmemx_int64_atomic_fand(int64_t *target, int64_t value, int pe); + +/* Atomic Fetch&Or */ +OSHMEM_DECLSPEC int32_t shmemx_int32_atomic_for(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmemx_int64_atomic_for(int64_t *target, int64_t value, int pe); + +/* Atomic Fetch&Xor */ +OSHMEM_DECLSPEC int32_t shmemx_int32_atomic_fxor(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC int64_t shmemx_int64_atomic_fxor(int64_t *target, int64_t value, int pe); + /* Atomic Fetch */ OSHMEM_DECLSPEC int32_t shmemx_int32_fetch(const int32_t *target, int pe); OSHMEM_DECLSPEC int64_t shmemx_int64_fetch(const int64_t *target, int pe); @@ -84,10 +96,22 @@ OSHMEM_DECLSPEC int64_t shmemx_int64_fetch(const int64_t *target, int pe); OSHMEM_DECLSPEC int32_t shmemx_int32_finc(int32_t *target, int pe); OSHMEM_DECLSPEC int64_t shmemx_int64_finc(int64_t *target, int pe); -/* Atomic Add*/ +/* Atomic Add */ OSHMEM_DECLSPEC void shmemx_int32_add(int32_t *target, int32_t value, int pe); OSHMEM_DECLSPEC void shmemx_int64_add(int64_t *target, int64_t value, int pe); +/* Atomic And */ +OSHMEM_DECLSPEC void shmemx_int32_atomic_and(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmemx_int64_atomic_and(int64_t *target, int64_t value, int pe); + +/* Atomic Or */ +OSHMEM_DECLSPEC void shmemx_int32_atomic_or(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmemx_int64_atomic_or(int64_t *target, int64_t value, int pe); + +/* Atomic Xor */ +OSHMEM_DECLSPEC void shmemx_int32_atomic_xor(int32_t *target, int32_t value, int pe); +OSHMEM_DECLSPEC void shmemx_int64_atomic_xor(int64_t *target, int64_t value, int pe); + /* Atomic Inc */ OSHMEM_DECLSPEC void shmemx_int32_inc(int32_t *target, int pe); OSHMEM_DECLSPEC void shmemx_int64_inc(int64_t *target, int pe); diff --git a/oshmem/mca/atomic/atomic.h b/oshmem/mca/atomic/atomic.h index b6675be5b2..017c526420 100644 --- a/oshmem/mca/atomic/atomic.h +++ b/oshmem/mca/atomic/atomic.h @@ -35,6 +35,49 @@ BEGIN_C_DECLS #define OSHMEM_ATOMIC_PTR_2_INT(ptr, size) ((size) == 8 ? *(uint64_t*)(ptr) : *(uint32_t*)(ptr)) +#define OSHMEM_TYPE_OP(type_name, type, prefix, op) \ + void prefix##type_name##_atomic_##op(type *target, type value, int pe) \ + { \ + int rc = OSHMEM_SUCCESS; \ + size_t size = 0; \ + \ + RUNTIME_CHECK_INIT(); \ + RUNTIME_CHECK_PE(pe); \ + RUNTIME_CHECK_ADDR(target); \ + \ + size = sizeof(value); \ + rc = MCA_ATOMIC_CALL(op( \ + (void*)target, \ + value, \ + size, \ + pe)); \ + RUNTIME_CHECK_RC(rc); \ + \ + return ; \ + } + +#define OSHMEM_TYPE_FOP(type_name, type, prefix, op) \ + type prefix##type_name##_atomic_##op(type *target, type value, int pe) \ + { \ + int rc = OSHMEM_SUCCESS; \ + size_t size = 0; \ + type out_value; \ + \ + RUNTIME_CHECK_INIT(); \ + RUNTIME_CHECK_PE(pe); \ + RUNTIME_CHECK_ADDR(target); \ + \ + size = sizeof(out_value); \ + rc = MCA_ATOMIC_CALL(op( \ + (void*)target, \ + (void*)&out_value, \ + value, \ + size, \ + pe)); \ + RUNTIME_CHECK_RC(rc); \ + \ + return out_value; \ + } /* ******************************************************************** */ struct oshmem_op_t; @@ -92,11 +135,38 @@ struct mca_atomic_base_module_1_0_0_t { uint64_t value, size_t size, int pe); + int (*atomic_and)(void *target, + uint64_t value, + size_t size, + int pe); + int (*atomic_or)(void *target, + uint64_t value, + size_t size, + int pe); + int (*atomic_xor)(void *target, + uint64_t value, + size_t size, + int pe); int (*atomic_fadd)(void *target, void *prev, uint64_t value, size_t size, int pe); + int (*atomic_fand)(void *target, + void *prev, + uint64_t value, + size_t size, + int pe); + int (*atomic_for)(void *target, + void *prev, + uint64_t value, + size_t size, + int pe); + int (*atomic_fxor)(void *target, + void *prev, + uint64_t value, + size_t size, + int pe); int (*atomic_swap)(void *target, void *prev, uint64_t value, diff --git a/oshmem/mca/atomic/base/atomic_base_select.c b/oshmem/mca/atomic/base/atomic_base_select.c index 6ea9e8e7a0..5d3fe50d8c 100644 --- a/oshmem/mca/atomic/base/atomic_base_select.c +++ b/oshmem/mca/atomic/base/atomic_base_select.c @@ -98,8 +98,11 @@ int mca_atomic_base_select(void) OBJ_RELEASE(avail->ac_module); OBJ_RELEASE(avail); /* check correctness */ - if (!(mca_atomic.atomic_fadd) || !(mca_atomic.atomic_cswap) || - !(mca_atomic.atomic_add) || !(mca_atomic.atomic_swap)) { + if (!(mca_atomic.atomic_fadd) || !(mca_atomic.atomic_add) || + !(mca_atomic.atomic_fand) || !(mca_atomic.atomic_and) || + !(mca_atomic.atomic_for) || !(mca_atomic.atomic_or) || + !(mca_atomic.atomic_fxor) || !(mca_atomic.atomic_xor) || + !(mca_atomic.atomic_cswap) || !(mca_atomic.atomic_swap)) { return OSHMEM_ERR_NOT_FOUND; } } diff --git a/oshmem/mca/atomic/basic/atomic_basic_module.c b/oshmem/mca/atomic/basic/atomic_basic_module.c index 86b1529873..f13e28f680 100644 --- a/oshmem/mca/atomic/basic/atomic_basic_module.c +++ b/oshmem/mca/atomic/basic/atomic_basic_module.c @@ -142,6 +142,27 @@ static int mca_atomic_basic_add(void *target, uint64_t value, MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_sum_int64)); } +static int mca_atomic_basic_and(void *target, uint64_t value, + size_t size, int pe) +{ + return mca_atomic_basic_op(target, value, size, pe, + MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_and_int64)); +} + +static int mca_atomic_basic_or(void *target, uint64_t value, + size_t size, int pe) +{ + return mca_atomic_basic_op(target, value, size, pe, + MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_or_int64)); +} + +static int mca_atomic_basic_xor(void *target, uint64_t value, + size_t size, int pe) +{ + return mca_atomic_basic_op(target, value, size, pe, + MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_xor_int64)); +} + static int mca_atomic_basic_fadd(void *target, void *prev, uint64_t value, size_t size, int pe) { @@ -149,6 +170,27 @@ static int mca_atomic_basic_fadd(void *target, void *prev, uint64_t value, MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_sum_int64)); } +static int mca_atomic_basic_fand(void *target, void *prev, uint64_t value, + size_t size, int pe) +{ + return mca_atomic_basic_fop(target, prev, value, size, pe, + MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_and_int64)); +} + +static int mca_atomic_basic_for(void *target, void *prev, uint64_t value, + size_t size, int pe) +{ + return mca_atomic_basic_fop(target, prev, value, size, pe, + MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_or_int64)); +} + +static int mca_atomic_basic_fxor(void *target, void *prev, uint64_t value, + size_t size, int pe) +{ + return mca_atomic_basic_fop(target, prev, value, size, pe, + MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_xor_int64)); +} + static int mca_atomic_basic_swap(void *target, void *prev, uint64_t value, size_t size, int pe) { @@ -166,7 +208,13 @@ mca_atomic_basic_query(int *priority) module = OBJ_NEW(mca_atomic_basic_module_t); if (module) { module->super.atomic_add = mca_atomic_basic_add; + module->super.atomic_and = mca_atomic_basic_and; + module->super.atomic_or = mca_atomic_basic_or; + module->super.atomic_xor = mca_atomic_basic_xor; module->super.atomic_fadd = mca_atomic_basic_fadd; + module->super.atomic_fand = mca_atomic_basic_fand; + module->super.atomic_for = mca_atomic_basic_for; + module->super.atomic_fxor = mca_atomic_basic_fxor; module->super.atomic_swap = mca_atomic_basic_swap; module->super.atomic_cswap = mca_atomic_basic_cswap; return &(module->super); diff --git a/oshmem/mca/atomic/mxm/atomic_mxm_module.c b/oshmem/mca/atomic/mxm/atomic_mxm_module.c index d2db654604..5a3087c9fe 100644 --- a/oshmem/mca/atomic/mxm/atomic_mxm_module.c +++ b/oshmem/mca/atomic/mxm/atomic_mxm_module.c @@ -32,6 +32,23 @@ int mca_atomic_mxm_finalize(void) return OSHMEM_SUCCESS; } +static int mca_atomic_mxm_op_not_implemented(void *target, + uint64_t value, + size_t size, + int pe) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + +static int mca_atomic_mxm_fop_not_implemented(void *target, + void *prev, + uint64_t value, + size_t size, + int pe) +{ + return OSHMEM_ERR_NOT_IMPLEMENTED; +} + mca_atomic_base_module_t * mca_atomic_mxm_query(int *priority) { @@ -42,7 +59,13 @@ mca_atomic_mxm_query(int *priority) module = OBJ_NEW(mca_atomic_mxm_module_t); if (module) { module->super.atomic_add = mca_atomic_mxm_add; + module->super.atomic_and = mca_atomic_mxm_op_not_implemented; + module->super.atomic_or = mca_atomic_mxm_op_not_implemented; + module->super.atomic_xor = mca_atomic_mxm_op_not_implemented; module->super.atomic_fadd = mca_atomic_mxm_fadd; + module->super.atomic_fand = mca_atomic_mxm_fop_not_implemented; + module->super.atomic_for = mca_atomic_mxm_fop_not_implemented; + module->super.atomic_fxor = mca_atomic_mxm_fop_not_implemented; module->super.atomic_swap = mca_atomic_mxm_swap; module->super.atomic_cswap = mca_atomic_mxm_cswap; return &(module->super); diff --git a/oshmem/mca/atomic/ucx/atomic_ucx_module.c b/oshmem/mca/atomic/ucx/atomic_ucx_module.c index f82e1ff932..870578a85e 100644 --- a/oshmem/mca/atomic/ucx/atomic_ucx_module.c +++ b/oshmem/mca/atomic/ucx/atomic_ucx_module.c @@ -83,6 +83,42 @@ static int mca_atomic_ucx_add(void *target, return mca_atomic_ucx_op(target, value, size, pe, UCP_ATOMIC_POST_OP_ADD); } +static int mca_atomic_ucx_and(void *target, + uint64_t value, + size_t size, + int pe) +{ +#if HAVE_DECL_UCP_ATOMIC_POST_OP_AND + return mca_atomic_ucx_op(target, value, size, pe, UCP_ATOMIC_POST_OP_AND); +#else + return OSHMEM_ERR_NOT_IMPLEMENTED; +#endif +} + +static int mca_atomic_ucx_or(void *target, + uint64_t value, + size_t size, + int pe) +{ +#if HAVE_DECL_UCP_ATOMIC_POST_OP_OR + return mca_atomic_ucx_op(target, value, size, pe, UCP_ATOMIC_POST_OP_OR); +#else + return OSHMEM_ERR_NOT_IMPLEMENTED; +#endif +} + +static int mca_atomic_ucx_xor(void *target, + uint64_t value, + size_t size, + int pe) +{ +#if HAVE_DECL_UCP_ATOMIC_POST_OP_XOR + return mca_atomic_ucx_op(target, value, size, pe, UCP_ATOMIC_POST_OP_XOR); +#else + return OSHMEM_ERR_NOT_IMPLEMENTED; +#endif +} + static int mca_atomic_ucx_fadd(void *target, void *prev, uint64_t value, @@ -92,6 +128,45 @@ static int mca_atomic_ucx_fadd(void *target, return mca_atomic_ucx_fop(target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_FADD); } +static int mca_atomic_ucx_fand(void *target, + void *prev, + uint64_t value, + size_t size, + int pe) +{ +#if HAVE_DECL_UCP_ATOMIC_FETCH_OP_FAND + return mca_atomic_ucx_fop(target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_FAND); +#else + return OSHMEM_ERR_NOT_IMPLEMENTED; +#endif +} + +static int mca_atomic_ucx_for(void *target, + void *prev, + uint64_t value, + size_t size, + int pe) +{ +#if HAVE_DECL_UCP_ATOMIC_FETCH_OP_FOR + return mca_atomic_ucx_fop(target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_FOR); +#else + return OSHMEM_ERR_NOT_IMPLEMENTED; +#endif +} + +static int mca_atomic_ucx_fxor(void *target, + void *prev, + uint64_t value, + size_t size, + int pe) +{ +#if HAVE_DECL_UCP_ATOMIC_FETCH_OP_FXOR + return mca_atomic_ucx_fop(target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_FXOR); +#else + return OSHMEM_ERR_NOT_IMPLEMENTED; +#endif +} + static int mca_atomic_ucx_swap(void *target, void *prev, uint64_t value, @@ -112,7 +187,13 @@ mca_atomic_ucx_query(int *priority) module = OBJ_NEW(mca_atomic_ucx_module_t); if (module) { module->super.atomic_add = mca_atomic_ucx_add; + module->super.atomic_and = mca_atomic_ucx_and; + module->super.atomic_or = mca_atomic_ucx_or; + module->super.atomic_xor = mca_atomic_ucx_xor; module->super.atomic_fadd = mca_atomic_ucx_fadd; + module->super.atomic_fand = mca_atomic_ucx_fand; + module->super.atomic_for = mca_atomic_ucx_for; + module->super.atomic_fxor = mca_atomic_ucx_fxor; module->super.atomic_swap = mca_atomic_ucx_swap; module->super.atomic_cswap = mca_atomic_ucx_cswap; return &(module->super); diff --git a/oshmem/shmem/c/Makefile.am b/oshmem/shmem/c/Makefile.am index c60ba29bcc..1b7a64a189 100644 --- a/oshmem/shmem/c/Makefile.am +++ b/oshmem/shmem/c/Makefile.am @@ -54,9 +54,15 @@ OSHMEM_API_SOURCES = \ shmem_set.c \ shmem_cswap.c \ shmem_fadd.c \ + shmem_fand.c \ + shmem_for.c \ + shmem_fxor.c \ shmem_fetch.c \ shmem_finc.c \ shmem_add.c \ + shmem_and.c \ + shmem_or.c \ + shmem_xor.c \ shmem_inc.c \ shmem_clear_lock.c \ shmem_set_lock.c \ diff --git a/oshmem/shmem/c/profile/Makefile.am b/oshmem/shmem/c/profile/Makefile.am index d7fdb187f0..5694e99de1 100644 --- a/oshmem/shmem/c/profile/Makefile.am +++ b/oshmem/shmem/c/profile/Makefile.am @@ -66,9 +66,15 @@ OSHMEM_API_SOURCES = \ pshmem_set.c \ pshmem_cswap.c \ pshmem_fadd.c \ + pshmem_fand.c \ + pshmem_for.c \ + pshmem_fxor.c \ pshmem_fetch.c \ pshmem_finc.c \ pshmem_add.c \ + pshmem_and.c \ + pshmem_or.c \ + pshmem_xor.c \ pshmem_inc.c \ pshmem_clear_lock.c \ pshmem_set_lock.c \ diff --git a/oshmem/shmem/c/profile/defines.h b/oshmem/shmem/c/profile/defines.h index 7f61bc2738..61ee740392 100644 --- a/oshmem/shmem/c/profile/defines.h +++ b/oshmem/shmem/c/profile/defines.h @@ -236,6 +236,27 @@ #define shmemx_int32_fadd pshmemx_int32_fadd #define shmemx_int64_fadd pshmemx_int64_fadd +/* Atomic Fetch&And */ +#define shmem_int_atomic_fand pshmem_int_atomic_fand +#define shmem_long_atomic_fand pshmem_long_atomic_fand +#define shmem_longlong_atomic_fand pshmem_longlong_atomic_fand +#define shmemx_int32_atomic_fand pshmemx_int32_atomic_fand +#define shmemx_int64_atomic_fand pshmemx_int64_atomic_fand + +/* Atomic Fetch&Or */ +#define shmem_int_atomic_for pshmem_int_atomic_for +#define shmem_long_atomic_for pshmem_long_atomic_for +#define shmem_longlong_atomic_for pshmem_longlong_atomic_for +#define shmemx_int32_atomic_for pshmemx_int32_atomic_for +#define shmemx_int64_atomic_for pshmemx_int64_atomic_for + +/* Atomic Fetch&Xor */ +#define shmem_int_atomic_fxor pshmem_int_atomic_fxor +#define shmem_long_atomic_fxor pshmem_long_atomic_fxor +#define shmem_longlong_atomic_fxor pshmem_longlong_atomic_fxor +#define shmemx_int32_atomic_fxor pshmemx_int32_atomic_fxor +#define shmemx_int64_atomic_fxor pshmemx_int64_atomic_fxor + /* Atomic Fetch */ #define shmem_double_fetch pshmem_double_fetch #define shmem_float_fetch pshmem_float_fetch @@ -252,13 +273,34 @@ #define shmemx_int32_finc pshmemx_int32_finc #define shmemx_int64_finc pshmemx_int64_finc -/* Atomic Add*/ +/* Atomic Add */ #define shmem_int_add pshmem_int_add #define shmem_long_add pshmem_long_add #define shmem_longlong_add pshmem_longlong_add #define shmemx_int32_add pshmemx_int32_add #define shmemx_int64_add pshmemx_int64_add +/* Atomic And */ +#define shmem_int_atomic_and pshmem_int_atomic_and +#define shmem_long_atomic_and pshmem_long_atomic_and +#define shmem_longlong_atomic_and pshmem_longlong_atomic_and +#define shmemx_int32_atomic_and pshmemx_int32_atomic_and +#define shmemx_int64_atomic_and pshmemx_int64_atomic_and + +/* Atomic Or */ +#define shmem_int_atomic_or pshmem_int_atomic_or +#define shmem_long_atomic_or pshmem_long_atomic_or +#define shmem_longlong_atomic_or pshmem_longlong_atomic_or +#define shmemx_int32_atomic_or pshmemx_int32_atomic_or +#define shmemx_int64_atomic_or pshmemx_int64_atomic_or + +/* Atomic Xor */ +#define shmem_int_atomic_xor pshmem_int_atomic_xor +#define shmem_long_atomic_xor pshmem_long_atomic_xor +#define shmem_longlong_atomic_xor pshmem_longlong_atomic_xor +#define shmemx_int32_atomic_xor pshmemx_int32_atomic_xor +#define shmemx_int64_atomic_xor pshmemx_int64_atomic_xor + /* Atomic Inc */ #define shmem_int_inc pshmem_int_inc #define shmem_long_inc pshmem_long_inc diff --git a/oshmem/shmem/c/shmem_and.c b/oshmem/shmem/c/shmem_and.c new file mode 100644 index 0000000000..f90231c927 --- /dev/null +++ b/oshmem/shmem/c/shmem_and.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" +#include "oshmem/op/op.h" +#include "oshmem/mca/atomic/atomic.h" + +/* + * These routines perform an atomic 'and' operation. + * The atomic 'and' routines cwoperates 'and' value to the data at address target on PE pe. + * The operation must be completed without the possibility of another process updating + * target between the time of the fetch and the update. + */ +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" +#pragma weak shmem_int_atomic_and = pshmem_int_atomic_and +#pragma weak shmem_long_atomic_and = pshmem_long_atomic_and +#pragma weak shmem_longlong_atomic_and = pshmem_longlong_atomic_and +#pragma weak shmemx_int32_atomic_and = pshmemx_int32_atomic_and +#pragma weak shmemx_int64_atomic_and = pshmemx_int64_atomic_and +#include "oshmem/shmem/c/profile/defines.h" +#endif + +OSHMEM_TYPE_OP(_int, int, shmem, and) +OSHMEM_TYPE_OP(_long, long, shmem, and) +OSHMEM_TYPE_OP(_longlong, long long, shmem, and) +OSHMEM_TYPE_OP(_int32, int32_t, shmemx, and) +OSHMEM_TYPE_OP(_int64, int64_t, shmemx, and) diff --git a/oshmem/shmem/c/shmem_fand.c b/oshmem/shmem/c/shmem_fand.c new file mode 100644 index 0000000000..a26babb0f3 --- /dev/null +++ b/oshmem/shmem/c/shmem_fand.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/op/op.h" +#include "oshmem/mca/atomic/atomic.h" + +/* + * These routines perform an atomic fetch-and-and operation. + * The fetch and and routines retrieve the value at address target on PE pe, and update + * target with the result of 'and' operation value to the retrieved value. The operation + * must be completed * without the possibility of another process updating target between + * the time of the fetch and the update. + */ +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" +#pragma weak shmem_int_atomic_fand = pshmem_int_atomic_fand +#pragma weak shmem_long_atomic_fand = pshmem_long_atomic_fand +#pragma weak shmem_longlong_atomic_fand = pshmem_longlong_atomic_fand +#pragma weak shmemx_int32_atomic_fand = pshmemx_int32_atomic_fand +#pragma weak shmemx_int64_atomic_fand = pshmemx_int64_atomic_fand +#include "oshmem/shmem/c/profile/defines.h" +#endif + +OSHMEM_TYPE_FOP(_int, int, shmem, fand) +OSHMEM_TYPE_FOP(_long, long, shmem, fand) +OSHMEM_TYPE_FOP(_longlong, long long, shmem, fand) +OSHMEM_TYPE_FOP(_int32, int32_t, shmemx, fand) +OSHMEM_TYPE_FOP(_int64, int64_t, shmemx, fand) diff --git a/oshmem/shmem/c/shmem_for.c b/oshmem/shmem/c/shmem_for.c new file mode 100644 index 0000000000..a487320bfc --- /dev/null +++ b/oshmem/shmem/c/shmem_for.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/op/op.h" +#include "oshmem/mca/atomic/atomic.h" + +/* + * These routines perform an atomic fetch-and-or operation. + * The fetch and or routines retrieve the value at address target on PE pe, and update + * target with the result of 'or' operation value to the retrieved value. The operation + * must be completed * without the possibility of another process updating target between + * the time of the fetch and the update. + */ +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" +#pragma weak shmem_int_atomic_for = pshmem_int_atomic_for +#pragma weak shmem_long_atomic_for = pshmem_long_atomic_for +#pragma weak shmem_longlong_atomic_for = pshmem_longlong_atomic_for +#pragma weak shmemx_int32_atomic_for = pshmemx_int32_atomic_for +#pragma weak shmemx_int64_atomic_for = pshmemx_int64_atomic_for +#include "oshmem/shmem/c/profile/defines.h" +#endif + +OSHMEM_TYPE_FOP(_int, int, shmem, for) +OSHMEM_TYPE_FOP(_long, long, shmem, for) +OSHMEM_TYPE_FOP(_longlong, long long, shmem, for) +OSHMEM_TYPE_FOP(_int32, int32_t, shmemx, for) +OSHMEM_TYPE_FOP(_int64, int64_t, shmemx, for) diff --git a/oshmem/shmem/c/shmem_fxor.c b/oshmem/shmem/c/shmem_fxor.c new file mode 100644 index 0000000000..ba43ad3ef8 --- /dev/null +++ b/oshmem/shmem/c/shmem_fxor.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/op/op.h" +#include "oshmem/mca/atomic/atomic.h" + +/* + * These routines perform an atomic fetch-and-xor operation. + * The fetch and xor routines retrieve the value at address target on PE pe, and update + * target with the result of 'xor' operation value to the retrieved value. The operation + * must be completed * without the possibility of another process updating target between + * the time of the fetch and the update. + */ +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" +#pragma weak shmem_int_atomic_fxor = pshmem_int_atomic_fxor +#pragma weak shmem_long_atomic_fxor = pshmem_long_atomic_fxor +#pragma weak shmem_longlong_atomic_fxor = pshmem_longlong_atomic_fxor +#pragma weak shmemx_int32_atomic_fxor = pshmemx_int32_atomic_fxor +#pragma weak shmemx_int64_atomic_fxor = pshmemx_int64_atomic_fxor +#include "oshmem/shmem/c/profile/defines.h" +#endif + +OSHMEM_TYPE_FOP(_int, int, shmem, fxor) +OSHMEM_TYPE_FOP(_long, long, shmem, fxor) +OSHMEM_TYPE_FOP(_longlong, long long, shmem, fxor) +OSHMEM_TYPE_FOP(_int32, int32_t, shmemx, fxor) +OSHMEM_TYPE_FOP(_int64, int64_t, shmemx, fxor) diff --git a/oshmem/shmem/c/shmem_or.c b/oshmem/shmem/c/shmem_or.c new file mode 100644 index 0000000000..c973c4d539 --- /dev/null +++ b/oshmem/shmem/c/shmem_or.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" +#include "oshmem/op/op.h" +#include "oshmem/mca/atomic/atomic.h" + +/* + * These routines perform an atomic 'or' operation. + * The atomic 'or' routines cwoperates 'or' value to the data at address target on PE pe. + * The operation must be completed without the possibility of another process updating + * target between the time of the fetch or the update. + */ +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" +#pragma weak shmem_int_atomic_or = pshmem_int_atomic_or +#pragma weak shmem_long_atomic_or = pshmem_long_atomic_or +#pragma weak shmem_longlong_atomic_or = pshmem_longlong_atomic_or +#pragma weak shmemx_int32_atomic_or = pshmemx_int32_atomic_or +#pragma weak shmemx_int64_atomic_or = pshmemx_int64_atomic_or +#include "oshmem/shmem/c/profile/defines.h" +#endif + +OSHMEM_TYPE_OP(_int, int, shmem, or) +OSHMEM_TYPE_OP(_long, long, shmem, or) +OSHMEM_TYPE_OP(_longlong, long long, shmem, or) +OSHMEM_TYPE_OP(_int32, int32_t, shmemx, or) +OSHMEM_TYPE_OP(_int64, int64_t, shmemx, or) diff --git a/oshmem/shmem/c/shmem_xor.c b/oshmem/shmem/c/shmem_xor.c new file mode 100644 index 0000000000..fc2832c29c --- /dev/null +++ b/oshmem/shmem/c/shmem_xor.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/include/shmemx.h" + +#include "oshmem/runtime/runtime.h" +#include "oshmem/op/op.h" +#include "oshmem/mca/atomic/atomic.h" + +/* + * These routines perform an atomic 'xor' operation. + * The atomic 'xor' routines cwoperates 'xor' value to the data at address target on PE pe. + * The operation must be completed without the possibility of another process updating + * target between the time of the fetch xor the update. + */ +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" +#pragma weak shmem_int_atomic_xor = pshmem_int_atomic_xor +#pragma weak shmem_long_atomic_xor = pshmem_long_atomic_xor +#pragma weak shmem_longlong_atomic_xor = pshmem_longlong_atomic_xor +#pragma weak shmemx_int32_atomic_xor = pshmemx_int32_atomic_xor +#pragma weak shmemx_int64_atomic_xor = pshmemx_int64_atomic_xor +#include "oshmem/shmem/c/profile/defines.h" +#endif + +OSHMEM_TYPE_OP(_int, int, shmem, xor) +OSHMEM_TYPE_OP(_long, long, shmem, xor) +OSHMEM_TYPE_OP(_longlong, long long, shmem, xor) +OSHMEM_TYPE_OP(_int32, int32_t, shmemx, xor) +OSHMEM_TYPE_OP(_int64, int64_t, shmemx, xor)