From c429900cd9389d33623f0dd93688148a8c6128f6 Mon Sep 17 00:00:00 2001 From: Xin Zhao Date: Fri, 13 Jul 2018 10:08:54 -0700 Subject: [PATCH] OMPI/OSHMEM: add new functionality of OpenSHMEM v1.4. Signed-off-by: Xin Zhao --- oshmem/include/oshmem/constants.h | 5 + oshmem/include/pshmem.h | 517 +++++++++++++++++- oshmem/include/shmem.h.in | 517 +++++++++++++++++- oshmem/mca/atomic/atomic.h | 69 ++- oshmem/mca/atomic/basic/atomic_basic.h | 7 +- oshmem/mca/atomic/basic/atomic_basic_cswap.c | 11 +- oshmem/mca/atomic/basic/atomic_basic_module.c | 87 +-- oshmem/mca/atomic/mxm/atomic_mxm.h | 12 +- oshmem/mca/atomic/mxm/atomic_mxm_cswap.c | 6 +- oshmem/mca/atomic/mxm/atomic_mxm_fadd.c | 6 +- oshmem/mca/atomic/mxm/atomic_mxm_module.c | 6 +- oshmem/mca/atomic/ucx/atomic_ucx.h | 3 +- oshmem/mca/atomic/ucx/atomic_ucx_cswap.c | 10 +- oshmem/mca/atomic/ucx/atomic_ucx_module.c | 63 ++- oshmem/mca/scoll/basic/scoll_basic_alltoall.c | 6 +- oshmem/mca/scoll/basic/scoll_basic_barrier.c | 24 +- .../mca/scoll/basic/scoll_basic_broadcast.c | 12 +- oshmem/mca/scoll/basic/scoll_basic_collect.c | 42 +- oshmem/mca/scoll/basic/scoll_basic_reduce.c | 34 +- oshmem/mca/scoll/fca/scoll_fca_module.c | 8 +- oshmem/mca/spml/base/base.h | 5 + oshmem/mca/spml/base/spml_base.c | 95 +++- oshmem/mca/spml/ikrit/spml_ikrit.c | 32 +- oshmem/mca/spml/ikrit/spml_ikrit.h | 25 +- oshmem/mca/spml/ikrit/spml_ikrit_component.c | 2 + oshmem/mca/spml/spml.h | 58 +- oshmem/mca/spml/ucx/spml_ucx.c | 227 ++++++-- oshmem/mca/spml/ucx/spml_ucx.h | 50 +- oshmem/mca/spml/ucx/spml_ucx_component.c | 83 ++- oshmem/runtime/oshmem_shmem_finalize.c | 2 + oshmem/runtime/oshmem_shmem_init.c | 13 + oshmem/runtime/runtime.h | 30 + oshmem/shmem/c/Makefile.am | 2 + oshmem/shmem/c/profile/Makefile.am | 1 + oshmem/shmem/c/profile/defines.h | 198 ++++++- oshmem/shmem/c/shmem_add.c | 42 +- oshmem/shmem/c/shmem_align.c | 4 + oshmem/shmem/c/shmem_alloc.c | 15 + oshmem/shmem/c/shmem_and.c | 6 + oshmem/shmem/c/shmem_context.c | 41 ++ oshmem/shmem/c/shmem_cswap.c | 49 +- oshmem/shmem/c/shmem_fadd.c | 47 +- oshmem/shmem/c/shmem_fand.c | 6 + oshmem/shmem/c/shmem_fence.c | 9 +- oshmem/shmem/c/shmem_fetch.c | 54 +- oshmem/shmem/c/shmem_finc.c | 47 +- oshmem/shmem/c/shmem_for.c | 8 +- oshmem/shmem/c/shmem_free.c | 5 + oshmem/shmem/c/shmem_fxor.c | 6 + oshmem/shmem/c/shmem_g.c | 38 +- oshmem/shmem/c/shmem_get.c | 68 ++- oshmem/shmem/c/shmem_get_nb.c | 68 ++- oshmem/shmem/c/shmem_iget.c | 67 ++- oshmem/shmem/c/shmem_inc.c | 41 +- oshmem/shmem/c/shmem_init.c | 21 +- oshmem/shmem/c/shmem_iput.c | 72 ++- oshmem/shmem/c/shmem_lock.c | 4 +- oshmem/shmem/c/shmem_or.c | 6 + oshmem/shmem/c/shmem_p.c | 36 +- oshmem/shmem/c/shmem_put.c | 69 ++- oshmem/shmem/c/shmem_put_nb.c | 69 ++- oshmem/shmem/c/shmem_query.c | 6 + oshmem/shmem/c/shmem_quiet.c | 9 +- oshmem/shmem/c/shmem_realloc.c | 4 + oshmem/shmem/c/shmem_set.c | 47 +- oshmem/shmem/c/shmem_swap.c | 54 +- oshmem/shmem/c/shmem_sync.c | 66 +++ oshmem/shmem/c/shmem_wait.c | 29 +- oshmem/shmem/c/shmem_xor.c | 6 + oshmem/shmem/fortran/shmem_character_get_f.c | 2 +- oshmem/shmem/fortran/shmem_character_put_f.c | 2 +- oshmem/shmem/fortran/shmem_complex_get_f.c | 2 +- oshmem/shmem/fortran/shmem_complex_iget_f.c | 2 +- oshmem/shmem/fortran/shmem_complex_iput_f.c | 2 +- oshmem/shmem/fortran/shmem_complex_put_f.c | 2 +- oshmem/shmem/fortran/shmem_double_get_f.c | 2 +- oshmem/shmem/fortran/shmem_double_iget_f.c | 2 +- oshmem/shmem/fortran/shmem_double_iput_f.c | 2 +- oshmem/shmem/fortran/shmem_double_put_f.c | 2 +- oshmem/shmem/fortran/shmem_fence_f.c | 2 +- oshmem/shmem/fortran/shmem_get128_f.c | 2 +- oshmem/shmem/fortran/shmem_get32_f.c | 2 +- oshmem/shmem/fortran/shmem_get4_f.c | 2 +- oshmem/shmem/fortran/shmem_get64_f.c | 2 +- oshmem/shmem/fortran/shmem_get8_f.c | 2 +- oshmem/shmem/fortran/shmem_get_nb_f.c | 24 +- oshmem/shmem/fortran/shmem_getmem_f.c | 2 +- oshmem/shmem/fortran/shmem_iget128_f.c | 2 +- oshmem/shmem/fortran/shmem_iget32_f.c | 2 +- oshmem/shmem/fortran/shmem_iget4_f.c | 2 +- oshmem/shmem/fortran/shmem_iget64_f.c | 2 +- oshmem/shmem/fortran/shmem_iget8_f.c | 2 +- oshmem/shmem/fortran/shmem_int4_add_f.c | 2 +- oshmem/shmem/fortran/shmem_int4_cswap_f.c | 2 +- oshmem/shmem/fortran/shmem_int4_fadd_f.c | 2 +- oshmem/shmem/fortran/shmem_int4_fetch_f.c | 2 +- oshmem/shmem/fortran/shmem_int4_finc_f.c | 2 +- oshmem/shmem/fortran/shmem_int4_inc_f.c | 2 +- oshmem/shmem/fortran/shmem_int4_set_f.c | 2 +- oshmem/shmem/fortran/shmem_int4_swap_f.c | 2 +- oshmem/shmem/fortran/shmem_int8_add_f.c | 2 +- oshmem/shmem/fortran/shmem_int8_cswap_f.c | 2 +- oshmem/shmem/fortran/shmem_int8_fadd_f.c | 2 +- oshmem/shmem/fortran/shmem_int8_fetch_f.c | 2 +- oshmem/shmem/fortran/shmem_int8_finc_f.c | 2 +- oshmem/shmem/fortran/shmem_int8_inc_f.c | 2 +- oshmem/shmem/fortran/shmem_int8_set_f.c | 2 +- oshmem/shmem/fortran/shmem_int8_swap_f.c | 2 +- oshmem/shmem/fortran/shmem_integer_get_f.c | 2 +- oshmem/shmem/fortran/shmem_integer_iget_f.c | 2 +- oshmem/shmem/fortran/shmem_integer_iput_f.c | 2 +- oshmem/shmem/fortran/shmem_integer_put_f.c | 2 +- oshmem/shmem/fortran/shmem_iput128_f.c | 2 +- oshmem/shmem/fortran/shmem_iput32_f.c | 2 +- oshmem/shmem/fortran/shmem_iput4_f.c | 2 +- oshmem/shmem/fortran/shmem_iput64_f.c | 2 +- oshmem/shmem/fortran/shmem_iput8_f.c | 2 +- oshmem/shmem/fortran/shmem_logical_get_f.c | 2 +- oshmem/shmem/fortran/shmem_logical_iget_f.c | 2 +- oshmem/shmem/fortran/shmem_logical_iput_f.c | 2 +- oshmem/shmem/fortran/shmem_logical_put_f.c | 2 +- oshmem/shmem/fortran/shmem_put128_f.c | 2 +- oshmem/shmem/fortran/shmem_put32_f.c | 2 +- oshmem/shmem/fortran/shmem_put4_f.c | 2 +- oshmem/shmem/fortran/shmem_put64_f.c | 2 +- oshmem/shmem/fortran/shmem_put8_f.c | 2 +- oshmem/shmem/fortran/shmem_put_nb_f.c | 24 +- oshmem/shmem/fortran/shmem_putmem_f.c | 2 +- oshmem/shmem/fortran/shmem_quiet_f.c | 2 +- oshmem/shmem/fortran/shmem_real4_fetch_f.c | 2 +- oshmem/shmem/fortran/shmem_real4_set_f.c | 2 +- oshmem/shmem/fortran/shmem_real4_swap_f.c | 2 +- oshmem/shmem/fortran/shmem_real8_fetch_f.c | 2 +- oshmem/shmem/fortran/shmem_real8_set_f.c | 2 +- oshmem/shmem/fortran/shmem_real8_swap_f.c | 2 +- oshmem/shmem/fortran/shmem_real_get_f.c | 2 +- oshmem/shmem/fortran/shmem_real_iget_f.c | 2 +- oshmem/shmem/fortran/shmem_real_iput_f.c | 2 +- oshmem/shmem/fortran/shmem_real_put_f.c | 2 +- oshmem/shmem/fortran/shmem_swap_f.c | 2 +- 140 files changed, 3091 insertions(+), 482 deletions(-) create mode 100644 oshmem/shmem/c/shmem_context.c create mode 100644 oshmem/shmem/c/shmem_sync.c diff --git a/oshmem/include/oshmem/constants.h b/oshmem/include/oshmem/constants.h index be8d4b2032..17a560692d 100644 --- a/oshmem/include/oshmem/constants.h +++ b/oshmem/include/oshmem/constants.h @@ -105,6 +105,11 @@ enum { #define SHMEM_UNDEFINED -32766 /* undefined stuff */ +#define SHMEM_CTX_PRIVATE (1<<0) +#define SHMEM_CTX_SERIALIZED (1<<1) +#define SHMEM_CTX_NOSTORE (1<<2) + + #ifndef UNREFERENCED_PARAMETER #define UNREFERENCED_PARAMETER(P) ((void)P) #endif diff --git a/oshmem/include/pshmem.h b/oshmem/include/pshmem.h index 8ca0696360..0064c5e0ff 100644 --- a/oshmem/include/pshmem.h +++ b/oshmem/include/pshmem.h @@ -54,6 +54,7 @@ extern "C" { * Initialization routines */ OSHMEM_DECLSPEC void pshmem_init(void); +OSHMEM_DECLSPEC int pshmem_init_thread(int requested, int *provided); OSHMEM_DECLSPEC void pshmem_global_exit(int status); /* @@ -66,7 +67,7 @@ OSHMEM_DECLSPEC void pshmem_finalize(void); */ OSHMEM_DECLSPEC int pshmem_n_pes(void); OSHMEM_DECLSPEC int pshmem_my_pe(void); - +OSHMEM_DECLSPEC void pshmem_query_thread(int *provided); /* * Accessability routines @@ -78,6 +79,7 @@ OSHMEM_DECLSPEC int pshmem_addr_accessible(const void *addr, int pe); * Symmetric heap routines */ OSHMEM_DECLSPEC void* pshmem_malloc(size_t size); +OSHMEM_DECLSPEC void* pshmem_calloc(size_t count, size_t size); OSHMEM_DECLSPEC void* pshmem_align(size_t align, size_t size); OSHMEM_DECLSPEC void* pshmem_realloc(void *ptr, size_t size); OSHMEM_DECLSPEC void pshmem_free(void* ptr); @@ -87,9 +89,36 @@ OSHMEM_DECLSPEC void pshmem_free(void* ptr); */ OSHMEM_DECLSPEC void *pshmem_ptr(const void *ptr, int pe); +/* + * Communication context operations + */ +OSHMEM_DECLSPEC int pshmem_ctx_create(long options, shmem_ctx_t *ctx); +OSHMEM_DECLSPEC void pshmem_ctx_destroy(shmem_ctx_t ctx); + /* * Elemental put routines */ +OSHMEM_DECLSPEC void pshmem_ctx_char_p(shmem_ctx_t ctx, char* addr, char value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_short_p(shmem_ctx_t ctx, short* addr, short value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int_p(shmem_ctx_t ctx, int* addr, int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_p(shmem_ctx_t ctx, long* addr, long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_float_p(shmem_ctx_t ctx, float* addr, float value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_double_p(shmem_ctx_t ctx, double* addr, double value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_p(shmem_ctx_t ctx, long long* addr, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longdouble_p(shmem_ctx_t ctx, long double* addr, long double value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_p(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + char*: pshmem_ctx_char_p, \ + short*: pshmem_ctx_short_p, \ + int*: pshmem_ctx_int_p, \ + long*: pshmem_ctx_long_p, \ + long long*: pshmem_ctx_longlong_p, \ + float*: pshmem_ctx_float_p, \ + double*: pshmem_ctx_double_p, \ + long double*: pshmem_ctx_longdouble_p)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC void pshmem_char_p(char* addr, char value, int pe); OSHMEM_DECLSPEC void pshmem_short_p(short* addr, short value, int pe); OSHMEM_DECLSPEC void pshmem_int_p(int* addr, int value, int pe); @@ -114,6 +143,27 @@ OSHMEM_DECLSPEC void pshmem_longdouble_p(long double* addr, long double value, /* * Block data put routines */ +OSHMEM_DECLSPEC void pshmem_ctx_char_put(shmem_ctx_t ctx, char *target, const char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_short_put(shmem_ctx_t ctx, short *target, const short *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int_put(shmem_ctx_t ctx, int* target, const int* source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_put(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_float_put(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_double_put(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_put(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longdouble_put(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_put(ctx, dst, src, len, pe) \ + _Generic(&*(dst), \ + char*: pshmem_ctx_char_put, \ + short*: pshmem_ctx_short_put, \ + int*: pshmem_ctx_int_put, \ + long*: pshmem_ctx_long_put, \ + long long*: pshmem_ctx_longlong_put, \ + float*: pshmem_ctx_float_put, \ + double*: pshmem_ctx_double_put, \ + long double*: pshmem_ctx_longdouble_put)(ctx, dst, src, len, pe) +#endif + OSHMEM_DECLSPEC void pshmem_char_put(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_short_put(short *target, const short *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_int_put(int* target, const int* source, size_t len, int pe); @@ -135,6 +185,13 @@ OSHMEM_DECLSPEC void pshmem_longdouble_put(long double *target, const long doub long double*: pshmem_longdouble_put)(dst, src, len, pe) #endif +OSHMEM_DECLSPEC void pshmem_ctx_put8(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put16(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put32(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put64(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put128(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_putmem(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); + OSHMEM_DECLSPEC void pshmem_put8(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_put16(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_put32(void *target, const void *source, size_t len, int pe); @@ -145,6 +202,27 @@ OSHMEM_DECLSPEC void pshmem_putmem(void *target, const void *source, size_t len /* * Strided put routines */ +OSHMEM_DECLSPEC void pshmem_ctx_char_iput(shmem_ctx_t ctx, char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_short_iput(shmem_ctx_t ctx, short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int_iput(shmem_ctx_t ctx, int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_iput(shmem_ctx_t ctx, long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_float_iput(shmem_ctx_t ctx, float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_double_iput(shmem_ctx_t ctx, double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_iput(shmem_ctx_t ctx, long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longdouble_iput(shmem_ctx_t ctx, long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_iput(ctx, dst, src, tst, sst, len, pe) \ + _Generic(&*(dst), \ + char*: pshmem_ctx_char_iput, \ + short*: pshmem_ctx_short_iput, \ + int*: pshmem_ctx_int_iput, \ + long*: pshmem_ctx_long_iput, \ + long long*: pshmem_ctx_longlong_iput, \ + float*: pshmem_ctx_float_iput, \ + double*: pshmem_ctx_double_iput, \ + long double*: pshmem_ctx_longdouble_iput)(ctx, dst, src, tst, sst, len, pe) +#endif + OSHMEM_DECLSPEC void pshmem_char_iput(char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_short_iput(short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_int_iput(int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -166,6 +244,12 @@ OSHMEM_DECLSPEC void pshmem_longdouble_iput(long double* target, const long doub long double*: pshmem_longdouble_iput)(dst, src, tst, sst, len, pe) #endif +OSHMEM_DECLSPEC void pshmem_ctx_iput8(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_iput16(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_iput32(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_iput64(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_iput128(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); + OSHMEM_DECLSPEC void pshmem_iput8(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_iput16(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_iput32(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -175,6 +259,27 @@ OSHMEM_DECLSPEC void pshmem_iput128(void* target, const void* source, ptrdiff_t /* * Nonblocking put routines */ +OSHMEM_DECLSPEC void pshmem_ctx_char_put_nbi(shmem_ctx_t ctx, char *target, const char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_short_put_nbi(shmem_ctx_t ctx, short *target, const short *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int_put_nbi(shmem_ctx_t ctx, int* target, const int* source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_put_nbi(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_put_nbi(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_float_put_nbi(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_double_put_nbi(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longdouble_put_nbi(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_put_nbi(ctx, dst, src, len, pe) \ + _Generic(&*(dst), \ + char*: pshmem_ctx_char_put_nbi, \ + short*: pshmem_ctx_short_put_nbi, \ + int*: pshmem_ctx_int_put_nbi, \ + long*: pshmem_ctx_long_put_nbi, \ + long long*: pshmem_ctx_longlong_put_nbi, \ + float*: pshmem_ctx_float_put_nbi, \ + double*: pshmem_ctx_double_put_nbi, \ + long double*: pshmem_ctx_longdouble_put_nbi)(ctx, dst, src, len, pe) +#endif + OSHMEM_DECLSPEC void pshmem_putmem_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_char_put_nbi(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_short_put_nbi(short *target, const short *source, size_t len, int pe); @@ -197,6 +302,13 @@ OSHMEM_DECLSPEC void pshmem_longdouble_put_nbi(long double *target, const long long double*: pshmem_longdouble_put_nbi)(dst, src, len, pe) #endif +OSHMEM_DECLSPEC void pshmem_ctx_put8_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put16_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put32_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put64_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_put128_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_putmem_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); + OSHMEM_DECLSPEC void pshmem_put8_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_put16_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_put32_nbi(void *target, const void *source, size_t len, int pe); @@ -206,6 +318,27 @@ OSHMEM_DECLSPEC void pshmem_put128_nbi(void *target, const void *source, size_t /* * Elemental get routines */ +OSHMEM_DECLSPEC char pshmem_ctx_char_g(shmem_ctx_t ctx, const char* addr, int pe); +OSHMEM_DECLSPEC short pshmem_ctx_short_g(shmem_ctx_t ctx, const short* addr, int pe); +OSHMEM_DECLSPEC int pshmem_ctx_int_g(shmem_ctx_t ctx, const int* addr, int pe); +OSHMEM_DECLSPEC long pshmem_ctx_long_g(shmem_ctx_t ctx, const long* addr, int pe); +OSHMEM_DECLSPEC float pshmem_ctx_float_g(shmem_ctx_t ctx, const float* addr, int pe); +OSHMEM_DECLSPEC double pshmem_ctx_double_g(shmem_ctx_t ctx, const double* addr, int pe); +OSHMEM_DECLSPEC long long pshmem_ctx_longlong_g(shmem_ctx_t ctx, const long long* addr, int pe); +OSHMEM_DECLSPEC long double pshmem_ctx_longdouble_g(shmem_ctx_t ctx, const long double* addr, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_g(ctx, addr, pe) \ + _Generic(&*(addr), \ + char*: pshmem_ctx_char_g, \ + short*: pshmem_ctx_short_g, \ + int*: pshmem_ctx_int_g, \ + long*: pshmem_ctx_long_g, \ + long long*: pshmem_ctx_longlong_g, \ + float*: pshmem_ctx_float_g, \ + double*: pshmem_ctx_double_g, \ + long double*: pshmem_ctx_longdouble_g)(ctx, addr, pe) +#endif + OSHMEM_DECLSPEC char pshmem_char_g(const char* addr, int pe); OSHMEM_DECLSPEC short pshmem_short_g(const short* addr, int pe); OSHMEM_DECLSPEC int pshmem_int_g(const int* addr, int pe); @@ -230,6 +363,27 @@ OSHMEM_DECLSPEC long double pshmem_longdouble_g(const long double* addr, int pe /* * Block data get routines */ +OSHMEM_DECLSPEC void pshmem_ctx_char_get(shmem_ctx_t ctx, char *target, const char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_short_get(shmem_ctx_t ctx, short *target, const short *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int_get(shmem_ctx_t ctx, int *target, const int *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_get(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_float_get(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_double_get(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_get(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longdouble_get(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_get(ctx, dst, src, len, pe) \ + _Generic(&*(dst), \ + char*: pshmem_ctx_char_get, \ + short*: pshmem_ctx_short_get, \ + int*: pshmem_ctx_int_get, \ + long*: pshmem_ctx_long_get, \ + long long*: pshmem_ctx_longlong_get, \ + float*: pshmem_ctx_float_get, \ + double*: pshmem_ctx_double_get, \ + long double*: pshmem_ctx_longdouble_get)(ctx, dst, src, len, pe) +#endif + OSHMEM_DECLSPEC void pshmem_char_get(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_short_get(short *target, const short *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_int_get(int *target, const int *source, size_t len, int pe); @@ -251,6 +405,13 @@ OSHMEM_DECLSPEC void pshmem_longdouble_get(long double *target, const long doub long double*: pshmem_longdouble_get)(dst, src, len, pe) #endif +OSHMEM_DECLSPEC void pshmem_ctx_get8(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_get16(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_get32(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_get64(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_get128(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_getmem(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); + OSHMEM_DECLSPEC void pshmem_get8(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_get16(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_get32(void *target, const void *source, size_t len, int pe); @@ -261,6 +422,27 @@ OSHMEM_DECLSPEC void pshmem_getmem(void *target, const void *source, size_t len /* * Strided get routines */ +OSHMEM_DECLSPEC void pshmem_ctx_char_iget(shmem_ctx_t ctx, char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_short_iget(shmem_ctx_t ctx, short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int_iget(shmem_ctx_t ctx, int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_float_iget(shmem_ctx_t ctx, float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_double_iget(shmem_ctx_t ctx, double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_iget(shmem_ctx_t ctx, long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longdouble_iget(shmem_ctx_t ctx, long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_iget(shmem_ctx_t ctx, long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_iget(ctx, dst, src, tst, sst, len, pe) \ + _Generic(&*(dst), \ + char*: pshmem_ctx_char_iget, \ + short*: pshmem_ctx_short_iget, \ + int*: pshmem_ctx_int_iget, \ + long*: pshmem_ctx_long_iget, \ + long long*: pshmem_ctx_longlong_iget, \ + float*: pshmem_ctx_float_iget, \ + double*: pshmem_ctx_double_iget, \ + long double*: pshmem_ctx_longdouble_iget)(ctx, dst, src, tst, sst, len, pe) +#endif + OSHMEM_DECLSPEC void pshmem_char_iget(char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_short_iget(short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_int_iget(int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -282,6 +464,12 @@ OSHMEM_DECLSPEC void pshmem_long_iget(long* target, const long* source, ptrdiff_ long double*: pshmem_longdouble_iget)(dst, src, tst, sst, len, pe) #endif +OSHMEM_DECLSPEC void pshmem_ctx_iget8(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_iget16(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_iget32(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_iget64(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_iget128(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); + OSHMEM_DECLSPEC void pshmem_iget8(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_iget16(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void pshmem_iget32(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -291,6 +479,27 @@ OSHMEM_DECLSPEC void pshmem_iget128(void* target, const void* source, ptrdiff_t /* * Nonblocking data get routines */ +OSHMEM_DECLSPEC void pshmem_ctx_char_get_nbi(shmem_ctx_t ctx, char *target, const char *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_short_get_nbi(shmem_ctx_t ctx, short *target, const short *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_int_get_nbi(shmem_ctx_t ctx, int *target, const int *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_get_nbi(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_get_nbi(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_float_get_nbi(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_double_get_nbi(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longdouble_get_nbi(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_get_nbi(ctx, dst, src, len, pe) \ + _Generic(&*(dst), \ + char*: pshmem_ctx_char_get_nbi, \ + short*: pshmem_ctx_short_get_nbi, \ + int*: pshmem_ctx_int_get_nbi, \ + long*: pshmem_ctx_long_get_nbi, \ + long long*: pshmem_ctx_longlong_get_nbi, \ + float*: pshmem_ctx_float_get_nbi, \ + double*: pshmem_ctx_double_get_nbi, \ + long double*: pshmem_ctx_longdouble_get_nbi)(ctx, dst, src, len, pe) +#endif + OSHMEM_DECLSPEC void pshmem_getmem_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_char_get_nbi(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_short_get_nbi(short *target, const short *source, size_t len, int pe); @@ -313,6 +522,13 @@ OSHMEM_DECLSPEC void pshmem_longdouble_get_nbi(long double *target, const long long double*: pshmem_longdouble_get_nbi)(dst, src, len, pe) #endif +OSHMEM_DECLSPEC void pshmem_ctx_get8_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_get16_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_get32_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_get64_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_get128_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_getmem_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); + OSHMEM_DECLSPEC void pshmem_get8_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_get16_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void pshmem_get32_nbi(void *target, const void *source, size_t len, int pe); @@ -323,6 +539,36 @@ OSHMEM_DECLSPEC void pshmem_get128_nbi(void *target, const void *source, size_t * Atomic operations */ /* Atomic swap */ +OSHMEM_DECLSPEC int pshmem_ctx_int_atomic_swap(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC long pshmem_ctx_long_atomic_swap(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_swap(shmem_ctx_t ctx, long long*target, long long value, int pe); +OSHMEM_DECLSPEC float pshmem_ctx_float_atomic_swap(shmem_ctx_t ctx, float *target, float value, int pe); +OSHMEM_DECLSPEC double pshmem_ctx_double_atomic_swap(shmem_ctx_t ctx, double *target, double value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_swap(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_ctx_int_atomic_swap, \ + long*: pshmem_ctx_long_atomic_swap, \ + long long*: pshmem_ctx_longlong_atomic_swap, \ + float*: pshmem_ctx_float_atomic_swap, \ + double*: pshmem_ctx_double_atomic_swap)(ctx, dst, val, pe) +#endif + +OSHMEM_DECLSPEC int pshmem_int_atomic_swap(int *target, int value, int pe); +OSHMEM_DECLSPEC long pshmem_long_atomic_swap(long *target, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_longlong_atomic_swap(long long*target, long long value, int pe); +OSHMEM_DECLSPEC float pshmem_float_atomic_swap(float *target, float value, int pe); +OSHMEM_DECLSPEC double pshmem_double_atomic_swap(double *target, double value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_swap(dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_swap, \ + long*: pshmem_long_atomic_swap, \ + long long*: pshmem_longlong_atomic_swap, \ + float*: pshmem_float_atomic_swap, \ + double*: pshmem_double_atomic_swap)(dst, val, pe) +#endif + OSHMEM_DECLSPEC int pshmem_int_swap(int *target, int value, int pe); OSHMEM_DECLSPEC long pshmem_long_swap(long *target, long value, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_swap(long long*target, long long value, int pe); @@ -339,6 +585,36 @@ OSHMEM_DECLSPEC double pshmem_double_swap(double *target, double value, int pe); #endif /* Atomic set */ +OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_set(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_set(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_set(shmem_ctx_t ctx, long long*target, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_float_atomic_set(shmem_ctx_t ctx, float *target, float value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_double_atomic_set(shmem_ctx_t ctx, double *target, double value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_set(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_ctx_int_atomic_set, \ + long*: pshmem_ctx_long_atomic_set, \ + long long*: pshmem_ctx_longlong_atomic_set, \ + float*: pshmem_ctx_float_atomic_set, \ + double*: pshmem_ctx_double_atomic_set)(ctx, dst, val, pe) +#endif + +OSHMEM_DECLSPEC void pshmem_int_atomic_set(int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_set(long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_set(long long*target, long long value, int pe); +OSHMEM_DECLSPEC void pshmem_float_atomic_set(float *target, float value, int pe); +OSHMEM_DECLSPEC void pshmem_double_atomic_set(double *target, double value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_set(dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_set, \ + long*: pshmem_long_atomic_set, \ + long long*: pshmem_longlong_atomic_set, \ + float*: pshmem_float_atomic_set, \ + double*: pshmem_double_atomic_set)(dst, val, pe) +#endif + OSHMEM_DECLSPEC void pshmem_int_set(int *target, int value, int pe); OSHMEM_DECLSPEC void pshmem_long_set(long *target, long value, int pe); OSHMEM_DECLSPEC void pshmem_longlong_set(long long*target, long long value, int pe); @@ -355,6 +631,30 @@ OSHMEM_DECLSPEC void pshmem_double_set(double *target, double value, int pe); #endif /* Atomic conditional swap */ +OSHMEM_DECLSPEC int pshmem_ctx_int_atomic_compare_swap(shmem_ctx_t ctx, int *target, int cond, int value, int pe); +OSHMEM_DECLSPEC long pshmem_ctx_long_atomic_compare_swap(shmem_ctx_t ctx, long *target, long cond, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_compare_swap(shmem_ctx_t ctx, long long *target, long long cond, long long value, int pe); + +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_compare_swap(ctx, dst, cond, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_ctx_int_atomic_compare_swap, \ + long*: pshmem_ctx_long_atomic_compare_swap, \ + long long*: pshmem_ctx_longlong_atomic_compare_swap)(ctx, dst, cond, val, pe) +#endif + +OSHMEM_DECLSPEC int pshmem_int_atomic_compare_swap(int *target, int cond, int value, int pe); +OSHMEM_DECLSPEC long pshmem_long_atomic_compare_swap(long *target, long cond, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_longlong_atomic_compare_swap(long long *target, long long cond, long long value, int pe); + +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_compare_swap(dst, cond, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_compare_swap, \ + long*: pshmem_long_atomic_compare_swap, \ + long long*: pshmem_longlong_atomic_compare_swap)(dst, cond, val, pe) +#endif + OSHMEM_DECLSPEC int pshmem_int_cswap(int *target, int cond, int value, int pe); OSHMEM_DECLSPEC long pshmem_long_cswap(long *target, long cond, long value, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_cswap(long long *target, long long cond, long long value, int pe); @@ -367,6 +667,28 @@ OSHMEM_DECLSPEC long long pshmem_longlong_cswap(long long *target, long long con #endif /* Atomic Fetch&Add */ +OSHMEM_DECLSPEC int pshmem_ctx_int_atomic_fetch_add(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC long pshmem_ctx_long_atomic_fetch_add(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_fetch_add(shmem_ctx_t ctx, long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch_add(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_ctx_int_atomic_fetch_add, \ + long*: pshmem_ctx_long_atomic_fetch_add, \ + long long*: pshmem_ctx_longlong_atomic_fetch_add)(ctx, dst, val, pe) +#endif + +OSHMEM_DECLSPEC int pshmem_int_atomic_fetch_add(int *target, int value, int pe); +OSHMEM_DECLSPEC long pshmem_long_atomic_fetch_add(long *target, long value, int pe); +OSHMEM_DECLSPEC long long pshmem_longlong_atomic_fetch_add(long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch_add(dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_fetch_add, \ + long*: pshmem_long_atomic_fetch_add, \ + long long*: pshmem_longlong_atomic_fetch_add)(dst, val, pe) +#endif + OSHMEM_DECLSPEC int pshmem_int_fadd(int *target, int value, int pe); OSHMEM_DECLSPEC long pshmem_long_fadd(long *target, long value, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_fadd(long long *target, long long value, int pe); @@ -379,6 +701,17 @@ OSHMEM_DECLSPEC long long pshmem_longlong_fadd(long long *target, long long valu #endif /* Atomic Fetch&And */ +OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_fetch_and(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_fetch_and(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_fetch_and(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +#if OSHMEMP_HAVE_C11 +#define pshmem_atomic_fetch_and(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_ctx_uint_atomic_fetch_and, \ + long*: pshmem_ctx_ulong_atomic_fetch_and, \ + long long*: pshmem_ctx_ulonglong_atomic_fetch_and)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_fetch_and(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_fetch_and(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch_and(unsigned long long *target, unsigned long long value, int pe); @@ -391,6 +724,17 @@ OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch_and(unsigned lo #endif /* Atomic Fetch&Or */ +OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_fetch_or(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_fetch_or(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_fetch_or(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch_or(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + unsigned int*: pshmem_ctx_uint_atomic_fetch_or, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch_or, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_or)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_fetch_or(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_fetch_or(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch_or(unsigned long long *target, unsigned long long value, int pe); @@ -403,6 +747,17 @@ OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch_or(unsigned lon #endif /* Atomic Fetch&Xor */ +OSHMEM_DECLSPEC unsigned int pshmem_ctx_uint_atomic_fetch_xor(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long pshmem_ctx_ulong_atomic_fetch_xor(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long pshmem_ctx_ulonglong_atomic_fetch_xor(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch_xor(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + unsigned int*: pshmem_ctx_uint_atomic_fetch_xor, \ + unsigned long*: pshmem_ctx_ulong_atomic_fetch_xor, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_fetch_xor)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC unsigned int pshmem_uint_atomic_fetch_xor(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long pshmem_ulong_atomic_fetch_xor(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch_xor(unsigned long long *target, unsigned long long value, int pe); @@ -415,6 +770,36 @@ OSHMEM_DECLSPEC unsigned long long pshmem_ulonglong_atomic_fetch_xor(unsigned lo #endif /* Atomic Fetch */ +OSHMEM_DECLSPEC int pshmem_ctx_int_atomic_fetch(shmem_ctx_t ctx, const int *target, int pe); +OSHMEM_DECLSPEC long pshmem_ctx_long_atomic_fetch(shmem_ctx_t ctx, const long *target, int pe); +OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_fetch(shmem_ctx_t ctx, const long long *target, int pe); +OSHMEM_DECLSPEC float pshmem_ctx_float_atomic_fetch(shmem_ctx_t ctx, const float *target, int pe); +OSHMEM_DECLSPEC double pshmem_ctx_double_atomic_fetch(shmem_ctx_t ctx, const double *target, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch(ctx, dst, pe) \ + _Generic(&*(dst), \ + int*: pshmem_ctx_int_atomic_fetch, \ + long*: pshmem_ctx_long_atomic_fetch, \ + long long*: pshmem_ctx_longlong_atomic_fetch,\ + float*: pshmem_ctx_float_atomic_fetch, \ + double*: pshmem_ctx_double_atomic_fetch)(ctx, dst, pe) +#endif + +OSHMEM_DECLSPEC int pshmem_int_atomic_fetch(const int *target, int pe); +OSHMEM_DECLSPEC long pshmem_long_atomic_fetch(const long *target, int pe); +OSHMEM_DECLSPEC long long pshmem_longlong_atomic_fetch(const long long *target, int pe); +OSHMEM_DECLSPEC float pshmem_float_atomic_fetch(const float *target, int pe); +OSHMEM_DECLSPEC double pshmem_double_atomic_fetch(const double *target, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch(dst, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_fetch, \ + long*: pshmem_long_atomic_fetch, \ + long long*: pshmem_longlong_atomic_fetch, \ + float*: pshmem_float_atomic_fetch, \ + double*: pshmem_double_atomic_fetch)(dst, pe) +#endif + OSHMEM_DECLSPEC int pshmem_int_fetch(const int *target, int pe); OSHMEM_DECLSPEC long pshmem_long_fetch(const long *target, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_fetch(const long long *target, int pe); @@ -431,18 +816,62 @@ OSHMEM_DECLSPEC double pshmem_double_fetch(const double *target, int pe); #endif /* Atomic Fetch&Inc */ +OSHMEM_DECLSPEC int pshmem_ctx_int_atomic_fetch_inc(shmem_ctx_t ctx, int *target, int pe); +OSHMEM_DECLSPEC long pshmem_ctx_long_atomic_fetch_inc(shmem_ctx_t ctx, long *target, int pe); +OSHMEM_DECLSPEC long long pshmem_ctx_longlong_atomic_fetch_inc(shmem_ctx_t ctx, long long *target, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch_inc(ctx, dst, pe) \ + _Generic(&*(dst), \ + int*: pshmem_ctx_int_atomic_fetch_inc, \ + long*: pshmem_ctx_long_atomic_fetch_inc, \ + long long*: pshmem_ctx_longlong_atomic_fetch_inc)(ctx, dst, pe) +#endif + +OSHMEM_DECLSPEC int pshmem_int_atomic_fetch_inc(int *target, int pe); +OSHMEM_DECLSPEC long pshmem_long_atomic_fetch_inc(long *target, int pe); +OSHMEM_DECLSPEC long long pshmem_longlong_atomic_fetch_inc(long long *target, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_fetch_inc(dst, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_fetch_inc, \ + long*: pshmem_long_atomic_fetch_inc, \ + long long*: pshmem_longlong_atomic_fetch_inc)(dst, pe) +#endif + OSHMEM_DECLSPEC int pshmem_int_finc(int *target, int pe); OSHMEM_DECLSPEC long pshmem_long_finc(long *target, int pe); OSHMEM_DECLSPEC long long pshmem_longlong_finc(long long *target, int pe); #if OSHMEMP_HAVE_C11 -#define pshmem_finc(dst, val, pe) \ +#define pshmem_finc(dst, pe) \ _Generic(&*(dst), \ int*: pshmem_int_finc, \ long*: pshmem_long_finc, \ - long long*: pshmem_longlong_finc)(dst, val, pe) + long long*: pshmem_longlong_finc)(dst, pe) #endif /* Atomic Add */ +OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_add(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_add(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_add(shmem_ctx_t ctx, long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_add(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_ctx_int_atomic_add, \ + long*: pshmem_ctx_long_atomic_add, \ + long long*: pshmem_ctx_longlong_atomic_add)(ctx, dst, val, pe) +#endif + +OSHMEM_DECLSPEC void pshmem_int_atomic_add(int *target, int value, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_add(long *target, long value, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_add(long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_add(dst, val, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_add, \ + long*: pshmem_long_atomic_add, \ + long long*: pshmem_longlong_atomic_add)(dst, val, pe) +#endif + OSHMEM_DECLSPEC void pshmem_int_add(int *target, int value, int pe); OSHMEM_DECLSPEC void pshmem_long_add(long *target, long value, int pe); OSHMEM_DECLSPEC void pshmem_longlong_add(long long *target, long long value, int pe); @@ -455,6 +884,17 @@ OSHMEM_DECLSPEC void pshmem_longlong_add(long long *target, long long value, int #endif /* Atomic And */ +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_and(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_and(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_and(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_and(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + unsigned int*: pshmem_ctx_uint_atomic_and, \ + unsigned long*: pshmem_ctx_ulong_atomic_and, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_and)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC void pshmem_uint_atomic_and(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void pshmem_ulong_atomic_and(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_and(unsigned long long *target, unsigned long long value, int pe); @@ -467,6 +907,17 @@ OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_and(unsigned long long *target, uns #endif /* Atomic Or */ +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_or(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_or(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_or(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_or(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + unsigned int*: pshmem_ctx_uint_atomic_or, \ + unsigned long*: pshmem_ctx_ulong_atomic_or, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_or)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC void pshmem_uint_atomic_or(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void pshmem_ulong_atomic_or(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_or(unsigned long long *target, unsigned long long value, int pe); @@ -479,6 +930,17 @@ OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_or(unsigned long long *target, unsi #endif /* Atomic Xor */ +OSHMEM_DECLSPEC void pshmem_ctx_uint_atomic_xor(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulong_atomic_xor(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_ulonglong_atomic_xor(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_xor(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + unsigned int*: pshmem_ctx_uint_atomic_xor, \ + unsigned long*: pshmem_ctx_ulong_atomic_xor, \ + unsigned long long*: pshmem_ctx_ulonglong_atomic_xor)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC void pshmem_uint_atomic_xor(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void pshmem_ulong_atomic_xor(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_xor(unsigned long long *target, unsigned long long value, int pe); @@ -491,6 +953,28 @@ OSHMEM_DECLSPEC void pshmem_ulonglong_atomic_xor(unsigned long long *target, uns #endif /* Atomic Inc */ +OSHMEM_DECLSPEC void pshmem_ctx_int_atomic_inc(shmem_ctx_t ctx, int *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_long_atomic_inc(shmem_ctx_t ctx, long *target, int pe); +OSHMEM_DECLSPEC void pshmem_ctx_longlong_atomic_inc(shmem_ctx_t ctx, long long *target, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_inc(ctx, dst, pe) \ + _Generic(&*(dst), \ + int*: pshmem_ctx_int_atomic_inc, \ + long*: pshmem_ctx_long_atomic_inc, \ + long long*: pshmem_ctx_longlong_atomic_inc)(ctx, dst, pe) +#endif + +OSHMEM_DECLSPEC void pshmem_int_atomic_inc(int *target, int pe); +OSHMEM_DECLSPEC void pshmem_long_atomic_inc(long *target, int pe); +OSHMEM_DECLSPEC void pshmem_longlong_atomic_inc(long long *target, int pe); +#if OSHMEM_HAVE_C11 +#define pshmem_atomic_inc(dst, pe) \ + _Generic(&*(dst), \ + int*: pshmem_int_atomic_inc, \ + long*: pshmem_long_atomic_inc, \ + long long*: pshmem_longlong_atomic_inc)(dst, pe) +#endif + OSHMEM_DECLSPEC void pshmem_int_inc(int *target, int pe); OSHMEM_DECLSPEC void pshmem_long_inc(long *target, int pe); OSHMEM_DECLSPEC void pshmem_longlong_inc(long long *target, int pe); @@ -501,6 +985,7 @@ OSHMEM_DECLSPEC void pshmem_longlong_inc(long long *target, int pe); long*: pshmem_long_inc, \ long long*: pshmem_longlong_inc)(dst, pe) #endif + /* * Lock functions */ @@ -521,15 +1006,39 @@ OSHMEM_DECLSPEC void pshmem_short_wait_until(volatile short *addr, int cmp, sho OSHMEM_DECLSPEC void pshmem_int_wait_until(volatile int *addr, int cmp, int value); OSHMEM_DECLSPEC void pshmem_long_wait_until(volatile long *addr, int cmp, long value); OSHMEM_DECLSPEC void pshmem_longlong_wait_until(volatile long long *addr, int cmp, long long value); -OSHMEM_DECLSPEC void pshmem_wait_until(volatile long *addr, int cmp, long value); +#if OSHMEM_HAVE_C11 +#define pshmem_wait_until(addr, cmp, value) \ + _Generic(&*(addr), \ + short*: pshmem_short_wait_until, \ + int*: pshmem_int_wait_until, \ + long*: pshmem_long_wait_until, \ + long long*: pshmem_longlong_wait_until(addr, cmp, value) +#endif + +OSHMEM_DECLSPEC int pshmem_short_test(volatile short *addr, int cmp, short value); +OSHMEM_DECLSPEC int pshmem_int_test(volatile int *addr, int cmp, int value); +OSHMEM_DECLSPEC int pshmem_long_test(volatile long *addr, int cmp, long value); +OSHMEM_DECLSPEC int pshmem_longlong_test(volatile long long *addr, int cmp, long long value); +#if OSHMEM_HAVE_C11 +#define pshmem_test(addr, cmp, value) \ + _Generic(&*(addr), \ + short*: pshmem_short_test, \ + int*: pshmem_int_test, \ + long*: pshmem_long_test, \ + long long*: pshmem_longlong_test(addr, cmp, value) +#endif /* * Barrier sync routines */ OSHMEM_DECLSPEC void pshmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync); OSHMEM_DECLSPEC void pshmem_barrier_all(void); +OSHMEM_DECLSPEC void pshmem_sync(int PE_start, int logPE_stride, int PE_size, long *pSync); +OSHMEM_DECLSPEC void pshmem_sync_all(void); OSHMEM_DECLSPEC void pshmem_fence(void); +OSHMEM_DECLSPEC void pshmem_ctx_fence(shmem_ctx_t ctx); OSHMEM_DECLSPEC void pshmem_quiet(void); +OSHMEM_DECLSPEC void pshmem_ctx_quiet(shmem_ctx_t ctx); /* * Collective routines diff --git a/oshmem/include/shmem.h.in b/oshmem/include/shmem.h.in index cf663a8ced..816f216693 100644 --- a/oshmem/include/shmem.h.in +++ b/oshmem/include/shmem.h.in @@ -136,6 +136,7 @@ enum shmem_wait_ops { * Initialization routines */ OSHMEM_DECLSPEC void shmem_init(void); +OSHMEM_DECLSPEC int shmem_init_thread(int requested, int *provided); OSHMEM_DECLSPEC void shmem_finalize(void); OSHMEM_DECLSPEC void shmem_global_exit(int status); @@ -144,6 +145,7 @@ OSHMEM_DECLSPEC void shmem_global_exit(int status); */ OSHMEM_DECLSPEC int shmem_n_pes(void); OSHMEM_DECLSPEC int shmem_my_pe(void); +OSHMEM_DECLSPEC void shmem_query_thread(int *provided); /* * Info routines @@ -161,6 +163,7 @@ OSHMEM_DECLSPEC int shmem_addr_accessible(const void *addr, int pe); * Symmetric heap routines */ OSHMEM_DECLSPEC void* shmem_malloc(size_t size); +OSHMEM_DECLSPEC void* shmem_calloc(size_t count, size_t size); OSHMEM_DECLSPEC void* shmem_align(size_t align, size_t size); OSHMEM_DECLSPEC void* shmem_realloc(void *ptr, size_t size); OSHMEM_DECLSPEC void shmem_free(void* ptr); @@ -170,9 +173,41 @@ OSHMEM_DECLSPEC void shmem_free(void* ptr); */ OSHMEM_DECLSPEC void *shmem_ptr(const void *ptr, int pe); +/* + * Communication context operations + */ + +typedef struct { int dummy; } * shmem_ctx_t; + +#define SHMEM_CTX_DEFAULT oshmem_ctx_default; + +OSHMEM_DECLSPEC int shmem_ctx_create(long options, shmem_ctx_t *ctx); +OSHMEM_DECLSPEC void shmem_ctx_destroy(shmem_ctx_t ctx); + /* * Elemental put routines */ +OSHMEM_DECLSPEC void shmem_ctx_char_p(shmem_ctx_t ctx, char* addr, char value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_short_p(shmem_ctx_t ctx, short* addr, short value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int_p(shmem_ctx_t ctx, int* addr, int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_p(shmem_ctx_t ctx, long* addr, long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_float_p(shmem_ctx_t ctx, float* addr, float value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_double_p(shmem_ctx_t ctx, double* addr, double value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_p(shmem_ctx_t ctx, long long* addr, long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longdouble_p(shmem_ctx_t ctx, long double* addr, long double value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_p(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + char*: shmem_ctx_char_p, \ + short*: shmem_ctx_short_p, \ + int*: shmem_ctx_int_p, \ + long*: shmem_ctx_long_p, \ + long long*: shmem_ctx_longlong_p, \ + float*: shmem_ctx_float_p, \ + double*: shmem_ctx_double_p, \ + long double*: shmem_ctx_longdouble_p)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC void shmem_char_p(char* addr, char value, int pe); OSHMEM_DECLSPEC void shmem_short_p(short* addr, short value, int pe); OSHMEM_DECLSPEC void shmem_int_p(int* addr, int value, int pe); @@ -197,6 +232,27 @@ OSHMEM_DECLSPEC void shmem_longdouble_p(long double* addr, long double value, i /* * Block data put routines */ +OSHMEM_DECLSPEC void shmem_ctx_char_put(shmem_ctx_t ctx, char *target, const char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_short_put(shmem_ctx_t ctx, short *target, const short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int_put(shmem_ctx_t ctx, int* target, const int* source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_put(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_float_put(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_double_put(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_put(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longdouble_put(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_put(ctx, dst, src, len, pe) \ + _Generic(&*(dst), \ + char*: shmem_ctx_char_put, \ + short*: shmem_ctx_short_put, \ + int*: shmem_ctx_int_put, \ + long*: shmem_ctx_long_put, \ + long long*: shmem_ctx_longlong_put, \ + float*: shmem_ctx_float_put, \ + double*: shmem_ctx_double_put, \ + long double*: shmem_ctx_longdouble_put)(ctx, dst, src, len, pe) +#endif + OSHMEM_DECLSPEC void shmem_char_put(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_short_put(short *target, const short *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_int_put(int* target, const int* source, size_t len, int pe); @@ -218,6 +274,13 @@ OSHMEM_DECLSPEC void shmem_longdouble_put(long double *target, const long doubl long double*: shmem_longdouble_put)(dst, src, len, pe) #endif +OSHMEM_DECLSPEC void shmem_ctx_put8(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put16(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put32(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put64(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put128(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_putmem(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); + OSHMEM_DECLSPEC void shmem_put8(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_put16(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_put32(void *target, const void *source, size_t len, int pe); @@ -229,6 +292,27 @@ OSHMEM_DECLSPEC void shmem_putmem(void *target, const void *source, size_t len, /* * Strided put routines */ +OSHMEM_DECLSPEC void shmem_ctx_char_iput(shmem_ctx_t ctx, char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_short_iput(shmem_ctx_t ctx, short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int_iput(shmem_ctx_t ctx, int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_iput(shmem_ctx_t ctx, long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_float_iput(shmem_ctx_t ctx, float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_double_iput(shmem_ctx_t ctx, double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_iput(shmem_ctx_t ctx, long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longdouble_iput(shmem_ctx_t ctx, long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_iput(ctx, dst, src, tst, sst, len, pe) \ + _Generic(&*(dst), \ + char*: shmem_ctx_char_iput, \ + short*: shmem_ctx_short_iput, \ + int*: shmem_ctx_int_iput, \ + long*: shmem_ctx_long_iput, \ + long long*: shmem_ctx_longlong_iput, \ + float*: shmem_ctx_float_iput, \ + double*: shmem_ctx_double_iput, \ + long double*: shmem_ctx_longdouble_iput)(ctx, dst, src, tst, sst, len, pe) +#endif + OSHMEM_DECLSPEC void shmem_char_iput(char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_short_iput(short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_int_iput(int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -250,6 +334,12 @@ OSHMEM_DECLSPEC void shmem_longdouble_iput(long double* target, const long doubl long double*: shmem_longdouble_iput)(dst, src, tst, sst, len, pe) #endif +OSHMEM_DECLSPEC void shmem_ctx_iput8(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_iput16(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_iput32(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_iput64(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_iput128(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); + OSHMEM_DECLSPEC void shmem_iput8(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_iput16(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_iput32(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -259,6 +349,27 @@ OSHMEM_DECLSPEC void shmem_iput128(void* target, const void* source, ptrdiff_t t /* * Nonblocking put routines */ +OSHMEM_DECLSPEC void shmem_ctx_char_put_nbi(shmem_ctx_t ctx, char *target, const char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_short_put_nbi(shmem_ctx_t ctx, short *target, const short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int_put_nbi(shmem_ctx_t ctx, int* target, const int* source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_put_nbi(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_put_nbi(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_float_put_nbi(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_double_put_nbi(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longdouble_put_nbi(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_put_nbi(ctx, dst, src, len, pe) \ + _Generic(&*(dst), \ + char*: shmem_ctx_char_put_nbi, \ + short*: shmem_ctx_short_put_nbi, \ + int*: shmem_ctx_int_put_nbi, \ + long*: shmem_ctx_long_put_nbi, \ + long long*: shmem_ctx_longlong_put_nbi, \ + float*: shmem_ctx_float_put_nbi, \ + double*: shmem_ctx_double_put_nbi, \ + long double*: shmem_ctx_longdouble_put_nbi)(ctx, dst, src, len, pe) +#endif + OSHMEM_DECLSPEC void shmem_char_put_nbi(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_short_put_nbi(short *target, const short *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_int_put_nbi(int* target, const int* source, size_t len, int pe); @@ -280,6 +391,13 @@ OSHMEM_DECLSPEC void shmem_longdouble_put_nbi(long double *target, const long d long double*: shmem_longdouble_put_nbi)(dst, src, len, pe) #endif +OSHMEM_DECLSPEC void shmem_ctx_put8_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put16_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put32_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put64_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_put128_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_putmem_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); + OSHMEM_DECLSPEC void shmem_put8_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_put16_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_put32_nbi(void *target, const void *source, size_t len, int pe); @@ -290,6 +408,27 @@ OSHMEM_DECLSPEC void shmem_putmem_nbi(void *target, const void *source, size_t /* * Elemental get routines */ +OSHMEM_DECLSPEC char shmem_ctx_char_g(shmem_ctx_t ctx, const char* addr, int pe); +OSHMEM_DECLSPEC short shmem_ctx_short_g(shmem_ctx_t ctx, const short* addr, int pe); +OSHMEM_DECLSPEC int shmem_ctx_int_g(shmem_ctx_t ctx, const int* addr, int pe); +OSHMEM_DECLSPEC long shmem_ctx_long_g(shmem_ctx_t ctx, const long* addr, int pe); +OSHMEM_DECLSPEC float shmem_ctx_float_g(shmem_ctx_t ctx, const float* addr, int pe); +OSHMEM_DECLSPEC double shmem_ctx_double_g(shmem_ctx_t ctx, const double* addr, int pe); +OSHMEM_DECLSPEC long long shmem_ctx_longlong_g(shmem_ctx_t ctx, const long long* addr, int pe); +OSHMEM_DECLSPEC long double shmem_ctx_longdouble_g(shmem_ctx_t ctx, const long double* addr, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_g(ctx, addr, pe) \ + _Generic(&*(addr), \ + char*: shmem_ctx_char_g, \ + short*: shmem_ctx_short_g, \ + int*: shmem_ctx_int_g, \ + long*: shmem_ctx_long_g, \ + long long*: shmem_ctx_longlong_g, \ + float*: shmem_ctx_float_g, \ + double*: shmem_ctx_double_g, \ + long double*: shmem_ctx_longdouble_g)(ctx, addr, pe) +#endif + OSHMEM_DECLSPEC char shmem_char_g(const char* addr, int pe); OSHMEM_DECLSPEC short shmem_short_g(const short* addr, int pe); OSHMEM_DECLSPEC int shmem_int_g(const int* addr, int pe); @@ -314,6 +453,27 @@ OSHMEM_DECLSPEC long double shmem_longdouble_g(const long double* addr, int pe) /* * Block data get routines */ +OSHMEM_DECLSPEC void shmem_ctx_char_get(shmem_ctx_t ctx, char *target, const char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_short_get(shmem_ctx_t ctx, short *target, const short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int_get(shmem_ctx_t ctx, int *target, const int *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_get(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_float_get(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_double_get(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_get(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longdouble_get(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_get(ctx, dst, src, len, pe) \ + _Generic(&*(dst), \ + char*: shmem_ctx_char_get, \ + short*: shmem_ctx_short_get, \ + int*: shmem_ctx_int_get, \ + long*: shmem_ctx_long_get, \ + long long*: shmem_ctx_longlong_get, \ + float*: shmem_ctx_float_get, \ + double*: shmem_ctx_double_get, \ + long double*: shmem_ctx_longdouble_get)(ctx, dst, src, len, pe) +#endif + OSHMEM_DECLSPEC void shmem_char_get(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_short_get(short *target, const short *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_int_get(int *target, const int *source, size_t len, int pe); @@ -335,6 +495,13 @@ OSHMEM_DECLSPEC void shmem_longdouble_get(long double *target, const long doubl long double*: shmem_longdouble_get)(dst, src, len, pe) #endif +OSHMEM_DECLSPEC void shmem_ctx_get8(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_get16(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_get32(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_get64(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_get128(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_getmem(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); + OSHMEM_DECLSPEC void shmem_get8(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_get16(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_get32(void *target, const void *source, size_t len, int pe); @@ -345,6 +512,27 @@ OSHMEM_DECLSPEC void shmem_getmem(void *target, const void *source, size_t len, /* * Strided get routines */ +OSHMEM_DECLSPEC void shmem_ctx_char_iget(shmem_ctx_t ctx, char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_short_iget(shmem_ctx_t ctx, short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int_iget(shmem_ctx_t ctx, int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_float_iget(shmem_ctx_t ctx, float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_double_iget(shmem_ctx_t ctx, double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_iget(shmem_ctx_t ctx, long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longdouble_iget(shmem_ctx_t ctx, long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_iget(shmem_ctx_t ctx, long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_iget(ctx, dst, src, tst, sst, len, pe)\ + _Generic(&*(dst), \ + char*: shmem_ctx_char_iget, \ + short*: shmem_ctx_short_iget, \ + int*: shmem_ctx_int_iget, \ + long*: shmem_ctx_long_iget, \ + long long*: shmem_ctx_longlong_iget, \ + float*: shmem_ctx_float_iget, \ + double*: shmem_ctx_double_iget, \ + long double*: shmem_ctx_longdouble_iget)(ctx, dst, src, tst, sst, len, pe) +#endif + OSHMEM_DECLSPEC void shmem_char_iget(char* target, const char* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_short_iget(short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_int_iget(int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -366,6 +554,12 @@ OSHMEM_DECLSPEC void shmem_long_iget(long* target, const long* source, ptrdiff_t long double*: shmem_longdouble_iget)(dst, src, tst, sst, len, pe) #endif +OSHMEM_DECLSPEC void shmem_ctx_iget8(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_iget16(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_iget32(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_iget64(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_iget128(shmem_ctx_t ctx, void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); + OSHMEM_DECLSPEC void shmem_iget8(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_iget16(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_iget32(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); @@ -375,6 +569,27 @@ OSHMEM_DECLSPEC void shmem_iget128(void* target, const void* source, ptrdiff_t t /* * Nonblocking data get routines */ +OSHMEM_DECLSPEC void shmem_ctx_char_get_nbi(shmem_ctx_t ctx, char *target, const char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_short_get_nbi(shmem_ctx_t ctx, short *target, const short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_int_get_nbi(shmem_ctx_t ctx, int *target, const int *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_get_nbi(shmem_ctx_t ctx, long *target, const long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_get_nbi(shmem_ctx_t ctx, long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_float_get_nbi(shmem_ctx_t ctx, float *target, const float *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_double_get_nbi(shmem_ctx_t ctx, double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longdouble_get_nbi(shmem_ctx_t ctx, long double *target, const long double *source, size_t len, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_get_nbi(ctx, dst, src, len, pe) \ + _Generic(&*(dst), \ + char*: shmem_ctx_char_get_nbi, \ + short*: shmem_ctx_short_get_nbi, \ + int*: shmem_ctx_int_get_nbi, \ + long*: shmem_ctx_long_get_nbi, \ + long long*: shmem_ctx_longlong_get_nbi, \ + float*: shmem_ctx_float_get_nbi, \ + double*: shmem_ctx_double_get_nbi, \ + long double*: shmem_ctx_longdouble_get_nbi)(ctx, dst, src, len, pe) +#endif + OSHMEM_DECLSPEC void shmem_getmem_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_char_get_nbi(char *target, const char *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_short_get_nbi(short *target, const short *source, size_t len, int pe); @@ -397,6 +612,13 @@ OSHMEM_DECLSPEC void shmem_longdouble_get_nbi(long double *target, const long d long double*: shmem_longdouble_get_nbi)(dst, src, len, pe) #endif +OSHMEM_DECLSPEC void shmem_ctx_get8_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_get16_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_get32_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_get64_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_get128_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_ctx_getmem_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t len, int pe); + OSHMEM_DECLSPEC void shmem_get8_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_get16_nbi(void *target, const void *source, size_t len, int pe); OSHMEM_DECLSPEC void shmem_get32_nbi(void *target, const void *source, size_t len, int pe); @@ -407,6 +629,36 @@ OSHMEM_DECLSPEC void shmem_get128_nbi(void *target, const void *source, size_t * Atomic operations */ /* Atomic swap */ +OSHMEM_DECLSPEC int shmem_ctx_int_atomic_swap(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC long shmem_ctx_long_atomic_swap(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_swap(shmem_ctx_t ctx, long long*target, long long value, int pe); +OSHMEM_DECLSPEC float shmem_ctx_float_atomic_swap(shmem_ctx_t ctx, float *target, float value, int pe); +OSHMEM_DECLSPEC double shmem_ctx_double_atomic_swap(shmem_ctx_t ctx, double *target, double value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_swap(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_ctx_int_atomic_swap, \ + long*: shmem_ctx_long_atomic_swap, \ + long long*: shmem_ctx_longlong_atomic_swap, \ + float*: shmem_ctx_float_atomic_swap, \ + double*: shmem_ctx_double_atomic_swap)(ctx, dst, val, pe) +#endif + +OSHMEM_DECLSPEC int shmem_int_atomic_swap(int *target, int value, int pe); +OSHMEM_DECLSPEC long shmem_long_atomic_swap(long *target, long value, int pe); +OSHMEM_DECLSPEC long long shmem_longlong_atomic_swap(long long*target, long long value, int pe); +OSHMEM_DECLSPEC float shmem_float_atomic_swap(float *target, float value, int pe); +OSHMEM_DECLSPEC double shmem_double_atomic_swap(double *target, double value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_swap(dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_swap, \ + long*: shmem_long_atomic_swap, \ + long long*: shmem_longlong_atomic_swap, \ + float*: shmem_float_atomic_swap, \ + double*: shmem_double_atomic_swap)(dst, val, pe) +#endif + OSHMEM_DECLSPEC int shmem_int_swap(int *target, int value, int pe); OSHMEM_DECLSPEC long shmem_long_swap(long *target, long value, int pe); OSHMEM_DECLSPEC long long shmem_longlong_swap(long long*target, long long value, int pe); @@ -423,6 +675,36 @@ OSHMEM_DECLSPEC double shmem_double_swap(double *target, double value, int pe); #endif /* Atomic set */ +OSHMEM_DECLSPEC void shmem_ctx_int_atomic_set(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_atomic_set(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_set(shmem_ctx_t ctx, long long*target, long long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_float_atomic_set(shmem_ctx_t ctx, float *target, float value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_double_atomic_set(shmem_ctx_t ctx, double *target, double value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_set(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_ctx_int_atomic_set, \ + long*: shmem_ctx_long_atomic_set, \ + long long*: shmem_ctx_longlong_atomic_set, \ + float*: shmem_ctx_float_atomic_set, \ + double*: shmem_ctx_double_atomic_set)(ctx, dst, val, pe) +#endif + +OSHMEM_DECLSPEC void shmem_int_atomic_set(int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_set(long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_set(long long*target, long long value, int pe); +OSHMEM_DECLSPEC void shmem_float_atomic_set(float *target, float value, int pe); +OSHMEM_DECLSPEC void shmem_double_atomic_set(double *target, double value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_set(dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_set, \ + long*: shmem_long_atomic_set, \ + long long*: shmem_longlong_atomic_set, \ + float*: shmem_float_atomic_set, \ + double*: shmem_double_atomic_set)(dst, val, pe) +#endif + OSHMEM_DECLSPEC void shmem_int_set(int *target, int value, int pe); OSHMEM_DECLSPEC void shmem_long_set(long *target, long value, int pe); OSHMEM_DECLSPEC void shmem_longlong_set(long long*target, long long value, int pe); @@ -439,6 +721,30 @@ OSHMEM_DECLSPEC void shmem_double_set(double *target, double value, int pe); #endif /* Atomic conditional swap */ +OSHMEM_DECLSPEC int shmem_ctx_int_atomic_compare_swap(shmem_ctx_t ctx, int *target, int cond, int value, int pe); +OSHMEM_DECLSPEC long shmem_ctx_long_atomic_compare_swap(shmem_ctx_t ctx, long *target, long cond, long value, int pe); +OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_compare_swap(shmem_ctx_t ctx, long long *target, long long cond, long long value, int pe); + +#if OSHMEM_HAVE_C11 +#define shmem_atomic_compare_swap(ctx, dst, cond, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_ctx_int_atomic_compare_swap, \ + long*: shmem_ctx_long_atomic_compare_swap, \ + long long*: shmem_ctx_longlong_atomic_compare_swap)(ctx, dst, cond, val, pe) +#endif + +OSHMEM_DECLSPEC int shmem_int_atomic_compare_swap(int *target, int cond, int value, int pe); +OSHMEM_DECLSPEC long shmem_long_atomic_compare_swap(long *target, long cond, long value, int pe); +OSHMEM_DECLSPEC long long shmem_longlong_atomic_compare_swap(long long *target, long long cond, long long value, int pe); + +#if OSHMEM_HAVE_C11 +#define shmem_atomic_compare_swap(dst, cond, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_compare_swap, \ + long*: shmem_long_atomic_compare_swap, \ + long long*: shmem_longlong_atomic_compare_swap)(dst, cond, val, pe) +#endif + OSHMEM_DECLSPEC int shmem_int_cswap(int *target, int cond, int value, int pe); OSHMEM_DECLSPEC long shmem_long_cswap(long *target, long cond, long value, int pe); OSHMEM_DECLSPEC long long shmem_longlong_cswap(long long *target, long long cond, long long value, int pe); @@ -452,6 +758,28 @@ OSHMEM_DECLSPEC long long shmem_longlong_cswap(long long *target, long long cond #endif /* Atomic Fetch&Add */ +OSHMEM_DECLSPEC int shmem_ctx_int_atomic_fetch_add(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC long shmem_ctx_long_atomic_fetch_add(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_fetch_add(shmem_ctx_t ctx, long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch_add(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_ctx_int_atomic_fetch_add, \ + long*: shmem_ctx_long_atomic_fetch_add, \ + long long*: shmem_ctx_longlong_atomic_fetch_add)(ctx, dst, val, pe) +#endif + +OSHMEM_DECLSPEC int shmem_int_atomic_fetch_add(int *target, int value, int pe); +OSHMEM_DECLSPEC long shmem_long_atomic_fetch_add(long *target, long value, int pe); +OSHMEM_DECLSPEC long long shmem_longlong_atomic_fetch_add(long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch_add(dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_fetch_add, \ + long*: shmem_long_atomic_fetch_add, \ + long long*: shmem_longlong_atomic_fetch_add)(dst, val, pe) +#endif + OSHMEM_DECLSPEC int shmem_int_fadd(int *target, int value, int pe); OSHMEM_DECLSPEC long shmem_long_fadd(long *target, long value, int pe); OSHMEM_DECLSPEC long long shmem_longlong_fadd(long long *target, long long value, int pe); @@ -464,6 +792,17 @@ OSHMEM_DECLSPEC long long shmem_longlong_fadd(long long *target, long long value #endif /* Atomic Fetch&And */ +OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_fetch_and(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_fetch_and(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_fetch_and(shmem_ctx_t ctx, long long *target, unsigned long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch_and(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_ctx_uint_atomic_fetch_and, \ + long*: shmem_ctx_ulong_atomic_fetch_and, \ + long long*: shmem_ctx_ulonglong_atomic_fetch_and)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_fetch_and(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_fetch_and(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch_and(unsigned long long *target, unsigned long long value, int pe); @@ -476,6 +815,17 @@ OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch_and(unsigned lon #endif /* Atomic Fetch&Or */ +OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_fetch_or(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_fetch_or(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_fetch_or(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch_or(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + unsigned int*: shmem_ctx_uint_atomic_fetch_or, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch_or, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_or)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_fetch_or(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_fetch_or(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch_or(unsigned long long *target, unsigned long long value, int pe); @@ -488,6 +838,17 @@ OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch_or(unsigned long #endif /* Atomic Fetch&Xor */ +OSHMEM_DECLSPEC unsigned int shmem_ctx_uint_atomic_fetch_xor(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC unsigned long shmem_ctx_ulong_atomic_fetch_xor(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC unsigned long long shmem_ctx_ulonglong_atomic_fetch_xor(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch_xor(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + unsigned int*: shmem_ctx_uint_atomic_fetch_xor, \ + unsigned long*: shmem_ctx_ulong_atomic_fetch_xor, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_fetch_xor)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC unsigned int shmem_uint_atomic_fetch_xor(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC unsigned long shmem_ulong_atomic_fetch_xor(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch_xor(unsigned long long *target, unsigned long long value, int pe); @@ -500,6 +861,36 @@ OSHMEM_DECLSPEC unsigned long long shmem_ulonglong_atomic_fetch_xor(unsigned lon #endif /* Atomic Fetch */ +OSHMEM_DECLSPEC int shmem_ctx_int_atomic_fetch(shmem_ctx_t ctx, const int *target, int pe); +OSHMEM_DECLSPEC long shmem_ctx_long_atomic_fetch(shmem_ctx_t ctx, const long *target, int pe); +OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_fetch(shmem_ctx_t ctx, const long long *target, int pe); +OSHMEM_DECLSPEC float shmem_ctx_float_atomic_fetch(shmem_ctx_t ctx, const float *target, int pe); +OSHMEM_DECLSPEC double shmem_ctx_double_atomic_fetch(shmem_ctx_t ctx, const double *target, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch(ctx, dst, pe) \ + _Generic(&*(dst), \ + int*: shmem_ctx_int_atomic_fetch, \ + long*: shmem_ctx_long_atomic_fetch, \ + long long*: shmem_ctx_longlong_atomic_fetch,\ + float*: shmem_ctx_float_atomic_fetch, \ + double*: shmem_ctx_double_atomic_fetch)(ctx, dst, pe) +#endif + +OSHMEM_DECLSPEC int shmem_int_atomic_fetch(const int *target, int pe); +OSHMEM_DECLSPEC long shmem_long_atomic_fetch(const long *target, int pe); +OSHMEM_DECLSPEC long long shmem_longlong_atomic_fetch(const long long *target, int pe); +OSHMEM_DECLSPEC float shmem_float_atomic_fetch(const float *target, int pe); +OSHMEM_DECLSPEC double shmem_double_atomic_fetch(const double *target, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch(dst, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_fetch, \ + long*: shmem_long_atomic_fetch, \ + long long*: shmem_longlong_atomic_fetch, \ + float*: shmem_float_atomic_fetch, \ + double*: shmem_double_atomic_fetch)(dst, pe) +#endif + OSHMEM_DECLSPEC int shmem_int_fetch(const int *target, int pe); OSHMEM_DECLSPEC long shmem_long_fetch(const long *target, int pe); OSHMEM_DECLSPEC long long shmem_longlong_fetch(const long long *target, int pe); @@ -516,6 +907,28 @@ OSHMEM_DECLSPEC double shmem_double_fetch(const double *target, int pe); #endif /* Atomic Fetch&Inc */ +OSHMEM_DECLSPEC int shmem_ctx_int_atomic_fetch_inc(shmem_ctx_t ctx, int *target, int pe); +OSHMEM_DECLSPEC long shmem_ctx_long_atomic_fetch_inc(shmem_ctx_t ctx, long *target, int pe); +OSHMEM_DECLSPEC long long shmem_ctx_longlong_atomic_fetch_inc(shmem_ctx_t ctx, long long *target, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch_inc(ctx, dst, pe) \ + _Generic(&*(dst), \ + int*: shmem_ctx_int_atomic_fetch_inc,\ + long*: shmem_ctx_long_atomic_fetch_inc,\ + long long*: shmem_ctx_longlong_atomic_fetch_inc)(ctx, dst, pe) +#endif + +OSHMEM_DECLSPEC int shmem_int_atomic_fetch_inc(int *target, int pe); +OSHMEM_DECLSPEC long shmem_long_atomic_fetch_inc(long *target, int pe); +OSHMEM_DECLSPEC long long shmem_longlong_atomic_fetch_inc(long long *target, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_fetch_inc(dst, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_fetch_inc, \ + long*: shmem_long_atomic_fetch_inc, \ + long long*: shmem_longlong_atomic_fetch_inc)(dst, pe) +#endif + OSHMEM_DECLSPEC int shmem_int_finc(int *target, int pe); OSHMEM_DECLSPEC long shmem_long_finc(long *target, int pe); OSHMEM_DECLSPEC long long shmem_longlong_finc(long long *target, int pe); @@ -528,6 +941,28 @@ OSHMEM_DECLSPEC long long shmem_longlong_finc(long long *target, int pe); #endif /* Atomic Add */ +OSHMEM_DECLSPEC void shmem_ctx_int_atomic_add(shmem_ctx_t ctx, int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_atomic_add(shmem_ctx_t ctx, long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_add(shmem_ctx_t ctx, long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_add(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_ctx_int_atomic_add, \ + long*: shmem_ctx_long_atomic_add, \ + long long*: shmem_ctx_longlong_atomic_add)(ctx, dst, val, pe) +#endif + +OSHMEM_DECLSPEC void shmem_int_atomic_add(int *target, int value, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_add(long *target, long value, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_add(long long *target, long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_add(dst, val, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_add, \ + long*: shmem_long_atomic_add, \ + long long*: shmem_longlong_atomic_add)(dst, val, pe) +#endif + OSHMEM_DECLSPEC void shmem_int_add(int *target, int value, int pe); OSHMEM_DECLSPEC void shmem_long_add(long *target, long value, int pe); OSHMEM_DECLSPEC void shmem_longlong_add(long long *target, long long value, int pe); @@ -540,6 +975,17 @@ OSHMEM_DECLSPEC void shmem_longlong_add(long long *target, long long value, int #endif /* Atomic And */ +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_and(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_and(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_and(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_and(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + unsigned int*: shmem_ctx_uint_atomic_and, \ + unsigned long*: shmem_ctx_ulong_atomic_and, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_and)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC void shmem_uint_atomic_and(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void shmem_ulong_atomic_and(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void shmem_ulonglong_atomic_and(unsigned long long *target, unsigned long long value, int pe); @@ -552,6 +998,17 @@ OSHMEM_DECLSPEC void shmem_ulonglong_atomic_and(unsigned long long *target, unsi #endif /* Atomic Or */ +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_or(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_or(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_or(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_or(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + unsigned int*: shmem_ctx_uint_atomic_or, \ + unsigned long*: shmem_ctx_ulong_atomic_or, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_or)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC void shmem_uint_atomic_or(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void shmem_ulong_atomic_or(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void shmem_ulonglong_atomic_or(unsigned long long *target, unsigned long long value, int pe); @@ -564,6 +1021,17 @@ OSHMEM_DECLSPEC void shmem_ulonglong_atomic_or(unsigned long long *target, unsig #endif /* Atomic Xor */ +OSHMEM_DECLSPEC void shmem_ctx_uint_atomic_xor(shmem_ctx_t ctx, unsigned int *target, unsigned int value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulong_atomic_xor(shmem_ctx_t ctx, unsigned long *target, unsigned long value, int pe); +OSHMEM_DECLSPEC void shmem_ctx_ulonglong_atomic_xor(shmem_ctx_t ctx, unsigned long long *target, unsigned long long value, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_xor(ctx, dst, val, pe) \ + _Generic(&*(dst), \ + unsigned int*: shmem_ctx_uint_atomic_xor, \ + unsigned long*: shmem_ctx_ulong_atomic_xor, \ + unsigned long long*: shmem_ctx_ulonglong_atomic_xor)(ctx, dst, val, pe) +#endif + OSHMEM_DECLSPEC void shmem_uint_atomic_xor(unsigned int *target, unsigned int value, int pe); OSHMEM_DECLSPEC void shmem_ulong_atomic_xor(unsigned long *target, unsigned long value, int pe); OSHMEM_DECLSPEC void shmem_ulonglong_atomic_xor(unsigned long long *target, unsigned long long value, int pe); @@ -576,6 +1044,28 @@ OSHMEM_DECLSPEC void shmem_ulonglong_atomic_xor(unsigned long long *target, unsi #endif /* Atomic Inc */ +OSHMEM_DECLSPEC void shmem_ctx_int_atomic_inc(shmem_ctx_t ctx, int *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_long_atomic_inc(shmem_ctx_t ctx, long *target, int pe); +OSHMEM_DECLSPEC void shmem_ctx_longlong_atomic_inc(shmem_ctx_t ctx, long long *target, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_inc(ctx, dst, pe) \ + _Generic(&*(dst), \ + int*: shmem_ctx_int_atomic_inc, \ + long*: shmem_ctx_long_atomic_inc, \ + long long*: shmem_ctx_longlong_atomic_inc)(ctx, dst, pe) +#endif + +OSHMEM_DECLSPEC void shmem_int_atomic_inc(int *target, int pe); +OSHMEM_DECLSPEC void shmem_long_atomic_inc(long *target, int pe); +OSHMEM_DECLSPEC void shmem_longlong_atomic_inc(long long *target, int pe); +#if OSHMEM_HAVE_C11 +#define shmem_atomic_inc(dst, pe) \ + _Generic(&*(dst), \ + int*: shmem_int_atomic_inc, \ + long*: shmem_long_atomic_inc, \ + long long*: shmem_longlong_atomic_inc)(dst, pe) +#endif + OSHMEM_DECLSPEC void shmem_int_inc(int *target, int pe); OSHMEM_DECLSPEC void shmem_long_inc(long *target, int pe); OSHMEM_DECLSPEC void shmem_longlong_inc(long long *target, int pe); @@ -587,7 +1077,6 @@ OSHMEM_DECLSPEC void shmem_longlong_inc(long long *target, int pe); long long*: shmem_longlong_inc)(dst, pe) #endif - /* * Lock functions */ @@ -608,15 +1097,39 @@ OSHMEM_DECLSPEC void shmem_short_wait_until(volatile short *addr, int cmp, shor OSHMEM_DECLSPEC void shmem_int_wait_until(volatile int *addr, int cmp, int value); OSHMEM_DECLSPEC void shmem_long_wait_until(volatile long *addr, int cmp, long value); OSHMEM_DECLSPEC void shmem_longlong_wait_until(volatile long long *addr, int cmp, long long value); -OSHMEM_DECLSPEC void shmem_wait_until(volatile long *addr, int cmp, long value); +#if OSHMEM_HAVE_C11 +#define shmem_wait_until(addr, cmp, value) \ + _Generic(&*(addr), \ + short*: shmem_short_wait_until, \ + int*: shmem_int_wait_until, \ + long*: shmem_long_wait_until, \ + long long*: shmem_longlong_wait_until(addr, cmp, value) +#endif + +OSHMEM_DECLSPEC int shmem_short_test(volatile short *addr, int cmp, short value); +OSHMEM_DECLSPEC int shmem_int_test(volatile int *addr, int cmp, int value); +OSHMEM_DECLSPEC int shmem_long_test(volatile long *addr, int cmp, long value); +OSHMEM_DECLSPEC int shmem_longlong_test(volatile long long *addr, int cmp, long long value); +#if OSHMEM_HAVE_C11 +#define shmem_test(addr, cmp, value) \ + _Generic(&*(addr), \ + short*: shmem_short_test, \ + int*: shmem_int_test, \ + long*: shmem_long_test, \ + long long*: shmem_longlong_test(addr, cmp, value) +#endif /* * Barrier sync routines */ OSHMEM_DECLSPEC void shmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync); OSHMEM_DECLSPEC void shmem_barrier_all(void); +OSHMEM_DECLSPEC void shmem_sync(int PE_start, int logPE_stride, int PE_size, long *pSync); +OSHMEM_DECLSPEC void shmem_sync_all(void); OSHMEM_DECLSPEC void shmem_fence(void); +OSHMEM_DECLSPEC void shmem_ctx_fence(shmem_ctx_t ctx); OSHMEM_DECLSPEC void shmem_quiet(void); +OSHMEM_DECLSPEC void shmem_ctx_quiet(shmem_ctx_t ctx); /* * Collective routines diff --git a/oshmem/mca/atomic/atomic.h b/oshmem/mca/atomic/atomic.h index ddda726726..68e44b5840 100644 --- a/oshmem/mca/atomic/atomic.h +++ b/oshmem/mca/atomic/atomic.h @@ -35,9 +35,7 @@ BEGIN_C_DECLS #define OSHMEM_ATOMIC_PTR_2_INT(ptr, size) ((size) == 8 ? *(uint64_t*)(ptr) : *(uint32_t*)(ptr)) -#define OSHMEM_TYPE_OP(type_name, type, prefix, op) \ - void prefix##_##type_name##_atomic_##op(type *target, type value, int pe) \ - { \ +#define DO_SHMEM_TYPE_OP(ctx, type_name, type, op, target, value, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ \ @@ -47,6 +45,7 @@ BEGIN_C_DECLS \ size = sizeof(value); \ rc = MCA_ATOMIC_CALL(op( \ + ctx, \ (void*)target, \ value, \ size, \ @@ -54,11 +53,23 @@ BEGIN_C_DECLS RUNTIME_CHECK_RC(rc); \ \ return; \ + } while (0) + +#define OSHMEM_TYPE_OP(type_name, type, prefix, op) \ + void prefix##_##type_name##_atomic_##op(type *target, type value, int pe) \ + { \ + DO_SHMEM_TYPE_OP(oshmem_ctx_default, type_name, type, op, \ + target, value, pe); \ } -#define OSHMEM_TYPE_FOP(type_name, type, prefix, op) \ - type prefix##_##type_name##_atomic_fetch_##op(type *target, type value, int pe) \ - { \ +#define OSHMEM_CTX_TYPE_OP(type_name, type, prefix, op) \ + void prefix##_ctx_##type_name##_atomic_##op(shmem_ctx_t ctx, type *target, type value, int pe) \ + { \ + DO_SHMEM_TYPE_OP(ctx, type_name, type, op, \ + target, value, pe); \ + } + +#define DO_OSHMEM_TYPE_FOP(ctx, type_name, type, op, target, value, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ type out_value; \ @@ -69,6 +80,7 @@ BEGIN_C_DECLS \ size = sizeof(out_value); \ rc = MCA_ATOMIC_CALL(f##op( \ + ctx, \ (void*)target, \ (void*)&out_value, \ value, \ @@ -77,7 +89,22 @@ BEGIN_C_DECLS RUNTIME_CHECK_RC(rc); \ \ return out_value; \ + } while (0) + +#define OSHMEM_TYPE_FOP(type_name, type, prefix, op) \ + type prefix##_##type_name##_atomic_fetch_##op(type *target, type value, int pe) \ + { \ + DO_OSHMEM_TYPE_FOP(oshmem_ctx_default, type_name, type, op, \ + target, value, pe); \ } + +#define OSHMEM_CTX_TYPE_FOP(type_name, type, prefix, op) \ + type prefix##_ctx_##type_name##_atomic_fetch_##op(shmem_ctx_t ctx, type *target, type value, int pe) \ + { \ + DO_OSHMEM_TYPE_FOP(ctx, type_name, type, op, \ + target, value, pe); \ + } + /* ******************************************************************** */ struct oshmem_op_t; @@ -131,48 +158,58 @@ struct mca_atomic_base_module_1_0_0_t { opal_object_t super; /* Collective function pointers */ - int (*atomic_add)(void *target, + int (*atomic_add)(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe); - int (*atomic_and)(void *target, + int (*atomic_and)(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe); - int (*atomic_or)(void *target, + int (*atomic_or)(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe); - int (*atomic_xor)(void *target, + int (*atomic_xor)(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe); - int (*atomic_fadd)(void *target, + int (*atomic_fadd)(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, int pe); - int (*atomic_fand)(void *target, + int (*atomic_fand)(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, int pe); - int (*atomic_for)(void *target, + int (*atomic_for)(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, int pe); - int (*atomic_fxor)(void *target, + int (*atomic_fxor)(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, int pe); - int (*atomic_swap)(void *target, + int (*atomic_swap)(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, int pe); - int (*atomic_cswap)(void *target, + int (*atomic_cswap)(shmem_ctx_t ctx, + void *target, uint64_t *prev, /* prev is used internally by wrapper, we may always use 64-bit value */ uint64_t cond, diff --git a/oshmem/mca/atomic/basic/atomic_basic.h b/oshmem/mca/atomic/basic/atomic_basic.h index 97d2343352..ee1bd0df01 100644 --- a/oshmem/mca/atomic/basic/atomic_basic.h +++ b/oshmem/mca/atomic/basic/atomic_basic.h @@ -26,8 +26,8 @@ BEGIN_C_DECLS OSHMEM_MODULE_DECLSPEC extern mca_atomic_base_component_1_0_0_t mca_atomic_basic_component; -OSHMEM_DECLSPEC void atomic_basic_lock(int pe); -OSHMEM_DECLSPEC void atomic_basic_unlock(int pe); +OSHMEM_DECLSPEC void atomic_basic_lock(shmem_ctx_t ctx, int pe); +OSHMEM_DECLSPEC void atomic_basic_unlock(shmem_ctx_t ctx, int pe); /* API functions */ @@ -36,7 +36,8 @@ int mca_atomic_basic_finalize(void); mca_atomic_base_module_t* mca_atomic_basic_query(int *priority); -int mca_atomic_basic_cswap(void *target, +int mca_atomic_basic_cswap(shmem_ctx_t ctx, + void *target, uint64_t *prev, uint64_t cond, uint64_t value, diff --git a/oshmem/mca/atomic/basic/atomic_basic_cswap.c b/oshmem/mca/atomic/basic/atomic_basic_cswap.c index 58fbbc7163..b30c946e89 100644 --- a/oshmem/mca/atomic/basic/atomic_basic_cswap.c +++ b/oshmem/mca/atomic/basic/atomic_basic_cswap.c @@ -18,7 +18,8 @@ #include "oshmem/mca/atomic/base/base.h" #include "atomic_basic.h" -int mca_atomic_basic_cswap(void *target, +int mca_atomic_basic_cswap(shmem_ctx_t ctx, + void *target, uint64_t *prev, uint64_t cond, uint64_t value, @@ -32,16 +33,16 @@ int mca_atomic_basic_cswap(void *target, } if (rc == OSHMEM_SUCCESS) { - atomic_basic_lock(pe); + atomic_basic_lock(ctx, pe); - rc = MCA_SPML_CALL(get(target, nlong, prev, pe)); + rc = MCA_SPML_CALL(get(ctx, target, nlong, prev, pe)); if ((rc == OSHMEM_SUCCESS) && (!cond || !memcmp(prev, &cond, nlong))) { - rc = MCA_SPML_CALL(put(target, nlong, (void*)&value, pe)); + rc = MCA_SPML_CALL(put(ctx, target, nlong, (void*)&value, pe)); shmem_quiet(); } - atomic_basic_unlock(pe); + atomic_basic_unlock(ctx, pe); } return rc; diff --git a/oshmem/mca/atomic/basic/atomic_basic_module.c b/oshmem/mca/atomic/basic/atomic_basic_module.c index f13e28f680..a6d35d4698 100644 --- a/oshmem/mca/atomic/basic/atomic_basic_module.c +++ b/oshmem/mca/atomic/basic/atomic_basic_module.c @@ -93,7 +93,8 @@ int mca_atomic_basic_finalize(void) } static inline -int mca_atomic_basic_fop(void *target, +int mca_atomic_basic_fop(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, @@ -103,9 +104,9 @@ int mca_atomic_basic_fop(void *target, int rc = OSHMEM_SUCCESS; long long temp_value = 0; - atomic_basic_lock(pe); + atomic_basic_lock(ctx, pe); - rc = MCA_SPML_CALL(get(target, size, (void*)&temp_value, pe)); + rc = MCA_SPML_CALL(get(ctx, target, size, (void*)&temp_value, pe)); memcpy(prev, (void*) &temp_value, size); @@ -114,17 +115,18 @@ int mca_atomic_basic_fop(void *target, size / op->dt_size); if (rc == OSHMEM_SUCCESS) { - rc = MCA_SPML_CALL(put(target, size, (void*)&temp_value, pe)); + rc = MCA_SPML_CALL(put(ctx, target, size, (void*)&temp_value, pe)); shmem_quiet(); } - atomic_basic_unlock(pe); + atomic_basic_unlock(ctx, pe); return rc; } static inline -int mca_atomic_basic_op(void *target, +int mca_atomic_basic_op(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe, @@ -132,69 +134,72 @@ int mca_atomic_basic_op(void *target, { long long prev; - return mca_atomic_basic_fop(target, &prev, value, size, pe, op); + return mca_atomic_basic_fop(ctx, target, &prev, value, size, pe, op); } -static int mca_atomic_basic_add(void *target, uint64_t value, +static int mca_atomic_basic_add(shmem_ctx_t ctx, void *target, uint64_t value, size_t size, int pe) { - return mca_atomic_basic_op(target, value, size, pe, + return mca_atomic_basic_op(ctx, target, value, size, pe, MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_sum_int64)); } -static int mca_atomic_basic_and(void *target, uint64_t value, +static int mca_atomic_basic_and(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe) { - return mca_atomic_basic_op(target, value, size, pe, + return mca_atomic_basic_op(ctx, target, value, size, pe, MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_and_int64)); } -static int mca_atomic_basic_or(void *target, uint64_t value, +static int mca_atomic_basic_or(shmem_ctx_t ctx, void *target, uint64_t value, size_t size, int pe) { - return mca_atomic_basic_op(target, value, size, pe, + return mca_atomic_basic_op(ctx, target, value, size, pe, MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_or_int64)); } -static int mca_atomic_basic_xor(void *target, uint64_t value, +static int mca_atomic_basic_xor(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe) { - return mca_atomic_basic_op(target, value, size, pe, + return mca_atomic_basic_op(ctx, target, value, size, pe, MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_xor_int64)); } -static int mca_atomic_basic_fadd(void *target, void *prev, uint64_t value, +static int mca_atomic_basic_fadd(shmem_ctx_t ctx, void *target, void *prev, uint64_t value, size_t size, int pe) { - return mca_atomic_basic_fop(target, prev, value, size, pe, + return mca_atomic_basic_fop(ctx, target, prev, value, size, pe, MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_sum_int64)); } -static int mca_atomic_basic_fand(void *target, void *prev, uint64_t value, +static int mca_atomic_basic_fand(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, int pe) { - return mca_atomic_basic_fop(target, prev, value, size, pe, + return mca_atomic_basic_fop(ctx, target, prev, value, size, pe, MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_and_int64)); } -static int mca_atomic_basic_for(void *target, void *prev, uint64_t value, +static int mca_atomic_basic_for(shmem_ctx_t ctx, void *target, void *prev, uint64_t value, size_t size, int pe) { - return mca_atomic_basic_fop(target, prev, value, size, pe, + return mca_atomic_basic_fop(ctx, target, prev, value, size, pe, MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_or_int64)); } -static int mca_atomic_basic_fxor(void *target, void *prev, uint64_t value, +static int mca_atomic_basic_fxor(shmem_ctx_t ctx, void *target, void *prev, uint64_t value, size_t size, int pe) { - return mca_atomic_basic_fop(target, prev, value, size, pe, + return mca_atomic_basic_fop(ctx, target, prev, value, size, pe, MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_xor_int64)); } -static int mca_atomic_basic_swap(void *target, void *prev, uint64_t value, +static int mca_atomic_basic_swap(shmem_ctx_t ctx, void *target, void *prev, uint64_t value, size_t size, int pe) { - return mca_atomic_basic_fop(target, prev, value, size, pe, + return mca_atomic_basic_fop(ctx, target, prev, value, size, pe, MCA_BASIC_OP(size, oshmem_op_swap_int32, oshmem_op_swap_int64)); } @@ -223,7 +228,7 @@ mca_atomic_basic_query(int *priority) return NULL ; } -void atomic_basic_lock(int pe) +void atomic_basic_lock(shmem_ctx_t ctx, int pe) { int index = -1; int me = oshmem_my_proc_id(); @@ -235,15 +240,15 @@ void atomic_basic_lock(int pe) do { /* announce that we need the resource */ do { - MCA_SPML_CALL(put((void*)(atomic_lock_sync + me), sizeof(lock_required), (void*)&lock_required, root_pe)); - MCA_SPML_CALL(get((void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe)); + MCA_SPML_CALL(put(ctx, (void*)(atomic_lock_sync + me), sizeof(lock_required), (void*)&lock_required, root_pe)); + MCA_SPML_CALL(get(ctx, (void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe)); } while (local_lock_sync[me] != lock_required); - MCA_SPML_CALL(get((void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe)); + MCA_SPML_CALL(get(ctx, (void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe)); while (index != me) { if (local_lock_sync[index] != ATOMIC_LOCK_IDLE) { - MCA_SPML_CALL(get((void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe)); - MCA_SPML_CALL(get((void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe)); + MCA_SPML_CALL(get(ctx, (void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe)); + MCA_SPML_CALL(get(ctx, (void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe)); } else { index = (index + 1) % num_pe; } @@ -251,8 +256,8 @@ void atomic_basic_lock(int pe) /* now tentatively claim the resource */ do { - MCA_SPML_CALL(put((void*)(atomic_lock_sync + me), sizeof(lock_active), (void*)&lock_active, root_pe)); - MCA_SPML_CALL(get((void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe)); + MCA_SPML_CALL(put(ctx, (void*)(atomic_lock_sync + me), sizeof(lock_active), (void*)&lock_active, root_pe)); + MCA_SPML_CALL(get(ctx, (void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe)); } while (local_lock_sync[me] != lock_active); index = 0; @@ -262,15 +267,15 @@ void atomic_basic_lock(int pe) index = index + 1; } - MCA_SPML_CALL(get((void*)atomic_lock_turn, sizeof(*atomic_lock_turn), (void*)local_lock_turn, root_pe)); + MCA_SPML_CALL(get(ctx, (void*)atomic_lock_turn, sizeof(*atomic_lock_turn), (void*)local_lock_turn, root_pe)); } while (!((index >= num_pe) && ((*local_lock_turn == me) || (local_lock_sync[*local_lock_turn] == ATOMIC_LOCK_IDLE)))); - MCA_SPML_CALL(put((void*)atomic_lock_turn, sizeof(me), (void*)&me, root_pe)); + MCA_SPML_CALL(put(ctx, (void*)atomic_lock_turn, sizeof(me), (void*)&me, root_pe)); } -void atomic_basic_unlock(int pe) +void atomic_basic_unlock(shmem_ctx_t ctx, int pe) { int index = -1; int me = oshmem_my_proc_id(); @@ -278,17 +283,17 @@ void atomic_basic_unlock(int pe) char lock_idle = ATOMIC_LOCK_IDLE; int root_pe = pe; - MCA_SPML_CALL(get((void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe)); - MCA_SPML_CALL(get((void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe)); + MCA_SPML_CALL(get(ctx, (void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe)); + MCA_SPML_CALL(get(ctx, (void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe)); do { index = (index + 1) % num_pe; } while (local_lock_sync[index] == ATOMIC_LOCK_IDLE); - MCA_SPML_CALL(put((void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe)); + MCA_SPML_CALL(put(ctx, (void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe)); do { - MCA_SPML_CALL(put((void*)(atomic_lock_sync + me), sizeof(lock_idle), (void*)&lock_idle, root_pe)); - MCA_SPML_CALL(get((void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe)); + MCA_SPML_CALL(put(ctx, (void*)(atomic_lock_sync + me), sizeof(lock_idle), (void*)&lock_idle, root_pe)); + MCA_SPML_CALL(get(ctx, (void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe)); } while (local_lock_sync[me] != lock_idle); } diff --git a/oshmem/mca/atomic/mxm/atomic_mxm.h b/oshmem/mca/atomic/mxm/atomic_mxm.h index b04da83133..39efdfc466 100644 --- a/oshmem/mca/atomic/mxm/atomic_mxm.h +++ b/oshmem/mca/atomic/mxm/atomic_mxm.h @@ -42,21 +42,25 @@ int mca_atomic_mxm_finalize(void); mca_atomic_base_module_t* mca_atomic_mxm_query(int *priority); -int mca_atomic_mxm_add(void *target, +int mca_atomic_mxm_add(shmem_ctx_t ctx, + void *target, uint64_t value, size_t nlong, int pe); -int mca_atomic_mxm_fadd(void *target, +int mca_atomic_mxm_fadd(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t nlong, int pe); -int mca_atomic_mxm_swap(void *target, +int mca_atomic_mxm_swap(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t nlong, int pe); -int mca_atomic_mxm_cswap(void *target, +int mca_atomic_mxm_cswap(shmem_ctx_t ctx, + void *target, uint64_t *prev, uint64_t cond, uint64_t value, diff --git a/oshmem/mca/atomic/mxm/atomic_mxm_cswap.c b/oshmem/mca/atomic/mxm/atomic_mxm_cswap.c index 7c0368dd2d..357b2dc616 100644 --- a/oshmem/mca/atomic/mxm/atomic_mxm_cswap.c +++ b/oshmem/mca/atomic/mxm/atomic_mxm_cswap.c @@ -24,7 +24,8 @@ #include "atomic_mxm.h" -int mca_atomic_mxm_swap(void *target, +int mca_atomic_mxm_swap(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t nlong, @@ -43,7 +44,8 @@ int mca_atomic_mxm_swap(void *target, return OSHMEM_SUCCESS; } -int mca_atomic_mxm_cswap(void *target, +int mca_atomic_mxm_cswap(shmem_ctx_t ctx, + void *target, uint64_t *prev, uint64_t cond, uint64_t value, diff --git a/oshmem/mca/atomic/mxm/atomic_mxm_fadd.c b/oshmem/mca/atomic/mxm/atomic_mxm_fadd.c index 72a5b7b0dc..148f4a6156 100644 --- a/oshmem/mca/atomic/mxm/atomic_mxm_fadd.c +++ b/oshmem/mca/atomic/mxm/atomic_mxm_fadd.c @@ -25,7 +25,8 @@ #include "atomic_mxm.h" -int mca_atomic_mxm_add(void *target, +int mca_atomic_mxm_add(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe) @@ -44,7 +45,8 @@ int mca_atomic_mxm_add(void *target, return OSHMEM_SUCCESS; } -int mca_atomic_mxm_fadd(void *target, +int mca_atomic_mxm_fadd(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, diff --git a/oshmem/mca/atomic/mxm/atomic_mxm_module.c b/oshmem/mca/atomic/mxm/atomic_mxm_module.c index 5a3087c9fe..c081651707 100644 --- a/oshmem/mca/atomic/mxm/atomic_mxm_module.c +++ b/oshmem/mca/atomic/mxm/atomic_mxm_module.c @@ -32,7 +32,8 @@ int mca_atomic_mxm_finalize(void) return OSHMEM_SUCCESS; } -static int mca_atomic_mxm_op_not_implemented(void *target, +static int mca_atomic_mxm_op_not_implemented(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe) @@ -40,7 +41,8 @@ static int mca_atomic_mxm_op_not_implemented(void *target, return OSHMEM_ERR_NOT_IMPLEMENTED; } -static int mca_atomic_mxm_fop_not_implemented(void *target, +static int mca_atomic_mxm_fop_not_implemented(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, diff --git a/oshmem/mca/atomic/ucx/atomic_ucx.h b/oshmem/mca/atomic/ucx/atomic_ucx.h index b8511b29a9..a679713019 100644 --- a/oshmem/mca/atomic/ucx/atomic_ucx.h +++ b/oshmem/mca/atomic/ucx/atomic_ucx.h @@ -42,7 +42,8 @@ int mca_atomic_ucx_finalize(void); mca_atomic_base_module_t* mca_atomic_ucx_query(int *priority); -int mca_atomic_ucx_cswap(void *target, +int mca_atomic_ucx_cswap(shmem_ctx_t ctx, + void *target, uint64_t *prev, uint64_t cond, uint64_t value, diff --git a/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c b/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c index 9c68150902..25fe992688 100644 --- a/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c +++ b/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c @@ -19,7 +19,8 @@ #include "atomic_ucx.h" -int mca_atomic_ucx_cswap(void *target, +int mca_atomic_ucx_cswap(shmem_ctx_t ctx, + void *target, uint64_t *prev, uint64_t cond, uint64_t value, @@ -29,6 +30,7 @@ int mca_atomic_ucx_cswap(void *target, ucs_status_ptr_t status_ptr; spml_ucx_mkey_t *ucx_mkey; uint64_t rva; + mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; if ((8 != size) && (4 != size)) { ATOMIC_ERROR("[#%d] Type size must be 4 or 8 bytes.", my_pe); @@ -38,11 +40,11 @@ int mca_atomic_ucx_cswap(void *target, assert(NULL != prev); *prev = value; - ucx_mkey = mca_spml_ucx_get_mkey(pe, target, (void *)&rva, mca_spml_self); - status_ptr = ucp_atomic_fetch_nb(mca_spml_self->ucp_peers[pe].ucp_conn, + ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, pe, target, (void *)&rva, mca_spml_self); + status_ptr = ucp_atomic_fetch_nb(ucx_ctx->ucp_peers[pe].ucp_conn, UCP_ATOMIC_FETCH_OP_CSWAP, cond, prev, size, rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb); - return opal_common_ucx_wait_request(status_ptr, mca_spml_self->ucp_worker, + return opal_common_ucx_wait_request(status_ptr, ucx_ctx->ucp_worker, "ucp_atomic_fetch_nb"); } diff --git a/oshmem/mca/atomic/ucx/atomic_ucx_module.c b/oshmem/mca/atomic/ucx/atomic_ucx_module.c index 870578a85e..53a00773cb 100644 --- a/oshmem/mca/atomic/ucx/atomic_ucx_module.c +++ b/oshmem/mca/atomic/ucx/atomic_ucx_module.c @@ -33,7 +33,8 @@ int mca_atomic_ucx_finalize(void) } static inline -int mca_atomic_ucx_op(void *target, +int mca_atomic_ucx_op(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe, @@ -42,18 +43,20 @@ int mca_atomic_ucx_op(void *target, ucs_status_t status; spml_ucx_mkey_t *ucx_mkey; uint64_t rva; + mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; assert((8 == size) || (4 == size)); - ucx_mkey = mca_spml_ucx_get_mkey(pe, target, (void *)&rva, mca_spml_self); - status = ucp_atomic_post(mca_spml_self->ucp_peers[pe].ucp_conn, + ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, pe, target, (void *)&rva, mca_spml_self); + status = ucp_atomic_post(ucx_ctx->ucp_peers[pe].ucp_conn, op, value, size, rva, ucx_mkey->rkey); return ucx_status_to_oshmem(status); } static inline -int mca_atomic_ucx_fop(void *target, +int mca_atomic_ucx_fop(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, @@ -63,117 +66,127 @@ int mca_atomic_ucx_fop(void *target, ucs_status_ptr_t status_ptr; spml_ucx_mkey_t *ucx_mkey; uint64_t rva; + mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; assert((8 == size) || (4 == size)); - ucx_mkey = mca_spml_ucx_get_mkey(pe, target, (void *)&rva, mca_spml_self); - status_ptr = ucp_atomic_fetch_nb(mca_spml_self->ucp_peers[pe].ucp_conn, + ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, pe, target, (void *)&rva, mca_spml_self); + status_ptr = ucp_atomic_fetch_nb(ucx_ctx->ucp_peers[pe].ucp_conn, op, value, prev, size, rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb); - return opal_common_ucx_wait_request(status_ptr, mca_spml_self->ucp_worker, + return opal_common_ucx_wait_request(status_ptr, ucx_ctx->ucp_worker, "ucp_atomic_fetch_nb"); } -static int mca_atomic_ucx_add(void *target, +static int mca_atomic_ucx_add(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe) { - return mca_atomic_ucx_op(target, value, size, pe, UCP_ATOMIC_POST_OP_ADD); + return mca_atomic_ucx_op(ctx, target, value, size, pe, UCP_ATOMIC_POST_OP_ADD); } -static int mca_atomic_ucx_and(void *target, +static int mca_atomic_ucx_and(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe) { #if HAVE_DECL_UCP_ATOMIC_POST_OP_AND - return mca_atomic_ucx_op(target, value, size, pe, UCP_ATOMIC_POST_OP_AND); + return mca_atomic_ucx_op(ctx, target, value, size, pe, UCP_ATOMIC_POST_OP_AND); #else return OSHMEM_ERR_NOT_IMPLEMENTED; #endif } -static int mca_atomic_ucx_or(void *target, +static int mca_atomic_ucx_or(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe) { #if HAVE_DECL_UCP_ATOMIC_POST_OP_OR - return mca_atomic_ucx_op(target, value, size, pe, UCP_ATOMIC_POST_OP_OR); + return mca_atomic_ucx_op(ctx, target, value, size, pe, UCP_ATOMIC_POST_OP_OR); #else return OSHMEM_ERR_NOT_IMPLEMENTED; #endif } -static int mca_atomic_ucx_xor(void *target, +static int mca_atomic_ucx_xor(shmem_ctx_t ctx, + void *target, uint64_t value, size_t size, int pe) { #if HAVE_DECL_UCP_ATOMIC_POST_OP_XOR - return mca_atomic_ucx_op(target, value, size, pe, UCP_ATOMIC_POST_OP_XOR); + return mca_atomic_ucx_op(ctx, target, value, size, pe, UCP_ATOMIC_POST_OP_XOR); #else return OSHMEM_ERR_NOT_IMPLEMENTED; #endif } -static int mca_atomic_ucx_fadd(void *target, +static int mca_atomic_ucx_fadd(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, int pe) { - return mca_atomic_ucx_fop(target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_FADD); + return mca_atomic_ucx_fop(ctx, target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_FADD); } -static int mca_atomic_ucx_fand(void *target, +static int mca_atomic_ucx_fand(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, int pe) { #if HAVE_DECL_UCP_ATOMIC_FETCH_OP_FAND - return mca_atomic_ucx_fop(target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_FAND); + return mca_atomic_ucx_fop(ctx, target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_FAND); #else return OSHMEM_ERR_NOT_IMPLEMENTED; #endif } -static int mca_atomic_ucx_for(void *target, +static int mca_atomic_ucx_for(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, int pe) { #if HAVE_DECL_UCP_ATOMIC_FETCH_OP_FOR - return mca_atomic_ucx_fop(target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_FOR); + return mca_atomic_ucx_fop(ctx, target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_FOR); #else return OSHMEM_ERR_NOT_IMPLEMENTED; #endif } -static int mca_atomic_ucx_fxor(void *target, +static int mca_atomic_ucx_fxor(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, int pe) { #if HAVE_DECL_UCP_ATOMIC_FETCH_OP_FXOR - return mca_atomic_ucx_fop(target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_FXOR); + return mca_atomic_ucx_fop(ctx, target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_FXOR); #else return OSHMEM_ERR_NOT_IMPLEMENTED; #endif } -static int mca_atomic_ucx_swap(void *target, +static int mca_atomic_ucx_swap(shmem_ctx_t ctx, + void *target, void *prev, uint64_t value, size_t size, int pe) { - return mca_atomic_ucx_fop(target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_SWAP); + return mca_atomic_ucx_fop(ctx, target, prev, value, size, pe, UCP_ATOMIC_FETCH_OP_SWAP); } diff --git a/oshmem/mca/scoll/basic/scoll_basic_alltoall.c b/oshmem/mca/scoll/basic/scoll_basic_alltoall.c index 479e118ec4..9843d985e7 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_alltoall.c +++ b/oshmem/mca/scoll/basic/scoll_basic_alltoall.c @@ -75,7 +75,7 @@ int mca_scoll_basic_alltoall(struct oshmem_group_t *group, /* quiet is needed because scoll level barrier does not * guarantee put completion */ - MCA_SPML_CALL(quiet()); + MCA_SPML_CALL(quiet(oshmem_ctx_default)); /* Wait for operation completion */ SCOLL_VERBOSE(14, "[#%d] Wait for operation completion", group->my_pe); @@ -138,7 +138,7 @@ static int a2as_alg_simple(struct oshmem_group_t *group, dst_pe = get_dst_pe(group, src_blk_idx, dst_blk_idx, &dst_pe_idx); for (elem_idx = 0; elem_idx < nelems; elem_idx++) { - rc = MCA_SPML_CALL(put( + rc = MCA_SPML_CALL(put(oshmem_ctx_default, get_stride_elem(target, tst, nelems, element_size, dst_blk_idx, elem_idx), element_size, @@ -178,7 +178,7 @@ static int a2a_alg_simple(struct oshmem_group_t *group, for (src_blk_idx = 0; src_blk_idx < group->proc_count; src_blk_idx++) { dst_pe = get_dst_pe(group, src_blk_idx, dst_blk_idx, &dst_pe_idx); - rc = MCA_SPML_CALL(put(dst_blk, + rc = MCA_SPML_CALL(put(oshmem_ctx_default, dst_blk, nelems * element_size, get_stride_elem(source, 1, nelems, element_size, dst_pe_idx, 0), diff --git a/oshmem/mca/scoll/basic/scoll_basic_barrier.c b/oshmem/mca/scoll/basic/scoll_basic_barrier.c index e7f87a544f..05b976ad41 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_barrier.c +++ b/oshmem/mca/scoll/basic/scoll_basic_barrier.c @@ -134,7 +134,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, i++) { pe_cur = wait_pe_array[i]; if (pe_cur != OSHMEM_PE_INVALID) { - rc = MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, pe_cur)); + rc = MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, pe_cur)); if ((rc == OSHMEM_SUCCESS) && (value == SHMEM_SYNC_WAIT)) { wait_pe_array[i] = OSHMEM_PE_INVALID; @@ -153,7 +153,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, i++) { pe_cur = oshmem_proc_pe(group->proc_array[i]); if (pe_cur != PE_root) { - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, pe_cur)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, pe_cur)); } } @@ -167,7 +167,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, The root could leave the first barrier and in the second barrier it could get SHMEM_SYNC_WAIT value on remote node before the remote node receives its SHMEM_SYNC_RUN value in the first barrier */ - MCA_SPML_CALL(quiet()); + MCA_SPML_CALL(quiet(oshmem_ctx_default)); } /* Wait for RUN signal */ else { @@ -245,14 +245,14 @@ static int _algorithm_tournament(struct oshmem_group_t *group, long *pSync) it is expected that shmem_long_cswap() will make it faster. */ do { - MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } while (value != my_id); SCOLL_VERBOSE(14, "[#%d] round = %d signals to #%d", group->my_pe, round, peer_pe); value = peer_id; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); #else SCOLL_VERBOSE(14, "[#%d] round = %d signals to #%d", group->my_pe, round, peer_pe); do @@ -285,7 +285,7 @@ static int _algorithm_tournament(struct oshmem_group_t *group, long *pSync) (peer_id < group->proc_count) && (rc == OSHMEM_SUCCESS); peer_id++) { peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } } @@ -339,7 +339,7 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, "[#%d] is extra and signal to #%d", group->my_pe, peer_pe); value = SHMEM_SYNC_WAIT; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); SCOLL_VERBOSE(14, "[#%d] wait", group->my_pe); value = SHMEM_SYNC_RUN; @@ -384,14 +384,14 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, it is expected that shmem_long_cswap() will make it faster. */ do { - MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } while (value != (round - 1)); SCOLL_VERBOSE(14, "[#%d] round = %d signals to #%d", group->my_pe, round, peer_pe); value = round; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); #else SCOLL_VERBOSE(14, "[#%d] round = %d signals to #%d", group->my_pe, round, peer_pe); { @@ -424,7 +424,7 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] signals to #%d", group->my_pe, peer_pe); value = SHMEM_SYNC_RUN; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } } @@ -470,14 +470,14 @@ static int _algorithm_dissemination(struct oshmem_group_t *group, long *pSync) it is expected that shmem_long_cswap() will make it faster. */ do { - MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } while (value != round); SCOLL_VERBOSE(14, "[#%d] round = %d signals to #%d", group->my_pe, round, peer_pe); value = round + 1; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); #endif SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round); diff --git a/oshmem/mca/scoll/basic/scoll_basic_broadcast.c b/oshmem/mca/scoll/basic/scoll_basic_broadcast.c index bccf48c9c9..ef9bf1869b 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_broadcast.c +++ b/oshmem/mca/scoll/basic/scoll_basic_broadcast.c @@ -143,13 +143,13 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, SCOLL_VERBOSE(15, "[#%d] send data to #%d", group->my_pe, pe_cur); - rc = MCA_SPML_CALL(put(target, nlong, (void *)source, pe_cur)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, target, nlong, (void *)source, pe_cur)); } } /* quiet is needed because scoll level barrier does not * guarantee put completion */ - MCA_SPML_CALL(quiet()); + MCA_SPML_CALL(quiet(oshmem_ctx_default)); } if (rc == OSHMEM_SUCCESS) { @@ -233,17 +233,17 @@ static int _algorithm_binomial_tree(struct oshmem_group_t *group, "[#%d] check remote pe is ready to receive #%d", group->my_pe, peer_pe); do { - rc = MCA_SPML_CALL(get((void*)pSync, sizeof(long), (void*)pSync, peer_pe)); + rc = MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(long), (void*)pSync, peer_pe)); } while ((OSHMEM_SUCCESS == rc) && (pSync[0] != SHMEM_SYNC_READY)); SCOLL_VERBOSE(14, "[#%d] send data to #%d", group->my_pe, peer_pe); - rc = MCA_SPML_CALL(put(target, nlong, (my_id == root_id ? (void *)source : target), peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, target, nlong, (my_id == root_id ? (void *)source : target), peer_pe)); - MCA_SPML_CALL(fence()); + MCA_SPML_CALL(fence(oshmem_ctx_default)); SCOLL_VERBOSE(14, "[#%d] signals to #%d", group->my_pe, peer_pe); value = nlong; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); if (OSHMEM_SUCCESS != rc) { break; } diff --git a/oshmem/mca/scoll/basic/scoll_basic_collect.c b/oshmem/mca/scoll/basic/scoll_basic_collect.c index d592a7eff7..eda5f93406 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_collect.c +++ b/oshmem/mca/scoll/basic/scoll_basic_collect.c @@ -178,7 +178,7 @@ static int _algorithm_f_central_counter(struct oshmem_group_t *group, group->my_pe, (int)nlong, pe_cur); /* Get data from the current peer */ - rc = MCA_SPML_CALL(get((void *)source, nlong, (void*)((unsigned char*)target + i * nlong), pe_cur)); + rc = MCA_SPML_CALL(get(oshmem_ctx_default, (void *)source, nlong, (void*)((unsigned char*)target + i * nlong), pe_cur)); } } @@ -255,21 +255,21 @@ static int _algorithm_f_tournament(struct oshmem_group_t *group, it is expected that shmem_long_cswap() will make it faster. */ do { - MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } while (value != my_id); SCOLL_VERBOSE(14, "[#%d] round = %d send data to #%d", group->my_pe, round, peer_pe); - rc = MCA_SPML_CALL(put((void*)((unsigned char*)target + my_id * nlong), (1 << (round - 1)) * nlong, (void*)((unsigned char*)target + my_id * nlong), peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)((unsigned char*)target + my_id * nlong), (1 << (round - 1)) * nlong, (void*)((unsigned char*)target + my_id * nlong), peer_pe)); - MCA_SPML_CALL(fence()); + MCA_SPML_CALL(fence(oshmem_ctx_default)); SCOLL_VERBOSE(14, "[#%d] round = %d signals to #%d", group->my_pe, round, peer_pe); value = peer_id; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); #endif SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round); value = SHMEM_SYNC_RUN; @@ -288,7 +288,7 @@ static int _algorithm_f_tournament(struct oshmem_group_t *group, (peer_id < group->proc_count) && (rc == OSHMEM_SUCCESS); peer_id++) { peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } } @@ -341,15 +341,15 @@ static int _algorithm_f_ring(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] round = %d send data to #%d by index = %d", group->my_pe, i, peer_pe, data_index); - rc = MCA_SPML_CALL(put((void*)((unsigned char*)target + data_index * nlong), nlong, (void*)((unsigned char*)target + data_index * nlong), peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)((unsigned char*)target + data_index * nlong), nlong, (void*)((unsigned char*)target + data_index * nlong), peer_pe)); - MCA_SPML_CALL(fence()); + MCA_SPML_CALL(fence(oshmem_ctx_default)); SCOLL_VERBOSE(14, "[#%d] round = %d signals to #%d", group->my_pe, i, peer_pe); value = i; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); data_index = (data_index ? (data_index - 1) : (group->proc_count - 1)); @@ -423,16 +423,16 @@ static int _algorithm_f_recursive_doubling(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] is extra send data to #%d", group->my_pe, pe_cur); - rc = MCA_SPML_CALL(put((void*)((unsigned char*)target + data_index * nlong), nlong, (void *)source, pe_cur)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)((unsigned char*)target + data_index * nlong), nlong, (void *)source, pe_cur)); } - MCA_SPML_CALL(fence()); + MCA_SPML_CALL(fence(oshmem_ctx_default)); SCOLL_VERBOSE(14, "[#%d] is extra and signal to #%d", group->my_pe, peer_pe); value = SHMEM_SYNC_RUN; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); SCOLL_VERBOSE(14, "[#%d] wait", group->my_pe); value = SHMEM_SYNC_RUN; @@ -469,15 +469,15 @@ static int _algorithm_f_recursive_doubling(struct oshmem_group_t *group, it is expected that shmem_long_cswap() will make it faster. */ do { - MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } while (value != (round - 1)); SCOLL_VERBOSE(14, "[#%d] round = %d send data to #%d by index = %d", group->my_pe, round, peer_pe, data_index); - rc = MCA_SPML_CALL(put((void*)((unsigned char*)target + data_index * nlong), (1 << (round - 1)) * nlong, (void*)((unsigned char*)target + data_index * nlong), peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)((unsigned char*)target + data_index * nlong), (1 << (round - 1)) * nlong, (void*)((unsigned char*)target + data_index * nlong), peer_pe)); - MCA_SPML_CALL(fence()); + MCA_SPML_CALL(fence(oshmem_ctx_default)); data_index = (my_id / (1 << round)) * (1 << round); @@ -485,7 +485,7 @@ static int _algorithm_f_recursive_doubling(struct oshmem_group_t *group, "[#%d] round = %d signals to #%d", group->my_pe, round, peer_pe); value = SHMEM_SYNC_RUN; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); #endif SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round); @@ -504,13 +504,13 @@ static int _algorithm_f_recursive_doubling(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] is extra send data to #%d", group->my_pe, peer_pe); - rc = MCA_SPML_CALL(put(target, group->proc_count * nlong, target, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, target, group->proc_count * nlong, target, peer_pe)); - MCA_SPML_CALL(fence()); + MCA_SPML_CALL(fence(oshmem_ctx_default)); SCOLL_VERBOSE(14, "[#%d] signals to #%d", group->my_pe, peer_pe); value = SHMEM_SYNC_RUN; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } } @@ -567,7 +567,7 @@ static int _algorithm_central_collector(struct oshmem_group_t *group, if (wait_pe_array[i] == 0) { pe_cur = oshmem_proc_pe(group->proc_array[i]); value = 0; - rc = MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, pe_cur)); + rc = MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, pe_cur)); if ((rc == OSHMEM_SUCCESS) && (value != _SHMEM_SYNC_VALUE)) { wait_pe_array[i] = value; @@ -597,7 +597,7 @@ static int _algorithm_central_collector(struct oshmem_group_t *group, pe_cur = oshmem_proc_pe(group->proc_array[i]); /* Get data from the current peer */ - rc = MCA_SPML_CALL(get((void *)source, (size_t)wait_pe_array[i], (void*)((unsigned char*)target + offset), pe_cur)); + rc = MCA_SPML_CALL(get(oshmem_ctx_default, (void *)source, (size_t)wait_pe_array[i], (void*)((unsigned char*)target + offset), pe_cur)); SCOLL_VERBOSE(14, "Got %d bytes of data from #%d (offset: %d)", diff --git a/oshmem/mca/scoll/basic/scoll_basic_reduce.c b/oshmem/mca/scoll/basic/scoll_basic_reduce.c index 3661255f4d..9d6db9eed4 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_reduce.c +++ b/oshmem/mca/scoll/basic/scoll_basic_reduce.c @@ -212,7 +212,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, memset(target_cur, 0, nlong); /* Get data from the current peer */ - rc = MCA_SPML_CALL(get((void *)source, nlong, target_cur, pe_cur)); + rc = MCA_SPML_CALL(get(oshmem_ctx_default, (void *)source, nlong, target_cur, pe_cur)); /* Do reduction operation */ if (rc == OSHMEM_SUCCESS) { @@ -305,21 +305,21 @@ static int _algorithm_tournament(struct oshmem_group_t *group, it is expected that shmem_long_cswap() will make it faster. */ do { - MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } while (value != my_id); SCOLL_VERBOSE(14, "[#%d] round = %d send data to #%d", group->my_pe, round, peer_pe); - rc = MCA_SPML_CALL(put(target, nlong, target_cur, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, target, nlong, target_cur, peer_pe)); - MCA_SPML_CALL(fence()); + MCA_SPML_CALL(fence(oshmem_ctx_default)); SCOLL_VERBOSE(14, "[#%d] round = %d signals to #%d", group->my_pe, round, peer_pe); value = peer_id; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); #endif SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round); value = SHMEM_SYNC_RUN; @@ -340,7 +340,7 @@ static int _algorithm_tournament(struct oshmem_group_t *group, (peer_id < group->proc_count) && (rc == OSHMEM_SUCCESS); peer_id++) { peer_pe = oshmem_proc_pe(group->proc_array[peer_id]); - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } } @@ -423,15 +423,15 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] is extra send data to #%d", group->my_pe, peer_pe); - rc = MCA_SPML_CALL(put(target, nlong, target_cur, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, target, nlong, target_cur, peer_pe)); - MCA_SPML_CALL(fence()); + MCA_SPML_CALL(fence(oshmem_ctx_default)); SCOLL_VERBOSE(14, "[#%d] is extra and signal to #%d", group->my_pe, peer_pe); value = SHMEM_SYNC_RUN; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); SCOLL_VERBOSE(14, "[#%d] wait", group->my_pe); value = SHMEM_SYNC_RUN; @@ -449,7 +449,7 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, "[#%d] signal to #%d that I am ready", group->my_pe, peer_pe); value = SHMEM_SYNC_WAIT; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } SCOLL_VERBOSE(14, @@ -482,21 +482,21 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, it is expected that shmem_long_cswap() will make it faster. */ do { - MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } while (value != (round - 1)); SCOLL_VERBOSE(14, "[#%d] round = %d send data to #%d", group->my_pe, round, peer_pe); - rc = MCA_SPML_CALL(put(target, nlong, target_cur, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, target, nlong, target_cur, peer_pe)); - MCA_SPML_CALL(fence()); + MCA_SPML_CALL(fence(oshmem_ctx_default)); SCOLL_VERBOSE(14, "[#%d] round = %d signals to #%d", group->my_pe, round, peer_pe); value = SHMEM_SYNC_RUN; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); #endif SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round); @@ -522,13 +522,13 @@ static int _algorithm_recursive_doubling(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] is extra send data to #%d", group->my_pe, peer_pe); - rc = MCA_SPML_CALL(put(target, nlong, target_cur, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, target, nlong, target_cur, peer_pe)); - MCA_SPML_CALL(fence()); + MCA_SPML_CALL(fence(oshmem_ctx_default)); SCOLL_VERBOSE(14, "[#%d] signals to #%d", group->my_pe, peer_pe); value = SHMEM_SYNC_RUN; - rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe)); + rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe)); } } diff --git a/oshmem/mca/scoll/fca/scoll_fca_module.c b/oshmem/mca/scoll/fca/scoll_fca_module.c index cb7299d3b3..c4fd8fbbf7 100644 --- a/oshmem/mca/scoll/fca/scoll_fca_module.c +++ b/oshmem/mca/scoll/fca/scoll_fca_module.c @@ -198,7 +198,7 @@ static int _fca_comm_new(mca_scoll_fca_module_t *fca_module) mca_scoll_fca_component.rcounts[i] = -1; } _internal_barrier(fca_module); - MCA_SPML_CALL(put((void *)&mca_scoll_fca_component.rcounts[my_id], (size_t)sizeof(info_size), (void *)&info_size, root_pe)); + MCA_SPML_CALL(put(oshmem_ctx_default, (void *)&mca_scoll_fca_component.rcounts[my_id], (size_t)sizeof(info_size), (void *)&info_size, root_pe)); if (root_pe == comm->my_pe) { int value = -1; @@ -233,7 +233,7 @@ static int _fca_comm_new(mca_scoll_fca_module_t *fca_module) if (root_pe == comm->my_pe) { for (i = 0; i < comm->proc_count; i++) { if (mca_scoll_fca_component.rcounts[i] > 0) { - MCA_SPML_CALL(get((void *)mca_scoll_fca_component.my_info_exchangeable, mca_scoll_fca_component.rcounts[i], (void*)(((char*)all_info)+disps[i]),comm->proc_array[i]->super.proc_name.vpid)); + MCA_SPML_CALL(get(oshmem_ctx_default, (void *)mca_scoll_fca_component.my_info_exchangeable, mca_scoll_fca_component.rcounts[i], (void*)(((char*)all_info)+disps[i]),comm->proc_array[i]->super.proc_name.vpid)); } } } @@ -269,7 +269,7 @@ static int _fca_comm_new(mca_scoll_fca_module_t *fca_module) _internal_barrier(fca_module); if (root_pe != comm->my_pe) { - MCA_SPML_CALL(get((void *)mca_scoll_fca_component.ret,sizeof(int), (void *)mca_scoll_fca_component.ret, root_pe)); + MCA_SPML_CALL(get(oshmem_ctx_default, (void *)mca_scoll_fca_component.ret,sizeof(int), (void *)mca_scoll_fca_component.ret, root_pe)); } /* Examine comm_new return value */ @@ -294,7 +294,7 @@ static int _fca_comm_new(mca_scoll_fca_module_t *fca_module) _internal_barrier(fca_module); if (root_pe != comm->my_pe) { - MCA_SPML_CALL(get((void *)mca_scoll_fca_component.fca_comm_desc_exchangeable, sizeof(fca_module->fca_comm_desc), (void *)&fca_module->fca_comm_desc, root_pe)); + MCA_SPML_CALL(get(oshmem_ctx_default, (void *)mca_scoll_fca_component.fca_comm_desc_exchangeable, sizeof(fca_module->fca_comm_desc), (void *)&fca_module->fca_comm_desc, root_pe)); } _internal_barrier(fca_module); diff --git a/oshmem/mca/spml/base/base.h b/oshmem/mca/spml/base/base.h index 58025561ca..4aeff7d760 100644 --- a/oshmem/mca/spml/base/base.h +++ b/oshmem/mca/spml/base/base.h @@ -67,6 +67,11 @@ OSHMEM_DECLSPEC int mca_spml_base_wait(void* addr, void* value, int datatype); OSHMEM_DECLSPEC int mca_spml_base_wait_nb(void* handle); +OSHMEM_DECLSPEC int mca_spml_base_test(void* addr, + int cmp, + void* value, + int datatype, + int *out_value); OSHMEM_DECLSPEC int mca_spml_base_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys); diff --git a/oshmem/mca/spml/base/spml_base.c b/oshmem/mca/spml/base/spml_base.c index 3b950988fa..86d544e88e 100644 --- a/oshmem/mca/spml/base/spml_base.c +++ b/oshmem/mca/spml/base/spml_base.c @@ -49,9 +49,97 @@ } while (_res == 0); /** - * Wait for data delivery. - * Pool on a variable given in addr until it is not equal to value. + * Check on a variable given in addr to see it is not equal to value. */ +int mca_spml_base_test(void* addr, int cmp, void* value, int datatype, int *out_value) +{ + volatile int *int_addr; + volatile long *long_addr; + volatile short *short_addr; + volatile long long *longlong_addr; + volatile int32_t *int32_addr; + volatile int64_t *int64_addr; + + int int_value; + long long_value; + short short_value; + long long longlong_value; + int32_t int32_value; + int64_t int64_value; + + ompi_fortran_integer_t *fint_addr, fint_value; + ompi_fortran_integer4_t *fint4_addr, fint4_value; + ompi_fortran_integer8_t *fint8_addr, fint8_value; + + switch (datatype) { + + /* Int */ + case SHMEM_INT: + int_value = *(int*) value; + int_addr = (int*) addr; + SPML_BASE_DO_CMP((*out_value), int_addr, cmp, int_value); + break; + + /* Short */ + case SHMEM_SHORT: + short_value = *(short*) value; + short_addr = (short*) addr; + SPML_BASE_DO_CMP((*out_value), short_addr, cmp, short_value); + break; + + /* Long */ + case SHMEM_LONG: + long_value = *(long*) value; + long_addr = (long*) addr; + SPML_BASE_DO_CMP((*out_value), long_addr, cmp, long_value); + break; + + /* Long-Long */ + case SHMEM_LLONG: + longlong_value = *(long long*) value; + longlong_addr = (long long*) addr; + SPML_BASE_DO_CMP((*out_value), longlong_addr, cmp, longlong_value); + break; + + /* Int32_t */ + case SHMEM_INT32_T: + int32_value = *(int32_t*) value; + int32_addr = (int32_t*) addr; + SPML_BASE_DO_CMP((*out_value), int32_addr, cmp, int32_value); + break; + + /* Int64_t */ + case SHMEM_INT64_T: + int64_value = *(int64_t*) value; + int64_addr = (int64_t*) addr; + SPML_BASE_DO_CMP((*out_value), int64_addr, cmp, int64_value); + break; + + /*C equivalent of Fortran integer type */ + case SHMEM_FINT: + fint_value = *(ompi_fortran_integer_t *) value; + fint_addr = (ompi_fortran_integer_t *) addr; + SPML_BASE_DO_CMP((*out_value), fint_addr, cmp, fint_value); + break; + + /*C equivalent of Fortran int4 type*/ + case SHMEM_FINT4: + fint4_value = *(ompi_fortran_integer4_t *) value; + fint4_addr = (ompi_fortran_integer4_t *) addr; + SPML_BASE_DO_CMP((*out_value), fint4_addr, cmp, fint4_value); + break; + + /*C equivalent of Fortran int8 type*/ + case SHMEM_FINT8: + fint8_value = *(ompi_fortran_integer8_t *) value; + fint8_addr = (ompi_fortran_integer8_t *) addr; + SPML_BASE_DO_CMP((*out_value), fint8_addr, cmp, fint8_value); + break; + } + + return OSHMEM_SUCCESS; +} + int mca_spml_base_wait(void* addr, int cmp, void* value, int datatype) { volatile int *int_addr; @@ -143,6 +231,7 @@ int mca_spml_base_wait(void* addr, int cmp, void* value, int datatype) return OSHMEM_SUCCESS; } + /** * Waits for completion of a non-blocking put or get issued by the calling PE. * This function waits for completion of a single non-blocking transfer issued by @@ -153,7 +242,7 @@ int mca_spml_base_wait(void* addr, int cmp, void* value, int datatype) */ int mca_spml_base_wait_nb(void* handle) { - MCA_SPML_CALL(quiet()); + MCA_SPML_CALL(quiet(oshmem_ctx_default)); return OSHMEM_SUCCESS; } diff --git a/oshmem/mca/spml/ikrit/spml_ikrit.c b/oshmem/mca/spml/ikrit/spml_ikrit.c index 9bdd04fe21..523baf7763 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit.c +++ b/oshmem/mca/spml/ikrit/spml_ikrit.c @@ -61,6 +61,8 @@ static int mca_spml_ikrit_get_async(void *src_addr, void *dst_addr, int src); +mca_spml_ikrit_ctx_t mca_spml_ikrit_ctx_default = { 0 }; + struct mca_spml_ikrit_put_request { opal_free_list_item_t link; /* must be a first member */ mxm_send_req_t mxm_req; @@ -162,6 +164,8 @@ mca_spml_ikrit_t mca_spml_ikrit = { mca_spml_ikrit_register, mca_spml_ikrit_deregister, mca_spml_ikrit_oob_get_mkeys, + mca_spml_ikrit_ctx_create, + mca_spml_ikrit_ctx_destroy, mca_spml_ikrit_put, mca_spml_ikrit_put_nb, mca_spml_ikrit_get, @@ -170,6 +174,7 @@ mca_spml_ikrit_t mca_spml_ikrit = { mca_spml_ikrit_send, mca_spml_base_wait, mca_spml_base_wait_nb, + mca_spml_base_test, mca_spml_ikrit_fence, /* fence is implemented as quiet */ mca_spml_ikrit_fence, mca_spml_ikrit_cache_mkeys, @@ -517,7 +522,7 @@ int mca_spml_ikrit_deregister(sshmem_mkey_t *mkeys) { int i; - MCA_SPML_CALL(fence()); + MCA_SPML_CALL(fence(oshmem_ctx_default)); if (!mkeys) return OSHMEM_SUCCESS; @@ -571,6 +576,19 @@ int mca_spml_ikrit_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys) return OSHMEM_ERROR; } +int mca_spml_ikrit_ctx_create(long options, shmem_ctx_t *ctx) +{ + int rc = OSHMEM_SUCCESS; + mca_spml_ikrit_ctx_t *ctxp = malloc(sizeof(mca_spml_ikrit_ctx_t)); + *ctx = (shmem_ctx_t)ctxp; + return rc; +} + +void mca_spml_ikrit_ctx_destroy(shmem_ctx_t ctx) +{ + free(ctx); +} + static inline int mca_spml_ikrit_get_helper(mxm_send_req_t *sreq, void *src_addr, size_t size, @@ -629,7 +647,8 @@ static inline int mca_spml_ikrit_get_shm(void *src_addr, return OSHMEM_SUCCESS; } -int mca_spml_ikrit_get_nb(void* src_addr, +int mca_spml_ikrit_get_nb(shmem_ctx_t ctx, + void* src_addr, size_t size, void* dst_addr, int src, @@ -638,7 +657,7 @@ int mca_spml_ikrit_get_nb(void* src_addr, return mca_spml_ikrit_get_async(src_addr, size, dst_addr, src); } -int mca_spml_ikrit_get(void *src_addr, size_t size, void *dst_addr, int src) +int mca_spml_ikrit_get(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_addr, int src) { mxm_send_req_t sreq; @@ -938,7 +957,8 @@ int mca_spml_ikrit_put_simple(void* dst_addr, return OSHMEM_SUCCESS; } -int mca_spml_ikrit_put_nb(void* dst_addr, +int mca_spml_ikrit_put_nb(shmem_ctx_t ctx, + void* dst_addr, size_t size, void* src_addr, int dst, @@ -954,7 +974,7 @@ int mca_spml_ikrit_put_nb(void* dst_addr, return OSHMEM_SUCCESS; } -int mca_spml_ikrit_put(void* dst_addr, size_t size, void* src_addr, int dst) +int mca_spml_ikrit_put(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_addr, int dst) { int err; mca_spml_ikrit_put_request_t *put_req; @@ -985,7 +1005,7 @@ int mca_spml_ikrit_put(void* dst_addr, size_t size, void* src_addr, int dst) } -int mca_spml_ikrit_fence(void) +int mca_spml_ikrit_fence(shmem_ctx_t ctx) { mxm_peer_t *peer; opal_list_item_t *item; diff --git a/oshmem/mca/spml/ikrit/spml_ikrit.h b/oshmem/mca/spml/ikrit/spml_ikrit.h index 5cf182ecc1..e275c3bf59 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit.h +++ b/oshmem/mca/spml/ikrit/spml_ikrit.h @@ -83,6 +83,14 @@ typedef struct mxm_peer mxm_peer_t; typedef mxm_mem_key_t *(*mca_spml_ikrit_get_mkey_slow_fn_t)(int pe, void *va, int ptl_id, void **rva); +struct mca_spml_ikrit_ctx { + int temp; +}; + +typedef struct mca_spml_ikrit_ctx mca_spml_ikrit_ctx_t; + +extern mca_spml_ikrit_ctx_t mca_spml_ikrit_ctx_default; + struct mca_spml_ikrit_t { mca_spml_base_module_t super; @@ -137,21 +145,28 @@ typedef struct spml_ikrit_mxm_ep_conn_info_t { extern mca_spml_ikrit_t mca_spml_ikrit; extern int mca_spml_ikrit_enable(bool enable); -extern int mca_spml_ikrit_get(void* dst_addr, +extern int mca_spml_ikrit_ctx_create(long options, + shmem_ctx_t *ctx); +extern void mca_spml_ikrit_ctx_destroy(shmem_ctx_t ctx); +extern int mca_spml_ikrit_get(shmem_ctx_t ctx, + void* dst_addr, size_t size, void* src_addr, int src); -extern int mca_spml_ikrit_get_nb(void* src_addr, +extern int mca_spml_ikrit_get_nb(shmem_ctx_t ctx, + void* src_addr, size_t size, void* dst_addr, int src, void **handle); -extern int mca_spml_ikrit_put(void* dst_addr, +extern int mca_spml_ikrit_put(shmem_ctx_t ctx, + void* dst_addr, size_t size, void* src_addr, int dst); -extern int mca_spml_ikrit_put_nb(void* dst_addr, +extern int mca_spml_ikrit_put_nb(shmem_ctx_t ctx, + void* dst_addr, size_t size, void* src_addr, int dst, @@ -174,7 +189,7 @@ extern int mca_spml_ikrit_oob_get_mkeys(int pe, extern int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs); extern int mca_spml_ikrit_del_procs(ompi_proc_t** procs, size_t nprocs); -extern int mca_spml_ikrit_fence(void); +extern int mca_spml_ikrit_fence(shmem_ctx_t ctx); extern int spml_ikrit_progress(void); /* the functionreturns NULL if data can be directly copied via shared memory diff --git a/oshmem/mca/spml/ikrit/spml_ikrit_component.c b/oshmem/mca/spml/ikrit/spml_ikrit_component.c index 45cba8eb4b..846d749490 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit_component.c +++ b/oshmem/mca/spml/ikrit/spml_ikrit_component.c @@ -384,6 +384,8 @@ static int spml_ikrit_mxm_init(void) mca_spml_ikrit.mxm_hw_rdma_ep = mca_spml_ikrit.mxm_ep; } + oshmem_ctx_default = (shmem_ctx_t) &mca_spml_ikrit_ctx_default; + return OSHMEM_SUCCESS; } diff --git a/oshmem/mca/spml/spml.h b/oshmem/mca/spml/spml.h index f320e83e47..c78ed6cbdd 100644 --- a/oshmem/mca/spml/spml.h +++ b/oshmem/mca/spml/spml.h @@ -113,6 +113,20 @@ typedef int (*mca_spml_base_module_wait_fn_t)(void* addr, void* value, int datatype); +/** + * Test for an int variable to change on the local PE. + * + * @param addr Address of the variable to pool on. + * @param value The value to pool on. Pool until the value held in addr is different than value. + * @param out_value Return value to indicated if variable is equal to given cmp value. + * @return OSHMEM_SUCCESS or failure status. + */ +typedef int (*mca_spml_base_module_test_fn_t)(void* addr, + int cmp, + void* value, + int datatype, + int *out_value); + /** * deserialize remote mkey * @@ -184,16 +198,36 @@ typedef int (*mca_spml_base_module_add_procs_fn_t)(ompi_proc_t** procs, typedef int (*mca_spml_base_module_del_procs_fn_t)(ompi_proc_t** procs, size_t nprocs); + +/** + * Create a communication context. + * + * @param options The set of options requested for the given context. + * @param ctx A handle to the newly created context. + * @return OSHMEM_SUCCESS or failure status. + */ +typedef int (*mca_spml_base_module_ctx_create_fn_t)(long options, shmem_ctx_t *ctx); + + +/** + * Destroy a communication context. + * + * @param ctx Handle to the context that will be destroyed. + */ +typedef void (*mca_spml_base_module_ctx_destroy_fn_t)(shmem_ctx_t ctx); + /** * Transfer data to a remote pe. * + * @param ctx The context object this routine is working on. * @param dst_addr The address in the remote PE of the object being written. * @param size The number of bytes to be written. * @param src_addr An address on the local PE holdng the value to be written. * @param dst The remote PE to be written to. * @return OSHMEM_SUCCESS or failure status. */ -typedef int (*mca_spml_base_module_put_fn_t)(void *dst_addr, +typedef int (*mca_spml_base_module_put_fn_t)(shmem_ctx_t ctx, + void *dst_addr, size_t size, void *src_addr, int dst); @@ -203,6 +237,7 @@ typedef int (*mca_spml_base_module_put_fn_t)(void *dst_addr, * blocking the caller. These routines return before the data has been delivered to the * remote PE. * + * @param ctx The context object this routine is working on. * @param dst_addr The address in the remote PE of the object being written. * @param size The number of bytes to be written. * @param src_addr An address on the local PE holdng the value to be written. @@ -211,7 +246,8 @@ typedef int (*mca_spml_base_module_put_fn_t)(void *dst_addr, * shmem_test_nb() to wait or poll for the completion of the transfer. * @return OSHMEM_SUCCESS or failure status. */ -typedef int (*mca_spml_base_module_put_nb_fn_t)(void *dst_addr, +typedef int (*mca_spml_base_module_put_nb_fn_t)(shmem_ctx_t ctx, + void *dst_addr, size_t size, void *src_addr, int dst, @@ -221,13 +257,15 @@ typedef int (*mca_spml_base_module_put_nb_fn_t)(void *dst_addr, * Blocking data transfer from remote PE. * Read data from remote PE. * + * @param ctx The context object this routine is working on. * @param dst_addr The address on the local PE, to write the result of the get operation to. * @param size The number of bytes to be read. * @param src_addr The address on the remote PE, to read from. * @param src The ID of the remote PE. * @return OSHMEM_SUCCESS or failure status. */ -typedef int (*mca_spml_base_module_get_fn_t)(void *dst_addr, +typedef int (*mca_spml_base_module_get_fn_t)(shmem_ctx_t ctx, + void *dst_addr, size_t size, void *src_addr, int src); @@ -236,6 +274,7 @@ typedef int (*mca_spml_base_module_get_fn_t)(void *dst_addr, * Non-blocking data transfer from remote PE. * Read data from remote PE. * + * @param ctx The context object this routine is working on. * @param dst_addr The address on the local PE, to write the result of the get operation to. * @param size The number of bytes to be read. * @param src_addr The address on the remote PE, to read from. @@ -244,7 +283,8 @@ typedef int (*mca_spml_base_module_get_fn_t)(void *dst_addr, * shmem_test_nb() to wait or poll for the completion of the transfer. * @return - OSHMEM_SUCCESS or failure status. */ -typedef int (*mca_spml_base_module_get_nb_fn_t)(void *dst_addr, +typedef int (*mca_spml_base_module_get_nb_fn_t)(shmem_ctx_t ctx, + void *dst_addr, size_t size, void *src_addr, int src, @@ -277,16 +317,18 @@ typedef int (*mca_spml_base_module_send_fn_t)(void *buf, /** * Assures ordering of delivery of put() requests * + * @param ctx - The context object this routine is working on. * @return - OSHMEM_SUCCESS or failure status. */ -typedef int (*mca_spml_base_module_fence_fn_t)(void); +typedef int (*mca_spml_base_module_fence_fn_t)(shmem_ctx_t ctx); /** * Wait for completion of all outstanding put() requests * + * @param ctx - The context object this routine is working on. * @return - OSHMEM_SUCCESS or failure status. */ -typedef int (*mca_spml_base_module_quiet_fn_t)(void); +typedef int (*mca_spml_base_module_quiet_fn_t)(shmem_ctx_t ctx); /** * Waits for completion of a non-blocking put or get issued by the calling PE. @@ -317,6 +359,9 @@ struct mca_spml_base_module_1_0_0_t { mca_spml_base_module_deregister_fn_t spml_deregister; mca_spml_base_module_oob_get_mkeys_fn_t spml_oob_get_mkeys; + mca_spml_base_module_ctx_create_fn_t spml_ctx_create; + mca_spml_base_module_ctx_destroy_fn_t spml_ctx_destroy; + mca_spml_base_module_put_fn_t spml_put; mca_spml_base_module_put_nb_fn_t spml_put_nb; mca_spml_base_module_get_fn_t spml_get; @@ -327,6 +372,7 @@ struct mca_spml_base_module_1_0_0_t { mca_spml_base_module_wait_fn_t spml_wait; mca_spml_base_module_wait_nb_fn_t spml_wait_nb; + mca_spml_base_module_test_fn_t spml_test; mca_spml_base_module_fence_fn_t spml_fence; mca_spml_base_module_quiet_fn_t spml_quiet; diff --git a/oshmem/mca/spml/ucx/spml_ucx.c b/oshmem/mca/spml/ucx/spml_ucx.c index 4ab429f296..277910b3ca 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.c +++ b/oshmem/mca/spml/ucx/spml_ucx.c @@ -56,6 +56,8 @@ mca_spml_ucx_t mca_spml_ucx = { mca_spml_ucx_register, mca_spml_ucx_deregister, mca_spml_base_oob_get_mkeys, + mca_spml_ucx_ctx_create, + mca_spml_ucx_ctx_destroy, mca_spml_ucx_put, mca_spml_ucx_put_nb, mca_spml_ucx_get, @@ -64,6 +66,7 @@ mca_spml_ucx_t mca_spml_ucx = { mca_spml_ucx_send, mca_spml_base_wait, mca_spml_base_wait_nb, + mca_spml_base_test, mca_spml_ucx_fence, mca_spml_ucx_quiet, mca_spml_ucx_rmkey_unpack, @@ -74,15 +77,20 @@ mca_spml_ucx_t mca_spml_ucx = { }, NULL, /* ucp_context */ - NULL, /* ucp_worker */ - NULL, /* ucp_peers */ - 0, /* using_mem_hooks */ 1, /* num_disconnect */ 0, /* heap_reg_nb */ 0, /* enabled */ mca_spml_ucx_get_mkey_slow }; +OBJ_CLASS_INSTANCE(mca_spml_ucx_ctx_list_item_t, opal_list_item_t, NULL, NULL); + +mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default = { + NULL, /* ucp_worker */ + NULL, /* ucp_peers */ + 0 /* options */ +}; + int mca_spml_ucx_enable(bool enable) { SPML_UCX_VERBOSE(50, "*** ucx ENABLED ****"); @@ -102,7 +110,7 @@ static void mca_spml_ucx_waitall(void **reqs, int *count_p) SPML_UCX_VERBOSE(10, "waiting for %d disconnect requests", *count_p); for (i = 0; i < *count_p; ++i) { - opal_common_ucx_wait_request(reqs[i], mca_spml_ucx.ucp_worker, "ucp_disconnect_nb"); + opal_common_ucx_wait_request(reqs[i], mca_spml_ucx_ctx_default.ucp_worker, "ucp_disconnect_nb"); reqs[i] = NULL; } @@ -120,7 +128,7 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs) oshmem_shmem_barrier(); - if (!mca_spml_ucx.ucp_peers) { + if (!mca_spml_ucx_ctx_default.ucp_peers) { return OSHMEM_SUCCESS; } @@ -138,12 +146,12 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs) for (i = 0; i < nprocs; ++i) { n = (i + my_rank) % nprocs; - ep = mca_spml_ucx.ucp_peers[n].ucp_conn; + ep = mca_spml_ucx_ctx_default.ucp_peers[n].ucp_conn; if (ep == NULL) { continue; } - mca_spml_ucx.ucp_peers[n].ucp_conn = NULL; + mca_spml_ucx_ctx_default.ucp_peers[n].ucp_conn = NULL; SPML_UCX_VERBOSE(10, "disconnecting from peer %zu", n); dreq = ucp_disconnect_nb(ep); @@ -165,10 +173,11 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs) /* coverity[uninit_use_in_call] */ mca_spml_ucx_waitall(dreqs, &num_reqs); free(dreqs); + free(mca_spml_ucx.remote_addrs_tbl); - opal_common_ucx_mca_pmix_fence(mca_spml_ucx.ucp_worker); - free(mca_spml_ucx.ucp_peers); - mca_spml_ucx.ucp_peers = NULL; + opal_common_ucx_mca_pmix_fence(mca_spml_ucx_ctx_default.ucp_worker); + free(mca_spml_ucx_ctx_default.ucp_peers); + mca_spml_ucx_ctx_default.ucp_peers = NULL; return OSHMEM_SUCCESS; } @@ -261,12 +270,12 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) ucp_ep_params_t ep_params; - mca_spml_ucx.ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(mca_spml_ucx.ucp_peers))); - if (NULL == mca_spml_ucx.ucp_peers) { + mca_spml_ucx_ctx_default.ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(mca_spml_ucx_ctx_default.ucp_peers))); + if (NULL == mca_spml_ucx_ctx_default.ucp_peers) { goto error; } - err = ucp_worker_get_address(mca_spml_ucx.ucp_worker, &wk_local_addr, &wk_addr_len); + err = ucp_worker_get_address(mca_spml_ucx_ctx_default.ucp_worker, &wk_local_addr, &wk_addr_len); if (err != UCS_OK) { goto error; } @@ -280,6 +289,9 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) opal_progress_register(spml_ucx_progress); + mca_spml_ucx.remote_addrs_tbl = (char **)calloc(nprocs, sizeof(char *)); + memset(mca_spml_ucx.remote_addrs_tbl, 0, nprocs * sizeof(char *)); + /* Get the EP connection requests for all the processes from modex */ for (n = 0; n < nprocs; ++n) { i = (my_rank + n) % nprocs; @@ -288,8 +300,8 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = (ucp_address_t *)(wk_raddrs + wk_roffs[i]); - err = ucp_ep_create(mca_spml_ucx.ucp_worker, &ep_params, - &mca_spml_ucx.ucp_peers[i].ucp_conn); + err = ucp_ep_create(mca_spml_ucx_ctx_default.ucp_worker, &ep_params, + &mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn); if (UCS_OK != err) { SPML_UCX_ERROR("ucp_ep_create(proc=%zu/%zu) failed: %s", n, nprocs, ucs_status_string(err)); @@ -298,9 +310,13 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) OSHMEM_PROC_DATA(procs[i])->num_transports = 1; OSHMEM_PROC_DATA(procs[i])->transport_ids = spml_ucx_transport_ids; + + mca_spml_ucx.remote_addrs_tbl[i] = (char *)malloc(wk_rsizes[i]); + memcpy(mca_spml_ucx.remote_addrs_tbl[i], (char *)(wk_raddrs + wk_roffs[i]), + wk_rsizes[i]); } - ucp_worker_release_address(mca_spml_ucx.ucp_worker, wk_local_addr); + ucp_worker_release_address(mca_spml_ucx_ctx_default.ucp_worker, wk_local_addr); free(wk_raddrs); free(wk_rsizes); free(wk_roffs); @@ -310,12 +326,17 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) error2: for (i = 0; i < nprocs; ++i) { - if (mca_spml_ucx.ucp_peers[i].ucp_conn) { - ucp_ep_destroy(mca_spml_ucx.ucp_peers[i].ucp_conn); + if (mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn) { + ucp_ep_destroy(mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn); + } + if (mca_spml_ucx.remote_addrs_tbl[i]) { + free(mca_spml_ucx.remote_addrs_tbl[i]); } } - if (mca_spml_ucx.ucp_peers) - free(mca_spml_ucx.ucp_peers); + if (mca_spml_ucx_ctx_default.ucp_peers) + free(mca_spml_ucx_ctx_default.ucp_peers); + if (mca_spml_ucx.remote_addrs_tbl) + free(mca_spml_ucx.remote_addrs_tbl); free(wk_raddrs); free(wk_rsizes); free(wk_roffs); @@ -370,11 +391,11 @@ void *mca_spml_ucx_rmkey_ptr(const void *dst_addr, sshmem_mkey_t *mkey, int pe) #endif } -static void mca_spml_ucx_cache_mkey(sshmem_mkey_t *mkey, uint32_t segno, int dst_pe) +static void mca_spml_ucx_cache_mkey(mca_spml_ucx_ctx_t *ucx_ctx, sshmem_mkey_t *mkey, uint32_t segno, int dst_pe) { ucp_peer_t *peer; - peer = &mca_spml_ucx.ucp_peers[dst_pe]; + peer = &(ucx_ctx->ucp_peers[dst_pe]); mkey_segment_init(&peer->mkeys[segno].super, mkey, segno); } @@ -383,9 +404,9 @@ void mca_spml_ucx_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t segno, int pe, int spml_ucx_mkey_t *ucx_mkey; ucs_status_t err; - ucx_mkey = &mca_spml_ucx.ucp_peers[pe].mkeys[segno].key; + ucx_mkey = &mca_spml_ucx_ctx_default.ucp_peers[pe].mkeys[segno].key; - err = ucp_ep_rkey_unpack(mca_spml_ucx.ucp_peers[pe].ucp_conn, + err = ucp_ep_rkey_unpack(mca_spml_ucx_ctx_default.ucp_peers[pe].ucp_conn, mkey->u.data, &ucx_mkey->rkey); if (UCS_OK != err) { @@ -394,7 +415,7 @@ void mca_spml_ucx_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t segno, int pe, int } mkey->spml_context = ucx_mkey; - mca_spml_ucx_cache_mkey(mkey, segno, pe); + mca_spml_ucx_cache_mkey(&mca_spml_ucx_ctx_default, mkey, segno, pe); return; error_fatal: @@ -414,7 +435,7 @@ void mca_spml_ucx_memuse_hook(void *addr, size_t length) } my_pe = oshmem_my_proc_id(); - ucx_mkey = &mca_spml_ucx.ucp_peers[my_pe].mkeys[HEAP_SEG_INDEX].key; + ucx_mkey = &mca_spml_ucx_ctx_default.ucp_peers[my_pe].mkeys[HEAP_SEG_INDEX].key; params.field_mask = UCP_MEM_ADVISE_PARAM_FIELD_ADDRESS | UCP_MEM_ADVISE_PARAM_FIELD_LENGTH | @@ -455,7 +476,7 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr, segno = memheap_find_segnum(addr); mem_seg = memheap_find_seg(segno); - ucx_mkey = &mca_spml_ucx.ucp_peers[my_pe].mkeys[segno].key; + ucx_mkey = &mca_spml_ucx_ctx_default.ucp_peers[my_pe].mkeys[segno].key; mkeys[0].spml_context = ucx_mkey; /* if possible use mem handle already created by ucx allocator */ @@ -493,7 +514,7 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr, oshmem_shmem_abort(-1); } - status = ucp_ep_rkey_unpack(mca_spml_ucx.ucp_peers[oshmem_group_self->my_pe].ucp_conn, + status = ucp_ep_rkey_unpack(mca_spml_ucx_ctx_default.ucp_peers[oshmem_group_self->my_pe].ucp_conn, mkeys[0].u.data, &ucx_mkey->rkey); if (UCS_OK != status) { @@ -504,7 +525,7 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr, mkeys[0].len = len; mkeys[0].va_base = addr; *count = 1; - mca_spml_ucx_cache_mkey(&mkeys[0], segno, my_pe); + mca_spml_ucx_cache_mkey(&mca_spml_ucx_ctx_default, &mkeys[0], segno, my_pe); return mkeys; error_unmap: @@ -520,7 +541,7 @@ int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys) spml_ucx_mkey_t *ucx_mkey; map_segment_t *mem_seg; - MCA_SPML_CALL(quiet()); + MCA_SPML_CALL(quiet(oshmem_ctx_default)); if (!mkeys) return OSHMEM_SUCCESS; @@ -546,81 +567,186 @@ int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys) return OSHMEM_SUCCESS; } -int mca_spml_ucx_get(void *src_addr, size_t size, void *dst_addr, int src) +int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx) +{ + mca_spml_ucx_ctx_list_item_t *ctx_item; + ucp_worker_params_t params; + ucp_ep_params_t ep_params; + size_t i, nprocs = oshmem_num_procs(); + ucs_status_t err; + int rc = OSHMEM_ERROR; + + ctx_item = OBJ_NEW(mca_spml_ucx_ctx_list_item_t); + ctx_item->ctx.options = options; + + params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; + if (oshmem_mpi_thread_provided == SHMEM_THREAD_SINGLE || options & SHMEM_CTX_PRIVATE || options & SHMEM_CTX_SERIALIZED) { + params.thread_mode = UCS_THREAD_MODE_SINGLE; + } else { + params.thread_mode = UCS_THREAD_MODE_MULTI; + } + + err = ucp_worker_create(mca_spml_ucx.ucp_context, ¶ms, + &ctx_item->ctx.ucp_worker); + if (UCS_OK != err) { + OBJ_RELEASE(ctx_item); + return OSHMEM_ERROR; + } + + ctx_item->ctx.ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(ctx_item->ctx.ucp_peers))); + if (NULL == ctx_item->ctx.ucp_peers) { + goto error; + } + + for (i = 0; i < nprocs; i++) { + ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; + ep_params.address = (ucp_address_t *)(mca_spml_ucx.remote_addrs_tbl[i]); + err = ucp_ep_create(ctx_item->ctx.ucp_worker, &ep_params, + &ctx_item->ctx.ucp_peers[i].ucp_conn); + if (UCS_OK != err) { + SPML_ERROR("ucp_ep_create(proc=%d/%d) failed: %s", i, nprocs, + ucs_status_string(err)); + goto error2; + } + } + + SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex); + + opal_list_append(&(mca_spml_ucx.ctx_list), &ctx_item->super); + + SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex); + + (*ctx) = (shmem_ctx_t)(&ctx_item->ctx); + + return OSHMEM_SUCCESS; + + error2: + for (i = 0; i < nprocs; i++) { + if (ctx_item->ctx.ucp_peers[i].ucp_conn) { + ucp_ep_destroy(ctx_item->ctx.ucp_peers[i].ucp_conn); + } + } + + if (ctx_item->ctx.ucp_peers) + free(ctx_item->ctx.ucp_peers); + + error: + ucp_worker_destroy(ctx_item->ctx.ucp_worker); + OBJ_RELEASE(ctx_item); + rc = OSHMEM_ERR_OUT_OF_RESOURCE; + SPML_ERROR("ctx create FAILED rc=%d", rc); + return rc; +} + +void mca_spml_ucx_ctx_destroy(shmem_ctx_t ctx) +{ + mca_spml_ucx_ctx_list_item_t *ctx_item, *next; + size_t i, nprocs = oshmem_num_procs(); + + MCA_SPML_CALL(quiet(ctx)); + + SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex); + + /* delete context object from list */ + OPAL_LIST_FOREACH_SAFE(ctx_item, next, &(mca_spml_ucx.ctx_list), + mca_spml_ucx_ctx_list_item_t) { + if ((shmem_ctx_t)(&ctx_item->ctx) == ctx) { + opal_list_remove_item(&(mca_spml_ucx.ctx_list), &ctx_item->super); + for (i = 0; i < nprocs; i++) { + ucp_ep_destroy(ctx_item->ctx.ucp_peers[i].ucp_conn); + } + free(ctx_item->ctx.ucp_peers); + ucp_worker_destroy(ctx_item->ctx.ucp_worker); + OBJ_RELEASE(ctx_item); + break; + } + } + + SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex); +} + +int mca_spml_ucx_get(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_addr, int src) { void *rva; spml_ucx_mkey_t *ucx_mkey; + mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; #if HAVE_DECL_UCP_GET_NB ucs_status_ptr_t request; #else ucs_status_t status; #endif - ucx_mkey = mca_spml_ucx_get_mkey(src, src_addr, &rva, &mca_spml_ucx); + ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, src, src_addr, &rva, &mca_spml_ucx); #if HAVE_DECL_UCP_GET_NB - request = ucp_get_nb(mca_spml_ucx.ucp_peers[src].ucp_conn, dst_addr, size, + request = ucp_get_nb(ucx_ctx->ucp_peers[src].ucp_conn, dst_addr, size, (uint64_t)rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb); - return opal_common_ucx_wait_request(request, mca_spml_ucx.ucp_worker, "ucp_get_nb"); + return opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker, "ucp_get_nb"); #else - status = ucp_get(mca_spml_ucx.ucp_peers[src].ucp_conn, dst_addr, size, + status = ucp_get(ucx_ctx->ucp_peers[src].ucp_conn, dst_addr, size, (uint64_t)rva, ucx_mkey->rkey); return ucx_status_to_oshmem(status); #endif } -int mca_spml_ucx_get_nb(void *src_addr, size_t size, void *dst_addr, int src, void **handle) +int mca_spml_ucx_get_nb(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_addr, int src, void **handle) { void *rva; ucs_status_t status; spml_ucx_mkey_t *ucx_mkey; + mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; - ucx_mkey = mca_spml_ucx_get_mkey(src, src_addr, &rva, &mca_spml_ucx); - status = ucp_get_nbi(mca_spml_ucx.ucp_peers[src].ucp_conn, dst_addr, size, + ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, src, src_addr, &rva, &mca_spml_ucx); + status = ucp_get_nbi(ucx_ctx->ucp_peers[src].ucp_conn, dst_addr, size, (uint64_t)rva, ucx_mkey->rkey); return ucx_status_to_oshmem_nb(status); } -int mca_spml_ucx_put(void* dst_addr, size_t size, void* src_addr, int dst) +int mca_spml_ucx_put(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_addr, int dst) { void *rva; spml_ucx_mkey_t *ucx_mkey; + mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; #if HAVE_DECL_UCP_PUT_NB ucs_status_ptr_t request; #else ucs_status_t status; #endif - ucx_mkey = mca_spml_ucx_get_mkey(dst, dst_addr, &rva, &mca_spml_ucx); + ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, dst, dst_addr, &rva, &mca_spml_ucx); #if HAVE_DECL_UCP_PUT_NB - request = ucp_put_nb(mca_spml_ucx.ucp_peers[dst].ucp_conn, src_addr, size, + request = ucp_put_nb(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size, (uint64_t)rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb); - return opal_common_ucx_wait_request(request, mca_spml_ucx.ucp_worker, "ucp_put_nb"); + return opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker, "ucp_put_nb"); #else - status = ucp_put(mca_spml_ucx.ucp_peers[dst].ucp_conn, src_addr, size, + status = ucp_put(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size, (uint64_t)rva, ucx_mkey->rkey); return ucx_status_to_oshmem(status); #endif } -int mca_spml_ucx_put_nb(void* dst_addr, size_t size, void* src_addr, int dst, void **handle) +int mca_spml_ucx_put_nb(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_addr, int dst, void **handle) { void *rva; ucs_status_t status; spml_ucx_mkey_t *ucx_mkey; + mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; - ucx_mkey = mca_spml_ucx_get_mkey(dst, dst_addr, &rva, &mca_spml_ucx); - status = ucp_put_nbi(mca_spml_ucx.ucp_peers[dst].ucp_conn, src_addr, size, + ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, dst, dst_addr, &rva, &mca_spml_ucx); + status = ucp_put_nbi(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size, (uint64_t)rva, ucx_mkey->rkey); return ucx_status_to_oshmem_nb(status); } -int mca_spml_ucx_fence(void) + + +int mca_spml_ucx_fence(shmem_ctx_t ctx) { ucs_status_t err; + mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; - err = ucp_worker_fence(mca_spml_ucx.ucp_worker); + err = ucp_worker_fence(ucx_ctx->ucp_worker); if (UCS_OK != err) { SPML_UCX_ERROR("fence failed: %s", ucs_status_string(err)); oshmem_shmem_abort(-1); @@ -629,11 +755,12 @@ int mca_spml_ucx_fence(void) return OSHMEM_SUCCESS; } -int mca_spml_ucx_quiet(void) +int mca_spml_ucx_quiet(shmem_ctx_t ctx) { int ret; + mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; - ret = opal_common_ucx_worker_flush(mca_spml_ucx.ucp_worker); + ret = opal_common_ucx_worker_flush(ucx_ctx->ucp_worker); if (OMPI_SUCCESS != ret) { oshmem_shmem_abort(-1); return ret; diff --git a/oshmem/mca/spml/ucx/spml_ucx.h b/oshmem/mca/spml/ucx/spml_ucx.h index b3809d1ff0..1b2f0b58d8 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.h +++ b/oshmem/mca/spml/ucx/spml_ucx.h @@ -31,6 +31,9 @@ #include "oshmem/mca/memheap/memheap.h" #include "oshmem/mca/memheap/base/base.h" +#include "opal/class/opal_free_list.h" +#include "opal/class/opal_list.h" + #include "orte/runtime/orte_globals.h" #include "opal/mca/common/ucx/common_ucx.h" @@ -62,21 +65,35 @@ struct ucp_peer { spml_ucx_cached_mkey_t mkeys[MCA_MEMHEAP_SEG_COUNT]; }; typedef struct ucp_peer ucp_peer_t; + +struct mca_spml_ucx_ctx { + ucp_worker_h ucp_worker; + ucp_peer_t *ucp_peers; + long options; +}; +typedef struct mca_spml_ucx_ctx mca_spml_ucx_ctx_t; + +extern mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default; + +struct mca_spml_ucx_ctx_list_item { + opal_list_item_t super; + mca_spml_ucx_ctx_t ctx; +}; +typedef struct mca_spml_ucx_ctx_list_item mca_spml_ucx_ctx_list_item_t; typedef spml_ucx_mkey_t * (*mca_spml_ucx_get_mkey_slow_fn_t)(int pe, void *va, void **rva); struct mca_spml_ucx { mca_spml_base_module_t super; ucp_context_h ucp_context; - ucp_worker_h ucp_worker; - ucp_peer_t *ucp_peers; int num_disconnect; int heap_reg_nb; - - int priority; /* component priority */ bool enabled; - mca_spml_ucx_get_mkey_slow_fn_t get_mkey_slow; + char **remote_addrs_tbl; + opal_list_t ctx_list; + int priority; /* component priority */ + shmem_internal_mutex_t internal_mutex; }; typedef struct mca_spml_ucx mca_spml_ucx_t; @@ -84,22 +101,29 @@ typedef struct mca_spml_ucx mca_spml_ucx_t; extern mca_spml_ucx_t mca_spml_ucx; extern int mca_spml_ucx_enable(bool enable); -extern int mca_spml_ucx_get(void* dst_addr, +extern int mca_spml_ucx_ctx_create(long options, + shmem_ctx_t *ctx); +extern void mca_spml_ucx_ctx_destroy(shmem_ctx_t ctx); +extern int mca_spml_ucx_get(shmem_ctx_t ctx, + void* dst_addr, size_t size, void* src_addr, int src); -extern int mca_spml_ucx_get_nb(void* dst_addr, +extern int mca_spml_ucx_get_nb(shmem_ctx_t ctx, + void* dst_addr, size_t size, void* src_addr, int src, void **handle); -extern int mca_spml_ucx_put(void* dst_addr, +extern int mca_spml_ucx_put(shmem_ctx_t ctx, + void* dst_addr, size_t size, void* src_addr, int dst); -extern int mca_spml_ucx_put_nb(void* dst_addr, +extern int mca_spml_ucx_put_nb(shmem_ctx_t ctx, + void* dst_addr, size_t size, void* src_addr, int dst, @@ -125,17 +149,17 @@ extern void *mca_spml_ucx_rmkey_ptr(const void *dst_addr, sshmem_mkey_t *, int p extern int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs); extern int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs); -extern int mca_spml_ucx_fence(void); -extern int mca_spml_ucx_quiet(void); +extern int mca_spml_ucx_fence(shmem_ctx_t ctx); +extern int mca_spml_ucx_quiet(shmem_ctx_t ctx); extern int spml_ucx_progress(void); static inline spml_ucx_mkey_t * -mca_spml_ucx_get_mkey(int pe, void *va, void **rva, mca_spml_ucx_t* module) +mca_spml_ucx_get_mkey(mca_spml_ucx_ctx_t *ucx_ctx, int pe, void *va, void **rva, mca_spml_ucx_t* module) { spml_ucx_cached_mkey_t *mkey; - mkey = module->ucp_peers[pe].mkeys; + mkey = ucx_ctx->ucp_peers[pe].mkeys; mkey = (spml_ucx_cached_mkey_t *)map_segment_find_va(&mkey->super.super, sizeof(*mkey), va); if (OPAL_UNLIKELY(NULL == mkey)) { assert(module->get_mkey_slow); diff --git a/oshmem/mca/spml/ucx/spml_ucx_component.c b/oshmem/mca/spml/ucx/spml_ucx_component.c index 6066acf391..a4a4a26385 100644 --- a/oshmem/mca/spml/ucx/spml_ucx_component.c +++ b/oshmem/mca/spml/ucx/spml_ucx_component.c @@ -110,15 +110,28 @@ static int mca_spml_ucx_component_register(void) int spml_ucx_progress(void) { - ucp_worker_progress(mca_spml_ucx.ucp_worker); + ucp_worker_progress(mca_spml_ucx_ctx_default.ucp_worker); return 1; } static int mca_spml_ucx_component_open(void) +{ + return OSHMEM_SUCCESS; +} + +static int mca_spml_ucx_component_close(void) +{ + return OSHMEM_SUCCESS; +} + +static int spml_ucx_init(void) { ucs_status_t err; ucp_config_t *ucp_config; ucp_params_t params; + ucp_context_attr_t attr; + ucp_worker_params_t wkr_params; + ucp_worker_attr_t wkr_attr; err = ucp_config_read("OSHMEM", NULL, &ucp_config); if (UCS_OK != err) { @@ -128,9 +141,14 @@ static int mca_spml_ucx_component_open(void) opal_common_ucx_mca_register(); memset(¶ms, 0, sizeof(params)); - params.field_mask = UCP_PARAM_FIELD_FEATURES|UCP_PARAM_FIELD_ESTIMATED_NUM_EPS; + params.field_mask = UCP_PARAM_FIELD_FEATURES|UCP_PARAM_FIELD_ESTIMATED_NUM_EPS|UCP_PARAM_FIELD_MT_WORKERS_SHARED; params.features = UCP_FEATURE_RMA|UCP_FEATURE_AMO32|UCP_FEATURE_AMO64; params.estimated_num_eps = ompi_proc_world_size(); + if (oshmem_mpi_thread_requested == SHMEM_THREAD_MULTIPLE) { + params.mt_workers_shared = 1; + } else { + params.mt_workers_shared = 0; + } err = ucp_init(¶ms, ucp_config, &mca_spml_ucx.ucp_context); ucp_config_release(ucp_config); @@ -138,33 +156,43 @@ static int mca_spml_ucx_component_open(void) return OSHMEM_ERROR; } - return OSHMEM_SUCCESS; -} - -static int mca_spml_ucx_component_close(void) -{ - if (mca_spml_ucx.ucp_context) { - ucp_cleanup(mca_spml_ucx.ucp_context); - mca_spml_ucx.ucp_context = NULL; + attr.field_mask = UCP_ATTR_FIELD_THREAD_MODE; + err = ucp_context_query(mca_spml_ucx.ucp_context, &attr); + if (err != UCS_OK) { + return OSHMEM_ERROR; } - opal_common_ucx_mca_deregister(); - return OSHMEM_SUCCESS; -} -static int spml_ucx_init(void) -{ - ucp_worker_params_t params; - ucs_status_t err; + if (oshmem_mpi_thread_requested == SHMEM_THREAD_MULTIPLE && + attr.thread_mode != UCS_THREAD_MODE_MULTI) { + oshmem_mpi_thread_provided = SHMEM_THREAD_SINGLE; + } - params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; - params.thread_mode = UCS_THREAD_MODE_SINGLE; + OBJ_CONSTRUCT(&(mca_spml_ucx.ctx_list), opal_list_t); + SHMEM_MUTEX_INIT(mca_spml_ucx.internal_mutex); - err = ucp_worker_create(mca_spml_ucx.ucp_context, ¶ms, - &mca_spml_ucx.ucp_worker); + wkr_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; + if (oshmem_mpi_thread_requested == SHMEM_THREAD_MULTIPLE) { + wkr_params.thread_mode = UCS_THREAD_MODE_MULTI; + } else { + wkr_params.thread_mode = UCS_THREAD_MODE_SINGLE; + } + + err = ucp_worker_create(mca_spml_ucx.ucp_context, &wkr_params, + &mca_spml_ucx_ctx_default.ucp_worker); if (UCS_OK != err) { return OSHMEM_ERROR; } + wkr_attr.field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE; + err = ucp_worker_query(mca_spml_ucx_ctx_default.ucp_worker, &wkr_attr); + + if (oshmem_mpi_thread_requested == SHMEM_THREAD_MULTIPLE && + wkr_attr.thread_mode != UCS_THREAD_MODE_MULTI) { + oshmem_mpi_thread_provided = SHMEM_THREAD_SINGLE; + } + + oshmem_ctx_default = (shmem_ctx_t) &mca_spml_ucx_ctx_default; + return OSHMEM_SUCCESS; } @@ -192,13 +220,22 @@ static int mca_spml_ucx_component_fini(void) { opal_progress_unregister(spml_ucx_progress); - if (mca_spml_ucx.ucp_worker) { - ucp_worker_destroy(mca_spml_ucx.ucp_worker); + if (mca_spml_ucx_ctx_default.ucp_worker) { + ucp_worker_destroy(mca_spml_ucx_ctx_default.ucp_worker); } if(!mca_spml_ucx.enabled) return OSHMEM_SUCCESS; /* never selected.. return success.. */ mca_spml_ucx.enabled = false; /* not anymore */ + + OBJ_DESTRUCT(&(mca_spml_ucx.ctx_list)); + SHMEM_MUTEX_DESTROY(mca_spml_ucx.internal_mutex); + + if (mca_spml_ucx.ucp_context) { + ucp_cleanup(mca_spml_ucx.ucp_context); + mca_spml_ucx.ucp_context = NULL; + } + return OSHMEM_SUCCESS; } diff --git a/oshmem/runtime/oshmem_shmem_finalize.c b/oshmem/runtime/oshmem_shmem_finalize.c index b2354844c1..d97842e4a9 100644 --- a/oshmem/runtime/oshmem_shmem_finalize.c +++ b/oshmem/runtime/oshmem_shmem_finalize.c @@ -74,6 +74,8 @@ int oshmem_shmem_finalize(void) if (OSHMEM_SUCCESS == ret) { oshmem_shmem_initialized = false; } + + SHMEM_MUTEX_DESTROY(shmem_internal_mutex_alloc); } /* Note: ompi_mpi_state is set atomically in ompi_mpi_init() and diff --git a/oshmem/runtime/oshmem_shmem_init.c b/oshmem/runtime/oshmem_shmem_init.c index afd09df1c5..d1a187bf4d 100644 --- a/oshmem/runtime/oshmem_shmem_init.c +++ b/oshmem/runtime/oshmem_shmem_init.c @@ -107,6 +107,10 @@ MPI_Comm oshmem_comm_world = {0}; opal_thread_t *oshmem_mpi_main_thread = NULL; +shmem_internal_mutex_t shmem_internal_mutex_alloc = {0}; + +shmem_ctx_t oshmem_ctx_default = NULL; + static int _shmem_init(int argc, char **argv, int requested, int *provided); #if OSHMEM_OPAL_THREAD_ENABLE @@ -157,6 +161,8 @@ int oshmem_shmem_init(int argc, char **argv, int requested, int *provided) PMPI_Comm_dup(MPI_COMM_WORLD, &oshmem_comm_world); OMPI_TIMING_NEXT("PMPI_Comm_dup"); + SHMEM_MUTEX_INIT(shmem_internal_mutex_alloc); + ret = _shmem_init(argc, argv, requested, provided); OMPI_TIMING_NEXT("_shmem_init"); OMPI_TIMING_IMPORT_OPAL("mca_scoll_mpi_comm_query"); @@ -250,6 +256,9 @@ static int _shmem_init(int argc, char **argv, int requested, int *provided) int ret = OSHMEM_SUCCESS; char *error = NULL; + oshmem_mpi_thread_requested = requested; + oshmem_mpi_thread_provided = requested; + /* Register the OSHMEM layer's MCA parameters */ if (OSHMEM_SUCCESS != (ret = oshmem_shmem_register_params())) { error = "oshmem_info_register: oshmem_register_params failed"; @@ -361,6 +370,10 @@ static int _shmem_init(int argc, char **argv, int requested, int *provided) goto error; } + (*provided) = oshmem_mpi_thread_provided; + + oshmem_mpi_thread_multiple = (oshmem_mpi_thread_provided == SHMEM_THREAD_MULTIPLE) ? true : false; + error: if (ret != OSHMEM_SUCCESS) { const char *err_msg = opal_strerror(ret); orte_show_help("help-shmem-runtime.txt", diff --git a/oshmem/runtime/runtime.h b/oshmem/runtime/runtime.h index c5e5204309..737def7e25 100644 --- a/oshmem/runtime/runtime.h +++ b/oshmem/runtime/runtime.h @@ -18,12 +18,14 @@ #define OSHMEM_SHMEM_RUNTIME_H #include "oshmem_config.h" +#include "shmem.h" #include "opal/class/opal_list.h" #include "opal/class/opal_hash_table.h" #include "orte/runtime/orte_globals.h" #include "ompi/include/mpi.h" +#include BEGIN_C_DECLS @@ -44,6 +46,34 @@ OSHMEM_DECLSPEC extern int oshmem_mpi_thread_provided; OSHMEM_DECLSPEC extern struct opal_thread_t *oshmem_mpi_main_thread; OSHMEM_DECLSPEC extern MPI_Comm oshmem_comm_world; + +typedef pthread_mutex_t shmem_internal_mutex_t; +OSHMEM_DECLSPEC extern shmem_internal_mutex_t shmem_internal_mutex_alloc; + +OSHMEM_DECLSPEC extern shmem_ctx_t oshmem_ctx_default; + +# define SHMEM_MUTEX_INIT(_mutex) \ + do { \ + if (oshmem_mpi_thread_provided == SHMEM_THREAD_MULTIPLE) \ + pthread_mutex_init(&_mutex, NULL); \ + } while (0) +# define SHMEM_MUTEX_DESTROY(_mutex) \ + do { \ + if (oshmem_mpi_thread_provided == SHMEM_THREAD_MULTIPLE) \ + pthread_mutex_destroy(&_mutex); \ + } while (0) +# define SHMEM_MUTEX_LOCK(_mutex) \ + do { \ + if (oshmem_mpi_thread_provided == SHMEM_THREAD_MULTIPLE) \ + pthread_mutex_lock(&_mutex); \ + } while (0) +# define SHMEM_MUTEX_UNLOCK(_mutex) \ + do { \ + if (oshmem_mpi_thread_provided == SHMEM_THREAD_MULTIPLE) \ + pthread_mutex_unlock(&_mutex); \ + } while (0) + + /* * SHMEM_Init_thread constants */ diff --git a/oshmem/shmem/c/Makefile.am b/oshmem/shmem/c/Makefile.am index 1b7a64a189..3acd7e400d 100644 --- a/oshmem/shmem/c/Makefile.am +++ b/oshmem/shmem/c/Makefile.am @@ -26,6 +26,7 @@ OSHMEM_API_SOURCES = \ shmem_align.c \ shmem_query.c \ shmem_p.c \ + shmem_context.c \ shmem_put.c \ shmem_g.c \ shmem_get.c \ @@ -36,6 +37,7 @@ OSHMEM_API_SOURCES = \ shmem_pe_accessible.c \ shmem_addr_accessible.c \ shmem_barrier.c \ + shmem_sync.c \ shmem_fence.c \ shmem_quiet.c \ shmem_wait.c \ diff --git a/oshmem/shmem/c/profile/Makefile.am b/oshmem/shmem/c/profile/Makefile.am index 5694e99de1..4764c3810d 100644 --- a/oshmem/shmem/c/profile/Makefile.am +++ b/oshmem/shmem/c/profile/Makefile.am @@ -48,6 +48,7 @@ OSHMEM_API_SOURCES = \ pshmem_pe_accessible.c \ pshmem_addr_accessible.c \ pshmem_barrier.c \ + pshmem_sync.c \ pshmem_fence.c \ pshmem_quiet.c \ pshmem_wait.c \ diff --git a/oshmem/shmem/c/profile/defines.h b/oshmem/shmem/c/profile/defines.h index bc43d415b1..ce87e2ee2f 100644 --- a/oshmem/shmem/c/profile/defines.h +++ b/oshmem/shmem/c/profile/defines.h @@ -21,6 +21,7 @@ * Initialization routines */ #define shmem_init pshmem_init +#define shmem_init_thread pshmem_init_thread #define start_pes pstart_pes /* shmem-compat.h */ /* @@ -33,6 +34,7 @@ * Query routines */ #define shmem_n_pes pshmem_n_pes +#define shmem_query_thread pshmem_query_thread #define shmem_my_pe pshmem_my_pe #define _num_pes p_num_pes /* shmem-compat.h */ #define _my_pe p_my_pe /* shmem-compat.h */ @@ -47,6 +49,7 @@ * Symmetric heap routines */ #define shmem_malloc pshmem_malloc +#define shmem_calloc pshmem_calloc #define shmem_align pshmem_align #define shmem_realloc pshmem_realloc #define shmem_free pshmem_free @@ -60,9 +63,24 @@ */ #define shmem_ptr pshmem_ptr +/* + * Communication context operations + */ +#define shmem_ctx_create pshmem_ctx_create +#define shmem_ctx_destroy pshmem_ctx_destroy + /* * Elemental put routines */ + +#define shmem_ctx_char_p pshmem_ctx_char_p +#define shmem_ctx_short_p pshmem_ctx_short_p +#define shmem_ctx_int_p pshmem_ctx_int_p +#define shmem_ctx_long_p pshmem_ctx_long_p +#define shmem_ctx_float_p pshmem_ctx_float_p +#define shmem_ctx_double_p pshmem_ctx_double_p +#define shmem_ctx_longlong_p pshmem_ctx_longlong_p +#define shmem_ctx_longdouble_p pshmem_ctx_longdouble_p #define shmem_char_p pshmem_char_p #define shmem_short_p pshmem_short_p #define shmem_int_p pshmem_int_p @@ -78,6 +96,14 @@ /* * Block data put routines */ +#define shmem_ctx_char_put pshmem_ctx_char_put +#define shmem_ctx_short_put pshmem_ctx_short_put +#define shmem_ctx_int_put pshmem_ctx_int_put +#define shmem_ctx_long_put pshmem_ctx_long_put +#define shmem_ctx_float_put pshmem_ctx_float_put +#define shmem_ctx_double_put pshmem_ctx_double_put +#define shmem_ctx_longlong_put pshmem_ctx_longlong_put +#define shmem_ctx_longdouble_put pshmem_ctx_longdouble_put #define shmem_char_put pshmem_char_put /* shmem-compat.h */ #define shmem_short_put pshmem_short_put #define shmem_int_put pshmem_int_put @@ -87,6 +113,12 @@ #define shmem_longlong_put pshmem_longlong_put #define shmem_longdouble_put pshmem_longdouble_put +#define shmem_ctx_put8 pshmem_ctx_put8 +#define shmem_ctx_put16 pshmem_ctx_put16 +#define shmem_ctx_put32 pshmem_ctx_put32 +#define shmem_ctx_put64 pshmem_ctx_put64 +#define shmem_ctx_put128 pshmem_ctx_put128 +#define shmem_ctx_putmem pshmem_ctx_putmem #define shmem_put8 pshmem_put8 #define shmem_put16 pshmem_put16 #define shmem_put32 pshmem_put32 @@ -97,6 +129,14 @@ /* * Strided put routines */ +#define shmem_ctx_char_iput pshmem_ctx_char_iput +#define shmem_ctx_short_iput pshmem_ctx_short_iput +#define shmem_ctx_int_iput pshmem_ctx_int_iput +#define shmem_ctx_float_iput pshmem_ctx_float_iput +#define shmem_ctx_double_iput pshmem_ctx_double_iput +#define shmem_ctx_longlong_iput pshmem_ctx_longlong_iput +#define shmem_ctx_longdouble_iput pshmem_ctx_longdouble_iput +#define shmem_ctx_long_iput pshmem_ctx_long_iput #define shmem_char_iput pshmem_char_iput #define shmem_short_iput pshmem_short_iput #define shmem_int_iput pshmem_int_iput @@ -106,6 +146,11 @@ #define shmem_longdouble_iput pshmem_longdouble_iput #define shmem_long_iput pshmem_long_iput +#define shmem_ctx_iput8 pshmem_ctx_iput8 +#define shmem_ctx_iput16 pshmem_ctx_iput16 +#define shmem_ctx_iput32 pshmem_ctx_iput32 +#define shmem_ctx_iput64 pshmem_ctx_iput64 +#define shmem_ctx_iput128 pshmem_ctx_iput128 #define shmem_iput8 pshmem_iput8 #define shmem_iput16 pshmem_iput16 #define shmem_iput32 pshmem_iput32 @@ -115,6 +160,14 @@ /* * Non-block data put routines */ +#define shmem_ctx_char_put_nbi pshmem_ctx_char_put_nbi +#define shmem_ctx_short_put_nbi pshmem_ctx_short_put_nbi +#define shmem_ctx_int_put_nbi pshmem_ctx_int_put_nbi +#define shmem_ctx_long_put_nbi pshmem_ctx_long_put_nbi +#define shmem_ctx_float_put_nbi pshmem_ctx_float_put_nbi +#define shmem_ctx_double_put_nbi pshmem_ctx_double_put_nbi +#define shmem_ctx_longlong_put_nbi pshmem_ctx_longlong_put_nbi +#define shmem_ctx_longdouble_put_nbi pshmem_ctx_longdouble_put_nbi #define shmem_char_put_nbi pshmem_char_put_nbi #define shmem_short_put_nbi pshmem_short_put_nbi #define shmem_int_put_nbi pshmem_int_put_nbi @@ -123,6 +176,13 @@ #define shmem_double_put_nbi pshmem_double_put_nbi #define shmem_longlong_put_nbi pshmem_longlong_put_nbi #define shmem_longdouble_put_nbi pshmem_longdouble_put_nbi + +#define shmem_ctx_put8_nbi pshmem_ctx_put8_nbi +#define shmem_ctx_put16_nbi pshmem_ctx_put16_nbi +#define shmem_ctx_put32_nbi pshmem_ctx_put32_nbi +#define shmem_ctx_put64_nbi pshmem_ctx_put64_nbi +#define shmem_ctx_put128_nbi pshmem_ctx_put128_nbi +#define shmem_ctx_putmem_nbi pshmem_ctx_putmem_nbi #define shmem_put8_nbi pshmem_put8_nbi #define shmem_put16_nbi pshmem_put16_nbi #define shmem_put32_nbi pshmem_put32_nbi @@ -133,6 +193,14 @@ /* * Elemental get routines */ +#define shmem_ctx_char_g pshmem_ctx_char_g +#define shmem_ctx_short_g pshmem_ctx_short_g +#define shmem_ctx_int_g pshmem_ctx_int_g +#define shmem_ctx_long_g pshmem_ctx_long_g +#define shmem_ctx_float_g pshmem_ctx_float_g +#define shmem_ctx_double_g pshmem_ctx_double_g +#define shmem_ctx_longlong_g pshmem_ctx_longlong_g +#define shmem_ctx_longdouble_g pshmem_ctx_longdouble_g #define shmem_char_g pshmem_char_g #define shmem_short_g pshmem_short_g #define shmem_int_g pshmem_int_g @@ -148,6 +216,14 @@ /* * Block data get routines */ +#define shmem_ctx_char_get pshmem_ctx_char_get +#define shmem_ctx_short_get pshmem_ctx_short_get +#define shmem_ctx_int_get pshmem_ctx_int_get +#define shmem_ctx_long_get pshmem_ctx_long_get +#define shmem_ctx_float_get pshmem_ctx_float_get +#define shmem_ctx_double_get pshmem_ctx_double_get +#define shmem_ctx_longlong_get pshmem_ctx_longlong_get +#define shmem_ctx_longdouble_get pshmem_ctx_longdouble_get #define shmem_char_get pshmem_char_get /* shmem-compat.h */ #define shmem_short_get pshmem_short_get #define shmem_int_get pshmem_int_get @@ -157,6 +233,12 @@ #define shmem_longlong_get pshmem_longlong_get #define shmem_longdouble_get pshmem_longdouble_get +#define shmem_ctx_get8 pshmem_ctx_get8 +#define shmem_ctx_get16 pshmem_ctx_get16 +#define shmem_ctx_get32 pshmem_ctx_get32 +#define shmem_ctx_get64 pshmem_ctx_get64 +#define shmem_ctx_get128 pshmem_ctx_get128 +#define shmem_ctx_getmem pshmem_ctx_getmem #define shmem_get8 pshmem_get8 #define shmem_get16 pshmem_get16 #define shmem_get32 pshmem_get32 @@ -167,6 +249,14 @@ /* * Strided get routines */ +#define shmem_ctx_char_iget pshmem_ctx_char_iget +#define shmem_ctx_short_iget pshmem_ctx_short_iget +#define shmem_ctx_int_iget pshmem_ctx_int_iget +#define shmem_ctx_float_iget pshmem_ctx_float_iget +#define shmem_ctx_double_iget pshmem_ctx_double_iget +#define shmem_ctx_longlong_iget pshmem_ctx_longlong_iget +#define shmem_ctx_longdouble_iget pshmem_ctx_longdouble_iget +#define shmem_ctx_long_iget pshmem_ctx_long_iget #define shmem_char_iget pshmem_char_iget #define shmem_short_iget pshmem_short_iget #define shmem_int_iget pshmem_int_iget @@ -176,6 +266,11 @@ #define shmem_longdouble_iget pshmem_longdouble_iget #define shmem_long_iget pshmem_long_iget +#define shmem_ctx_iget8 pshmem_ctx_iget8 +#define shmem_ctx_iget16 pshmem_ctx_iget16 +#define shmem_ctx_iget32 pshmem_ctx_iget32 +#define shmem_ctx_iget64 pshmem_ctx_iget64 +#define shmem_ctx_iget128 pshmem_ctx_iget128 #define shmem_iget8 pshmem_iget8 #define shmem_iget16 pshmem_iget16 #define shmem_iget32 pshmem_iget32 @@ -185,6 +280,14 @@ /* * Non-block data get routines */ +#define shmem_ctx_char_get_nbi pshmem_ctx_char_get_nbi +#define shmem_ctx_short_get_nbi pshmem_ctx_short_get_nbi +#define shmem_ctx_int_get_nbi pshmem_ctx_int_get_nbi +#define shmem_ctx_long_get_nbi pshmem_ctx_long_get_nbi +#define shmem_ctx_float_get_nbi pshmem_ctx_float_get_nbi +#define shmem_ctx_double_get_nbi pshmem_ctx_double_get_nbi +#define shmem_ctx_longlong_get_nbi pshmem_ctx_longlong_get_nbi +#define shmem_ctx_longdouble_get_nbi pshmem_ctx_longdouble_get_nbi #define shmem_char_get_nbi pshmem_char_get_nbi #define shmem_short_get_nbi pshmem_short_get_nbi #define shmem_int_get_nbi pshmem_int_get_nbi @@ -193,6 +296,13 @@ #define shmem_double_get_nbi pshmem_double_get_nbi #define shmem_longlong_get_nbi pshmem_longlong_get_nbi #define shmem_longdouble_get_nbi pshmem_longdouble_get_nbi + +#define shmem_ctx_get8_nbi pshmem_ctx_get8_nbi +#define shmem_ctx_get16_nbi pshmem_ctx_get16_nbi +#define shmem_ctx_get32_nbi pshmem_ctx_get32_nbi +#define shmem_ctx_get64_nbi pshmem_ctx_get64_nbi +#define shmem_ctx_get128_nbi pshmem_ctx_get128_nbi +#define shmem_ctx_getmem_nbi pshmem_ctx_getmem_nbi #define shmem_get8_nbi pshmem_get8_nbi #define shmem_get16_nbi pshmem_get16_nbi #define shmem_get32_nbi pshmem_get32_nbi @@ -204,6 +314,16 @@ * Atomic operations */ /* Atomic swap */ +#define shmem_ctx_double_atomic_swap pshmem_ctx_double_atomic_swap +#define shmem_ctx_float_atomic_swap pshmem_ctx_float_atomic_swap +#define shmem_ctx_int_atomic_swap pshmem_ctx_int_atomic_swap +#define shmem_ctx_long_atomic_swap pshmem_ctx_long_atomic_swap +#define shmem_ctx_longlong_atomic_swap pshmem_ctx_longlong_atomic_swap +#define shmem_double_atomic_swap pshmem_double_atomic_swap +#define shmem_float_atomic_swap pshmem_float_atomic_swap +#define shmem_int_atomic_swap pshmem_int_atomic_swap +#define shmem_long_atomic_swap pshmem_long_atomic_swap +#define shmem_longlong_atomic_swap pshmem_longlong_atomic_swap #define shmem_double_swap pshmem_double_swap #define shmem_float_swap pshmem_float_swap #define shmem_int_swap pshmem_int_swap @@ -213,6 +333,16 @@ #define shmemx_int64_swap pshmemx_int64_swap /* Atomic set */ +#define shmem_ctx_double_atomic_set pshmem_ctx_double_atomic_set +#define shmem_ctx_float_atomic_set pshmem_ctx_float_atomic_set +#define shmem_ctx_int_atomic_set pshmem_ctx_int_atomic_set +#define shmem_ctx_long_atomic_set pshmem_ctx_long_atomic_set +#define shmem_ctx_longlong_atomic_set pshmem_ctx_longlong_atomic_set +#define shmem_double_atomic_set pshmem_double_atomic_set +#define shmem_float_atomic_set pshmem_float_atomic_set +#define shmem_int_atomic_set pshmem_int_atomic_set +#define shmem_long_atomic_set pshmem_long_atomic_set +#define shmem_longlong_atomic_set pshmem_longlong_atomic_set #define shmem_double_set pshmem_double_set #define shmem_float_set pshmem_float_set #define shmem_int_set pshmem_int_set @@ -222,14 +352,25 @@ #define shmemx_int64_set pshmemx_int64_set /* Atomic conditional swap */ +#define shmem_ctx_int_atomic_compare_swap pshmem_ctx_int_atomic_compare_swap +#define shmem_ctx_long_atomic_compare_swap pshmem_ctx_long_atomic_compare_swap +#define shmem_ctx_longlong_atomic_compare_swap pshmem_ctx_longlong_atomic_compare_swap +#define shmem_int_atomic_compare_swap pshmem_int_atomic_compare_swap +#define shmem_long_atomic_compare_swap pshmem_long_atomic_compare_swap +#define shmem_longlong_atomic_compare_swap pshmem_longlong_atomic_compare_swap #define shmem_int_cswap pshmem_int_cswap #define shmem_long_cswap pshmem_long_cswap #define shmem_longlong_cswap pshmem_longlong_cswap #define shmemx_int32_cswap pshmemx_int32_cswap #define shmemx_int64_cswap pshmemx_int64_cswap - /* Atomic Fetch&Add */ +#define shmem_ctx_int_atomic_fetch_add pshmem_ctx_int_atomic_fetch_add +#define shmem_ctx_long_atomic_fetch_add pshmem_ctx_long_atomic_fetch_add +#define shmem_ctx_longlong_atomic_fetch_add pshmem_ctx_longlong_atomic_fetch_add +#define shmem_int_atomic_fetch_add pshmem_int_atomic_fetch_add +#define shmem_long_atomic_fetch_add pshmem_long_atomic_fetch_add +#define shmem_longlong_atomic_fetch_add pshmem_longlong_atomic_fetch_add #define shmem_int_fadd pshmem_int_fadd #define shmem_long_fadd pshmem_long_fadd #define shmem_longlong_fadd pshmem_longlong_fadd @@ -240,6 +381,9 @@ #define shmem_uint_atomic_fetch_and pshmem_uint_atomic_fetch_and #define shmem_ulong_atomic_fetch_and pshmem_ulong_atomic_fetch_and #define shmem_ulonglong_atomic_fetch_and pshmem_ulonglong_atomic_fetch_and +#define shmem_ctx_uint_atomic_fetch_and pshmem_ctx_uint_atomic_fetch_and +#define shmem_ctx_ulong_atomic_fetch_and pshmem_ctx_ulong_atomic_fetch_and +#define shmem_ctx_ulonglong_atomic_fetch_and pshmem_ctx_ulonglong_atomic_fetch_and #define shmemx_int32_atomic_fetch_and pshmemx_int32_atomic_fetch_and #define shmemx_int64_atomic_fetch_and pshmemx_int64_atomic_fetch_and #define shmemx_uint32_atomic_fetch_and pshmemx_uint32_atomic_fetch_and @@ -249,6 +393,9 @@ #define shmem_uint_atomic_fetch_or pshmem_uint_atomic_fetch_or #define shmem_ulong_atomic_fetch_or pshmem_ulong_atomic_fetch_or #define shmem_ulonglong_atomic_fetch_or pshmem_ulonglong_atomic_fetch_or +#define shmem_ctx_uint_atomic_fetch_or pshmem_ctx_uint_atomic_fetch_or +#define shmem_ctx_ulong_atomic_fetch_or pshmem_ctx_ulong_atomic_fetch_or +#define shmem_ctx_ulonglong_atomic_fetch_or pshmem_ctx_ulonglong_atomic_fetch_or #define shmemx_int32_atomic_fetch_or pshmemx_int32_atomic_fetch_or #define shmemx_int64_atomic_fetch_or pshmemx_int64_atomic_fetch_or #define shmemx_uint32_atomic_fetch_or pshmemx_uint32_atomic_fetch_or @@ -258,12 +405,25 @@ #define shmem_uint_atomic_fetch_xor pshmem_uint_atomic_fetch_xor #define shmem_ulong_atomic_fetch_xor pshmem_ulong_atomic_fetch_xor #define shmem_ulonglong_atomic_fetch_xor pshmem_ulonglong_atomic_fetch_xor +#define shmem_ctx_uint_atomic_fetch_xor pshmem_ctx_uint_atomic_fetch_xor +#define shmem_ctx_ulong_atomic_fetch_xor pshmem_ctx_ulong_atomic_fetch_xor +#define shmem_ctx_ulonglong_atomic_fetch_xor pshmem_ctx_ulonglong_atomic_fetch_xor #define shmemx_int32_atomic_fetch_xor pshmemx_int32_atomic_fetch_xor #define shmemx_int64_atomic_fetch_xor pshmemx_int64_atomic_fetch_xor #define shmemx_uint32_atomic_fetch_xor pshmemx_uint32_atomic_fetch_xor #define shmemx_uint64_atomic_fetch_xor pshmemx_uint64_atomic_fetch_xor /* Atomic Fetch */ +#define shmem_ctx_double_atomic_fetch pshmem_ctx_double_atomic_fetch +#define shmem_ctx_float_atomic_fetch pshmem_ctx_float_atomic_fetch +#define shmem_ctx_int_atomic_fetch pshmem_ctx_int_atomic_fetch +#define shmem_ctx_long_atomic_fetch pshmem_ctx_long_atomic_fetch +#define shmem_ctx_longlong_atomic_fetch pshmem_ctx_longlong_atomic_fetch +#define shmem_double_atomic_fetch pshmem_double_atomic_fetch +#define shmem_float_atomic_fetch pshmem_float_atomic_fetch +#define shmem_int_atomic_fetch pshmem_int_atomic_fetch +#define shmem_long_atomic_fetch pshmem_long_atomic_fetch +#define shmem_longlong_atomic_fetch pshmem_longlong_atomic_fetch #define shmem_double_fetch pshmem_double_fetch #define shmem_float_fetch pshmem_float_fetch #define shmem_int_fetch pshmem_int_fetch @@ -273,6 +433,12 @@ #define shmemx_int64_fetch pshmemx_int64_fetch /* Atomic Fetch&Inc */ +#define shmem_ctx_int_atomic_fetch_inc pshmem_ctx_int_atomic_fetch_inc +#define shmem_ctx_long_atomic_fetch_inc pshmem_ctx_long_atomic_fetch_inc +#define shmem_ctx_longlong_atomic_fetch_inc pshmem_ctx_longlong_atomic_fetch_inc +#define shmem_int_atomic_fetch_inc pshmem_int_atomic_fetch_inc +#define shmem_long_atomic_fetch_inc pshmem_long_atomic_fetch_inc +#define shmem_longlong_atomic_fetch_inc pshmem_longlong_atomic_fetch_inc #define shmem_int_finc pshmem_int_finc #define shmem_long_finc pshmem_long_finc #define shmem_longlong_finc pshmem_longlong_finc @@ -280,6 +446,12 @@ #define shmemx_int64_finc pshmemx_int64_finc /* Atomic Add */ +#define shmem_ctx_int_atomic_add pshmem_ctx_int_atomic_add +#define shmem_ctx_long_atomic_add pshmem_ctx_long_atomic_add +#define shmem_ctx_longlong_atomic_add pshmem_ctx_longlong_atomic_add +#define shmem_int_atomic_add pshmem_int_atomic_add +#define shmem_long_atomic_add pshmem_long_atomic_add +#define shmem_longlong_atomic_add pshmem_longlong_atomic_add #define shmem_int_add pshmem_int_add #define shmem_long_add pshmem_long_add #define shmem_longlong_add pshmem_longlong_add @@ -290,6 +462,9 @@ #define shmem_uint_atomic_and pshmem_uint_atomic_and #define shmem_ulong_atomic_and pshmem_ulong_atomic_and #define shmem_ulonglong_atomic_and pshmem_ulonglong_atomic_and +#define shmem_ctx_uint_atomic_and pshmem_ctx_uint_atomic_and +#define shmem_ctx_ulong_atomic_and pshmem_ctx_ulong_atomic_and +#define shmem_ctx_ulonglong_atomic_and pshmem_ctx_ulonglong_atomic_and #define shmemx_int32_atomic_and pshmemx_int32_atomic_and #define shmemx_int64_atomic_and pshmemx_int64_atomic_and #define shmemx_uint32_atomic_and pshmemx_uint32_atomic_and @@ -299,6 +474,9 @@ #define shmem_uint_atomic_or pshmem_uint_atomic_or #define shmem_ulong_atomic_or pshmem_ulong_atomic_or #define shmem_ulonglong_atomic_or pshmem_ulonglong_atomic_or +#define shmem_ctx_uint_atomic_or pshmem_ctx_uint_atomic_or +#define shmem_ctx_ulong_atomic_or pshmem_ctx_ulong_atomic_or +#define shmem_ctx_ulonglong_atomic_or pshmem_ctx_ulonglong_atomic_or #define shmemx_int32_atomic_or pshmemx_int32_atomic_or #define shmemx_int64_atomic_or pshmemx_int64_atomic_or #define shmemx_uint32_atomic_or pshmemx_uint32_atomic_or @@ -308,12 +486,21 @@ #define shmem_uint_atomic_xor pshmem_uint_atomic_xor #define shmem_ulong_atomic_xor pshmem_ulong_atomic_xor #define shmem_ulonglong_atomic_xor pshmem_ulonglong_atomic_xor +#define shmem_ctx_uint_atomic_xor pshmem_ctx_uint_atomic_xor +#define shmem_ctx_ulong_atomic_xor pshmem_ctx_ulong_atomic_xor +#define shmem_ctx_ulonglong_atomic_xor pshmem_ctx_ulonglong_atomic_xor #define shmemx_int32_atomic_xor pshmemx_int32_atomic_xor #define shmemx_int64_atomic_xor pshmemx_int64_atomic_xor #define shmemx_uint32_atomic_xor pshmemx_uint32_atomic_xor #define shmemx_uint64_atomic_xor pshmemx_uint64_atomic_xor /* Atomic Inc */ +#define shmem_ctx_int_atomic_inc pshmem_ctx_int_atomic_inc +#define shmem_ctx_long_atomic_inc pshmem_ctx_long_atomic_inc +#define shmem_ctx_longlong_atomic_inc pshmem_ctx_longlong_atomic_inc +#define shmem_int_atomic_inc pshmem_int_atomic_inc +#define shmem_long_atomic_inc pshmem_long_atomic_inc +#define shmem_longlong_atomic_inc pshmem_longlong_atomic_inc #define shmem_int_inc pshmem_int_inc #define shmem_long_inc pshmem_long_inc #define shmem_longlong_inc pshmem_longlong_inc @@ -346,13 +533,22 @@ #define shmemx_int32_wait_until pshmemx_int32_wait_until #define shmemx_int64_wait_until pshmemx_int64_wait_until +#define shmem_short_test pshmem_short_test +#define shmem_int_test pshmem_int_test +#define shmem_long_test pshmem_long_test +#define shmem_longlong_test pshmem_longlong_test + /* * Barrier sync routines */ #define shmem_barrier pshmem_barrier #define shmem_barrier_all pshmem_barrier_all +#define shmem_sync pshmem_sync +#define shmem_sync_all pshmem_sync_all #define shmem_fence pshmem_fence +#define shmem_ctx_fence pshmem_ctx_fence #define shmem_quiet pshmem_quiet +#define shmem_ctx_quiet pshmem_ctx_quiet /* * Collective routines diff --git a/oshmem/shmem/c/shmem_add.c b/oshmem/shmem/c/shmem_add.c index 5dbcc65b36..9ca5c62c77 100644 --- a/oshmem/shmem/c/shmem_add.c +++ b/oshmem/shmem/c/shmem_add.c @@ -23,9 +23,7 @@ * must be completed without the possibility of another process updating target between the * time of the fetch and the update. */ -#define SHMEM_TYPE_ADD(type_name, type, prefix) \ - void prefix##type_name##_add(type *target, type value, int pe) \ - { \ +#define DO_SHMEM_TYPE_ATOMIC_ADD(ctx, type_name, type, target, value, pe) { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ \ @@ -35,17 +33,39 @@ \ size = sizeof(value); \ rc = MCA_ATOMIC_CALL(add( \ + ctx, \ (void*)target, \ value, \ size, \ pe)); \ RUNTIME_CHECK_RC(rc); \ + } while (0) + +#define SHMEM_CTX_TYPE_ATOMIC_ADD(type_name, type, prefix) \ + void prefix##_ctx##type_name##_atomic_add(shmem_ctx_t ctx, type *target, type value, int pe) \ + { \ + DO_SHMEM_TYPE_ATOMIC_ADD(ctx, type_name, type, target, \ + value, pe); \ + return ; \ + } + +#define SHMEM_TYPE_ATOMIC_ADD(type_name, type, prefix) \ + void prefix##type_name##_atomic_add(type *target, type value, int pe) \ + { \ + DO_SHMEM_TYPE_ATOMIC_ADD(oshmem_ctx_default, type_name, \ + type, target, value, pe); \ \ return ; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_int_atomic_add = pshmem_ctx_int_atomic_add +#pragma weak shmem_ctx_long_atomic_add = pshmem_ctx_long_atomic_add +#pragma weak shmem_ctx_longlong_atomic_add = pshmem_ctx_longlong_atomic_add +#pragma weak shmem_int_atomic_add = pshmem_int_atomic_add +#pragma weak shmem_long_atomic_add = pshmem_long_atomic_add +#pragma weak shmem_longlong_atomic_add = pshmem_longlong_atomic_add #pragma weak shmem_int_add = pshmem_int_add #pragma weak shmem_long_add = pshmem_long_add #pragma weak shmem_longlong_add = pshmem_longlong_add @@ -54,6 +74,22 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_ATOMIC_ADD(_int, int, shmem) +SHMEM_CTX_TYPE_ATOMIC_ADD(_long, long, shmem) +SHMEM_CTX_TYPE_ATOMIC_ADD(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_ADD(_int, int, shmem) +SHMEM_TYPE_ATOMIC_ADD(_long, long, shmem) +SHMEM_TYPE_ATOMIC_ADD(_longlong, long long, shmem) + +/* deprecated APIs */ +#define SHMEM_TYPE_ADD(type_name, type, prefix) \ + void prefix##type_name##_add(type *target, type value, int pe) \ + { \ + DO_SHMEM_TYPE_ATOMIC_ADD(oshmem_ctx_default, type_name, \ + type, target, value, pe); \ + return ; \ + } + SHMEM_TYPE_ADD(_int, int, shmem) SHMEM_TYPE_ADD(_long, long, shmem) SHMEM_TYPE_ADD(_longlong, long long, shmem) diff --git a/oshmem/shmem/c/shmem_align.c b/oshmem/shmem/c/shmem_align.c index 72e16f3d15..52c7d62539 100644 --- a/oshmem/shmem/c/shmem_align.c +++ b/oshmem/shmem/c/shmem_align.c @@ -43,8 +43,12 @@ static inline void* _shmemalign(size_t align, size_t size) RUNTIME_CHECK_INIT(); + SHMEM_MUTEX_LOCK(shmem_internal_mutex_alloc); + rc = MCA_MEMHEAP_CALL(memalign(align, size, &pBuff)); + SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_alloc); + if (OSHMEM_SUCCESS != rc) { SHMEM_API_VERBOSE(1, "Allocation with shmemalign(align=%lu, size=%lu) failed.", diff --git a/oshmem/shmem/c/shmem_alloc.c b/oshmem/shmem/c/shmem_alloc.c index c7439a2927..3f7a579a20 100644 --- a/oshmem/shmem/c/shmem_alloc.c +++ b/oshmem/shmem/c/shmem_alloc.c @@ -20,6 +20,7 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_malloc = pshmem_malloc +#pragma weak shmem_calloc = pshmem_calloc #pragma weak shmalloc = pshmalloc #include "oshmem/shmem/c/profile/defines.h" #endif @@ -31,6 +32,16 @@ void* shmem_malloc(size_t size) return _shmalloc(size); } +void* shmem_calloc(size_t count, size_t size) +{ + size_t req_sz = count * size; + void *ptr = _shmalloc(req_sz); + if (ptr) { + memset(ptr, 0, req_sz); + } + return ptr; +} + void* shmalloc(size_t size) { return _shmalloc(size); @@ -44,8 +55,12 @@ static inline void* _shmalloc(size_t size) RUNTIME_CHECK_INIT(); RUNTIME_CHECK_WITH_MEMHEAP_SIZE(size); + SHMEM_MUTEX_LOCK(shmem_internal_mutex_alloc); + rc = MCA_MEMHEAP_CALL(alloc(size, &pBuff)); + SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_alloc); + if (OSHMEM_SUCCESS != rc) { SHMEM_API_VERBOSE(10, "Allocation with shmalloc(size=%lu) failed.", diff --git a/oshmem/shmem/c/shmem_and.c b/oshmem/shmem/c/shmem_and.c index 4f013f2e35..2402a6c0f5 100644 --- a/oshmem/shmem/c/shmem_and.c +++ b/oshmem/shmem/c/shmem_and.c @@ -28,6 +28,9 @@ #pragma weak shmem_uint_atomic_and = pshmem_uint_atomic_and #pragma weak shmem_ulong_atomic_and = pshmem_ulong_atomic_and #pragma weak shmem_ulonglong_atomic_and = pshmem_ulonglong_atomic_and +#pragma weak shmem_ctx_uint_atomic_and = pshmem_ctx_uint_atomic_and +#pragma weak shmem_ctx_ulong_atomic_and = pshmem_ctx_ulong_atomic_and +#pragma weak shmem_ctx_ulonglong_atomic_and = pshmem_ctx_ulonglong_atomic_and #pragma weak shmemx_int32_atomic_and = pshmemx_int32_atomic_and #pragma weak shmemx_int64_atomic_and = pshmemx_int64_atomic_and #pragma weak shmemx_uint32_atomic_and = pshmemx_uint32_atomic_and @@ -38,6 +41,9 @@ OSHMEM_TYPE_OP(uint, unsigned int, shmem, and) OSHMEM_TYPE_OP(ulong, unsigned long, shmem, and) OSHMEM_TYPE_OP(ulonglong, unsigned long long, shmem, and) +OSHMEM_CTX_TYPE_OP(uint, unsigned int, shmem, and) +OSHMEM_CTX_TYPE_OP(ulong, unsigned long, shmem, and) +OSHMEM_CTX_TYPE_OP(ulonglong, unsigned long long, shmem, and) OSHMEM_TYPE_OP(int32, int32_t, shmemx, and) OSHMEM_TYPE_OP(int64, int64_t, shmemx, and) OSHMEM_TYPE_OP(uint32, uint32_t, shmemx, and) diff --git a/oshmem/shmem/c/shmem_context.c b/oshmem/shmem/c/shmem_context.c new file mode 100644 index 0000000000..44367bb6b0 --- /dev/null +++ b/oshmem/shmem/c/shmem_context.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2013-2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "oshmem_config.h" + +#include + +#include "orte/util/show_help.h" + +#include "opal/runtime/opal_cr.h" +#include "opal/util/output.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" +#include "oshmem/runtime/params.h" +#include "oshmem/runtime/runtime.h" +#include "oshmem/shmem/shmem_api_logger.h" + +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_create = pshmem_ctx_create +#pragma weak shmem_ctx_destroy = pshmem_ctx_destroy +#include "oshmem/shmem/c/profile/defines.h" +#endif + +int shmem_ctx_create(long options, shmem_ctx_t *ctx) +{ + return MCA_SPML_CALL(ctx_create(options, ctx)); +} + +void shmem_ctx_destroy(shmem_ctx_t ctx) +{ + MCA_SPML_CALL(ctx_destroy(ctx)); +} diff --git a/oshmem/shmem/c/shmem_cswap.c b/oshmem/shmem/c/shmem_cswap.c index 2868e5a6bb..12ef8e4a04 100644 --- a/oshmem/shmem/c/shmem_cswap.c +++ b/oshmem/shmem/c/shmem_cswap.c @@ -25,12 +25,9 @@ * of target. The operation must be completed without the possibility of another process updating * target between the time of the fetch and the update. */ -#define SHMEM_TYPE_CSWAP(type_name, type, prefix) \ - type prefix##type_name##_cswap(type *target, type cond, type value, int pe) \ - { \ +#define DO_SHMEM_TYPE_ATOMIC_COMPARE_SWAP(ctx, type, target, cond, value, pe, out_value) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ - uint64_t out_value; \ \ RUNTIME_CHECK_INIT(); \ RUNTIME_CHECK_PE(pe); \ @@ -38,19 +35,42 @@ \ size = sizeof(value); \ rc = MCA_ATOMIC_CALL(cswap( \ + ctx, \ (void*)target, \ - &out_value, \ + (uint64_t*)&out_value, \ OSHMEM_ATOMIC_PTR_2_INT(&cond, sizeof(cond)), \ OSHMEM_ATOMIC_PTR_2_INT(&value, sizeof(value)), \ size, \ pe)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(type_name, type, prefix) \ + type prefix##_ctx##type_name##_atomic_compare_swap(shmem_ctx_t ctx, type *target, type cond, type value, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_COMPARE_SWAP(ctx, type, target, cond, value, \ + pe, out_value); \ + return out_value; \ + } + +#define SHMEM_TYPE_ATOMIC_COMPARE_SWAP(type_name, type, prefix) \ + type prefix##type_name##_atomic_compare_swap(type *target, type cond, type value, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_COMPARE_SWAP(oshmem_ctx_default, type, target, \ + cond, value, pe, out_value); \ return out_value; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_int_atomic_compare_swap = pshmem_ctx_int_atomic_compare_swap +#pragma weak shmem_ctx_long_atomic_compare_swap = pshmem_ctx_long_atomic_compare_swap +#pragma weak shmem_ctx_longlong_atomic_compare_swap = pshmem_ctx_longlong_atomic_compare_swap +#pragma weak shmem_int_atomic_compare_swap = pshmem_int_atomic_compare_swap +#pragma weak shmem_long_atomic_compare_swap = pshmem_long_atomic_compare_swap +#pragma weak shmem_longlong_atomic_compare_swap = pshmem_longlong_atomic_compare_swap #pragma weak shmem_int_cswap = pshmem_int_cswap #pragma weak shmem_long_cswap = pshmem_long_cswap #pragma weak shmem_longlong_cswap = pshmem_longlong_cswap @@ -59,6 +79,23 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_int, int, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_long, long, shmem) +SHMEM_CTX_TYPE_ATOMIC_COMPARE_SWAP(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_int, int, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_long, long, shmem) +SHMEM_TYPE_ATOMIC_COMPARE_SWAP(_longlong, long long, shmem) + +/* deprecated APIs */ +#define SHMEM_TYPE_CSWAP(type_name, type, prefix) \ + type prefix##type_name##_cswap(type *target, type cond, type value, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_COMPARE_SWAP(oshmem_ctx_default, type, target, \ + cond, value, pe, out_value); \ + return out_value; \ + } + SHMEM_TYPE_CSWAP(_int, int, shmem) SHMEM_TYPE_CSWAP(_long, long, shmem) SHMEM_TYPE_CSWAP(_longlong, long long, shmem) diff --git a/oshmem/shmem/c/shmem_fadd.c b/oshmem/shmem/c/shmem_fadd.c index 4feaa08772..16c59a4ada 100644 --- a/oshmem/shmem/c/shmem_fadd.c +++ b/oshmem/shmem/c/shmem_fadd.c @@ -25,12 +25,9 @@ * without the possibility of another process updating target between the time of the * fetch and the update. */ -#define SHMEM_TYPE_FADD(type_name, type, prefix) \ - type prefix##type_name##_fadd(type *target, type value, int pe) \ - { \ +#define DO_SHMEM_TYPE_ATOMIC_FETCH_ADD(ctx, type_name, type, target, value, pe, out_value) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ - type out_value; \ \ RUNTIME_CHECK_INIT(); \ RUNTIME_CHECK_PE(pe); \ @@ -38,18 +35,41 @@ \ size = sizeof(out_value); \ rc = MCA_ATOMIC_CALL(fadd( \ + ctx, \ (void*)target, \ (void*)&out_value, \ value, \ size, \ pe)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(type_name, type, prefix) \ + type prefix##_ctx##type_name##_atomic_fetch_add(shmem_ctx_t ctx, type *target, type value, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH_ADD(ctx, type_name, type, target, \ + value, pe, out_value); \ + return out_value; \ + } + +#define SHMEM_TYPE_ATOMIC_FETCH_ADD(type_name, type, prefix) \ + type prefix##type_name##_atomic_fetch_add(type *target, type value, int pe)\ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH_ADD(oshmem_ctx_default, type_name, \ + type, target, value, pe, out_value); \ return out_value; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_int_atomic_fetch_add = pshmem_ctx_int_atomic_fetch_add +#pragma weak shmem_ctx_long_atomic_fetch_add = pshmem_ctx_long_atomic_fetch_add +#pragma weak shmem_ctx_longlong_atomic_fetch_add = pshmem_ctx_longlong_atomic_fetch_add +#pragma weak shmem_int_atomic_fetch_add = pshmem_int_atomic_fetch_add +#pragma weak shmem_long_atomic_fetch_add = pshmem_long_atomic_fetch_add +#pragma weak shmem_longlong_atomic_fetch_add = pshmem_longlong_atomic_fetch_add #pragma weak shmem_int_fadd = pshmem_int_fadd #pragma weak shmem_long_fadd = pshmem_long_fadd #pragma weak shmem_longlong_fadd = pshmem_longlong_fadd @@ -58,6 +78,23 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_int, int, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_long, long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_ADD(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD(_int, int, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD(_long, long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_ADD(_longlong, long long, shmem) + +/* deprecated APIs */ +#define SHMEM_TYPE_FADD(type_name, type, prefix) \ + type prefix##type_name##_fadd(type *target, type value, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH_ADD(oshmem_ctx_default, type_name, \ + type, target, value, pe, out_value); \ + return out_value; \ + } + SHMEM_TYPE_FADD(_int, int, shmem) SHMEM_TYPE_FADD(_long, long, shmem) SHMEM_TYPE_FADD(_longlong, long long, shmem) diff --git a/oshmem/shmem/c/shmem_fand.c b/oshmem/shmem/c/shmem_fand.c index 357f902c3b..2b452a4052 100644 --- a/oshmem/shmem/c/shmem_fand.c +++ b/oshmem/shmem/c/shmem_fand.c @@ -30,6 +30,9 @@ #pragma weak shmem_uint_atomic_fetch_and = pshmem_uint_atomic_fetch_and #pragma weak shmem_ulong_atomic_fetch_and = pshmem_ulong_atomic_fetch_and #pragma weak shmem_ulonglong_atomic_fetch_and = pshmem_ulonglong_atomic_fetch_and +#pragma weak shmem_ctx_uint_atomic_fetch_and = pshmem_ctx_uint_atomic_fetch_and +#pragma weak shmem_ctx_ulong_atomic_fetch_and = pshmem_ctx_ulong_atomic_fetch_and +#pragma weak shmem_ctx_ulonglong_atomic_fetch_and = pshmem_ctx_ulonglong_atomic_fetch_and #pragma weak shmemx_int32_atomic_fetch_and = pshmemx_int32_atomic_fetch_and #pragma weak shmemx_int64_atomic_fetch_and = pshmemx_int64_atomic_fetch_and #pragma weak shmemx_uint32_atomic_fetch_and = pshmemx_uint32_atomic_fetch_and @@ -42,6 +45,9 @@ OSHMEM_TYPE_FOP(uint, unsigned int, shmem, and) OSHMEM_TYPE_FOP(ulong, unsigned long, shmem, and) OSHMEM_TYPE_FOP(ulonglong, unsigned long long, shmem, and) +OSHMEM_CTX_TYPE_FOP(uint, unsigned int, shmem, and) +OSHMEM_CTX_TYPE_FOP(ulong, unsigned long, shmem, and) +OSHMEM_CTX_TYPE_FOP(ulonglong, unsigned long long, shmem, and) OSHMEM_TYPE_FOP(int32, int32_t, shmemx, and) OSHMEM_TYPE_FOP(int64, int64_t, shmemx, and) OSHMEM_TYPE_FOP(uint32, uint32_t, shmemx, and) diff --git a/oshmem/shmem/c/shmem_fence.c b/oshmem/shmem/c/shmem_fence.c index 6c8f6c189c..0a049bee57 100644 --- a/oshmem/shmem/c/shmem_fence.c +++ b/oshmem/shmem/c/shmem_fence.c @@ -17,11 +17,18 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_fence = pshmem_fence +#pragma weak shmem_ctx_fence = pshmem_ctx_fence #include "oshmem/shmem/c/profile/defines.h" #endif void shmem_fence(void) { - MCA_SPML_CALL(fence()); + MCA_SPML_CALL(fence(oshmem_ctx_default)); +} + +void shmem_ctx_fence(shmem_ctx_t ctx) +{ + + MCA_SPML_CALL(fence(ctx)); } diff --git a/oshmem/shmem/c/shmem_fetch.c b/oshmem/shmem/c/shmem_fetch.c index 1e08d399ae..95c688ea02 100644 --- a/oshmem/shmem/c/shmem_fetch.c +++ b/oshmem/shmem/c/shmem_fetch.c @@ -24,12 +24,9 @@ * The operation must be completed without the possibility of another process * updating target during the fetch. */ -#define SHMEM_TYPE_FETCH(type_name, type, prefix) \ - type prefix##type_name##_fetch(const type *target, int pe) \ - { \ +#define DO_SHMEM_TYPE_ATOMIC_FETCH(ctx, type_name, type, target, pe, out_value) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ - type out_value; \ type value = 0; \ \ RUNTIME_CHECK_INIT(); \ @@ -38,18 +35,46 @@ \ size = sizeof(out_value); \ rc = MCA_ATOMIC_CALL(fadd( \ + ctx, \ (void*)target, \ (void*)&out_value, \ value, \ size, \ pe)); \ RUNTIME_CHECK_RC(rc); \ + } while (0) + +#define SHMEM_CTX_TYPE_ATOMIC_FETCH(type_name, type, prefix) \ + type prefix##_ctx##type_name##_atomic_fetch(shmem_ctx_t ctx, const type *target, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH(ctx, type_name, type, target, \ + pe, out_value); \ + return out_value; \ + } + +#define SHMEM_TYPE_ATOMIC_FETCH(type_name, type, prefix) \ + type prefix##type_name##_atomic_fetch(const type *target, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH(oshmem_ctx_default, type_name, \ + type, target, pe, out_value); \ \ return out_value; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_int_atomic_fetch = pshmem_ctx_int_atomic_fetch +#pragma weak shmem_ctx_long_atomic_fetch = pshmem_ctx_long_atomic_fetch +#pragma weak shmem_ctx_longlong_atomic_fetch = pshmem_ctx_longlong_atomic_fetch +#pragma weak shmem_ctx_double_atomic_fetch = pshmem_ctx_double_atomic_fetch +#pragma weak shmem_ctx_float_atomic_fetch = pshmem_ctx_float_atomic_fetch +#pragma weak shmem_int_atomic_fetch = pshmem_int_atomic_fetch +#pragma weak shmem_long_atomic_fetch = pshmem_long_atomic_fetch +#pragma weak shmem_longlong_atomic_fetch = pshmem_longlong_atomic_fetch +#pragma weak shmem_double_atomic_fetch = pshmem_double_atomic_fetch +#pragma weak shmem_float_atomic_fetch = pshmem_float_atomic_fetch #pragma weak shmem_int_fetch = pshmem_int_fetch #pragma weak shmem_long_fetch = pshmem_long_fetch #pragma weak shmem_longlong_fetch = pshmem_longlong_fetch @@ -60,6 +85,27 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_ATOMIC_FETCH(_int, int, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH(_long, long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH(_double, double, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH(_float, float, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_int, int, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_long, long, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_double, double, shmem) +SHMEM_TYPE_ATOMIC_FETCH(_float, float, shmem) + +/* deprecated APIs */ +#define SHMEM_TYPE_FETCH(type_name, type, prefix) \ + type prefix##type_name##_fetch(const type *target, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH(oshmem_ctx_default, type_name, \ + type, target, pe, out_value); \ + return out_value; \ + } + SHMEM_TYPE_FETCH(_int, int, shmem) SHMEM_TYPE_FETCH(_long, long, shmem) SHMEM_TYPE_FETCH(_longlong, long long, shmem) diff --git a/oshmem/shmem/c/shmem_finc.c b/oshmem/shmem/c/shmem_finc.c index ba40e50655..dc507797e2 100644 --- a/oshmem/shmem/c/shmem_finc.c +++ b/oshmem/shmem/c/shmem_finc.c @@ -25,13 +25,10 @@ * completed without the possibility of another process updating target between the time of * the fetch and the update. */ -#define SHMEM_TYPE_FINC(type_name, type, prefix) \ - type prefix##type_name##_finc(type *target, int pe) \ - { \ +#define DO_SHMEM_TYPE_ATOMIC_FETCH_INC(ctx, type_name, type, target, pe, out_value) do { \ int rc = OSHMEM_SUCCESS; \ size_t size; \ type value = 1; \ - type out_value; \ \ RUNTIME_CHECK_INIT(); \ RUNTIME_CHECK_PE(pe); \ @@ -39,18 +36,41 @@ \ size = sizeof(out_value); \ rc = MCA_ATOMIC_CALL(fadd( \ + ctx, \ (void*)target, \ (void*)&out_value, \ value, \ size, \ pe)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(type_name, type, prefix) \ + type prefix##_ctx##type_name##_atomic_fetch_inc(shmem_ctx_t ctx, type *target, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH_INC(ctx, type_name, type, target,\ + pe, out_value); \ + return out_value; \ + } + +#define SHMEM_TYPE_ATOMIC_FETCH_INC(type_name, type, prefix) \ + type prefix##type_name##_atomic_fetch_inc(type *target, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH_INC(oshmem_ctx_default, type_name,\ + type, target, pe, out_value); \ return out_value; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_int_atomic_fetch_inc = pshmem_ctx_int_atomic_fetch_inc +#pragma weak shmem_ctx_long_atomic_fetch_inc = pshmem_ctx_long_atomic_fetch_inc +#pragma weak shmem_ctx_longlong_atomic_fetch_inc = pshmem_ctx_longlong_atomic_fetch_inc +#pragma weak shmem_int_atomic_fetch_inc = pshmem_int_atomic_fetch_inc +#pragma weak shmem_long_atomic_fetch_inc = pshmem_long_atomic_fetch_inc +#pragma weak shmem_longlong_atomic_fetch_inc = pshmem_longlong_atomic_fetch_inc #pragma weak shmem_int_finc = pshmem_int_finc #pragma weak shmem_long_finc = pshmem_long_finc #pragma weak shmem_longlong_finc = pshmem_longlong_finc @@ -59,6 +79,23 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_int, int, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_long, long, shmem) +SHMEM_CTX_TYPE_ATOMIC_FETCH_INC(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC(_int, int, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC(_long, long, shmem) +SHMEM_TYPE_ATOMIC_FETCH_INC(_longlong, long long, shmem) + +/* deprecated APIs */ +#define SHMEM_TYPE_FINC(type_name, type, prefix) \ + type prefix##type_name##_finc(type *target, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_FETCH_INC(oshmem_ctx_default, type_name, \ + type, target, pe, out_value); \ + return out_value; \ + } + SHMEM_TYPE_FINC(_int, int, shmem) SHMEM_TYPE_FINC(_long, long, shmem) SHMEM_TYPE_FINC(_longlong, long long, shmem) diff --git a/oshmem/shmem/c/shmem_for.c b/oshmem/shmem/c/shmem_for.c index c09fa43842..c30bef03b1 100644 --- a/oshmem/shmem/c/shmem_for.c +++ b/oshmem/shmem/c/shmem_for.c @@ -19,7 +19,7 @@ #include "oshmem/mca/atomic/atomic.h" /* - * These routines perform an atomic fetch-and-or operation. + * These routines perfetch_orm an atomic fetch-and-or operation. * The fetch and or routines retrieve the value at address target on PE pe, and update * target with the result of 'or' operation value to the retrieved value. The operation * must be completed without the possibility of another process updating target between @@ -30,6 +30,9 @@ #pragma weak shmem_uint_atomic_fetch_or = pshmem_uint_atomic_fetch_or #pragma weak shmem_ulong_atomic_fetch_or = pshmem_ulong_atomic_fetch_or #pragma weak shmem_ulonglong_atomic_fetch_or = pshmem_ulonglong_atomic_fetch_or +#pragma weak shmem_ctx_uint_atomic_fetch_or = pshmem_ctx_uint_atomic_fetch_or +#pragma weak shmem_ctx_ulong_atomic_fetch_or = pshmem_ctx_ulong_atomic_fetch_or +#pragma weak shmem_ctx_ulonglong_atomic_fetch_or = pshmem_ctx_ulonglong_atomic_fetch_or #pragma weak shmemx_int32_atomic_fetch_or = pshmemx_int32_atomic_fetch_or #pragma weak shmemx_int64_atomic_fetch_or = pshmemx_int64_atomic_fetch_or #pragma weak shmemx_uint32_atomic_fetch_or = pshmemx_uint32_atomic_fetch_or @@ -40,6 +43,9 @@ OSHMEM_TYPE_FOP(uint, unsigned int, shmem, or) OSHMEM_TYPE_FOP(ulong, unsigned long, shmem, or) OSHMEM_TYPE_FOP(ulonglong, unsigned long long, shmem, or) +OSHMEM_CTX_TYPE_FOP(uint, unsigned int, shmem, or) +OSHMEM_CTX_TYPE_FOP(ulong, unsigned long, shmem, or) +OSHMEM_CTX_TYPE_FOP(ulonglong, unsigned long long, shmem, or) OSHMEM_TYPE_FOP(int32, int32_t, shmemx, or) OSHMEM_TYPE_FOP(int64, int64_t, shmemx, or) OSHMEM_TYPE_FOP(uint32, uint32_t, shmemx, or) diff --git a/oshmem/shmem/c/shmem_free.c b/oshmem/shmem/c/shmem_free.c index b0e706b009..f5c5ce0cae 100644 --- a/oshmem/shmem/c/shmem_free.c +++ b/oshmem/shmem/c/shmem_free.c @@ -53,7 +53,12 @@ static inline void _shfree(void* ptr) shmem_barrier_all(); #endif + SHMEM_MUTEX_LOCK(shmem_internal_mutex_alloc); + rc = MCA_MEMHEAP_CALL(free(ptr)); + + SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_alloc); + if (OSHMEM_SUCCESS != rc) { SHMEM_API_VERBOSE(10, "shfree failure."); } diff --git a/oshmem/shmem/c/shmem_fxor.c b/oshmem/shmem/c/shmem_fxor.c index a95fd88e73..413daca45c 100644 --- a/oshmem/shmem/c/shmem_fxor.c +++ b/oshmem/shmem/c/shmem_fxor.c @@ -30,6 +30,9 @@ #pragma weak shmem_uint_atomic_fetch_xor = pshmem_uint_atomic_fetch_xor #pragma weak shmem_ulong_atomic_fetch_xor = pshmem_ulong_atomic_fetch_xor #pragma weak shmem_ulonglong_atomic_fetch_xor = pshmem_ulonglong_atomic_fetch_xor +#pragma weak shmem_ctx_uint_atomic_fetch_xor = pshmem_ctx_uint_atomic_fetch_xor +#pragma weak shmem_ctx_ulong_atomic_fetch_xor = pshmem_ctx_ulong_atomic_fetch_xor +#pragma weak shmem_ctx_ulonglong_atomic_fetch_xor = pshmem_ctx_ulonglong_atomic_fetch_xor #pragma weak shmemx_int32_atomic_fetch_xor = pshmemx_int32_atomic_fetch_xor #pragma weak shmemx_int64_atomic_fetch_xor = pshmemx_int64_atomic_fetch_xor #pragma weak shmemx_uint32_atomic_fetch_xor = pshmemx_uint32_atomic_fetch_xor @@ -40,6 +43,9 @@ OSHMEM_TYPE_FOP(uint, unsigned int, shmem, xor) OSHMEM_TYPE_FOP(ulong, unsigned long, shmem, xor) OSHMEM_TYPE_FOP(ulonglong, unsigned long long, shmem, xor) +OSHMEM_CTX_TYPE_FOP(uint, unsigned int, shmem, xor) +OSHMEM_CTX_TYPE_FOP(ulong, unsigned long, shmem, xor) +OSHMEM_CTX_TYPE_FOP(ulonglong, unsigned long long, shmem, xor) OSHMEM_TYPE_FOP(int32, int32_t, shmemx, xor) OSHMEM_TYPE_FOP(int64, int64_t, shmemx, xor) OSHMEM_TYPE_FOP(uint32, uint32_t, shmemx, xor) diff --git a/oshmem/shmem/c/shmem_g.c b/oshmem/shmem/c/shmem_g.c index 506a288c5f..7ab1391363 100644 --- a/oshmem/shmem/c/shmem_g.c +++ b/oshmem/shmem/c/shmem_g.c @@ -22,12 +22,9 @@ * double, long) from symmetric data objects on remote PEs. * Retrieves the value at the symmetric address addr of the remote PE pe. */ -#define SHMEM_TYPE_G(type_name, type, prefix) \ - type prefix##type_name##_g(const type *addr, int pe) \ - { \ +#define DO_SHMEM_TYPE_G(ctx, type, addr, pe, out_value) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ - type out_value; \ \ RUNTIME_CHECK_INIT(); \ RUNTIME_CHECK_PE(pe); \ @@ -35,17 +32,40 @@ \ size = sizeof(out_value); \ rc = MCA_SPML_CALL(get( \ + ctx, \ (void*)addr, \ size, \ (void*)&out_value, \ pe)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_G(type_name, type, prefix) \ + type prefix##_ctx##type_name##_g(shmem_ctx_t ctx, const type *addr, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_G(ctx, type, addr, pe, out_value); \ + return out_value; \ + } + +#define SHMEM_TYPE_G(type_name, type, prefix) \ + type prefix##type_name##_g(const type *addr, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_G(oshmem_ctx_default, type, addr, pe, out_value); \ return out_value; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_char_g = pshmem_ctx_char_g +#pragma weak shmem_ctx_short_g = pshmem_ctx_short_g +#pragma weak shmem_ctx_int_g = pshmem_ctx_int_g +#pragma weak shmem_ctx_long_g = pshmem_ctx_long_g +#pragma weak shmem_ctx_longlong_g = pshmem_ctx_longlong_g +#pragma weak shmem_ctx_float_g = pshmem_ctx_float_g +#pragma weak shmem_ctx_double_g = pshmem_ctx_double_g +#pragma weak shmem_ctx_longdouble_g = pshmem_ctx_longdouble_g #pragma weak shmem_char_g = pshmem_char_g #pragma weak shmem_short_g = pshmem_short_g #pragma weak shmem_int_g = pshmem_int_g @@ -60,6 +80,14 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_G(_char, char, shmem) +SHMEM_CTX_TYPE_G(_short, short, shmem) +SHMEM_CTX_TYPE_G(_int, int, shmem) +SHMEM_CTX_TYPE_G(_long, long, shmem) +SHMEM_CTX_TYPE_G(_longlong, long long, shmem) +SHMEM_CTX_TYPE_G(_float, float, shmem) +SHMEM_CTX_TYPE_G(_double, double, shmem) +SHMEM_CTX_TYPE_G(_longdouble, long double, shmem) SHMEM_TYPE_G(_char, char, shmem) SHMEM_TYPE_G(_short, short, shmem) SHMEM_TYPE_G(_int, int, shmem) diff --git a/oshmem/shmem/c/shmem_get.c b/oshmem/shmem/c/shmem_get.c index 8ad8189045..9537030138 100644 --- a/oshmem/shmem/c/shmem_get.c +++ b/oshmem/shmem/c/shmem_get.c @@ -22,9 +22,7 @@ * on the remote PE (pe), to the data object at address target on the local PE. These routines * return after the data has been copied to address target on the local pe. */ -#define SHMEM_TYPE_GET(type_name, type) \ - void shmem##type_name##_get(type *target, const type *source, size_t nelems, int pe) \ - { \ +#define DO_SHMEM_TYPE_GET(ctx, type, target, source, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ \ @@ -34,17 +32,39 @@ \ size = nelems * sizeof(type); \ rc = MCA_SPML_CALL(get( \ + ctx, \ (void*)source, \ size, \ (void*)target, \ pe)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_GET(type_name, type) \ + void shmem_ctx##type_name##_get(shmem_ctx_t ctx, type *target, const type *source, size_t nelems, int pe) \ + { \ + DO_SHMEM_TYPE_GET(ctx, type, target, source, nelems, pe); \ + return ; \ + } + +#define SHMEM_TYPE_GET(type_name, type) \ + void shmem##type_name##_get(type *target, const type *source, size_t nelems, int pe) \ + { \ + DO_SHMEM_TYPE_GET(oshmem_ctx_default, type, target, source, \ + nelems, pe); \ return ; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_char_get = pshmem_ctx_char_get +#pragma weak shmem_ctx_short_get = pshmem_ctx_short_get +#pragma weak shmem_ctx_int_get = pshmem_ctx_int_get +#pragma weak shmem_ctx_long_get = pshmem_ctx_long_get +#pragma weak shmem_ctx_longlong_get = pshmem_ctx_longlong_get +#pragma weak shmem_ctx_float_get = pshmem_ctx_float_get +#pragma weak shmem_ctx_double_get = pshmem_ctx_double_get +#pragma weak shmem_ctx_longdouble_get = pshmem_ctx_longdouble_get #pragma weak shmem_char_get = pshmem_char_get #pragma weak shmem_short_get = pshmem_short_get #pragma weak shmem_int_get = pshmem_int_get @@ -53,6 +73,12 @@ #pragma weak shmem_float_get = pshmem_float_get #pragma weak shmem_double_get = pshmem_double_get #pragma weak shmem_longdouble_get = pshmem_longdouble_get +#pragma weak shmem_ctx_getmem = pshmem_ctx_getmem +#pragma weak shmem_ctx_get8 = pshmem_ctx_get8 +#pragma weak shmem_ctx_get16 = pshmem_ctx_get16 +#pragma weak shmem_ctx_get32 = pshmem_ctx_get32 +#pragma weak shmem_ctx_get64 = pshmem_ctx_get64 +#pragma weak shmem_ctx_get128 = pshmem_ctx_get128 #pragma weak shmem_getmem = pshmem_getmem #pragma weak shmem_get8 = pshmem_get8 #pragma weak shmem_get16 = pshmem_get16 @@ -62,6 +88,14 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_GET(_char, char) +SHMEM_CTX_TYPE_GET(_short, short) +SHMEM_CTX_TYPE_GET(_int, int) +SHMEM_CTX_TYPE_GET(_long, long) +SHMEM_CTX_TYPE_GET(_longlong, long long) +SHMEM_CTX_TYPE_GET(_float, float) +SHMEM_CTX_TYPE_GET(_double, double) +SHMEM_CTX_TYPE_GET(_longdouble, long double) SHMEM_TYPE_GET(_char, char) SHMEM_TYPE_GET(_short, short) SHMEM_TYPE_GET(_int, int) @@ -71,9 +105,7 @@ SHMEM_TYPE_GET(_float, float) SHMEM_TYPE_GET(_double, double) SHMEM_TYPE_GET(_longdouble, long double) -#define SHMEM_TYPE_GETMEM(name, element_size, prefix) \ - void prefix##name(void *target, const void *source, size_t nelems, int pe) \ - { \ +#define DO_SHMEM_GETMEM(ctx, target, source, element_size, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ \ @@ -83,15 +115,35 @@ SHMEM_TYPE_GET(_longdouble, long double) \ size = nelems * element_size; \ rc = MCA_SPML_CALL(get( \ + ctx, \ (void*)source, \ size, \ (void*)target, \ pe)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_GETMEM(name, element_size, prefix) \ + void prefix##_ctx##name(shmem_ctx_t ctx, void *target, const void *source, size_t nelems, int pe) \ + { \ + DO_SHMEM_GETMEM(ctx, target, source, element_size, nelems, pe); \ return ; \ } +#define SHMEM_TYPE_GETMEM(name, element_size, prefix) \ + void prefix##name(void *target, const void *source, size_t nelems, int pe) \ + { \ + DO_SHMEM_GETMEM(oshmem_ctx_default, target, source, \ + element_size, nelems, pe); \ + return ; \ + } + +SHMEM_CTX_TYPE_GETMEM(_getmem, 1, shmem) +SHMEM_CTX_TYPE_GETMEM(_get8, 1, shmem) +SHMEM_CTX_TYPE_GETMEM(_get16, 2, shmem) +SHMEM_CTX_TYPE_GETMEM(_get32, 4, shmem) +SHMEM_CTX_TYPE_GETMEM(_get64, 8, shmem) +SHMEM_CTX_TYPE_GETMEM(_get128, 16, shmem) SHMEM_TYPE_GETMEM(_getmem, 1, shmem) SHMEM_TYPE_GETMEM(_get8, 1, shmem) SHMEM_TYPE_GETMEM(_get16, 2, shmem) diff --git a/oshmem/shmem/c/shmem_get_nb.c b/oshmem/shmem/c/shmem_get_nb.c index ba6e401b19..971223f91f 100644 --- a/oshmem/shmem/c/shmem_get_nb.c +++ b/oshmem/shmem/c/shmem_get_nb.c @@ -22,9 +22,7 @@ * on the remote PE (pe), to the data object at address target on the local PE. These routines * return after the data has been copied to address target on the local pe. */ -#define SHMEM_TYPE_GET_NB(type_name, type) \ - void shmem##type_name##_get_nbi(type *target, const type *source, size_t nelems, int pe) \ - { \ +#define DO_SHMEM_TYPE_GET_NB(ctx, type, target, source, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ \ @@ -34,17 +32,39 @@ \ size = nelems * sizeof(type); \ rc = MCA_SPML_CALL(get_nb( \ + ctx, \ (void *)source, \ size, \ (void *)target, \ pe, NULL)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_GET_NB(type_name, type) \ + void shmem_ctx##type_name##_get_nbi(shmem_ctx_t ctx, type *target, const type *source, size_t nelems, int pe) \ + { \ + DO_SHMEM_TYPE_GET_NB(ctx, type, target, source, nelems, pe); \ + return ; \ + } + +#define SHMEM_TYPE_GET_NB(type_name, type) \ + void shmem##type_name##_get_nbi(type *target, const type *source, size_t nelems, int pe) \ + { \ + DO_SHMEM_TYPE_GET_NB(oshmem_ctx_default, type, target, \ + source, nelems, pe); \ return ; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_char_get_nbi = pshmem_ctx_char_get_nbi +#pragma weak shmem_ctx_short_get_nbi = pshmem_ctx_short_get_nbi +#pragma weak shmem_ctx_int_get_nbi = pshmem_ctx_int_get_nbi +#pragma weak shmem_ctx_long_get_nbi = pshmem_ctx_long_get_nbi +#pragma weak shmem_ctx_longlong_get_nbi = pshmem_ctx_longlong_get_nbi +#pragma weak shmem_ctx_float_get_nbi = pshmem_ctx_float_get_nbi +#pragma weak shmem_ctx_double_get_nbi = pshmem_ctx_double_get_nbi +#pragma weak shmem_ctx_longdouble_get_nbi = pshmem_ctx_longdouble_get_nbi #pragma weak shmem_char_get_nbi = pshmem_char_get_nbi #pragma weak shmem_short_get_nbi = pshmem_short_get_nbi #pragma weak shmem_int_get_nbi = pshmem_int_get_nbi @@ -53,6 +73,12 @@ #pragma weak shmem_float_get_nbi = pshmem_float_get_nbi #pragma weak shmem_double_get_nbi = pshmem_double_get_nbi #pragma weak shmem_longdouble_get_nbi = pshmem_longdouble_get_nbi +#pragma weak shmem_ctx_get8_nbi = pshmem_ctx_get8_nbi +#pragma weak shmem_ctx_get16_nbi = pshmem_ctx_get16_nbi +#pragma weak shmem_ctx_get32_nbi = pshmem_ctx_get32_nbi +#pragma weak shmem_ctx_get64_nbi = pshmem_ctx_get64_nbi +#pragma weak shmem_ctx_get128_nbi = pshmem_ctx_get128_nbi +#pragma weak shmem_ctx_getmem_nbi = pshmem_ctx_getmem_nbi #pragma weak shmem_get8_nbi = pshmem_get8_nbi #pragma weak shmem_get16_nbi = pshmem_get16_nbi #pragma weak shmem_get32_nbi = pshmem_get32_nbi @@ -62,6 +88,14 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_GET_NB(_char, char) +SHMEM_CTX_TYPE_GET_NB(_short, short) +SHMEM_CTX_TYPE_GET_NB(_int, int) +SHMEM_CTX_TYPE_GET_NB(_long, long) +SHMEM_CTX_TYPE_GET_NB(_longlong, long long) +SHMEM_CTX_TYPE_GET_NB(_float, float) +SHMEM_CTX_TYPE_GET_NB(_double, double) +SHMEM_CTX_TYPE_GET_NB(_longdouble, long double) SHMEM_TYPE_GET_NB(_char, char) SHMEM_TYPE_GET_NB(_short, short) SHMEM_TYPE_GET_NB(_int, int) @@ -71,9 +105,7 @@ SHMEM_TYPE_GET_NB(_float, float) SHMEM_TYPE_GET_NB(_double, double) SHMEM_TYPE_GET_NB(_longdouble, long double) -#define SHMEM_TYPE_GETMEM_NB(name, element_size, prefix) \ - void prefix##name##_nbi(void *target, const void *source, size_t nelems, int pe) \ - { \ +#define DO_SHMEM_GETMEM_NB(ctx, target, source, element_size, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ \ @@ -83,15 +115,35 @@ SHMEM_TYPE_GET_NB(_longdouble, long double) \ size = nelems * element_size; \ rc = MCA_SPML_CALL(get_nb( \ + ctx, \ (void *)source, \ size, \ (void *)target, \ pe, NULL)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_GETMEM_NB(name, element_size, prefix) \ + void prefix##_ctx##name##_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t nelems, int pe) \ + { \ + DO_SHMEM_GETMEM_NB(ctx, target, source, element_size, nelems, pe); \ return ; \ } +#define SHMEM_TYPE_GETMEM_NB(name, element_size, prefix) \ + void prefix##name##_nbi(void *target, const void *source, size_t nelems, int pe) \ + { \ + DO_SHMEM_GETMEM_NB(oshmem_ctx_default, target, source, \ + element_size, nelems, pe); \ + return ; \ + } + +SHMEM_CTX_TYPE_GETMEM_NB(_get8, 1, shmem) +SHMEM_CTX_TYPE_GETMEM_NB(_get16, 2, shmem) +SHMEM_CTX_TYPE_GETMEM_NB(_get32, 4, shmem) +SHMEM_CTX_TYPE_GETMEM_NB(_get64, 8, shmem) +SHMEM_CTX_TYPE_GETMEM_NB(_get128, 16, shmem) +SHMEM_CTX_TYPE_GETMEM_NB(_getmem, 1, shmem) SHMEM_TYPE_GETMEM_NB(_get8, 1, shmem) SHMEM_TYPE_GETMEM_NB(_get16, 2, shmem) SHMEM_TYPE_GETMEM_NB(_get32, 4, shmem) diff --git a/oshmem/shmem/c/shmem_iget.c b/oshmem/shmem/c/shmem_iget.c index 4e8e7154f6..300d3c310e 100644 --- a/oshmem/shmem/c/shmem_iget.c +++ b/oshmem/shmem/c/shmem_iget.c @@ -23,9 +23,7 @@ * it is stored at the local memory address target, separated by stride tst. The routines return * when the data has been copied into the local target array. */ -#define SHMEM_TYPE_IGET(type_name, type) \ - void shmem##type_name##_iget(type *target, const type *source, ptrdiff_t tst, ptrdiff_t sst, size_t nelems, int pe) \ - { \ +#define DO_SHMEM_TYPE_IGET(ctx, type, target, source, tst, sst, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t element_size = 0; \ size_t i = 0; \ @@ -38,18 +36,40 @@ for (i = 0; i < nelems; i++) \ { \ rc = MCA_SPML_CALL(get( \ + ctx, \ (void*)(source + i * sst), \ element_size, \ (void*)(target + i * tst), \ pe)); \ } \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_IGET(type_name, type) \ + void shmem_ctx##type_name##_iget(shmem_ctx_t ctx, type *target, const type *source, ptrdiff_t tst, ptrdiff_t sst, size_t nelems, int pe) \ + { \ + DO_SHMEM_TYPE_IGET(ctx, type, target, source, tst, sst, nelems, pe); \ + return ; \ + } + +#define SHMEM_TYPE_IGET(type_name, type) \ + void shmem##type_name##_iget(type *target, const type *source, ptrdiff_t tst, ptrdiff_t sst, size_t nelems, int pe) \ + { \ + DO_SHMEM_TYPE_IGET(oshmem_ctx_default, type, target, source, \ + tst, sst, nelems, pe); \ return ; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_char_iget = pshmem_ctx_char_iget +#pragma weak shmem_ctx_short_iget = pshmem_ctx_short_iget +#pragma weak shmem_ctx_int_iget = pshmem_ctx_int_iget +#pragma weak shmem_ctx_long_iget = pshmem_ctx_long_iget +#pragma weak shmem_ctx_longlong_iget = pshmem_ctx_longlong_iget +#pragma weak shmem_ctx_float_iget = pshmem_ctx_float_iget +#pragma weak shmem_ctx_double_iget = pshmem_ctx_double_iget +#pragma weak shmem_ctx_longdouble_iget = pshmem_ctx_longdouble_iget #pragma weak shmem_char_iget = pshmem_char_iget #pragma weak shmem_short_iget = pshmem_short_iget #pragma weak shmem_int_iget = pshmem_int_iget @@ -58,6 +78,11 @@ #pragma weak shmem_float_iget = pshmem_float_iget #pragma weak shmem_double_iget = pshmem_double_iget #pragma weak shmem_longdouble_iget = pshmem_longdouble_iget +#pragma weak shmem_ctx_iget8 = pshmem_ctx_iget8 +#pragma weak shmem_ctx_iget16 = pshmem_ctx_iget16 +#pragma weak shmem_ctx_iget32 = pshmem_ctx_iget32 +#pragma weak shmem_ctx_iget64 = pshmem_ctx_iget64 +#pragma weak shmem_ctx_iget128 = pshmem_ctx_iget128 #pragma weak shmem_iget8 = pshmem_iget8 #pragma weak shmem_iget16 = pshmem_iget16 #pragma weak shmem_iget32 = pshmem_iget32 @@ -66,6 +91,14 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_IGET(_char, char) +SHMEM_CTX_TYPE_IGET(_short, short) +SHMEM_CTX_TYPE_IGET(_int, int) +SHMEM_CTX_TYPE_IGET(_long, long) +SHMEM_CTX_TYPE_IGET(_longlong, long long) +SHMEM_CTX_TYPE_IGET(_float, float) +SHMEM_CTX_TYPE_IGET(_double, double) +SHMEM_CTX_TYPE_IGET(_longdouble, long double) SHMEM_TYPE_IGET(_char, char) SHMEM_TYPE_IGET(_short, short) SHMEM_TYPE_IGET(_int, int) @@ -75,9 +108,7 @@ SHMEM_TYPE_IGET(_float, float) SHMEM_TYPE_IGET(_double, double) SHMEM_TYPE_IGET(_longdouble, long double) -#define SHMEM_TYPE_IGETMEM(name, element_size, prefix) \ - void prefix##name(void *target, const void *source, ptrdiff_t tst, ptrdiff_t sst, size_t nelems, int pe) \ - { \ +#define DO_SHMEM_IGETMEM(ctx, target, source, tst, sst, element_size, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t i = 0; \ \ @@ -88,16 +119,36 @@ SHMEM_TYPE_IGET(_longdouble, long double) for (i = 0; i < nelems; i++) \ { \ rc = MCA_SPML_CALL(get( \ + ctx, \ (void*)((char*)source + i * sst * element_size), \ element_size, \ (void*)((char*)target + i * tst * element_size), \ pe)); \ } \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_IGETMEM(name, element_size, prefix) \ + void prefix##_ctx##name(shmem_ctx_t ctx, void *target, const void *source, ptrdiff_t tst, ptrdiff_t sst, size_t nelems, int pe) \ + { \ + DO_SHMEM_IGETMEM(ctx, target, source, tst, sst, \ + element_size, nelems, pe); \ return ; \ } +#define SHMEM_TYPE_IGETMEM(name, element_size, prefix) \ + void prefix##name(void *target, const void *source, ptrdiff_t tst, ptrdiff_t sst, size_t nelems, int pe) \ + { \ + DO_SHMEM_IGETMEM(oshmem_ctx_default, target, source, tst, sst, \ + element_size, nelems, pe); \ + return ; \ + } + +SHMEM_CTX_TYPE_IGETMEM(_iget8, 1, shmem) +SHMEM_CTX_TYPE_IGETMEM(_iget16, 2, shmem) +SHMEM_CTX_TYPE_IGETMEM(_iget32, 4, shmem) +SHMEM_CTX_TYPE_IGETMEM(_iget64, 8, shmem) +SHMEM_CTX_TYPE_IGETMEM(_iget128, 16, shmem) SHMEM_TYPE_IGETMEM(_iget8, 1, shmem) SHMEM_TYPE_IGETMEM(_iget16, 2, shmem) SHMEM_TYPE_IGETMEM(_iget32, 4, shmem) diff --git a/oshmem/shmem/c/shmem_inc.c b/oshmem/shmem/c/shmem_inc.c index 6813b05bc2..f3e022d3b6 100644 --- a/oshmem/shmem/c/shmem_inc.c +++ b/oshmem/shmem/c/shmem_inc.c @@ -24,9 +24,7 @@ * one. The operation must be completed without the possibility of another process updating * target between the time of the fetch and the update. */ -#define SHMEM_TYPE_INC(type_name, type, prefix) \ - void prefix##type_name##_inc(type *target, int pe) \ - { \ +#define DO_SHMEM_TYPE_ATOMIC_INC(ctx, type_name, type, target, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ type value = 1; \ @@ -37,17 +35,37 @@ \ size = sizeof(value); \ rc = MCA_ATOMIC_CALL(add( \ + ctx, \ (void*)target, \ value, \ size, \ pe)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_ATOMIC_INC(type_name, type, prefix) \ + void prefix##_ctx##type_name##_atomic_inc(shmem_ctx_t ctx, type *target, int pe) \ + { \ + DO_SHMEM_TYPE_ATOMIC_INC(ctx, type_name, type, target, pe); \ + return ; \ + } + +#define SHMEM_TYPE_ATOMIC_INC(type_name, type, prefix) \ + void prefix##type_name##_atomic_inc(type *target, int pe) \ + { \ + DO_SHMEM_TYPE_ATOMIC_INC(oshmem_ctx_default, type_name, \ + type, target, pe); \ return ; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_int_atomic_inc = pshmem_ctx_int_atomic_inc +#pragma weak shmem_ctx_long_atomic_inc = pshmem_ctx_long_atomic_inc +#pragma weak shmem_ctx_longlong_atomic_inc = pshmem_ctx_longlong_atomic_inc +#pragma weak shmem_int_atomic_inc = pshmem_int_atomic_inc +#pragma weak shmem_long_atomic_inc = pshmem_long_atomic_inc +#pragma weak shmem_longlong_atomic_inc = pshmem_longlong_atomic_inc #pragma weak shmem_int_inc = pshmem_int_inc #pragma weak shmem_long_inc = pshmem_long_inc #pragma weak shmem_longlong_inc = pshmem_longlong_inc @@ -56,6 +74,21 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_ATOMIC_INC(_int, int, shmem) +SHMEM_CTX_TYPE_ATOMIC_INC(_long, long, shmem) +SHMEM_CTX_TYPE_ATOMIC_INC(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_INC(_int, int, shmem) +SHMEM_TYPE_ATOMIC_INC(_long, long, shmem) +SHMEM_TYPE_ATOMIC_INC(_longlong, long long, shmem) + +#define SHMEM_TYPE_INC(type_name, type, prefix) \ + void prefix##type_name##_inc(type *target, int pe) \ + { \ + DO_SHMEM_TYPE_ATOMIC_INC(oshmem_ctx_default, type_name, \ + type, target, pe); \ + return ; \ + } + SHMEM_TYPE_INC(_int, int, shmem) SHMEM_TYPE_INC(_long, long, shmem) SHMEM_TYPE_INC(_longlong, long long, shmem) diff --git a/oshmem/shmem/c/shmem_init.c b/oshmem/shmem/c/shmem_init.c index 06e7af7568..934d2b6023 100644 --- a/oshmem/shmem/c/shmem_init.c +++ b/oshmem/shmem/c/shmem_init.c @@ -28,24 +28,33 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_init = pshmem_init +#pragma weak shmem_init_thread = pshmem_init_thread #pragma weak start_pes = pstart_pes #include "oshmem/shmem/c/profile/defines.h" #endif extern int oshmem_shmem_globalexit_status; -static inline void _shmem_init(void); +static inline void _shmem_init(int required, int *provided); void shmem_init(void) { + int provided; /* spec says that npes are ignored for now */ - _shmem_init(); + _shmem_init(SHMEM_THREAD_SINGLE, &provided); +} + +int shmem_init_thread(int requested, int *provided) +{ + _shmem_init(requested, provided); + return 0; } void start_pes(int npes) { + int provided; /* spec says that npes are ignored for now */ - _shmem_init(); + _shmem_init(SHMEM_THREAD_SINGLE, &provided); } static void shmem_onexit(int exitcode, void *arg) @@ -54,11 +63,9 @@ static void shmem_onexit(int exitcode, void *arg) shmem_finalize(); } -static inline void _shmem_init(void) +static inline void _shmem_init(int required, int *provided) { int err = OSHMEM_SUCCESS; - int provided; - int required = SHMEM_THREAD_SINGLE; if (oshmem_shmem_initialized) { /* @@ -67,7 +74,7 @@ static inline void _shmem_init(void) return; } - err = oshmem_shmem_init(0, NULL, required, &provided); + err = oshmem_shmem_init(0, NULL, required, provided); if (OSHMEM_SUCCESS != err) { /* since spec does not propagete error to user we can only abort */ SHMEM_API_ERROR("SHMEM failed to initialize - aborting"); diff --git a/oshmem/shmem/c/shmem_iput.c b/oshmem/shmem/c/shmem_iput.c index 9067963a6f..9b0137b198 100644 --- a/oshmem/shmem/c/shmem_iput.c +++ b/oshmem/shmem/c/shmem_iput.c @@ -23,9 +23,7 @@ * been copied out of the source array on the local PE but not necessarily before the data has * been delivered to the remote data object. */ -#define SHMEM_TYPE_IPUT(type_name, type) \ - void shmem##type_name##_iput(type *target, const type *source, ptrdiff_t tst, ptrdiff_t sst, size_t nelems, int pe) \ - { \ +#define DO_SHMEM_TYPE_IPUT(ctx, type, target, source, tst, sst, nelemes, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t element_size = 0; \ size_t i = 0; \ @@ -38,18 +36,41 @@ for (i = 0; i < nelems; i++) \ { \ rc = MCA_SPML_CALL(put( \ + ctx, \ (void*)(target + i * tst), \ element_size, \ (void*)(source + i * sst), \ pe)); \ } \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_IPUT(type_name, type) \ + void shmem_ctx##type_name##_iput(shmem_ctx_t ctx, type *target, const type *source, ptrdiff_t tst, ptrdiff_t sst, size_t nelems, int pe) \ + { \ + DO_SHMEM_TYPE_IPUT(ctx, type, target, source, tst, sst, \ + nelems, pe); \ + return ; \ + } + +#define SHMEM_TYPE_IPUT(type_name, type) \ + void shmem##type_name##_iput(type *target, const type *source, ptrdiff_t tst, ptrdiff_t sst, size_t nelems, int pe) \ + { \ + DO_SHMEM_TYPE_IPUT(oshmem_ctx_default, type, target, source, \ + tst, sst, nelems, pe); \ return ; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_char_iput = pshmem_ctx_char_iput +#pragma weak shmem_ctx_short_iput = pshmem_ctx_short_iput +#pragma weak shmem_ctx_int_iput = pshmem_ctx_int_iput +#pragma weak shmem_ctx_long_iput = pshmem_ctx_long_iput +#pragma weak shmem_ctx_longlong_iput = pshmem_ctx_longlong_iput +#pragma weak shmem_ctx_float_iput = pshmem_ctx_float_iput +#pragma weak shmem_ctx_double_iput = pshmem_ctx_double_iput +#pragma weak shmem_ctx_longdouble_iput = pshmem_ctx_longdouble_iput #pragma weak shmem_char_iput = pshmem_char_iput #pragma weak shmem_short_iput = pshmem_short_iput #pragma weak shmem_int_iput = pshmem_int_iput @@ -58,14 +79,27 @@ #pragma weak shmem_float_iput = pshmem_float_iput #pragma weak shmem_double_iput = pshmem_double_iput #pragma weak shmem_longdouble_iput = pshmem_longdouble_iput -#pragma weak shmemx_iput8 = pshmem_iput8 -#pragma weak shmemx_iput16 = pshmem_iput16 +#pragma weak shmem_ctx_iput8 = pshmem_ctx_iput8 +#pragma weak shmem_ctx_iput16 = pshmem_ctx_iput16 +#pragma weak shmem_ctx_iput32 = pshmem_ctx_iput32 +#pragma weak shmem_ctx_iput64 = pshmem_ctx_iput64 +#pragma weak shmem_ctx_iput128 = pshmem_ctx_iput128 +#pragma weak shmem_iput8 = pshmem_iput8 +#pragma weak shmem_iput16 = pshmem_iput16 #pragma weak shmem_iput32 = pshmem_iput32 #pragma weak shmem_iput64 = pshmem_iput64 #pragma weak shmem_iput128 = pshmem_iput128 #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_IPUT(_char, char) +SHMEM_CTX_TYPE_IPUT(_short, short) +SHMEM_CTX_TYPE_IPUT(_int, int) +SHMEM_CTX_TYPE_IPUT(_long, long) +SHMEM_CTX_TYPE_IPUT(_longlong, long long) +SHMEM_CTX_TYPE_IPUT(_float, float) +SHMEM_CTX_TYPE_IPUT(_double, double) +SHMEM_CTX_TYPE_IPUT(_longdouble, long double) SHMEM_TYPE_IPUT(_char, char) SHMEM_TYPE_IPUT(_short, short) SHMEM_TYPE_IPUT(_int, int) @@ -75,9 +109,7 @@ SHMEM_TYPE_IPUT(_float, float) SHMEM_TYPE_IPUT(_double, double) SHMEM_TYPE_IPUT(_longdouble, long double) -#define SHMEM_TYPE_IPUTMEM(name, element_size, prefix) \ - void prefix##name(void *target, const void *source, ptrdiff_t tst, ptrdiff_t sst, size_t nelems, int pe) \ - { \ +#define DO_SHMEM_IPUTMEM(ctx, target, source, tst, sst, element_size, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t i = 0; \ \ @@ -88,16 +120,36 @@ SHMEM_TYPE_IPUT(_longdouble, long double) for (i = 0; i < nelems; i++) \ { \ rc = MCA_SPML_CALL(put( \ + ctx, \ (void*)((char*)target + i * tst * element_size), \ element_size, \ (void*)((char*)source + i * sst * element_size), \ pe)); \ } \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_IPUTMEM(name, element_size, prefix) \ + void prefix##_ctx##name(shmem_ctx_t ctx, void *target, const void *source, ptrdiff_t tst, ptrdiff_t sst, size_t nelems, int pe) \ + { \ + DO_SHMEM_IPUTMEM(ctx, target, source, tst, sst, \ + element_size, nelems, pe); \ return ; \ } +#define SHMEM_TYPE_IPUTMEM(name, element_size, prefix) \ + void prefix##name(void *target, const void *source, ptrdiff_t tst, ptrdiff_t sst, size_t nelems, int pe) \ + { \ + DO_SHMEM_IPUTMEM(oshmem_ctx_default, target, source, tst, \ + sst, element_size, nelems, pe); \ + return ; \ + } + +SHMEM_CTX_TYPE_IPUTMEM(_iput8, 1, shmem) +SHMEM_CTX_TYPE_IPUTMEM(_iput16, 2, shmem) +SHMEM_CTX_TYPE_IPUTMEM(_iput32, 4, shmem) +SHMEM_CTX_TYPE_IPUTMEM(_iput64, 8, shmem) +SHMEM_CTX_TYPE_IPUTMEM(_iput128, 16, shmem) SHMEM_TYPE_IPUTMEM(_iput8, 1, shmem) SHMEM_TYPE_IPUTMEM(_iput16, 2, shmem) SHMEM_TYPE_IPUTMEM(_iput32, 4, shmem) diff --git a/oshmem/shmem/c/shmem_lock.c b/oshmem/shmem/c/shmem_lock.c index 8424e15468..4bd524f188 100644 --- a/oshmem/shmem/c/shmem_lock.c +++ b/oshmem/shmem/c/shmem_lock.c @@ -269,13 +269,13 @@ static uint64_t shmem_lock_cswap(void *target, uint64_t prev_value = 0; if (target_size == 8) { - MCA_ATOMIC_CALL(cswap( target, (void*)&prev_value, cond, value, target_size, pe)); + MCA_ATOMIC_CALL(cswap(oshmem_ctx_default, target, (void*)&prev_value, cond, value, target_size, pe)); } else if (target_size == 4) { uint32_t prev_value_32 = 0; uint32_t cond32 = (uint32_t) cond; uint32_t value32 = (uint32_t) value; - MCA_ATOMIC_CALL(cswap( target, (void*)&prev_value_32, cond32, value32, target_size, pe)); + MCA_ATOMIC_CALL(cswap(oshmem_ctx_default, target, (void*)&prev_value_32, cond32, value32, target_size, pe)); prev_value = prev_value_32; } diff --git a/oshmem/shmem/c/shmem_or.c b/oshmem/shmem/c/shmem_or.c index ba9460f151..1ae67efbc6 100644 --- a/oshmem/shmem/c/shmem_or.c +++ b/oshmem/shmem/c/shmem_or.c @@ -28,6 +28,9 @@ #pragma weak shmem_uint_atomic_or = pshmem_uint_atomic_or #pragma weak shmem_ulong_atomic_or = pshmem_ulong_atomic_or #pragma weak shmem_ulonglong_atomic_or = pshmem_ulonglong_atomic_or +#pragma weak shmem_ctx_uint_atomic_or = pshmem_ctx_uint_atomic_or +#pragma weak shmem_ctx_ulong_atomic_or = pshmem_ctx_ulong_atomic_or +#pragma weak shmem_ctx_ulonglong_atomic_or = pshmem_ctx_ulonglong_atomic_or #pragma weak shmemx_int32_atomic_or = pshmemx_int32_atomic_or #pragma weak shmemx_int64_atomic_or = pshmemx_int64_atomic_or #pragma weak shmemx_uint32_atomic_or = pshmemx_uint32_atomic_or @@ -38,6 +41,9 @@ OSHMEM_TYPE_OP(uint, unsigned int, shmem, or) OSHMEM_TYPE_OP(ulong, unsigned long, shmem, or) OSHMEM_TYPE_OP(ulonglong, unsigned long long, shmem, or) +OSHMEM_CTX_TYPE_OP(uint, unsigned int, shmem, or) +OSHMEM_CTX_TYPE_OP(ulong, unsigned long, shmem, or) +OSHMEM_CTX_TYPE_OP(ulonglong, unsigned long long, shmem, or) OSHMEM_TYPE_OP(int32, int32_t, shmemx, or) OSHMEM_TYPE_OP(int64, int64_t, shmemx, or) OSHMEM_TYPE_OP(uint32, uint32_t, shmemx, or) diff --git a/oshmem/shmem/c/shmem_p.c b/oshmem/shmem/c/shmem_p.c index 5e71c65667..2f38b32cb3 100644 --- a/oshmem/shmem/c/shmem_p.c +++ b/oshmem/shmem/c/shmem_p.c @@ -24,9 +24,8 @@ * data object of the remote PE indicated by the parameter pe. These routines start the remote * transfer and may return before the data is delivered to the remote PE. */ -#define SHMEM_TYPE_P(type_name, type, prefix) \ - void prefix##type_name##_p(type *addr, type value, int pe) \ - { \ + +#define DO_SHMEM_TYPE_P(ctx, type, addr, value, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ \ @@ -36,17 +35,38 @@ \ size = sizeof(type); \ rc = MCA_SPML_CALL(put( \ + ctx, \ (void*)addr, \ size, \ (void*)&value, \ pe)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while(0) + +#define SHMEM_CTX_TYPE_P(type_name, type, prefix) \ + void prefix##_ctx##type_name##_p(shmem_ctx_t ctx, type *addr, type value, int pe) \ + { \ + DO_SHMEM_TYPE_P(ctx, type, addr, value, pe); \ + return ; \ + } + +#define SHMEM_TYPE_P(type_name, type, prefix) \ + void prefix##type_name##_p(type *addr, type value, int pe) \ + { \ + DO_SHMEM_TYPE_P(oshmem_ctx_default, type, addr, value, pe); \ return ; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_char_p = pshmem_ctx_char_p +#pragma weak shmem_ctx_short_p = pshmem_ctx_short_p +#pragma weak shmem_ctx_int_p = pshmem_ctx_int_p +#pragma weak shmem_ctx_long_p = pshmem_ctx_long_p +#pragma weak shmem_ctx_longlong_p = pshmem_ctx_longlong_p +#pragma weak shmem_ctx_float_p = pshmem_ctx_float_p +#pragma weak shmem_ctx_double_p = pshmem_ctx_double_p +#pragma weak shmem_ctx_longdouble_p = pshmem_ctx_longdouble_p #pragma weak shmem_char_p = pshmem_char_p #pragma weak shmem_short_p = pshmem_short_p #pragma weak shmem_int_p = pshmem_int_p @@ -61,6 +81,14 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_P(_char, char, shmem) +SHMEM_CTX_TYPE_P(_short, short, shmem) +SHMEM_CTX_TYPE_P(_int, int, shmem) +SHMEM_CTX_TYPE_P(_long, long, shmem) +SHMEM_CTX_TYPE_P(_longlong, long long, shmem) +SHMEM_CTX_TYPE_P(_float, float, shmem) +SHMEM_CTX_TYPE_P(_double, double, shmem) +SHMEM_CTX_TYPE_P(_longdouble, long double, shmem) SHMEM_TYPE_P(_char, char, shmem) SHMEM_TYPE_P(_short, short, shmem) SHMEM_TYPE_P(_int, int, shmem) diff --git a/oshmem/shmem/c/shmem_put.c b/oshmem/shmem/c/shmem_put.c index 7332256245..c734409ea7 100644 --- a/oshmem/shmem/c/shmem_put.c +++ b/oshmem/shmem/c/shmem_put.c @@ -25,9 +25,7 @@ * order. Because of this, two successive put operations may deliver data out of order unless a * call to shmem_fence() is introduced between the two calls. */ -#define SHMEM_TYPE_PUT(type_name, type) \ - void shmem##type_name##_put(type *target, const type *source, size_t len, int pe) \ - { \ +#define DO_SHMEM_TYPE_PUT(ctx, type, target, source, len, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ \ @@ -37,17 +35,39 @@ \ size = len * sizeof(type); \ rc = MCA_SPML_CALL(put( \ + ctx, \ (void*)target, \ size, \ (void*)source, \ pe)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_PUT(type_name, type) \ + void shmem_ctx##type_name##_put(shmem_ctx_t ctx, type *target, const type *source, size_t len, int pe)\ + { \ + DO_SHMEM_TYPE_PUT(ctx, type, target, source, len, pe); \ + return ; \ + } + +#define SHMEM_TYPE_PUT(type_name, type) \ + void shmem##type_name##_put(type *target, const type *source, size_t len, int pe)\ + { \ + DO_SHMEM_TYPE_PUT(oshmem_ctx_default, type, target, \ + source, len, pe); \ return ; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_char_put = pshmem_ctx_char_put +#pragma weak shmem_ctx_short_put = pshmem_ctx_short_put +#pragma weak shmem_ctx_int_put = pshmem_ctx_int_put +#pragma weak shmem_ctx_long_put = pshmem_ctx_long_put +#pragma weak shmem_ctx_longlong_put = pshmem_ctx_longlong_put +#pragma weak shmem_ctx_float_put = pshmem_ctx_float_put +#pragma weak shmem_ctx_double_put = pshmem_ctx_double_put +#pragma weak shmem_ctx_longdouble_put = pshmem_ctx_longdouble_put #pragma weak shmem_char_put = pshmem_char_put #pragma weak shmem_short_put = pshmem_short_put #pragma weak shmem_int_put = pshmem_int_put @@ -56,6 +76,12 @@ #pragma weak shmem_float_put = pshmem_float_put #pragma weak shmem_double_put = pshmem_double_put #pragma weak shmem_longdouble_put = pshmem_longdouble_put +#pragma weak shmem_ctx_putmem = pshmem_ctx_putmem +#pragma weak shmem_ctx_put8 = pshmem_ctx_put8 +#pragma weak shmem_ctx_put16 = pshmem_ctx_put16 +#pragma weak shmem_ctx_put32 = pshmem_ctx_put32 +#pragma weak shmem_ctx_put64 = pshmem_ctx_put64 +#pragma weak shmem_ctx_put128 = pshmem_ctx_put128 #pragma weak shmem_putmem = pshmem_putmem #pragma weak shmem_put8 = pshmem_put8 #pragma weak shmem_put16 = pshmem_put16 @@ -65,6 +91,14 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_PUT(_char, char) +SHMEM_CTX_TYPE_PUT(_short, short) +SHMEM_CTX_TYPE_PUT(_int, int) +SHMEM_CTX_TYPE_PUT(_long, long) +SHMEM_CTX_TYPE_PUT(_longlong, long long) +SHMEM_CTX_TYPE_PUT(_float, float) +SHMEM_CTX_TYPE_PUT(_double, double) +SHMEM_CTX_TYPE_PUT(_longdouble, long double) SHMEM_TYPE_PUT(_char, char) SHMEM_TYPE_PUT(_short, short) SHMEM_TYPE_PUT(_int, int) @@ -74,9 +108,7 @@ SHMEM_TYPE_PUT(_float, float) SHMEM_TYPE_PUT(_double, double) SHMEM_TYPE_PUT(_longdouble, long double) -#define SHMEM_TYPE_PUTMEM(name, element_size, prefix) \ - void prefix##name(void *target, const void *source, size_t nelems, int pe) \ - { \ +#define DO_SHMEM_PUTMEM(ctx, target, source, element_size, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ \ @@ -86,15 +118,36 @@ SHMEM_TYPE_PUT(_longdouble, long double) \ size = nelems * element_size; \ rc = MCA_SPML_CALL(put( \ + ctx, \ (void*)target, \ size, \ (void*)source, \ pe)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_PUTMEM(name, element_size, prefix) \ + void prefix##_ctx##name(shmem_ctx_t ctx, void *target, const void *source, size_t nelems, int pe) \ + { \ + DO_SHMEM_PUTMEM(ctx, target, source, \ + element_size, nelems, pe); \ return ; \ } +#define SHMEM_TYPE_PUTMEM(name, element_size, prefix) \ + void prefix##name(void *target, const void *source, size_t nelems, int pe) \ + { \ + DO_SHMEM_PUTMEM(oshmem_ctx_default, target, \ + source, element_size, nelems, pe); \ + return ; \ + } + +SHMEM_CTX_TYPE_PUTMEM(_putmem, 1, shmem) +SHMEM_CTX_TYPE_PUTMEM(_put8, 1, shmem) +SHMEM_CTX_TYPE_PUTMEM(_put16, 2, shmem) +SHMEM_CTX_TYPE_PUTMEM(_put32, 4, shmem) +SHMEM_CTX_TYPE_PUTMEM(_put64, 8, shmem) +SHMEM_CTX_TYPE_PUTMEM(_put128, 16, shmem) SHMEM_TYPE_PUTMEM(_putmem, 1, shmem) SHMEM_TYPE_PUTMEM(_put8, 1, shmem) SHMEM_TYPE_PUTMEM(_put16, 2, shmem) diff --git a/oshmem/shmem/c/shmem_put_nb.c b/oshmem/shmem/c/shmem_put_nb.c index 925e26e33f..eb422ad676 100644 --- a/oshmem/shmem/c/shmem_put_nb.c +++ b/oshmem/shmem/c/shmem_put_nb.c @@ -29,9 +29,7 @@ * subsequent call to shmem_quiet. At the completion of shmem_quiet, the data has been copied * into the dest array on the destination PE. */ -#define SHMEM_TYPE_PUT_NB(type_name, type) \ - void shmem##type_name##_put_nbi(type *target, const type *source, size_t len, int pe) \ - { \ +#define DO_SHMEM_TYPE_PUT_NB(ctx, type, target, source, len, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ \ @@ -41,12 +39,26 @@ \ size = len * sizeof(type); \ rc = MCA_SPML_CALL(put_nb( \ + ctx, \ (void *)target, \ size, \ (void *)source, \ pe, NULL)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_PUT_NB(type_name, type) \ + void shmem_ctx##type_name##_put_nbi(shmem_ctx_t ctx, type *target, const type *source, size_t len, int pe) \ + { \ + DO_SHMEM_TYPE_PUT_NB(ctx, type, target, source, len, pe); \ + return ; \ + } + +#define SHMEM_TYPE_PUT_NB(type_name, type) \ + void shmem##type_name##_put_nbi(type *target, const type *source, size_t len, int pe) \ + { \ + DO_SHMEM_TYPE_PUT_NB(oshmem_ctx_default, type, target, \ + source, len, pe); \ return ; \ } @@ -66,9 +78,31 @@ #pragma weak shmem_put64_nbi = pshmem_put64_nbi #pragma weak shmem_put128_nbi = pshmem_put128_nbi #pragma weak shmem_putmem_nbi = pshmem_putmem_nbi +#pragma weak shmem_ctx_char_put_nbi = pshmem_ctx_char_put_nbi +#pragma weak shmem_ctx_short_put_nbi = pshmem_ctx_short_put_nbi +#pragma weak shmem_ctx_int_put_nbi = pshmem_ctx_int_put_nbi +#pragma weak shmem_ctx_long_put_nbi = pshmem_ctx_long_put_nbi +#pragma weak shmem_ctx_longlong_put_nbi = pshmem_ctx_longlong_put_nbi +#pragma weak shmem_ctx_float_put_nbi = pshmem_ctx_float_put_nbi +#pragma weak shmem_ctx_double_put_nbi = pshmem_ctx_double_put_nbi +#pragma weak shmem_ctx_longdouble_put_nbi = pshmem_ctx_longdouble_put_nbi +#pragma weak shmem_ctx_put8_nbi = pshmem_ctx_put8_nbi +#pragma weak shmem_ctx_put16_nbi = pshmem_ctx_put16_nbi +#pragma weak shmem_ctx_put32_nbi = pshmem_ctx_put32_nbi +#pragma weak shmem_ctx_put64_nbi = pshmem_ctx_put64_nbi +#pragma weak shmem_ctx_put128_nbi = pshmem_ctx_put128_nbi +#pragma weak shmem_ctx_putmem_nbi = pshmem_ctx_putmem_nbi #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_PUT_NB(_char, char) +SHMEM_CTX_TYPE_PUT_NB(_short, short) +SHMEM_CTX_TYPE_PUT_NB(_int, int) +SHMEM_CTX_TYPE_PUT_NB(_long, long) +SHMEM_CTX_TYPE_PUT_NB(_longlong, long long) +SHMEM_CTX_TYPE_PUT_NB(_float, float) +SHMEM_CTX_TYPE_PUT_NB(_double, double) +SHMEM_CTX_TYPE_PUT_NB(_longdouble, long double) SHMEM_TYPE_PUT_NB(_char, char) SHMEM_TYPE_PUT_NB(_short, short) SHMEM_TYPE_PUT_NB(_int, int) @@ -78,9 +112,7 @@ SHMEM_TYPE_PUT_NB(_float, float) SHMEM_TYPE_PUT_NB(_double, double) SHMEM_TYPE_PUT_NB(_longdouble, long double) -#define SHMEM_TYPE_PUTMEM_NB(name, element_size, prefix) \ - void prefix##name##_nbi(void *target, const void *source, size_t nelems, int pe) \ - { \ +#define DO_SHMEM_PUTMEM_NB(ctx, target, source, element_size, nelems, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ \ @@ -90,15 +122,36 @@ SHMEM_TYPE_PUT_NB(_longdouble, long double) \ size = nelems * element_size; \ rc = MCA_SPML_CALL(put_nb( \ + ctx, \ (void *)target, \ size, \ (void *)source, \ pe, NULL)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_PUTMEM_NB(name, element_size, prefix) \ + void prefix##_ctx##name##_nbi(shmem_ctx_t ctx, void *target, const void *source, size_t nelems, int pe) \ + { \ + DO_SHMEM_PUTMEM_NB(ctx, target, source, element_size, \ + nelems, pe); \ return ; \ } +#define SHMEM_TYPE_PUTMEM_NB(name, element_size, prefix) \ + void prefix##name##_nbi(void *target, const void *source, size_t nelems, int pe) \ + { \ + DO_SHMEM_PUTMEM_NB(oshmem_ctx_default, target, source, \ + element_size, nelems, pe); \ + return ; \ + } + +SHMEM_CTX_TYPE_PUTMEM_NB(_put8, 1, shmem) +SHMEM_CTX_TYPE_PUTMEM_NB(_put16, 2, shmem) +SHMEM_CTX_TYPE_PUTMEM_NB(_put32, 4, shmem) +SHMEM_CTX_TYPE_PUTMEM_NB(_put64, 8, shmem) +SHMEM_CTX_TYPE_PUTMEM_NB(_put128, 16, shmem) +SHMEM_CTX_TYPE_PUTMEM_NB(_putmem, 1, shmem) SHMEM_TYPE_PUTMEM_NB(_put8, 1, shmem) SHMEM_TYPE_PUTMEM_NB(_put16, 2, shmem) SHMEM_TYPE_PUTMEM_NB(_put32, 4, shmem) diff --git a/oshmem/shmem/c/shmem_query.c b/oshmem/shmem/c/shmem_query.c index ec4412d69a..fd3f1771d9 100644 --- a/oshmem/shmem/c/shmem_query.c +++ b/oshmem/shmem/c/shmem_query.c @@ -20,6 +20,7 @@ #include "oshmem/include/pshmem.h" #pragma weak shmem_n_pes = pshmem_n_pes #pragma weak shmem_my_pe = pshmem_my_pe +#pragma weak shmem_query_thread = pshmem_query_thread #pragma weak _num_pes = p_num_pes #pragma weak _my_pe = p_my_pe #include "oshmem/shmem/c/profile/defines.h" @@ -60,3 +61,8 @@ int my_pe(void) RUNTIME_CHECK_INIT(); return oshmem_my_proc_id(); } +void shmem_query_thread(int *provided) +{ + RUNTIME_CHECK_INIT(); + (*provided) = oshmem_mpi_thread_provided; +} diff --git a/oshmem/shmem/c/shmem_quiet.c b/oshmem/shmem/c/shmem_quiet.c index 834e6c7fe1..e27b273d30 100644 --- a/oshmem/shmem/c/shmem_quiet.c +++ b/oshmem/shmem/c/shmem_quiet.c @@ -17,11 +17,18 @@ #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" #pragma weak shmem_quiet = pshmem_quiet +#pragma weak shmem_ctx_quiet = pshmem_ctx_quiet #include "oshmem/shmem/c/profile/defines.h" #endif void shmem_quiet(void) { - MCA_SPML_CALL(quiet()); + MCA_SPML_CALL(quiet(oshmem_ctx_default)); +} + +void shmem_ctx_quiet(shmem_ctx_t ctx) +{ + + MCA_SPML_CALL(quiet(ctx)); } diff --git a/oshmem/shmem/c/shmem_realloc.c b/oshmem/shmem/c/shmem_realloc.c index 88e2d94164..0a45cf9fe3 100644 --- a/oshmem/shmem/c/shmem_realloc.c +++ b/oshmem/shmem/c/shmem_realloc.c @@ -45,8 +45,12 @@ static inline void* _shrealloc(void *ptr, size_t size) RUNTIME_CHECK_INIT(); + SHMEM_MUTEX_LOCK(shmem_internal_mutex_alloc); + rc = MCA_MEMHEAP_CALL(realloc(size, ptr, &pBuff)); + SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_alloc); + if (OSHMEM_SUCCESS != rc) { SHMEM_API_VERBOSE(1, "Allocation with shrealloc(ptr=%p, size=%lu) failed.", diff --git a/oshmem/shmem/c/shmem_set.c b/oshmem/shmem/c/shmem_set.c index fd129baf40..7c9df841f3 100644 --- a/oshmem/shmem/c/shmem_set.c +++ b/oshmem/shmem/c/shmem_set.c @@ -23,9 +23,7 @@ * The operation must be completed without the possibility of another * process updating the target during the set. */ -#define SHMEM_TYPE_SET(type_name, type, prefix) \ - void prefix##type_name##_set(type *target, type value, int pe) \ - { \ +#define DO_SHMEM_TYPE_ATOMIC_SET(ctx, type, target, value, pe) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ type out_value; \ @@ -36,16 +34,41 @@ \ size = sizeof(out_value); \ rc = MCA_ATOMIC_CALL(swap( \ + ctx, \ (void*)target, \ (void*)&out_value, \ value, \ size, \ pe)); \ RUNTIME_CHECK_RC(rc); \ + } while (0) + +#define SHMEM_CTX_TYPE_ATOMIC_SET(type_name, type, prefix) \ + void prefix##_ctx##type_name##_atomic_set(shmem_ctx_t ctx, type *target, type value, int pe) \ + { \ + DO_SHMEM_TYPE_ATOMIC_SET(ctx, type, target, value, pe); \ + return; \ + } + +#define SHMEM_TYPE_ATOMIC_SET(type_name, type, prefix) \ + void prefix##type_name##_atomic_set(type *target, type value, int pe) \ + { \ + DO_SHMEM_TYPE_ATOMIC_SET(oshmem_ctx_default, type, target, value, pe); \ + return; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_int_atomic_set = pshmem_ctx_int_atomic_set +#pragma weak shmem_ctx_long_atomic_set = pshmem_ctx_long_atomic_set +#pragma weak shmem_ctx_longlong_atomic_set = pshmem_ctx_longlong_atomic_set +#pragma weak shmem_ctx_float_atomic_set = pshmem_ctx_float_atomic_set +#pragma weak shmem_ctx_double_atomic_set = pshmem_ctx_double_atomic_set +#pragma weak shmem_int_atomic_set = pshmem_int_atomic_set +#pragma weak shmem_long_atomic_set = pshmem_long_atomic_set +#pragma weak shmem_longlong_atomic_set = pshmem_longlong_atomic_set +#pragma weak shmem_float_atomic_set = pshmem_float_atomic_set +#pragma weak shmem_double_atomic_set = pshmem_double_atomic_set #pragma weak shmem_int_set = pshmem_int_set #pragma weak shmem_long_set = pshmem_long_set #pragma weak shmem_longlong_set = pshmem_longlong_set @@ -56,6 +79,24 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_ATOMIC_SET(_int, int, shmem) +SHMEM_CTX_TYPE_ATOMIC_SET(_long, long, shmem) +SHMEM_CTX_TYPE_ATOMIC_SET(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_SET(_float, float, shmem) +SHMEM_CTX_TYPE_ATOMIC_SET(_double, double, shmem) +SHMEM_TYPE_ATOMIC_SET(_int, int, shmem) +SHMEM_TYPE_ATOMIC_SET(_long, long, shmem) +SHMEM_TYPE_ATOMIC_SET(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_SET(_float, float, shmem) +SHMEM_TYPE_ATOMIC_SET(_double, double, shmem) + +/* deprecated APIs */ +#define SHMEM_TYPE_SET(type_name, type, prefix) \ + void prefix##type_name##_set(type *target, type value, int pe) \ + { \ + DO_SHMEM_TYPE_ATOMIC_SET(oshmem_ctx_default, type, target, value, pe); \ + return; \ + } SHMEM_TYPE_SET(_int, int, shmem) SHMEM_TYPE_SET(_long, long, shmem) SHMEM_TYPE_SET(_longlong, long long, shmem) diff --git a/oshmem/shmem/c/shmem_swap.c b/oshmem/shmem/c/shmem_swap.c index 74aa99dd99..5ee0e1e949 100644 --- a/oshmem/shmem/c/shmem_swap.c +++ b/oshmem/shmem/c/shmem_swap.c @@ -23,12 +23,9 @@ * contents of target. The operation must be completed without the possibility of another * process updating target between the time of the fetch and the update. */ -#define SHMEM_TYPE_SWAP(type_name, type, prefix) \ - type prefix##type_name##_swap(type *target, type value, int pe) \ - { \ +#define DO_SHMEM_TYPE_ATOMIC_SWAP(ctx, type, target, value, pe, out_value) do { \ int rc = OSHMEM_SUCCESS; \ size_t size = 0; \ - type out_value; \ \ RUNTIME_CHECK_INIT(); \ RUNTIME_CHECK_PE(pe); \ @@ -36,18 +33,45 @@ \ size = sizeof(out_value); \ rc = MCA_ATOMIC_CALL(swap( \ + ctx, \ (void*)target, \ (void*)&out_value, \ OSHMEM_ATOMIC_PTR_2_INT(&value, sizeof(value)), \ size, \ pe)); \ RUNTIME_CHECK_RC(rc); \ - \ + } while (0) + +#define SHMEM_CTX_TYPE_ATOMIC_SWAP(type_name, type, prefix) \ + type prefix##_ctx##type_name##_atomic_swap(shmem_ctx_t ctx, type *target, type value, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_SWAP(ctx, type, target, value, pe, \ + out_value); \ + return out_value; \ + } + +#define SHMEM_TYPE_ATOMIC_SWAP(type_name, type, prefix) \ + type prefix##type_name##_atomic_swap(type *target, type value, int pe)\ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_SWAP(oshmem_ctx_default, type, target, \ + value, pe, out_value); \ return out_value; \ } #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" +#pragma weak shmem_ctx_int_atomic_swap = pshmem_ctx_int_atomic_swap +#pragma weak shmem_ctx_long_atomic_swap = pshmem_ctx_long_atomic_swap +#pragma weak shmem_ctx_longlong_atomic_swap = pshmem_ctx_longlong_atomic_swap +#pragma weak shmem_ctx_float_atomic_swap = pshmem_ctx_float_atomic_swap +#pragma weak shmem_ctx_double_atomic_swap = pshmem_ctx_double_atomic_swap +#pragma weak shmem_int_atomic_swap = pshmem_int_atomic_swap +#pragma weak shmem_long_atomic_swap = pshmem_long_atomic_swap +#pragma weak shmem_longlong_atomic_swap = pshmem_longlong_atomic_swap +#pragma weak shmem_float_atomic_swap = pshmem_float_atomic_swap +#pragma weak shmem_double_atomic_swap = pshmem_double_atomic_swap #pragma weak shmem_int_swap = pshmem_int_swap #pragma weak shmem_long_swap = pshmem_long_swap #pragma weak shmem_longlong_swap = pshmem_longlong_swap @@ -58,6 +82,26 @@ #include "oshmem/shmem/c/profile/defines.h" #endif +SHMEM_CTX_TYPE_ATOMIC_SWAP(_int, int, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP(_long, long, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP(_longlong, long long, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP(_float, float, shmem) +SHMEM_CTX_TYPE_ATOMIC_SWAP(_double, double, shmem) +SHMEM_TYPE_ATOMIC_SWAP(_int, int, shmem) +SHMEM_TYPE_ATOMIC_SWAP(_long, long, shmem) +SHMEM_TYPE_ATOMIC_SWAP(_longlong, long long, shmem) +SHMEM_TYPE_ATOMIC_SWAP(_float, float, shmem) +SHMEM_TYPE_ATOMIC_SWAP(_double, double, shmem) + +/* deprecated APIs */ +#define SHMEM_TYPE_SWAP(type_name, type, prefix) \ + type prefix##type_name##_swap(type *target, type value, int pe) \ + { \ + type out_value; \ + DO_SHMEM_TYPE_ATOMIC_SWAP(oshmem_ctx_default, type, target, \ + value, pe, out_value); \ + return out_value; \ + } SHMEM_TYPE_SWAP(_int, int, shmem) SHMEM_TYPE_SWAP(_long, long, shmem) SHMEM_TYPE_SWAP(_longlong, long long, shmem) diff --git a/oshmem/shmem/c/shmem_sync.c b/oshmem/shmem/c/shmem_sync.c new file mode 100644 index 0000000000..9f2b983aa1 --- /dev/null +++ b/oshmem/shmem/c/shmem_sync.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2013-2018 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "oshmem_config.h" + +#include "oshmem/constants.h" +#include "oshmem/include/shmem.h" + +#include "oshmem/runtime/runtime.h" + +#include "oshmem/mca/scoll/scoll.h" +#include "oshmem/mca/scoll/base/base.h" + +#include "oshmem/proc/proc.h" + + +#if OSHMEM_PROFILING +#include "oshmem/include/pshmem.h" +#pragma weak shmem_sync = pshmem_sync +#pragma weak shmem_sync_all = pshmem_sync_all +#include "oshmem/shmem/c/profile/defines.h" +#endif + +void shmem_sync(int PE_start, int logPE_stride, int PE_size, long *pSync) +{ + int rc; + oshmem_group_t* group; + + RUNTIME_CHECK_INIT(); + +#if OSHMEM_SPEC_COMPAT == 1 + /* all outstanding puts must be completed */ + shmem_quiet(); +#endif + + /* Create group basing PE_start, logPE_stride and PE_size */ + group = oshmem_proc_group_create_nofail(PE_start, 1<g_scoll.scoll_barrier(group, pSync, SCOLL_DEFAULT_ALG); + + oshmem_proc_group_destroy(group); + RUNTIME_CHECK_RC(rc); +} + +void shmem_sync_all(void) +{ + int rc = OSHMEM_SUCCESS; + +#if OSHMEM_SPEC_COMPAT == 1 + /* all outstanding puts must be completed */ + shmem_quiet(); +#endif + + if (mca_scoll_sync_array) { + rc = oshmem_group_all->g_scoll.scoll_barrier(oshmem_group_all, + mca_scoll_sync_array, + SCOLL_DEFAULT_ALG); + } + RUNTIME_CHECK_RC(rc); +} diff --git a/oshmem/shmem/c/shmem_wait.c b/oshmem/shmem/c/shmem_wait.c index 22caa07797..521f788bc4 100644 --- a/oshmem/shmem/c/shmem_wait.c +++ b/oshmem/shmem/c/shmem_wait.c @@ -48,13 +48,16 @@ #pragma weak shmem_longlong_wait = pshmem_longlong_wait #pragma weak shmemx_int32_wait = pshmemx_int32_wait #pragma weak shmemx_int64_wait = pshmemx_int64_wait -#pragma weak shmem_wait_until = pshmem_wait_until #pragma weak shmem_short_wait_until = pshmem_short_wait_until #pragma weak shmem_int_wait_until = pshmem_int_wait_until #pragma weak shmem_long_wait_until = pshmem_long_wait_until #pragma weak shmem_longlong_wait_until = pshmem_longlong_wait_until #pragma weak shmemx_int32_wait_until = pshmemx_int32_wait_until #pragma weak shmemx_int64_wait_until = pshmemx_int64_wait_until +#pragma weak shmem_short_test = pshmem_short_test +#pragma weak shmem_int_test = pshmem_int_test +#pragma weak shmem_long_test = pshmem_long_test +#pragma weak shmem_longlong_test = pshmem_longlong_test #include "oshmem/shmem/c/profile/defines.h" #endif @@ -83,10 +86,32 @@ SHMEM_TYPE_WAIT(_int64, int64_t, SHMEM_INT64_T, shmemx) return ; \ } -SHMEM_TYPE_WAIT_UNTIL(, volatile long, SHMEM_LONG, shmem) SHMEM_TYPE_WAIT_UNTIL(_short, volatile short, SHMEM_SHORT, shmem) SHMEM_TYPE_WAIT_UNTIL(_int, volatile int, SHMEM_INT, shmem) SHMEM_TYPE_WAIT_UNTIL(_long, volatile long, SHMEM_LONG, shmem) SHMEM_TYPE_WAIT_UNTIL(_longlong, volatile long long, SHMEM_LLONG, shmem) SHMEM_TYPE_WAIT_UNTIL(_int32, int32_t, SHMEM_INT32_T, shmemx) SHMEM_TYPE_WAIT_UNTIL(_int64, int64_t, SHMEM_INT64_T, shmemx) + +#define SHMEM_TYPE_TEST(type_name, type, code, prefix) \ + int prefix##type_name##_test(type *addr, int cmp, type value) \ + { \ + int rc = OSHMEM_SUCCESS; \ + int out_value; \ + \ + RUNTIME_CHECK_INIT(); \ + \ + rc = MCA_SPML_CALL(test( \ + (void*)addr, \ + cmp, \ + (void*)&value, \ + code, &out_value)); \ + RUNTIME_CHECK_RC(rc); \ + \ + return out_value; \ + } + +SHMEM_TYPE_TEST(_short, volatile short, SHMEM_SHORT, shmem) +SHMEM_TYPE_TEST(_int, volatile int, SHMEM_INT, shmem) +SHMEM_TYPE_TEST(_long, volatile long, SHMEM_LONG, shmem) +SHMEM_TYPE_TEST(_longlong, volatile long long, SHMEM_LLONG, shmem) diff --git a/oshmem/shmem/c/shmem_xor.c b/oshmem/shmem/c/shmem_xor.c index 149fba3366..55538798bd 100644 --- a/oshmem/shmem/c/shmem_xor.c +++ b/oshmem/shmem/c/shmem_xor.c @@ -28,6 +28,9 @@ #pragma weak shmem_uint_atomic_xor = pshmem_uint_atomic_xor #pragma weak shmem_ulong_atomic_xor = pshmem_ulong_atomic_xor #pragma weak shmem_ulonglong_atomic_xor = pshmem_ulonglong_atomic_xor +#pragma weak shmem_ctx_uint_atomic_xor = pshmem_ctx_uint_atomic_xor +#pragma weak shmem_ctx_ulong_atomic_xor = pshmem_ctx_ulong_atomic_xor +#pragma weak shmem_ctx_ulonglong_atomic_xor = pshmem_ctx_ulonglong_atomic_xor #pragma weak shmemx_int32_atomic_xor = pshmemx_int32_atomic_xor #pragma weak shmemx_int64_atomic_xor = pshmemx_int64_atomic_xor #pragma weak shmemx_uint32_atomic_xor = pshmemx_uint32_atomic_xor @@ -38,6 +41,9 @@ OSHMEM_TYPE_OP(uint, unsigned int, shmem, xor) OSHMEM_TYPE_OP(ulong, unsigned long, shmem, xor) OSHMEM_TYPE_OP(ulonglong, unsigned long long, shmem, xor) +OSHMEM_CTX_TYPE_OP(uint, unsigned int, shmem, xor) +OSHMEM_CTX_TYPE_OP(ulong, unsigned long, shmem, xor) +OSHMEM_CTX_TYPE_OP(ulonglong, unsigned long long, shmem, xor) OSHMEM_TYPE_OP(int32, int32_t, shmemx, xor) OSHMEM_TYPE_OP(int64, int64_t, shmemx, xor) OSHMEM_TYPE_OP(uint32, uint32_t, shmemx, xor) diff --git a/oshmem/shmem/fortran/shmem_character_get_f.c b/oshmem/shmem/fortran/shmem_character_get_f.c index 3b83f2d2f1..7932e66af5 100644 --- a/oshmem/shmem/fortran/shmem_character_get_f.c +++ b/oshmem/shmem/fortran/shmem_character_get_f.c @@ -37,7 +37,7 @@ void shmem_character_get_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, M size_t character_type_size = 0; ompi_datatype_type_size(&ompi_mpi_character.dt, &character_type_size); - MCA_SPML_CALL(get(FPTR_2_VOID_PTR(source), + MCA_SPML_CALL(get(oshmem_ctx_default, FPTR_2_VOID_PTR(source), OMPI_FINT_2_INT(*len) * character_type_size, FPTR_2_VOID_PTR(target), OMPI_FINT_2_INT(*pe))); diff --git a/oshmem/shmem/fortran/shmem_character_put_f.c b/oshmem/shmem/fortran/shmem_character_put_f.c index 0c378342c5..babada9c49 100644 --- a/oshmem/shmem/fortran/shmem_character_put_f.c +++ b/oshmem/shmem/fortran/shmem_character_put_f.c @@ -37,7 +37,7 @@ void shmem_character_put_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, M size_t character_type_size = 0; ompi_datatype_type_size(&ompi_mpi_character.dt, &character_type_size); - MCA_SPML_CALL(put(FPTR_2_VOID_PTR(target), + MCA_SPML_CALL(put(oshmem_ctx_default, FPTR_2_VOID_PTR(target), OMPI_FINT_2_INT(*length) * character_type_size, FPTR_2_VOID_PTR(source), OMPI_FINT_2_INT(*pe))); diff --git a/oshmem/shmem/fortran/shmem_complex_get_f.c b/oshmem/shmem/fortran/shmem_complex_get_f.c index 22fd7cba64..21465b5293 100644 --- a/oshmem/shmem/fortran/shmem_complex_get_f.c +++ b/oshmem/shmem/fortran/shmem_complex_get_f.c @@ -37,7 +37,7 @@ void shmem_complex_get_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI size_t complex_type_size = 0; ompi_datatype_type_size(&ompi_mpi_cplex.dt, &complex_type_size); - MCA_SPML_CALL(get(FPTR_2_VOID_PTR(source), + MCA_SPML_CALL(get(oshmem_ctx_default, FPTR_2_VOID_PTR(source), OMPI_FINT_2_INT(*len) * complex_type_size, FPTR_2_VOID_PTR(target), OMPI_FINT_2_INT(*pe))); diff --git a/oshmem/shmem/fortran/shmem_complex_iget_f.c b/oshmem/shmem/fortran/shmem_complex_iget_f.c index d543f62db2..51e5a59dd2 100644 --- a/oshmem/shmem/fortran/shmem_complex_iget_f.c +++ b/oshmem/shmem/fortran/shmem_complex_iget_f.c @@ -44,7 +44,7 @@ void shmem_complex_iget_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MP for (i=0; i