From a2caf839c0ab61d0626ee355299025472ef34299 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Mon, 14 Feb 2005 22:07:08 +0000 Subject: [PATCH] * Initial try at making OMPI atomic operations work properly on Solaris - SPARC32 has spinlocks and "pseudo atomic" math - SPARC64 has spinlocks, compare & set, atomic math (still in s/w) This commit was SVN r4433. --- config/ompi_config_asm.m4 | 48 ++++++---- src/asm/asm-data.txt | 6 +- src/asm/asm.c | 18 ++-- src/asm/base/SPARC32.asm | 80 +++++++++++++++++ src/asm/base/SPARC64.asm | 116 +++++++++++++++++++++++++ src/asm/test/atomic_cmpset_32_serial.c | 4 + src/asm/test/atomic_spinlock.c | 3 + src/include/sys/atomic.h | 38 ++++---- src/include/sys/sparc/atomic.h | 24 +++-- src/include/sys/sparc/update.sh | 3 +- src/include/sys/sparc64/atomic.h | 2 +- src/include/sys/sparc64/update.sh | 2 +- 12 files changed, 289 insertions(+), 55 deletions(-) create mode 100644 src/asm/base/SPARC32.asm create mode 100644 src/asm/base/SPARC64.asm diff --git a/config/ompi_config_asm.m4 b/config/ompi_config_asm.m4 index eac12c7d1e..d1f035b61b 100644 --- a/config/ompi_config_asm.m4 +++ b/config/ompi_config_asm.m4 @@ -290,20 +290,30 @@ AC_DEFUN([OMPI_CHECK_ASM_TYPE],[ AC_MSG_CHECKING([prefix for function in .type]) ompi_cv_asm_type="" - for type in @ \# % ; do - asm_result=0 - echo "configure: trying $type" >& AC_FD_CC - OMPI_TRY_ASSEMBLE([ .type mysym, ${type}function], - [# ok, we succeeded at assembling. see if there was - # a warning in the output. - if test "`cat conftest.out`" = "" ; then - ompi_cv_asm_type="${type}" - asm_result=1 - fi]) - if test "$asm_result" = "1" ; then - break - fi - done + case "${host}" in + *-sun-solaris*) + # GCC on solaris seems to accept just about anything, not + # that what it defines actually works... So just hardwire + # to the right answer + ompi_cv_asm_type="#" + ;; + *) + for type in @ \# % ; do + asm_result=0 + echo "configure: trying $type" >& AC_FD_CC + OMPI_TRY_ASSEMBLE([ .type mysym, ${type}function], + [# ok, we succeeded at assembling. see if there was + # a warning in the output. + if test "`cat conftest.out`" = "" ; then + ompi_cv_asm_type="${type}" + asm_result=1 + fi]) + if test "$asm_result" = "1" ; then + break + fi + done + ;; + esac rm -f conftest.out AC_MSG_RESULT([$ompi_cv_asm_type]) @@ -618,7 +628,7 @@ case "${host}" in ompi_cv_asm_arch="POWERPC32" # Note that on some platforms (Apple G5), even if we are - # compiling in 32 bit more (and therefore should assume + # compiling in 32 bit mode (and therefore should assume # sizeof(long) == 4), we can use the 64 bit test and set # operations. OMPI_CHECK_POWERPC_64BIT(OMPI_ASM_SUPPORT_64BIT=1) @@ -633,14 +643,15 @@ case "${host}" in sparc-*) if test "$ac_cv_sizeof_long" = "4" ; then + OMPI_ASM_SUPPORT_64BIT=0 ompi_cv_asm_arch="SPARC32" elif test "$ac_cv_sizeof_long" = "8" ; then + OMPI_ASM_SUPPORT_64BIT=1 ompi_cv_asm_arch="SPARC64" else AC_MSG_ERROR([Could not determine Sparc word size: $ac_cv_sizeof_long]) fi - OMPI_ASM_SUPPORT_64BIT=1 - OMPI_GCC_INLINE_ASSIGN='"mov 0,%0" : : "=&r"(ret)' + OMPI_GCC_INLINE_ASSIGN='"mov 0,%0" : "=&r"(ret)' ;; *) @@ -716,12 +727,13 @@ dnl do all the evil mojo to provide a working assembly file dnl dnl ################################################################# AC_DEFUN([OMPI_ASM_FIND_FILE], [ + AC_REQUIRE([AC_PROG_FGREP]) AC_CHECK_PROG([PERL], [perl], [perl]) # see if we have a pre-built one already AC_MSG_CHECKING([for pre-built assembly file]) ompi_cv_asm_file="" - if grep "$ompi_cv_asm_arch" "${top_ompi_srcdir}/src/asm/asm-data.txt" | grep -F "$ompi_cv_asm_format" >conftest.out 2>&1 ; then + if grep "$ompi_cv_asm_arch" "${top_ompi_srcdir}/src/asm/asm-data.txt" | $FGREP "$ompi_cv_asm_format" >conftest.out 2>&1 ; then ompi_cv_asm_file="`cut -f3 conftest.out`" if test ! "$ompi_cv_asm_file" = "" ; then ompi_cv_asm_file="atomic-${ompi_cv_asm_file}.s" diff --git a/src/asm/asm-data.txt b/src/asm/asm-data.txt index 3f0b798f88..d3911ae2b1 100644 --- a/src/asm/asm-data.txt +++ b/src/asm/asm-data.txt @@ -22,11 +22,13 @@ # Assembly Format field: # config_file-text-global-label_suffix-gsym-lsym-type-size-align_log-ppc_r_reg-64_bit -AMD64 default-.text-.globl-:--.L-@-1-0-1-0 amd64-linux -IA32 default-.text-.globl-:--.L-@-1-0-1-0 ia32-linux +AMD64 default-.text-.globl-:--.L-@-1-0-1-1 amd64-linux +IA32 default-.text-.globl-:--.L-@-1-0-1-1 ia32-linux POWERPC32 default-.text-.globl-:-_-L--0-1-1-0 powerpc32-osx POWERPC32 default-.text-.globl-:-_-L--0-1-1-1 powerpc32-64-osx POWERPC32 default-.text-.globl-:--.L-@-1-1-0-0 powerpc32-linux POWERPC32 aix-.csect .text[PR]-.globl-:-.-L--0-1-0-0 powerpc32-aix POWERPC64 default-.text-.globl-:-_-L--0-1-1-1 powerpc64-osx POWERPC64 aix-.csect .text[PR]-.globl-:-.-L--0-1-0-1 powerpc64-aix +SPARC32 default-.text-.globl-:--.L-#-1-0-1-0 sparc32-solaris +SPARC64 default-.text-.globl-:--.L-#-1-0-1-1 sparc64-solaris diff --git a/src/asm/asm.c b/src/asm/asm.c index a15c824728..3e3865e09c 100644 --- a/src/asm/asm.c +++ b/src/asm/asm.c @@ -28,14 +28,14 @@ /* have to fix if you change LOCKS_TABLE_SIZE */ static ompi_lock_t locks_table[LOCKS_TABLE_SIZE] = { - OMPI_ATOMIC_UNLOCKED, - OMPI_ATOMIC_UNLOCKED, - OMPI_ATOMIC_UNLOCKED, - OMPI_ATOMIC_UNLOCKED, - OMPI_ATOMIC_UNLOCKED, - OMPI_ATOMIC_UNLOCKED, - OMPI_ATOMIC_UNLOCKED, - OMPI_ATOMIC_UNLOCKED + { { OMPI_ATOMIC_UNLOCKED } }, + { { OMPI_ATOMIC_UNLOCKED } }, + { { OMPI_ATOMIC_UNLOCKED } }, + { { OMPI_ATOMIC_UNLOCKED } }, + { { OMPI_ATOMIC_UNLOCKED } }, + { { OMPI_ATOMIC_UNLOCKED } }, + { { OMPI_ATOMIC_UNLOCKED } }, + { { OMPI_ATOMIC_UNLOCKED } } }; # else /* OMPI_WANT_SMP_LOCKS */ @@ -49,7 +49,7 @@ static ompi_lock_t locks_table[1] = { OMPI_ATOMIC_UNLOCKED }; int32_t -ompi_atomic_sub_32(volatile int32_t *addr, int delta) +ompi_atomic_add_32(volatile int32_t *addr, int delta) { int32_t ret; diff --git a/src/asm/base/SPARC32.asm b/src/asm/base/SPARC32.asm new file mode 100644 index 0000000000..bd24b157f7 --- /dev/null +++ b/src/asm/base/SPARC32.asm @@ -0,0 +1,80 @@ +START_FILE + TEXT + + ALIGN(4) + +START_FUNC(ompi_atomic_mb) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + retl + nop +END_FUNC(ompi_atomic_mb) + + +START_FUNC(ompi_atomic_rmb) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + retl + nop +END_FUNC(ompi_atomic_rmb) + + +START_FUNC(ompi_atomic_wmb) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + retl + nop +END_FUNC(ompi_atomic_wmb) + + +START_FUNC(ompi_atomic_init) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + stb %o1, [%o0] + retl + nop +END_FUNC(ompi_atomic_init) + + +START_FUNC(ompi_atomic_trylock) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + ldstub [%o0], %o0 + and %o0, 0xff, %o0 + subcc %g0, %o0, %g0 + retl + subx %g0, -1, %o0 +END_FUNC(ompi_atomic_trylock) + + +START_FUNC(ompi_atomic_lock) + !#PROLOGUE# 0 + save %sp, -112, %sp + !#PROLOGUE# 1 +LSYM(retry) + ldstub [%i0], %l0 + tst %l0 + be REFLSYM(out) + nop +LSYM(loop) + ldub [%i0], %l0 + tst %l0 + bne REFLSYM(loop) + nop + ba,a REFLSYM(retry) +LSYM(out) + nop + ret + restore +END_FUNC(ompi_atomic_lock) + + +START_FUNC(ompi_atomic_unlock) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + stbar + stb %g0, [%o0] + + retl + nop +END_FUNC(ompi_atomic_unlock) diff --git a/src/asm/base/SPARC64.asm b/src/asm/base/SPARC64.asm new file mode 100644 index 0000000000..d69f2e2349 --- /dev/null +++ b/src/asm/base/SPARC64.asm @@ -0,0 +1,116 @@ +START_FILE + TEXT + + ALIGN(4) + + +START_FUNC(ompi_atomic_mb) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad + retl + nop +END_FUNC(ompi_atomic_mb) + + +START_FUNC(ompi_atomic_rmb) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #LoadLoad + retl + nop +END_FUNC(ompi_atomic_rmb) + + +START_FUNC(ompi_atomic_wmb) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #StoreStore + retl + nop +END_FUNC(ompi_atomic_wmb) + + +START_FUNC(ompi_atomic_cmpset_32) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + mov %o1, %g1 + casa [%o0] 0x80, %o2, %g1 + xor %g1, %o1, %g1 + subcc %g0, %g1, %g0 + retl + subx %g0, -1, %o0 +END_FUNC(ompi_atomic_cmpset_32) + + +START_FUNC(ompi_atomic_cmpset_acq_32) + !#PROLOGUE# 0 + save %sp, -192, %sp + !#PROLOGUE# 1 + mov %i0, %o0 + mov %i1, %o1 + call ompi_atomic_cmpset_32, 0 + mov %i2, %o2 + call ompi_atomic_rmb, 0 + mov %o0, %i0 + return %i7+8 + sra %o0, 0, %o0 +END_FUNC(ompi_atomic_cmpset_acq_32) + + +START_FUNC(ompi_atomic_cmpset_rel_32) + !#PROLOGUE# 0 + save %sp, -192, %sp + !#PROLOGUE# 1 + call ompi_atomic_wmb, 0 + sra %i1, 0, %i1 + sra %i2, 0, %i2 + mov %i0, %o0 + mov %i1, %o1 + call ompi_atomic_cmpset_32, 0 + mov %i2, %o2 + ret + restore %g0, %o0, %o0 +END_FUNC(ompi_atomic_cmpset_rel_32) + + +START_FUNC(ompi_atomic_cmpset_64) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + mov %o1, %g1 + casxa [%o0] 0x80, %o2, %g1 + xor %g1, %o1, %g1 + mov 0, %o0 + retl + movre %g1, 1, %o0 +END_FUNC(ompi_atomic_cmpset_64) + + +START_FUNC(ompi_atomic_cmpset_acq_64) + !#PROLOGUE# 0 + save %sp, -192, %sp + !#PROLOGUE# 1 + mov %i0, %o0 + mov %i1, %o1 + call ompi_atomic_cmpset_64, 0 + mov %i2, %o2 + call ompi_atomic_rmb, 0 + mov %o0, %i0 + return %i7+8 + sra %o0, 0, %o0 +END_FUNC(ompi_atomic_cmpset_acq_64) + + +START_FUNC(ompi_atomic_cmpset_rel_64) + !#PROLOGUE# 0 + save %sp, -192, %sp + !#PROLOGUE# 1 + call ompi_atomic_wmb, 0 + nop + mov %i0, %o0 + mov %i1, %o1 + call ompi_atomic_cmpset_64, 0 + mov %i2, %o2 + ret + restore %g0, %o0, %o0 +END_FUNC(ompi_atomic_cmpset_rel_64) diff --git a/src/asm/test/atomic_cmpset_32_serial.c b/src/asm/test/atomic_cmpset_32_serial.c index 7ff4dfb33a..73d8b7d011 100644 --- a/src/asm/test/atomic_cmpset_32_serial.c +++ b/src/asm/test/atomic_cmpset_32_serial.c @@ -27,6 +27,9 @@ int32_t new32; int main(int argc, char *argv[]) { +#if ! OMPI_HAVE_ATOMIC_CMPSET_32 + return 77; +#else vol32 = 42, old32 = 42, new32 = 50; assert(ompi_atomic_cmpset_32(&vol32, old32, new32) == 1); @@ -53,4 +56,5 @@ main(int argc, char *argv[]) assert(vol32 == 42); return 0; +#endif } diff --git a/src/asm/test/atomic_spinlock.c b/src/asm/test/atomic_spinlock.c index 6b04e7b71e..354fa6e7b2 100644 --- a/src/asm/test/atomic_spinlock.c +++ b/src/asm/test/atomic_spinlock.c @@ -33,6 +33,8 @@ struct start_info { ompi_lock_t *lock; }; + +#if OMPI_HAVE_POSIX_THREADS static void* atomic_spinlock_start(void* arg) { struct start_info *data = (struct start_info*) arg; @@ -40,6 +42,7 @@ static void* atomic_spinlock_start(void* arg) return (void*) (unsigned long) atomic_spinlock_test(data->lock, data->count, data->tid); } +#endif int diff --git a/src/include/sys/atomic.h b/src/include/sys/atomic.h index 553645a372..d469c932b8 100644 --- a/src/include/sys/atomic.h +++ b/src/include/sys/atomic.h @@ -62,6 +62,28 @@ #if defined(c_plusplus) || defined(__cplusplus) extern "C" { #endif +/********************************************************************** + * + * Data structures for atomic ops + * + *********************************************************************/ +/** + * Volatile lock object (with optional padding). + * + * \note The internals of the lock are included here, but should be + * considered private. The implementation currently in use may choose + * to use an int or unsigned char as the lock value - the user is not + * informed either way. + */ +struct ompi_lock_t { + union { + volatile int lock; /**< The lock address (an integer) */ + volatile unsigned char sparc_lock; /**< The lock address on sparc */ + char padding[sizeof(int)]; /**< Array for optional padding */ + } u; +}; +typedef struct ompi_lock_t ompi_lock_t; + /********************************************************************** * @@ -161,22 +183,6 @@ void ompi_atomic_wmb(void); * Atomic spinlocks - always inlined, if have atomic cmpset * *********************************************************************/ -/** - * Volatile lock object (with optional padding). - * - * \note The internals of the lock are included here, but should be - * considered private. The implementation currently in use may choose - * to use an int or unsigned char as the lock value - the user is not - * informed either way. - */ -struct ompi_lock_t { - union { - volatile int lock; /**< The lock address (an integer) */ - volatile unsigned char sparc_lock; /**< The lock address on sparc */ - char padding[sizeof(int)]; /**< Array for optional padding */ - } u; -}; -typedef struct ompi_lock_t ompi_lock_t; #if !defined(OMPI_HAVE_ATOMIC_SPINLOCKS) && !defined(DOXYGEN) /* 0 is more like "pending" - we'll fix up at the end after all diff --git a/src/include/sys/sparc/atomic.h b/src/include/sys/sparc/atomic.h index b4a1bfb9ff..ebb16f7ded 100644 --- a/src/include/sys/sparc/atomic.h +++ b/src/include/sys/sparc/atomic.h @@ -22,13 +22,23 @@ #define MB() #endif +#ifdef OMPI_GENERATE_ASM_FILE +struct ompi_lock_t { + union { + volatile int lock; /**< The lock address (an integer) */ + volatile unsigned char sparc_lock; /**< The lock address on sparc */ + char padding[sizeof(int)]; /**< Array for optional padding */ + } u; +}; +typedef struct ompi_lock_t ompi_lock_t; +#endif /********************************************************************** * * Define constants for UltraSparc 64 * *********************************************************************/ -#define OMPI_HAVE_MEM_BARRIER 1 +#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1 #define OMPI_HAVE_ATOMIC_CMPSET_32 0 #define OMPI_HAVE_ATOMIC_CMPSET_64 0 @@ -79,7 +89,7 @@ static inline void ompi_atomic_wmb(void) static inline void ompi_atomic_init(ompi_lock_t* lock, int value) { - lock->sparc_lock = (unsigned char) value; + lock->u.sparc_lock = (unsigned char) value; } @@ -94,7 +104,7 @@ static inline int ompi_atomic_trylock(ompi_lock_t *lock) __asm__ __volatile__ ("\t" "ldstub [%1], %0" : "=r"(result) - : "r"(&(lock->sparc_lock)) + : "r"(&(lock->u.sparc_lock)) : "memory"); return (result == 0); } @@ -107,10 +117,10 @@ static inline void ompi_atomic_lock(ompi_lock_t *lock) "retry: \n\t" "ldstub [%0], %%l0 \n\t" "tst %%l0 \n\t" - "bw out \n\t" + "be out \n\t" "nop \n" "loop: \n\t" - "ldb [%0], %%l0 \n\t" + "ldub [%0], %%l0 \n\t" "tst %%l0 \n\t" "bne loop \n\t" "nop \n\t" @@ -118,7 +128,7 @@ static inline void ompi_atomic_lock(ompi_lock_t *lock) "out: \n\t" "nop" : - : "r"(&(lock->sparc_lock)) + : "r"(&(lock->u.sparc_lock)) : "%l0", "memory"); } @@ -130,7 +140,7 @@ static inline void ompi_atomic_unlock(ompi_lock_t *lock) "stbar \n\t" "stb %%g0, [%0] \n\t" : - : "r"(&(lock->sparc_lock)) + : "r"(&(lock->u.sparc_lock)) : "memory"); } diff --git a/src/include/sys/sparc/update.sh b/src/include/sys/sparc/update.sh index 83737fcbdc..396844cb84 100644 --- a/src/include/sys/sparc/update.sh +++ b/src/include/sys/sparc/update.sh @@ -26,7 +26,8 @@ cat > $CFILE< $CFILE<