From 0c484996617bbb59e9800279b9543bed2f969570 Mon Sep 17 00:00:00 2001
From: Brian Barrett <brbarret@open-mpi.org>
Date: Sun, 30 Jan 2005 04:56:38 +0000
Subject: [PATCH] * change OMPI_POWERPC_SUPPORT_64BIT to
 OMPI_ASM_SUPPORT_64BIT, since it   looks like we need to do the same thing
 for UltaSparc / MIPS machines * The atomic_cmpset_64 bit code made some
 assumptions about calling   convention that wouldn't be true if the function
 was inlined.  Fix   those assumptions, so we should work fine whether GCC
 inlines   the function or not.

This commit was SVN r4249.
---
 config/ompi_config_asm.m4         | 23 ++++++++++-------
 src/include/sys/powerpc/atomic.h  | 43 +++++++++++++++++++------------
 src/include/sys/powerpc/update.sh |  5 ++--
 3 files changed, 43 insertions(+), 28 deletions(-)

diff --git a/config/ompi_config_asm.m4 b/config/ompi_config_asm.m4
index 62f931b1e4..d101dccd58 100644
--- a/config/ompi_config_asm.m4
+++ b/config/ompi_config_asm.m4
@@ -394,7 +394,7 @@ AC_DEFUN([OMPI_CHECK_POWERPC_64BIT],[
     if test "$ompi_cv_asm_powerpc_r_reg" = "1" ; then
         ldarx_asm="        ldarx r1,r1,r1";
     else
-        ldarx_asm="        ldarx1,1,1";
+        ldarx_asm="        ldarx 1,1,1";
     fi
     OMPI_TRY_ASSEMBLE([$ompi_cv_asm_text
         $ldarx_asm],
@@ -582,7 +582,7 @@ AC_DEFINE_UNQUOTED([OMPI_WANT_SMP_LOCKS], [$want_smp_locks],
 # find our architecture for purposes of assembly stuff
 ompi_cv_asm_arch="UNSUPPORTED"
 OMPI_GCC_INLINE_ASSIGN=""
-OMPI_POWERPC_SUPPORT_64BIT=0
+OMPI_ASM_SUPPORT_64BIT=0
 case "${host}" in
     *-winnt*)
         ompi_cv_asm_arch="WINDOWS"
@@ -590,21 +590,25 @@ case "${host}" in
 
     i?86-*)
         ompi_cv_asm_arch="IA32"
+        OMPI_ASM_SUPPORT_64BIT=1
         OMPI_GCC_INLINE_ASSIGN='"movl [$]0, %0" : "=&r"(ret)'
     ;;
 
     x86_64*)
         ompi_cv_asm_arch="AMD64"
+        OMPI_ASM_SUPPORT_64BIT=1
         OMPI_GCC_INLINE_ASSIGN='"movl [$]0, %0" : "=&r"(ret)'
     ;;
 
     ia64-*)
         ompi_cv_asm_arch="IA64"
+        OMPI_ASM_SUPPORT_64BIT=1
         OMPI_GCC_INLINE_ASSIGN='"mov %0=r0\n;;\n" : "=&r"(ret)'
     ;;
 
     alpha-*)
         ompi_cv_asm_arch="ALPHA"
+        OMPI_ASM_SUPPORT_64BIT=1
         OMPI_GCC_INLINE_ASSIGN='"bis zero,zero,%0" : "=&r"(ret)'
     ;;
 
@@ -617,9 +621,9 @@ case "${host}" in
             # compiling in 32 bit more (and therefore should assume
             # sizeof(long) == 4), we can use the 64 bit test and set
             # operations.
-            OMPI_CHECK_POWERPC_64BIT(OMPI_POWERPC_SUPPORT_64BIT=1)
+            OMPI_CHECK_POWERPC_64BIT(OMPI_ASM_SUPPORT_64BIT=1)
         elif test "$ac_cv_sizeof_long" = "8" ; then
-            OMPI_POWERPC_SUPPORT_64BIT=1
+            OMPI_ASM_SUPPORT_64BIT=1
             ompi_cv_asm_arch="POWERPC64"
         else
             AC_MSG_ERROR([Could not determine PowerPC word size: $ac_cv_sizeof_long])
@@ -635,6 +639,7 @@ case "${host}" in
         else
           AC_MSG_ERROR([Could not determine Sparc word size: $ac_cv_sizeof_long])
         fi
+        OMPI_ASM_SUPPORT_64BIT=1
         OMPI_GCC_INLINE_ASSIGN='"mov 0,%0" : : "=&r"(ret)'
     ;;
 
@@ -643,10 +648,10 @@ case "${host}" in
     ;;
 esac
 
-AC_DEFINE_UNQUOTED([OMPI_POWERPC_SUPPORT_64BIT],
-                   [$OMPI_POWERPC_SUPPORT_64BIT],
-                   [Non-zero if safe to call PPC64 ops, even in PPC32 code])
-AC_SUBST([OMPI_POWERPC_SUPPORT_64BIT])
+AC_DEFINE_UNQUOTED([OMPI_ASM_SUPPORT_64BIT],
+                   [$OMPI_ASM_SUPPORT_64BIT],
+                   [Whether we can do 64bit assembly operations or not.  Should not be used outside of the assembly header files])
+AC_SUBST([OMPI_ASM_SUPPORT_64BIT])
 
 # now that we know our architecture, try to inline assemble
 OMPI_CHECK_INLINE_GCC([$OMPI_GCC_INLINE_ASSIGN])
@@ -666,7 +671,7 @@ if test "$ompi_cv_asm_arch" = "POWERPC32" -o "$ompi_cv_asm_arch" = "POWERPC64" ;
 else
     asm_format="${asm_format}-1"
 fi
-ompi_cv_asm_format="${asm_format}-${OMPI_POWERPC_SUPPORT_64BIT}"
+ompi_cv_asm_format="${asm_format}-${OMPI_ASM_SUPPORT_64BIT}"
 OMPI_ASSEMBLY_FORMAT="$ompi_cv_asm_format"
 
 AC_MSG_CHECKING([for assembly format])
diff --git a/src/include/sys/powerpc/atomic.h b/src/include/sys/powerpc/atomic.h
index d2d4095759..419143d5f7 100644
--- a/src/include/sys/powerpc/atomic.h
+++ b/src/include/sys/powerpc/atomic.h
@@ -51,7 +51,8 @@
 #define OMPI_HAVE_ATOMIC_ADD_32 1
 #define OMPI_HAVE_ATOMIC_SUB_32 1
 
-#if (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64) || OMPI_POWERPC_SUPPORT_64BIT
+
+#if (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64) || OMPI_ASM_SUPPORT_64BIT
 #define OMPI_HAVE_ATOMIC_CMPSET_64 1
 #endif
 
@@ -229,7 +230,7 @@ int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
 
 #endif /* OMPI_GCC_INLINE_ASSEMBLY */
 
-#elif (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC32) && OMPI_POWERPC_SUPPORT_64BIT
+#elif (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC32) && OMPI_ASM_SUPPORT_64BIT
 
 #ifndef ll_low /* GLIBC provides these somewhere, so protect */
 #define ll_low(x)       *(((unsigned int*)&(x))+0)
@@ -237,29 +238,39 @@ int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
 #endif
 
 #if  OMPI_GCC_INLINE_ASSEMBLY
+
 static inline int ompi_atomic_cmpset_64(volatile int64_t *addr,
                                         int64_t oldval, int64_t newval)
 {
     int ret;
 
+    /*
+     * We force oldval and newval into memory because PPC doesn't
+     * appear to have a way to do a move register with offset.  Since
+     * this is 32-bit code, a 64 bit integer will be loaded into two
+     * registers (assuming no inlining, addr will be in r3, oldval
+     * will be in r4 and r5, and newval will be r6 and r7.  We need
+     * to load the whole thing into one register.  So we have the
+     * compiler push the values into memory and load the double word
+     * into registers.  We use r4,r5 so that the main block of code
+     * is very similar to the pure 64 bit version.
+     */
    __asm__ __volatile__ (
-                         "stw r4, -32(r1)       \n\t"
-                         "stw r5, -28(r1)       \n\t"
-                         "stw r6, -24(r1)       \n\t"
-                         "stw r7, -20(r1)       \n\t"
-                         "ld r5,-32(r1)         \n\t"
-                         "ld r6,-24(r1)         \n\t"
-                         "1: ldarx   r9, 0, r3  \n\t"
-                         "   cmpd    0, r9, r5  \n\t"
+                         "ld r4,%2         \n\t"
+                         "ld r5,%3        \n\t"
+                         "1: ldarx   r9, 0, %1  \n\t"
+                         "   cmpd    0, r9, r4  \n\t"
                          "   bne-    2f         \n\t"
-                         "   stdcx.  r7, 0, r3  \n\t"
+                         "   stdcx.  r5, 0, %1  \n\t"
                          "   bne-    1b         \n\t"
                          "2:                    \n\t"
-                         "xor r3,r5,r9          \n\t"
-                         "subfic r2,r3,0        \n\t"
-                         "adde %0,r2,r3         \n\t"
+                         "xor r5,r4,r9          \n\t"
+                         "subfic r9,r5,0        \n\t"
+                         "adde %0,r9,r5         \n\t"
                          : "=&r" (ret)
-                         : : "r2", "r9", "cc", "memory");
+                         : "r"(addr), 
+                           "m"(oldval), "m"(newval)
+                         : "r4", "r5", "r9", "cc", "memory");
     
      return ret;
 }
@@ -299,7 +310,7 @@ int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
 
 #endif /* OMPI_GCC_INLINE_ASSEMBLY */
 
-#endif /* OMPI_ASM_ARCHITECTURE == PPC64 || OMPI_POWERPC_SUPPORT_64BIT */
+#endif /* OMPI_ASM_SUPPORT_64BIT */
 
 
 #if OMPI_GCC_INLINE_ASSEMBLY
diff --git a/src/include/sys/powerpc/update.sh b/src/include/sys/powerpc/update.sh
index ebbdac0c51..a8cf4c2aff 100644
--- a/src/include/sys/powerpc/update.sh
+++ b/src/include/sys/powerpc/update.sh
@@ -30,6 +30,5 @@ cat > $CFILE<<EOF
 #include "atomic.h"
 EOF
 
-gcc -O1 -mcpu=970 -DOMPI_POWERPC_SUPPORT_64BIT=1 -I. -S $CFILE -o atomic-32-64.s
-gcc -O1 -DOMPI_POWERPC_SUPPORT_64BIT=0 -I. -S $CFILE -o atomic-32.s
-# gcc -m64 -DOMPI_POWERPC_SUPPORT_64BIT=1 -I. -S $CFILE -o atomic-64.s
+gcc -O1 -mpowerpc64 -mcpu=970 -DOMPI_ASM_SUPPORT_64BIT=1 -I. -S $CFILE -o atomic-32-64.s
+gcc -O1 -DOMPI_ASM_SUPPORT_64BIT=0 -I. -S $CFILE -o atomic-32.s