* Fix bug 1127. Update powerpc 32 code to do 64bit compare and set when

running on hardware with such support. Since the calling convention is to use the registers as 32 bit values (even if they are 64 bit registers), we need to pack things up properly before doing the compare and set. This commit was SVN r4226.
2005-01-28 23:32:44 +00:00 · 2005-01-28 23:32:44 +00:00 · 113fba12db
--- a/src/asm/base/POWERPC32.asm
+++ b/src/asm/base/POWERPC32.asm
@ -67,55 +67,68 @@ END_FUNC(ompi_atomic_cmpset_rel_32)

 #START_64BIT
 START_FUNC(ompi_atomic_cmpset_64)
+	stw r4,-32(r1)
+	stw r5,-28(r1)
+	stw r6,-24(r1)
+	stw r7,-20(r1)
+	ld r5,-32(r1)
+	ld r7,-24(r1)
 	1: ldarx   r9, 0, r3  
-	   cmpd    0, r9, r4  
+	   cmpd    0, r9, r5  
 	   bne-    2f         
-	   stdcx.  r6, 0, r3  
-	   bne-    1b         
+	   stdcx.  r7, 0, r3
+	   bne-    1b
 	2:
-	li r3,0
-	cmpw cr7,r9,r4
-	bnelr+ cr7
-	cmpw cr7,r10,r5
-	bnelr+ cr7
-	li r3,1
+	xor r3,r5,r9
+	subfic r2,r3,0
+	adde r3,r2,r3
 	blr
 END_FUNC(ompi_atomic_cmpset_64)


 START_FUNC(ompi_atomic_cmpset_acq_64)
+        stw r4,-32(r1)
+        stw r5,-28(r1)
+        stw r6,-24(r1)
+        stw r7,-20(r1)
+        ld r5,-32(r1)
+        ld r7,-24(r1)
+
        1: ldarx   r9, 0, r3  
-           cmpd    0, r9, r4  
+           cmpd    0, r9, r5
           bne-    2f         
-           stdcx.  r6, 0, r3  
+           stdcx.  r7, 0, r3  
           bne-    1b         
        2:
-        cmpw cr0,r9,r4
-        li r3,0
-        bne+ cr0,L15
-        cmpw cr0,r10,r5
-        bne+ cr0,L15
-        li r3,1
-L15:
+        xor r3,r5,r9
+        subfic r2,r3,0
+        adde r3,r2,r3
+        blr
        lwsync
        blr
 END_FUNC(ompi_atomic_cmpset_acq_64)


 START_FUNC(ompi_atomic_cmpset_rel_64)
+        stw r4,-32(r1)
+        stw r5,-28(r1)
+        stw r6,-24(r1)
+        stw r7,-20(r1)
+        ld r5,-32(r1)
+        ld r7,-24(r1)
+
        eieio
        1: ldarx   r9, 0, r3  
-           cmpd    0, r9, r4  
+           cmpd    0, r9, r5  
           bne-    2f         
-           stdcx.  r6, 0, r3  
+           stdcx.  r7, 0, r3  
           bne-    1b         
        2:
-        cmpw cr0,r9,r4
-        li r3,0
-        bnelr+ cr0
-        cmpw cr0,r10,r5
-        bnelr+ cr0
-        li r3,1
+        xor r3,r5,r9
+        subfic r2,r3,0
+        adde r3,r2,r3
+        blr
+        lwsync
        blr
 END_FUNC(ompi_atomic_cmpset_rel_64)
 #END_64BIT
--- a/src/include/sys/powerpc/atomic.h
+++ b/src/include/sys/powerpc/atomic.h
@ -51,7 +51,7 @@
 #define OMPI_HAVE_ATOMIC_ADD_32 1
 #define OMPI_HAVE_ATOMIC_SUB_32 1

-#if (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64) || (OMPI_POWERPC_SUPPORT_64BIT && OMPI_GCC_INLINE_ASSEMBLY)
+#if (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64) || OMPI_POWERPC_SUPPORT_64BIT
 #define OMPI_HAVE_ATOMIC_CMPSET_64 1
 #endif

@ -172,7 +172,7 @@ int ompi_atomic_cmpset_rel_32(volatile int32_t *addr,
 #endif /* OMPI_GCC_INLINE_ASSEMBLY */


-#if OMPI_POWERPC_SUPPORT_64BIT
+#if (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64)

 #if  OMPI_GCC_INLINE_ASSEMBLY
 static inline int ompi_atomic_cmpset_64(volatile int64_t *addr,
@ -194,7 +194,6 @@ static inline int ompi_atomic_cmpset_64(volatile int64_t *addr,
   return (ret == oldval);
 }

-
 /* these two functions aren't inlined in the non-gcc case because then
   there would be two function calls (since neither cmpset_64 nor
   atomic_?mb can be inlined).  Instead, we "inline" them by hand in
@ -219,8 +218,7 @@ static inline int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
    return ompi_atomic_cmpset_64(addr, oldval, newval);
 }

-#elif OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64
-/* currently, don't have 64 bit apps for non-inline assembly */
+#else /* OMPI_GCC_INLINE_ASSEMBLY */

 int ompi_atomic_cmpset_64(volatile int64_t *addr,
                          int64_t oldval, int64_t newval);
@ -231,7 +229,77 @@ int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,

 #endif /* OMPI_GCC_INLINE_ASSEMBLY */

-#endif /* OMPI_POWERPC_SUPPORT_64BIT */
+#elif (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC32) && OMPI_POWERPC_SUPPORT_64BIT
+
+#ifndef ll_low /* GLIBC provides these somewhere, so protect */
+#define ll_low(x)       *(((unsigned int*)&(x))+0)
+#define ll_high(x)      *(((unsigned int*)&(x))+1)
+#endif
+
+#if  OMPI_GCC_INLINE_ASSEMBLY
+static inline int ompi_atomic_cmpset_64(volatile int64_t *addr,
+                                        int64_t oldval, int64_t newval)
+{
+    int ret;
+
+   __asm__ __volatile__ (
+                         "stw r4, -32(r1)       \n\t"
+                         "stw r5, -28(r1)       \n\t"
+                         "stw r6, -24(r1)       \n\t"
+                         "stw r7, -20(r1)       \n\t"
+                         "ld r5,-32(r1)         \n\t"
+                         "ld r6,-24(r1)         \n\t"
+                         "1: ldarx   r9, 0, r3  \n\t"
+                         "   cmpd    0, r9, r5  \n\t"
+                         "   bne-    2f         \n\t"
+                         "   stdcx.  r7, 0, r3  \n\t"
+                         "   bne-    1b         \n\t"
+                         "2:                    \n\t"
+                         "xor r3,r5,r9          \n\t"
+                         "subfic r2,r3,0        \n\t"
+                         "adde %0,r2,r3         \n\t"
+                         : "=&r" (ret)
+                         : : "r2", "r9", "cc", "memory");
+    
+     return ret;
+}
+
+/* these two functions aren't inlined in the non-gcc case because then
+   there would be two function calls (since neither cmpset_64 nor
+   atomic_?mb can be inlined).  Instead, we "inline" them by hand in
+   the assembly, meaning there is one function call overhead instead
+   of two */
+static inline int ompi_atomic_cmpset_acq_64(volatile int64_t *addr,
+                                            int64_t oldval, int64_t newval)
+{
+    int rc;
+
+    rc = ompi_atomic_cmpset_64(addr, oldval, newval);
+    ompi_atomic_rmb();
+
+    return rc;
+}
+
+
+static inline int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
+                                            int64_t oldval, int64_t newval)
+{
+    ompi_atomic_wmb();
+    return ompi_atomic_cmpset_64(addr, oldval, newval);
+}
+
+#else /* OMPI_GCC_INLINE_ASSEMBLY */
+
+int ompi_atomic_cmpset_64(volatile int64_t *addr,
+                          int64_t oldval, int64_t newval);
+int ompi_atomic_cmpset_acq_64(volatile int64_t *addr,
+                              int64_t oldval, int64_t newval);
+int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
+                              int64_t oldval, int64_t newval);
+
+#endif /* OMPI_GCC_INLINE_ASSEMBLY */
+
+#endif /* OMPI_ASM_ARCHITECTURE == PPC64 || OMPI_POWERPC_SUPPORT_64BIT */


 #if OMPI_GCC_INLINE_ASSEMBLY
--- a/src/include/sys/powerpc/update.sh
+++ b/src/include/sys/powerpc/update.sh
@ -25,9 +25,11 @@ cat > $CFILE<<EOF
 #define static
 #define inline
 #define OMPI_GCC_INLINE_ASSEMBLY 1
+#define OMPI_ASSEMBLY_ARCH OMPI_POWERPC32
+#include "../architecture.h"
 #include "atomic.h"
 EOF

-gcc -mcpu=970 -DOMPI_POWERPC_SUPPORT_64BIT=1 -I. -S $CFILE -o atomic-32-64.s
-gcc -DOMPI_POWERPC_SUPPORT_64BIT=0 -I. -S $CFILE -o atomic-32.s
-gcc -m64 -DOMPI_POWERPC_SUPPORT_64BIT=1 -I. -S $CFILE -o atomic-64.s
+gcc -O1 -mcpu=970 -DOMPI_POWERPC_SUPPORT_64BIT=1 -I. -S $CFILE -o atomic-32-64.s
+gcc -O1 -DOMPI_POWERPC_SUPPORT_64BIT=0 -I. -S $CFILE -o atomic-32.s
+# gcc -m64 -DOMPI_POWERPC_SUPPORT_64BIT=1 -I. -S $CFILE -o atomic-64.s