* change OMPI_POWERPC_SUPPORT_64BIT to OMPI_ASM_SUPPORT_64BIT, since it

looks like we need to do the same thing for UltaSparc / MIPS machines * The atomic_cmpset_64 bit code made some assumptions about calling convention that wouldn't be true if the function was inlined. Fix those assumptions, so we should work fine whether GCC inlines the function or not. This commit was SVN r4249.
2005-01-30 04:56:38 +00:00 · 2005-01-30 04:56:38 +00:00 · 0c48499661
--- a/config/ompi_config_asm.m4
+++ b/config/ompi_config_asm.m4
@ -394,7 +394,7 @@ AC_DEFUN([OMPI_CHECK_POWERPC_64BIT],[
    if test "$ompi_cv_asm_powerpc_r_reg" = "1" ; then
        ldarx_asm="        ldarx r1,r1,r1";
    else
-        ldarx_asm="        ldarx1,1,1";
+        ldarx_asm="        ldarx 1,1,1";
    fi
    OMPI_TRY_ASSEMBLE([$ompi_cv_asm_text
        $ldarx_asm],
@ -582,7 +582,7 @@ AC_DEFINE_UNQUOTED([OMPI_WANT_SMP_LOCKS], [$want_smp_locks],
 # find our architecture for purposes of assembly stuff
 ompi_cv_asm_arch="UNSUPPORTED"
 OMPI_GCC_INLINE_ASSIGN=""
-OMPI_POWERPC_SUPPORT_64BIT=0
+OMPI_ASM_SUPPORT_64BIT=0
 case "${host}" in
    *-winnt*)
        ompi_cv_asm_arch="WINDOWS"
@ -590,21 +590,25 @@ case "${host}" in

    i?86-*)
        ompi_cv_asm_arch="IA32"
+        OMPI_ASM_SUPPORT_64BIT=1
        OMPI_GCC_INLINE_ASSIGN='"movl [$]0, %0" : "=&r"(ret)'
    ;;

    x86_64*)
        ompi_cv_asm_arch="AMD64"
+        OMPI_ASM_SUPPORT_64BIT=1
        OMPI_GCC_INLINE_ASSIGN='"movl [$]0, %0" : "=&r"(ret)'
    ;;

    ia64-*)
        ompi_cv_asm_arch="IA64"
+        OMPI_ASM_SUPPORT_64BIT=1
        OMPI_GCC_INLINE_ASSIGN='"mov %0=r0\n;;\n" : "=&r"(ret)'
    ;;

    alpha-*)
        ompi_cv_asm_arch="ALPHA"
+        OMPI_ASM_SUPPORT_64BIT=1
        OMPI_GCC_INLINE_ASSIGN='"bis zero,zero,%0" : "=&r"(ret)'
    ;;

@ -617,9 +621,9 @@ case "${host}" in
            # compiling in 32 bit more (and therefore should assume
            # sizeof(long) == 4), we can use the 64 bit test and set
            # operations.
-            OMPI_CHECK_POWERPC_64BIT(OMPI_POWERPC_SUPPORT_64BIT=1)
+            OMPI_CHECK_POWERPC_64BIT(OMPI_ASM_SUPPORT_64BIT=1)
        elif test "$ac_cv_sizeof_long" = "8" ; then
-            OMPI_POWERPC_SUPPORT_64BIT=1
+            OMPI_ASM_SUPPORT_64BIT=1
            ompi_cv_asm_arch="POWERPC64"
        else
            AC_MSG_ERROR([Could not determine PowerPC word size: $ac_cv_sizeof_long])
@ -635,6 +639,7 @@ case "${host}" in
        else
          AC_MSG_ERROR([Could not determine Sparc word size: $ac_cv_sizeof_long])
        fi
+        OMPI_ASM_SUPPORT_64BIT=1
        OMPI_GCC_INLINE_ASSIGN='"mov 0,%0" : : "=&r"(ret)'
    ;;

@ -643,10 +648,10 @@ case "${host}" in
    ;;
 esac

-AC_DEFINE_UNQUOTED([OMPI_POWERPC_SUPPORT_64BIT],
-                   [$OMPI_POWERPC_SUPPORT_64BIT],
-                   [Non-zero if safe to call PPC64 ops, even in PPC32 code])
-AC_SUBST([OMPI_POWERPC_SUPPORT_64BIT])
+AC_DEFINE_UNQUOTED([OMPI_ASM_SUPPORT_64BIT],
+                   [$OMPI_ASM_SUPPORT_64BIT],
+                   [Whether we can do 64bit assembly operations or not.  Should not be used outside of the assembly header files])
+AC_SUBST([OMPI_ASM_SUPPORT_64BIT])

 # now that we know our architecture, try to inline assemble
 OMPI_CHECK_INLINE_GCC([$OMPI_GCC_INLINE_ASSIGN])
@ -666,7 +671,7 @@ if test "$ompi_cv_asm_arch" = "POWERPC32" -o "$ompi_cv_asm_arch" = "POWERPC64" ;
 else
    asm_format="${asm_format}-1"
 fi
-ompi_cv_asm_format="${asm_format}-${OMPI_POWERPC_SUPPORT_64BIT}"
+ompi_cv_asm_format="${asm_format}-${OMPI_ASM_SUPPORT_64BIT}"
 OMPI_ASSEMBLY_FORMAT="$ompi_cv_asm_format"

 AC_MSG_CHECKING([for assembly format])
--- a/src/include/sys/powerpc/atomic.h
+++ b/src/include/sys/powerpc/atomic.h
@ -51,7 +51,8 @@
 #define OMPI_HAVE_ATOMIC_ADD_32 1
 #define OMPI_HAVE_ATOMIC_SUB_32 1

-#if (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64) || OMPI_POWERPC_SUPPORT_64BIT
+
+#if (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64) || OMPI_ASM_SUPPORT_64BIT
 #define OMPI_HAVE_ATOMIC_CMPSET_64 1
 #endif

@ -229,7 +230,7 @@ int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,

 #endif /* OMPI_GCC_INLINE_ASSEMBLY */

-#elif (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC32) && OMPI_POWERPC_SUPPORT_64BIT
+#elif (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC32) && OMPI_ASM_SUPPORT_64BIT

 #ifndef ll_low /* GLIBC provides these somewhere, so protect */
 #define ll_low(x)       *(((unsigned int*)&(x))+0)
@ -237,29 +238,39 @@ int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
 #endif

 #if  OMPI_GCC_INLINE_ASSEMBLY
+
 static inline int ompi_atomic_cmpset_64(volatile int64_t *addr,
                                        int64_t oldval, int64_t newval)
 {
    int ret;

+    /*
+     * We force oldval and newval into memory because PPC doesn't
+     * appear to have a way to do a move register with offset.  Since
+     * this is 32-bit code, a 64 bit integer will be loaded into two
+     * registers (assuming no inlining, addr will be in r3, oldval
+     * will be in r4 and r5, and newval will be r6 and r7.  We need
+     * to load the whole thing into one register.  So we have the
+     * compiler push the values into memory and load the double word
+     * into registers.  We use r4,r5 so that the main block of code
+     * is very similar to the pure 64 bit version.
+     */
   __asm__ __volatile__ (
-                         "stw r4, -32(r1)       \n\t"
-                         "stw r5, -28(r1)       \n\t"
-                         "stw r6, -24(r1)       \n\t"
-                         "stw r7, -20(r1)       \n\t"
-                         "ld r5,-32(r1)         \n\t"
-                         "ld r6,-24(r1)         \n\t"
-                         "1: ldarx   r9, 0, r3  \n\t"
-                         "   cmpd    0, r9, r5  \n\t"
+                         "ld r4,%2         \n\t"
+                         "ld r5,%3        \n\t"
+                         "1: ldarx   r9, 0, %1  \n\t"
+                         "   cmpd    0, r9, r4  \n\t"
                         "   bne-    2f         \n\t"
-                         "   stdcx.  r7, 0, r3  \n\t"
+                         "   stdcx.  r5, 0, %1  \n\t"
                         "   bne-    1b         \n\t"
                         "2:                    \n\t"
-                         "xor r3,r5,r9          \n\t"
-                         "subfic r2,r3,0        \n\t"
-                         "adde %0,r2,r3         \n\t"
+                         "xor r5,r4,r9          \n\t"
+                         "subfic r9,r5,0        \n\t"
+                         "adde %0,r9,r5         \n\t"
                         : "=&r" (ret)
-                         : : "r2", "r9", "cc", "memory");
+                         : "r"(addr), 
+                           "m"(oldval), "m"(newval)
+                         : "r4", "r5", "r9", "cc", "memory");
    
     return ret;
 }
@ -299,7 +310,7 @@ int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,

 #endif /* OMPI_GCC_INLINE_ASSEMBLY */

-#endif /* OMPI_ASM_ARCHITECTURE == PPC64 || OMPI_POWERPC_SUPPORT_64BIT */
+#endif /* OMPI_ASM_SUPPORT_64BIT */


 #if OMPI_GCC_INLINE_ASSEMBLY
--- a/src/include/sys/powerpc/update.sh
+++ b/src/include/sys/powerpc/update.sh
@ -30,6 +30,5 @@ cat > $CFILE<<EOF
 #include "atomic.h"
 EOF

-gcc -O1 -mcpu=970 -DOMPI_POWERPC_SUPPORT_64BIT=1 -I. -S $CFILE -o atomic-32-64.s
-gcc -O1 -DOMPI_POWERPC_SUPPORT_64BIT=0 -I. -S $CFILE -o atomic-32.s
-# gcc -m64 -DOMPI_POWERPC_SUPPORT_64BIT=1 -I. -S $CFILE -o atomic-64.s
+gcc -O1 -mpowerpc64 -mcpu=970 -DOMPI_ASM_SUPPORT_64BIT=1 -I. -S $CFILE -o atomic-32-64.s
+gcc -O1 -DOMPI_ASM_SUPPORT_64BIT=0 -I. -S $CFILE -o atomic-32.s