atomic/ppc: add atomics for load-link, store-conditional, and swap

This commit adds implementations of opal_atomic_ll_32/64 and opal_atomic_sc_32/64. These atomics can be used to implement more efficient lifo/fifo operations on supported platforms. The only supported platform with this commit is powerpc/power. This commit also adds an implementation of opal_atomic_swap_32/64 for powerpc. Tested with Power8. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
2015-07-14 12:09:39 -06:00 · 2015-07-14 12:09:39 -06:00 · 6a19a10fbb
--- a/opal/include/opal/sys/atomic.h
+++ b/opal/include/opal/sys/atomic.h
@ -1,3 +1,4 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
 /*
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
@ -11,7 +12,7 @@
 *                         All rights reserved.
 * Copyright (c) 2007      Sun Microsystems, Inc.  All rights reserved.
 * Copyright (c) 2011      Sandia National Laboratories. All rights reserved.
- * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
+ * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
 *                         reserved.
 * $COPYRIGHT$
 *
@ -38,7 +39,7 @@
 *  - \c OPAL_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers
 *  - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks
 *  - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly"
- *  - \c OPAL_HAVE_ATOMIC_MATH_64 if 32 bit add/sub/cmpset can be done "atomicly"
+ *  - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/cmpset can be done "atomicly"
 *
 * Note that for the Atomic math, atomic add/sub may be implemented as
 * C code using opal_atomic_cmpset.  The appearance of atomic
@ -177,6 +178,12 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t;
 #ifndef OPAL_HAVE_ATOMIC_CMPSET_128
 #define OPAL_HAVE_ATOMIC_CMPSET_128 0
 #endif
+#ifndef OPAL_HAVE_ATOMIC_LLSC_32
+#define OPAL_HAVE_ATOMIC_LLSC_32 0
+#endif
+#ifndef OPAL_HAVE_ATOMIC_LLSC_64
+#define OPAL_HAVE_ATOMIC_LLSC_64 0
+#endif
 #endif /* DOXYGEN */

 /**********************************************************************
--- a/opal/include/opal/sys/atomic_impl.h
+++ b/opal/include/opal/sys/atomic_impl.h
@ -11,7 +11,7 @@
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2010-2014 Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
+ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
 *                         reserved.
 * $COPYRIGHT$
 *
@ -274,6 +274,30 @@ static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr,

 #endif /* (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) */

+#if (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)
+
+#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32
+
+#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_32((int32_t *) addr)
+#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_32((int32_t *) addr, (int32_t) newval)
+
+#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
+
+#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64
+
+#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_64((int64_t *) addr)
+#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_64((int64_t *) addr, (int64_t) newval)
+
+#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
+
+#endif
+
+#endif /* (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)*/
+
+#if !defined(OPAL_HAVE_ATOMIC_LLSC_PTR)
+#define OPAL_HAVE_ATOMIC_LLSC_PTR 0
+#endif
+
 #if OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64


--- a/opal/include/opal/sys/powerpc/atomic.h
+++ b/opal/include/opal/sys/powerpc/atomic.h
@ -1,3 +1,4 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
 /*
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
@ -10,6 +11,8 @@
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2010      IBM Corporation.  All rights reserved.
+ * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
+ *                         reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
@ -40,6 +43,8 @@
 #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1

 #define OPAL_HAVE_ATOMIC_CMPSET_32 1
+#define OPAL_HAVE_ATOMIC_SWAP_32 1
+#define OPAL_HAVE_ATOMIC_LLSC_32 1

 #define OPAL_HAVE_ATOMIC_MATH_32 1
 #define OPAL_HAVE_ATOMIC_ADD_32 1
@ -48,6 +53,8 @@

 #if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) || OPAL_ASM_SUPPORT_64BIT
 #define OPAL_HAVE_ATOMIC_CMPSET_64 1
+#define OPAL_HAVE_ATOMIC_SWAP_64 1
+#define OPAL_HAVE_ATOMIC_LLSC_64 1
 #endif


@ -140,6 +147,32 @@ static inline int opal_atomic_cmpset_32(volatile int32_t *addr,
   return (ret == oldval);
 }

+static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
+{
+   int32_t ret;
+
+   __asm__ __volatile__ ("lwarx   %0, 0, %1  \n\t"
+                         : "=&r" (ret)
+                         : "r" (addr)
+                         :);
+   return ret;
+}
+
+static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
+{
+    int32_t ret, foo;
+
+    __asm__ __volatile__ ("   stwcx.  %4, 0, %3  \n\t"
+                          "   li      %0,0       \n\t"
+                          "   bne-    1f         \n\t"
+                          "   ori     %0,%0,1    \n\t"
+                          "1:"
+                          : "=r" (ret), "=m" (*addr), "=r" (foo)
+                          : "r" (addr), "r" (newval)
+                          : "cc", "memory");
+    return ret;
+}
+
 /* these two functions aren't inlined in the non-gcc case because then
   there would be two function calls (since neither cmpset_32 nor
   atomic_?mb can be inlined).  Instead, we "inline" them by hand in
@ -164,6 +197,20 @@ static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr,
    return opal_atomic_cmpset_32(addr, oldval, newval);
 }

+static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval)
+{
+    int32_t ret;
+
+    __asm__ __volatile__ ("1: lwarx   %0, 0, %2  \n\t"
+                          "   stwcx.  %3, 0, %2  \n\t"
+                          "   bne-    1b         \n\t"
+                          : "=&r" (ret), "=m" (*addr)
+                          : "r" (addr), "r" (newval)
+                          : "cc", "memory");
+
+   return ret;
+}
+
 #endif /* OPAL_GCC_INLINE_ASSEMBLY */


@ -189,6 +236,32 @@ static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
   return (ret == oldval);
 }

+static inline int64_t opal_atomic_ll_64(volatile int64_t *addr)
+{
+   int64_t ret;
+
+   __asm__ __volatile__ ("ldarx   %0, 0, %1  \n\t"
+                         : "=&r" (ret)
+                         : "r" (addr)
+                         :);
+   return ret;
+}
+
+static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval)
+{
+    int32_t ret, foo;
+
+    __asm__ __volatile__ ("   stdcx.  %4, 0, %3  \n\t"
+                          "   li      %0,0       \n\t"
+                          "   bne-    1f         \n\t"
+                          "   ori     %0,%0,1    \n\t"
+                          "1:"
+                          : "=r" (ret), "=m" (*addr), "=r" (foo)
+                          : "r" (addr), "r" (newval)
+                          : "cc", "memory");
+    return ret;
+}
+
 /* these two functions aren't inlined in the non-gcc case because then
   there would be two function calls (since neither cmpset_64 nor
   atomic_?mb can be inlined).  Instead, we "inline" them by hand in
@ -213,6 +286,20 @@ static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr,
    return opal_atomic_cmpset_64(addr, oldval, newval);
 }

+static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval)
+{
+   int64_t ret;
+
+   __asm__ __volatile__ ("1: ldarx   %0, 0, %2  \n\t"
+                         "   stdcx.  %3, 0, %2  \n\t"
+                         "   bne-    1b         \n\t"
+                         : "=&r" (ret), "=m" (*addr)
+                         : "r" (addr), "r" (newval)
+                         : "cc", "memory");
+
+   return ret;
+}
+
 #endif /* OPAL_GCC_INLINE_ASSEMBLY */

 #elif (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32) && OPAL_ASM_SUPPORT_64BIT