From a36bdfe69f2356727df6e85e6d4c1307f2ae8f56 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Fri, 2 Sep 2016 23:47:47 -0600 Subject: [PATCH] opal/asm: updates to powerpc assembly This commit contains the following changes: - There is a bug in the PGI 16.x betas for ppc64 that causes them to emit the incorrect instruction for loading 64-bit operands. If not cast to void * the operands are loaded with lwz (load word and zero) instead of ld. This does not affect optimized mode. The work around is to cast to void * and was implemented similar to a work-around for a xlc bug. - Actually implement 64-bit add/sub. These functions were missing and fell back to the less efficient compare-and-swap implementations. Thanks to @PHHargrove for helping to track this down. With this update the GCC inline assembly works as expected with pgi and ppc64. Signed-off-by: Nathan Hjelm --- opal/include/opal/sys/powerpc/atomic.h | 61 ++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index d10437948d..745248aa0e 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -55,6 +55,9 @@ #define OPAL_HAVE_ATOMIC_CMPSET_64 1 #define OPAL_HAVE_ATOMIC_SWAP_64 1 #define OPAL_HAVE_ATOMIC_LLSC_64 1 +#define OPAL_HAVE_ATOMIC_MATH_64 1 +#define OPAL_HAVE_ATOMIC_ADD_64 1 +#define OPAL_HAVE_ATOMIC_SUB_64 1 #endif @@ -128,6 +131,16 @@ void opal_atomic_isync(void) #define OPAL_ASM_ADDR(a) (a) #endif +#if defined(__PGI) +/* work-around for bug in PGI 16.5-16.7 where the compiler fails to + * correctly emit load instructions for 64-bit operands. without this + * it will emit lwz instead of ld to load the 64-bit operand. */ +#define OPAL_ASM_VALUE64(x) (void *)(intptr_t) (x) +#else +#define OPAL_ASM_VALUE64(x) x +#endif + + static inline int opal_atomic_cmpset_32(volatile int32_t *addr, int32_t oldval, int32_t newval) { @@ -217,6 +230,38 @@ static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval #if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) #if OPAL_GCC_INLINE_ASSEMBLY +static inline int64_t opal_atomic_add_64 (volatile int64_t* v, int64_t inc) +{ + int64_t t; + + __asm__ __volatile__("1: ldarx %0, 0, %3 \n\t" + " add %0, %2, %0 \n\t" + " stdcx. %0, 0, %3 \n\t" + " bne- 1b \n\t" + : "=&r" (t), "+m" (*v) + : "r" (OPAL_ASM_VALUE64(inc)), "r" OPAL_ASM_ADDR(v) + : "cc"); + + return t; +} + + +static inline int64_t opal_atomic_sub_64 (volatile int64_t* v, int64_t dec) +{ + int64_t t; + + __asm__ __volatile__( + "1: ldarx %0,0,%3 \n\t" + " subf %0,%2,%0 \n\t" + " stdcx. %0,0,%3 \n\t" + " bne- 1b \n\t" + : "=&r" (t), "+m" (*v) + : "r" (OPAL_ASM_VALUE64(dec)), "r" OPAL_ASM_ADDR(v) + : "cc"); + + return t; +} + static inline int opal_atomic_cmpset_64(volatile int64_t *addr, int64_t oldval, int64_t newval) { @@ -229,8 +274,8 @@ static inline int opal_atomic_cmpset_64(volatile int64_t *addr, " stdcx. %4, 0, %2 \n\t" " bne- 1b \n\t" "2:" - : "=&r" (ret), "=m" (*addr) - : "r" (addr), "r" (oldval), "r" (newval), "m" (*addr) + : "=&r" (ret), "+m" (*addr) + : "r" (addr), "r" (OPAL_ASM_VALUE64(oldval)), "r" (OPAL_ASM_VALUE64(newval)) : "cc", "memory"); return (ret == oldval); @@ -249,15 +294,15 @@ static inline int64_t opal_atomic_ll_64(volatile int64_t *addr) static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval) { - int32_t ret, foo; + int32_t ret; - __asm__ __volatile__ (" stdcx. %4, 0, %3 \n\t" + __asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" " li %0,0 \n\t" " bne- 1f \n\t" " ori %0,%0,1 \n\t" "1:" - : "=r" (ret), "=m" (*addr), "=r" (foo) - : "r" (addr), "r" (newval) + : "=r" (ret) + : "r" (addr), "r" (OPAL_ASM_VALUE64(newval)) : "cc", "memory"); return ret; } @@ -294,7 +339,7 @@ static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval " stdcx. %3, 0, %2 \n\t" " bne- 1b \n\t" : "=&r" (ret), "=m" (*addr) - : "r" (addr), "r" (newval) + : "r" (addr), "r" (OPAL_ASM_VALUE64(newval)) : "cc", "memory"); return ret;