/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef OMPI_SYS_ARCH_ATOMIC_H #define OMPI_SYS_ARCH_ATOMIC_H 1 /* * On powerpc ... */ #if OMPI_WANT_SMP_LOCKS #define MB() __asm__ __volatile__ ("sync" : : : "memory") #define RMB() __asm__ __volatile__ ("lwsync" : : : "memory") #define WMB() __asm__ __volatile__ ("eieio" : : : "memory") #define SMP_SYNC "sync \n\t" #define SMP_ISYNC "\n\tisync" #else #define MB() #define RMB() #define WMB() #define SMP_SYNC "" #define SMP_ISYNC #endif /********************************************************************** * * Define constants for PowerPC 32 * *********************************************************************/ #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_CMPSET_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 #define OPAL_HAVE_ATOMIC_SUB_32 1 #if (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64) || OMPI_ASM_SUPPORT_64BIT #define OPAL_HAVE_ATOMIC_CMPSET_64 1 #endif /********************************************************************** * * Memory Barriers * *********************************************************************/ #if OMPI_GCC_INLINE_ASSEMBLY static inline void opal_atomic_mb(void) { MB(); } static inline void opal_atomic_rmb(void) { RMB(); } static inline void opal_atomic_wmb(void) { WMB(); } #elif OMPI_XLC_INLINE_ASSEMBLY /* end OMPI_GCC_INLINE_ASSEMBLY */ /* Yeah, I don't know who thought this was a reasonable syntax for * inline assembly. Do these because they are used so often and they * are fairly simple (aka: there is a tech pub on IBM's web site * containing the right hex for the instructions). */ void opal_atomic_mb(void); #pragma mc_func opal_atomic_mb { "7c0004ac" } /* sync */ #pragma reg_killed_by opal_atomic_mb /* none */ void opal_atomic_rmb(void); #pragma mc_func opal_atomic_rmb { "7c2004ac" } /* lwsync */ #pragma reg_killed_by opal_atomic_rmb /* none */ void opal_atomic_wmb(void); #pragma mc_func opal_atomic_wmb { "7c0006ac" } /* eieio */ #pragma reg_killed_by opal_atomic_wmb /* none */ #else /* end OMPI_XLC_INLINE_ASSEMBLY */ void opal_atomic_mb(void); void opal_atomic_rmb(void); void opal_atomic_wmb(void); #endif /********************************************************************** * * Atomic math operations * *********************************************************************/ #if OMPI_GCC_INLINE_ASSEMBLY static inline int opal_atomic_cmpset_32(volatile int32_t *addr, int32_t oldval, int32_t newval) { int32_t ret; __asm__ __volatile__ ( "1: lwarx %0, 0, %2 \n\t" " cmpw 0, %0, %3 \n\t" " bne- 2f \n\t" " stwcx. %4, 0, %2 \n\t" " bne- 1b \n\t" "2:" : "=&r" (ret), "=m" (*addr) : "r" (addr), "r" (oldval), "r" (newval), "m" (*addr) : "cc", "memory"); return (ret == oldval); } /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_32 nor atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ static inline int opal_atomic_cmpset_acq_32(volatile int32_t *addr, int32_t oldval, int32_t newval) { int rc; rc = opal_atomic_cmpset_32(addr, oldval, newval); opal_atomic_rmb(); return rc; } static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr, int32_t oldval, int32_t newval) { opal_atomic_wmb(); return opal_atomic_cmpset_32(addr, oldval, newval); } #else int opal_atomic_cmpset_32(volatile int32_t *addr, int32_t oldval, int32_t newval); int opal_atomic_cmpset_acq_32(volatile int32_t *addr, int32_t oldval, int32_t newval); int opal_atomic_cmpset_rel_32(volatile int32_t *addr, int32_t oldval, int32_t newval); #endif /* OMPI_GCC_INLINE_ASSEMBLY */ #if (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64) #if OMPI_GCC_INLINE_ASSEMBLY static inline int opal_atomic_cmpset_64(volatile int64_t *addr, int64_t oldval, int64_t newval) { int64_t ret; __asm__ __volatile__ ( "1: ldarx %0, 0, %2 \n\t" " cmpd 0, %0, %3 \n\t" " bne- 2f \n\t" " stdcx. %4, 0, %2 \n\t" " bne- 1b \n\t" "2:" : "=&r" (ret), "=m" (*addr) : "r" (addr), "r" (oldval), "r" (newval), "m" (*addr) : "cc", "memory"); return (ret == oldval); } /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_64 nor atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ static inline int opal_atomic_cmpset_acq_64(volatile int64_t *addr, int64_t oldval, int64_t newval) { int rc; rc = opal_atomic_cmpset_64(addr, oldval, newval); opal_atomic_rmb(); return rc; } static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, int64_t newval) { opal_atomic_wmb(); return opal_atomic_cmpset_64(addr, oldval, newval); } #else /* OMPI_GCC_INLINE_ASSEMBLY */ int opal_atomic_cmpset_64(volatile int64_t *addr, int64_t oldval, int64_t newval); int opal_atomic_cmpset_acq_64(volatile int64_t *addr, int64_t oldval, int64_t newval); int opal_atomic_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, int64_t newval); #endif /* OMPI_GCC_INLINE_ASSEMBLY */ #elif (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC32) && OMPI_ASM_SUPPORT_64BIT #ifndef ll_low /* GLIBC provides these somewhere, so protect */ #define ll_low(x) *(((unsigned int*)&(x))+0) #define ll_high(x) *(((unsigned int*)&(x))+1) #endif #if OMPI_GCC_INLINE_ASSEMBLY static inline int opal_atomic_cmpset_64(volatile int64_t *addr, int64_t oldval, int64_t newval) { int ret; /* * We force oldval and newval into memory because PPC doesn't * appear to have a way to do a move register with offset. Since * this is 32-bit code, a 64 bit integer will be loaded into two * registers (assuming no inlining, addr will be in r3, oldval * will be in r4 and r5, and newval will be r6 and r7. We need * to load the whole thing into one register. So we have the * compiler push the values into memory and load the double word * into registers. We use r4,r5 so that the main block of code * is very similar to the pure 64 bit version. */ __asm__ __volatile__ ( "ld r4,%2 \n\t" "ld r5,%3 \n\t" "1: ldarx r9, 0, %1 \n\t" " cmpd 0, r9, r4 \n\t" " bne- 2f \n\t" " stdcx. r5, 0, %1 \n\t" " bne- 1b \n\t" "2: \n\t" "xor r5,r4,r9 \n\t" "subfic r9,r5,0 \n\t" "adde %0,r9,r5 \n\t" : "=&r" (ret) : "r"(addr), "m"(oldval), "m"(newval) : "r4", "r5", "r9", "cc", "memory"); return ret; } /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_64 nor atomic_?mb can be inlined). Instead, we "inline" them by hand in the assembly, meaning there is one function call overhead instead of two */ static inline int opal_atomic_cmpset_acq_64(volatile int64_t *addr, int64_t oldval, int64_t newval) { int rc; rc = opal_atomic_cmpset_64(addr, oldval, newval); opal_atomic_rmb(); return rc; } static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, int64_t newval) { opal_atomic_wmb(); return opal_atomic_cmpset_64(addr, oldval, newval); } #else /* OMPI_GCC_INLINE_ASSEMBLY */ int opal_atomic_cmpset_64(volatile int64_t *addr, int64_t oldval, int64_t newval); int opal_atomic_cmpset_acq_64(volatile int64_t *addr, int64_t oldval, int64_t newval); int opal_atomic_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, int64_t newval); #endif /* OMPI_GCC_INLINE_ASSEMBLY */ #endif /* OMPI_ASM_SUPPORT_64BIT */ #if OMPI_GCC_INLINE_ASSEMBLY static inline int32_t opal_atomic_add_32(volatile int32_t* v, int inc) { int32_t t; __asm__ __volatile__( "1: lwarx %0, 0, %3 \n\t" " add %0, %2, %0 \n\t" " stwcx. %0, 0, %3 \n\t" " bne- 1b \n\t" : "=&r" (t), "=m" (*v) : "r" (inc), "r" (v), "m" (*v) : "cc"); return *v; } static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec) { int32_t t; __asm__ __volatile__( "1: lwarx %0,0,%3\n\t" " subf %0,%2,%0 \n\t" " stwcx. %0,0,%3 \n\t" " bne- 1b \n\t" : "=&r" (t), "=m" (*v) : "r" (dec), "r" (v), "m" (*v) : "cc"); return *v; } #endif /* OMPI_GCC_INLINE_ASSEMBLY */ #endif /* ! OMPI_SYS_ARCH_ATOMIC_H */