From b2b58b31a28bf72a646eb253231ecab437567ec1 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 2 Dec 2014 10:25:46 -0700 Subject: [PATCH 1/5] Add support for 128-bit compare and swap on x86_64 when available. A 128-bit compare-and-swap will enable a better atomic lifo implementation that uses the pointer + counter method to avoid ABA issues. This commit adds configury to check for the instruction (cmpxchg16b) and adds an implementation that uses the __int128 type available in C99. --- config/opal_config_asm.m4 | 20 ++++++++++++++++++++ configure.ac | 1 + opal/include/opal/sys/amd64/atomic.h | 28 ++++++++++++++++++++++++++++ opal/include/opal/sys/atomic.h | 3 +++ opal/include/opal/sys/atomic_impl.h | 7 +++++-- 5 files changed, 57 insertions(+), 2 deletions(-) diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index 5ec73d913f..6cca0f5c06 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -558,6 +558,25 @@ AC_DEFUN([OPAL_CHECK_SPARCV8PLUS],[ unset sparc_result ])dnl +dnl ################################################################# +dnl +dnl OPAL_CHECK_CMPXCHG16B +dnl +dnl ################################################################# +AC_DEFUN([OPAL_CHECK_CMPXCHG16B],[ + AC_MSG_CHECKING([if have x86_64 16-byte compare-and-exchange]) + OPAL_VAR_SCOPE_PUSH([cmpxchg16b_result]) + OPAL_TRY_ASSEMBLE([$opal_cv_asm_text + cmpxchg16b 0], + [AC_MSG_RESULT([yes]) + cmpxchg16b_result=1], + [AC_MSG_RESULT([no]) + cmpxchg16b_result=0]) + AC_DEFINE_UNQUOTED([OPAL_HAVE_CMPXCHG16B], [$cmpxchg16b_result], + [Whether the processor supports the cmpxchg16b instruction]) + OPAL_VAR_SCOPE_POP +])dnl + dnl ################################################################# dnl @@ -801,6 +820,7 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ fi OPAL_ASM_SUPPORT_64BIT=1 OPAL_GCC_INLINE_ASSIGN='"xaddl %1,%0" : "=m"(ret), "+r"(negone) : "m"(ret)' + OPAL_CHECK_CMPXCHG16B ;; ia64-*) diff --git a/configure.ac b/configure.ac index 850e94f100..99d51e7b27 100644 --- a/configure.ac +++ b/configure.ac @@ -366,6 +366,7 @@ AC_CHECK_TYPES(uint32_t) AC_CHECK_TYPES(int64_t) AC_CHECK_TYPES(uint64_t) AC_CHECK_TYPES(int128_t) +AC_CHECK_TYPES(__int128) AC_CHECK_TYPES(uint128_t) AC_CHECK_TYPES(long long) diff --git a/opal/include/opal/sys/amd64/atomic.h b/opal/include/opal/sys/amd64/atomic.h index ddb6213c29..3fe6b00649 100644 --- a/opal/include/opal/sys/amd64/atomic.h +++ b/opal/include/opal/sys/amd64/atomic.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,6 +11,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd. + * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -119,6 +122,31 @@ static inline int opal_atomic_cmpset_64( volatile int64_t *addr, #define opal_atomic_cmpset_acq_64 opal_atomic_cmpset_64 #define opal_atomic_cmpset_rel_64 opal_atomic_cmpset_64 +#if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE___INT128 + +static inline __int128 opal_atomic_cmpset_128 (volatile __int128 *addr, __int128 oldval, + __int128 newval) +{ + unsigned char ret; + + /* cmpxchg16b compares the value at the address with eax:edx (low:high). if the values are + * the same the contents of ebx:ecx are stores at the address. in all cases the value stored + * at the address is returned in eax:edx. */ + __asm__ __volatile__ (SMPLOCK "cmpxchg16b (%%rsi) \n\t" + "sete %0 \n\t" + : "=qm" (ret) + : "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1]), + "a" (((int64_t *)&oldval)[0]), "d" (((int64_t *)&oldval)[1]) + : "memory", "cc", "eax", "edx"); + + return ret; +} + +#define OPAL_HAVE_ATOMIC_CMPSET_128 1 + +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + + #if OPAL_GCC_INLINE_ASSEMBLY #define OPAL_HAVE_ATOMIC_SWAP_32 1 diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 220ded71a3..952a118ec2 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -177,6 +177,9 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; #ifndef OPAL_HAVE_ATOMIC_CMPSET_64 #define OPAL_HAVE_ATOMIC_CMPSET_64 0 #endif +#ifndef OPAL_HAVE_ATOMIC_CMPSET_128 +#define OPAL_HAVE_ATOMIC_CMPSET_128 0 +#endif #endif /* DOXYGEN */ /********************************************************************** diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h index 8b70eeae3f..496fecdcc4 100644 --- a/opal/include/opal/sys/atomic_impl.h +++ b/opal/include/opal/sys/atomic_impl.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,6 +11,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -266,9 +269,9 @@ static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr, #if (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SWAP_32 -#define opal_atomic_swap_ptr(addr, value) opal_atomic_swap_32((int32_t *) addr, value) +#define opal_atomic_swap_ptr(addr, value) (void *) opal_atomic_swap_32((int32_t *) addr, (int32_t) value) #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SWAP_64 -#define opal_atomic_swap_ptr(addr, value) opal_atomic_swap_64((int64_t *) addr, value) +#define opal_atomic_swap_ptr(addr, value) (void *) opal_atomic_swap_64((int64_t *) addr, (int64_t) value) #endif #endif /* (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) */ From b1632dfb3c7141350c6e9f97829800e455d6f7d4 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 3 Dec 2014 16:46:22 -0700 Subject: [PATCH 2/5] Define opal_int128_t type if a 128-bit integer is available. There currently is no standard support for 128-bit integer types. Any use of the __int128 and int128_t types can lead to warnings from the compiler when using -Wpedantic. Additionally, some compilers may support __int128 and other may support int128_t. This commit addresses both issues by defining opal_int128_t if there is a supported 128-bit type. In the case of GCC a pragma has been added to suppress warnings about __int128 not being a standard C type. --- opal/include/opal/sys/amd64/atomic.h | 6 +++--- opal/include/opal/sys/atomic.h | 5 +---- opal/include/opal_stdint.h | 27 +++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/opal/include/opal/sys/amd64/atomic.h b/opal/include/opal/sys/amd64/atomic.h index 3fe6b00649..a84556f3d4 100644 --- a/opal/include/opal/sys/amd64/atomic.h +++ b/opal/include/opal/sys/amd64/atomic.h @@ -122,10 +122,10 @@ static inline int opal_atomic_cmpset_64( volatile int64_t *addr, #define opal_atomic_cmpset_acq_64 opal_atomic_cmpset_64 #define opal_atomic_cmpset_rel_64 opal_atomic_cmpset_64 -#if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE___INT128 +#if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T -static inline __int128 opal_atomic_cmpset_128 (volatile __int128 *addr, __int128 oldval, - __int128 newval) +static inline opal_int128_t opal_atomic_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval, + opal_int128_t newval) { unsigned char ret; diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 952a118ec2..cae28a49b1 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -49,10 +49,7 @@ #include "opal_config.h" #include "opal/sys/architecture.h" - -#ifdef HAVE_SYS_TYPES_H -#include -#endif +#include "opal_stdint.h" /* do some quick #define cleanup in cases where we are doing testing... */ diff --git a/opal/include/opal_stdint.h b/opal/include/opal_stdint.h index be5f799d73..713478a1d6 100644 --- a/opal/include/opal_stdint.h +++ b/opal/include/opal_stdint.h @@ -134,6 +134,33 @@ typedef unsigned long long uint64_t; #endif +/* 128-bit */ + +#ifdef HAVE_INT128_T + +typedef int128_t opal_int128_t; +typedef uint128_t opal_uint128_t; + +#define HAVE_OPAL_INT128_T 1 + +#elif HAVE___INT128 + +/* suppress warning about __int128 type */ +#pragma GCC diagnostic ignored "-Wpedantic" +typedef __int128 opal_int128_t; + +/* suppress warning about __int128 type */ +#pragma GCC diagnostic ignored "-Wpedantic" +typedef unsigned __int128 opal_uint128_t; + +#define HAVE_OPAL_INT128_T 1 + +#else + +#define HAVE_OPAL_INT128_T 0 + +#endif + /* Pointers */ #if SIZEOF_VOID_P == SIZEOF_INT From 250f74960252c19596277a28109e3c28a3f27232 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 3 Dec 2014 17:12:22 -0700 Subject: [PATCH 3/5] Fix return type of opal_atomic_cmpset_128. The return type will be opal_int128_t after the fetching atomics changes but for now it is int. --- opal/include/opal/sys/amd64/atomic.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/opal/include/opal/sys/amd64/atomic.h b/opal/include/opal/sys/amd64/atomic.h index a84556f3d4..83bd76723a 100644 --- a/opal/include/opal/sys/amd64/atomic.h +++ b/opal/include/opal/sys/amd64/atomic.h @@ -124,8 +124,8 @@ static inline int opal_atomic_cmpset_64( volatile int64_t *addr, #if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T -static inline opal_int128_t opal_atomic_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval, - opal_int128_t newval) +static inline int opal_atomic_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval, + opal_int128_t newval) { unsigned char ret; @@ -139,7 +139,7 @@ static inline opal_int128_t opal_atomic_cmpset_128 (volatile opal_int128_t *addr "a" (((int64_t *)&oldval)[0]), "d" (((int64_t *)&oldval)[1]) : "memory", "cc", "eax", "edx"); - return ret; + return (int) ret; } #define OPAL_HAVE_ATOMIC_CMPSET_128 1 From fe787512d8a6e0069cd38a4f01d8edb4106021bf Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 3 Dec 2014 18:40:34 -0700 Subject: [PATCH 4/5] Add support for __sync builtin compare and swap on 128-bit values --- config/opal_config_asm.m4 | 19 ++++++++++++++++++- opal/include/opal/sys/sync_builtin/atomic.h | 14 ++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index 6cca0f5c06..f3be9801dd 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -19,6 +19,23 @@ dnl $HEADER$ dnl +AC_DEFUN([OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128], [ + AC_MSG_CHECKING([for __sync builtin atomic compare-and-swap on 128-bit values]) + + OPAL_VAR_SCOPE_PUSH([sync_bool_compare_and_swap_128_result]) + + AC_TRY_COMPILE([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);], + [AC_MSG_RESULT([yes]) + sync_bool_compare_and_swap_128_result=1], + [AC_MSG_RESULT([no]) + sync_bool_compare_and_swap_128_result=0]) + + AC_DEFINE_UNQUOTED([OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128], [$sync_bool_compare_and_swap_128_result], + [Whether the __sync builtin atomic compare and swap supports 128-bit values]) + + OPAL_VAR_SCOPE_POP +]) + AC_DEFUN([OPAL_CHECK_SYNC_BUILTINS], [ AC_MSG_CHECKING([for __sync builtin atomics]) @@ -577,7 +594,6 @@ AC_DEFUN([OPAL_CHECK_CMPXCHG16B],[ OPAL_VAR_SCOPE_POP ])dnl - dnl ################################################################# dnl dnl OPAL_CHECK_INLINE_GCC @@ -789,6 +805,7 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ [AC_MSG_ERROR([__sync builtin atomics requested but not found.])]) AC_DEFINE([OPAL_C_GCC_INLINE_ASSEMBLY], [1], [Whether C compiler supports GCC style inline assembly]) + OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128 elif test "$enable_osx_builtin_atomics" = "yes" ; then AC_CHECK_HEADER([libkern/OSAtomic.h],[opal_cv_asm_builtin="BUILTIN_OSX"], [AC_MSG_ERROR([OSX builtin atomics requested but not found.])]) diff --git a/opal/include/opal/sys/sync_builtin/atomic.h b/opal/include/opal/sys/sync_builtin/atomic.h index cc8991a646..d526a16008 100644 --- a/opal/include/opal/sys/sync_builtin/atomic.h +++ b/opal/include/opal/sys/sync_builtin/atomic.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,6 +11,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -106,6 +109,17 @@ static inline int opal_atomic_cmpset_64( volatile int64_t *addr, return __sync_bool_compare_and_swap(addr, oldval, newval); } +#if OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 +static inline int opal_atomic_cmpset_128 (volatile opal_int128_t *addr, + opal_int128_t oldval, opal_int128_t newval) +{ + return __sync_bool_compare_and_swap(addr, oldval, newval); +} + +#define OPAL_HAVE_ATOMIC_CMPSET_128 1 + +#endif + #define OPAL_HAVE_ATOMIC_MATH_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1 static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) From 0efe6baf649e9d6ee5facb61c2b0570b30143685 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 4 Dec 2014 14:25:53 -0700 Subject: [PATCH 5/5] Add check for -mcx16 flag for 128-bit compare and swap Some versions of gcc require this flag to be set before the __sync builtin atomic compare and swap will support 128-bit values. If the flag is required this check adds the flag to the CFLAGS. --- config/opal_config_asm.m4 | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index f3be9801dd..41672d65d7 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -20,16 +20,30 @@ dnl AC_DEFUN([OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128], [ + + OPAL_VAR_SCOPE_PUSH([sync_bool_compare_and_swap_128_result CFLAGS_save]) + AC_MSG_CHECKING([for __sync builtin atomic compare-and-swap on 128-bit values]) - - OPAL_VAR_SCOPE_PUSH([sync_bool_compare_and_swap_128_result]) - AC_TRY_COMPILE([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);], [AC_MSG_RESULT([yes]) sync_bool_compare_and_swap_128_result=1], [AC_MSG_RESULT([no]) sync_bool_compare_and_swap_128_result=0]) + if test $sync_bool_compare_and_swap_128_result = 0 ; then + CFLAGS_save=$CFLAGS + CFLAGS="$CFLAGS -mcx16" + + AC_MSG_CHECKING([for __sync builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) + AC_TRY_COMPILE([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);], + [AC_MSG_RESULT([yes]) + sync_bool_compare_and_swap_128_result=1 + CFLAGS_save="$CFLAGS"], + [AC_MSG_RESULT([no])]) + + CFLAGS=$CFLAGS_save + fi + AC_DEFINE_UNQUOTED([OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128], [$sync_bool_compare_and_swap_128_result], [Whether the __sync builtin atomic compare and swap supports 128-bit values])