opal: add armv8 support

This commit adds assembly support for aarch64. Signed-off-by: Nathan Hjelm <hjelmn@me.com>
2016-05-22 00:13:45 -06:00 · 2016-05-22 00:13:45 -06:00 · 0084ad0d1b
--- a/config/opal_config_asm.m4
+++ b/config/opal_config_asm.m4
@ -934,6 +934,14 @@ AC_DEFUN([OPAL_CONFIG_ASM],[
            OPAL_ASM_SUPPORT_64BIT=1
            OPAL_GCC_INLINE_ASSIGN='"mov %0=r0\n;;\n" : "=&r"(ret)'
            ;;
+	aarch64*)
+            opal_cv_asm_arch="ARM64"
+            OPAL_ASM_SUPPORT_64BIT=1
+            OPAL_ASM_ARM_VERSION=8
+            AC_DEFINE_UNQUOTED([OPAL_ASM_ARM_VERSION], [$OPAL_ASM_ARM_VERSION],
+                               [What ARM assembly version to use])
+            OPAL_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)'
+            ;;

        armv7*)
            opal_cv_asm_arch="ARM"
--- a/opal/include/opal/sys/Makefile.am
+++ b/opal/include/opal/sys/Makefile.am
@ -11,6 +11,8 @@
 #                         All rights reserved.
 # Copyright (c) 2010      Cisco Systems, Inc.  All rights reserved.
 # Copyright (c) 2011      Sandia National Laboratories. All rights reserved.
+# Copyright (c) 2016      Los Alamos National Security, LLC. All rights
+#                         reserved.
 # $COPYRIGHT$
 #
 # Additional copyrights may follow
@ -29,6 +31,7 @@ headers += \

 include opal/sys/amd64/Makefile.am
 include opal/sys/arm/Makefile.am
+include opal/sys/arm64/Makefile.am
 include opal/sys/ia32/Makefile.am
 include opal/sys/ia64/Makefile.am
 include opal/sys/mips/Makefile.am
--- a/opal/include/opal/sys/architecture.h
+++ b/opal/include/opal/sys/architecture.h
@ -11,6 +11,8 @@
 *                         All rights reserved.
 * Copyright (c) 2011      Sandia National Laboratories. All rights reserved.
 * Copyright (c) 2014      Intel, Inc. All rights reserved
+ * Copyright (c) 2016      Los Alamos National Security, LLC. All rights
+ *                         reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
@ -37,6 +39,7 @@
 #define OPAL_SPARCV9_64     0062
 #define OPAL_MIPS           0070
 #define OPAL_ARM            0100
+#define OPAL_ARM64          0101
 #define OPAL_BUILTIN_SYNC   0200
 #define OPAL_BUILTIN_OSX    0201
 #define OPAL_BUILTIN_NO     0202
--- a/opal/include/opal/sys/arm64/Makefile.am
+++ b/opal/include/opal/sys/arm64/Makefile.am
@ -0,0 +1,24 @@
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2008 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+# This makefile.am does not stand on its own - it is included from opal/include/Makefile.am
+
+headers += \
+       opal/sys/arm64/atomic.h \
+       opal/sys/arm64/timer.h
+
--- a/opal/include/opal/sys/arm64/atomic.h
+++ b/opal/include/opal/sys/arm64/atomic.h
@ -0,0 +1,270 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2005 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2010      IBM Corporation.  All rights reserved.
+ * Copyright (c) 2010      ARM ltd.  All rights reserved.
+ * Copyright (c) 2016      Los Alamos National Security, LLC. All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#if !defined(OPAL_SYS_ARCH_ATOMIC_H)
+
+#define OPAL_SYS_ARCH_ATOMIC_H 1
+
+#if OPAL_GCC_INLINE_ASSEMBLY
+
+#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
+#define OPAL_HAVE_ATOMIC_LLSC_32 1
+#define OPAL_HAVE_ATOMIC_CMPSET_32 1
+#define OPAL_HAVE_ATOMIC_MATH_32 1
+#define OPAL_HAVE_ATOMIC_CMPSET_64 1
+#define OPAL_HAVE_ATOMIC_LLSC_64 1
+#define OPAL_HAVE_ATOMIC_ADD_32 1
+#define OPAL_HAVE_ATOMIC_SUB_32 1
+#define OPAL_HAVE_ATOMIC_ADD_64 1
+#define OPAL_HAVE_ATOMIC_SUB_64 1
+
+#define MB()  __asm__ __volatile__ ("dmb sy" : : : "memory")
+#define RMB() __asm__ __volatile__ ("dmb ld" : : : "memory")
+#define WMB() __asm__ __volatile__ ("dmb st" : : : "memory")
+
+/**********************************************************************
+ *
+ * Memory Barriers
+ *
+ *********************************************************************/
+
+static inline void opal_atomic_mb (void)
+{
+    MB();
+}
+
+static inline void opal_atomic_rmb (void)
+{
+    RMB();
+}
+
+static inline void opal_atomic_wmb (void)
+{
+    WMB();
+}
+
+static inline void opal_atomic_isync (void)
+{
+    __asm__ __volatile__ ("isb");
+}
+
+/**********************************************************************
+ *
+ * Atomic math operations
+ *
+ *********************************************************************/
+
+static inline int opal_atomic_cmpset_32(volatile int32_t *addr,
+                                        int32_t oldval, int32_t newval)
+{
+    int32_t ret, tmp;
+
+    __asm__ __volatile__ ("1:  ldaxr    %w0, [%2]      \n"
+                          "    cmp     %w0, %w3        \n"
+                          "    bne     2f              \n"
+                          "    stxr    %w1, %w4, [%2]  \n"
+                          "    cbnz    %w1, 1b         \n"
+                          "2:                          \n"
+                          : "=&r" (ret), "=&r" (tmp)
+                          : "r" (addr), "r" (oldval), "r" (newval)
+                          : "cc", "memory");
+
+    return (ret == oldval);
+}
+
+/* these two functions aren't inlined in the non-gcc case because then
+   there would be two function calls (since neither cmpset_32 nor
+   atomic_?mb can be inlined).  Instead, we "inline" them by hand in
+   the assembly, meaning there is one function call overhead instead
+   of two */
+static inline int opal_atomic_cmpset_acq_32(volatile int32_t *addr,
+                                            int32_t oldval, int32_t newval)
+{
+    int32_t ret, tmp;
+
+    __asm__ __volatile__ ("1:  ldaxr   %w0, [%2]       \n"
+                          "    cmp     %w0, %w3        \n"
+                          "    bne     2f              \n"
+                          "    stxr    %w1, %w4, [%2]  \n"
+                          "    cbnz    %w1, 1b         \n"
+                          "2:                          \n"
+                          : "=&r" (ret), "=&r" (tmp)
+                          : "r" (addr), "r" (oldval), "r" (newval)
+                          : "cc", "memory");
+
+    return (ret == oldval);
+}
+
+
+static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr,
+                                            int32_t oldval, int32_t newval)
+{
+    int32_t ret, tmp;
+
+    __asm__ __volatile__ ("1:  ldxr    %w0, [%2]       \n"
+                          "    cmp     %w0, %w3        \n"
+                          "    bne     2f              \n"
+                          "    stlxr   %w1, %w4, [%2]  \n"
+                          "    cbnz    %w1, 1b         \n"
+                          "2:                          \n"
+                          : "=&r" (ret), "=&r" (tmp)
+                          : "r" (addr), "r" (oldval), "r" (newval)
+                          : "cc", "memory");
+
+    return (ret == oldval);
+}
+
+static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
+{
+    int32_t ret;
+
+    __asm__ __volatile__ ("ldaxr    %w0, [%1]          \n"
+                          : "=&r" (ret)
+                          : "r" (addr));
+
+    return ret;
+}
+
+static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
+{
+    int ret;
+
+    __asm__ __volatile__ ("stlxr    %w0, %w2, [%1]     \n"
+                          : "=&r" (ret)
+                          : "r" (addr), "r" (newval)
+                          : "cc", "memory");
+
+    return ret == 0;
+}
+
+static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
+                                        int64_t oldval, int64_t newval)
+{
+    int64_t ret;
+    int tmp;
+
+    __asm__ __volatile__ ("1:  ldaxr    %0, [%2]       \n"
+                          "    cmp     %0, %3          \n"
+                          "    bne     2f              \n"
+                          "    stxr    %w1, %4, [%2]   \n"
+                          "    cbnz    %w1, 1b         \n"
+                          "2:                          \n"
+                          : "=&r" (ret), "=&r" (tmp)
+                          : "r" (addr), "r" (oldval), "r" (newval)
+                          : "cc", "memory");
+
+    return (ret == oldval);
+}
+
+/* these two functions aren't inlined in the non-gcc case because then
+   there would be two function calls (since neither cmpset_64 nor
+   atomic_?mb can be inlined).  Instead, we "inline" them by hand in
+   the assembly, meaning there is one function call overhead instead
+   of two */
+static inline int opal_atomic_cmpset_acq_64(volatile int64_t *addr,
+                                            int64_t oldval, int64_t newval)
+{
+    int64_t ret;
+    int tmp;
+
+    __asm__ __volatile__ ("1:  ldaxr   %0, [%2]        \n"
+                          "    cmp     %0, %3          \n"
+                          "    bne     2f              \n"
+                          "    stxr    %w1, %4, [%2]   \n"
+                          "    cbnz    %w1, 1b         \n"
+                          "2:                          \n"
+                          : "=&r" (ret), "=&r" (tmp)
+                          : "r" (addr), "r" (oldval), "r" (newval)
+                          : "cc", "memory");
+
+    return (ret == oldval);
+}
+
+
+static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr,
+                                            int64_t oldval, int64_t newval)
+{
+    int64_t ret;
+    int tmp;
+
+    __asm__ __volatile__ ("1:  ldxr    %0, [%2]        \n"
+                          "    cmp     %0, %3          \n"
+                          "    bne     2f              \n"
+                          "    stlxr   %w1, %4, [%2]   \n"
+                          "    cbnz    %w1, 1b         \n"
+                          "2:                          \n"
+                          : "=&r" (ret), "=&r" (tmp)
+                          : "r" (addr), "r" (oldval), "r" (newval)
+                          : "cc", "memory");
+
+    return (ret == oldval);
+}
+
+static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr)
+{
+    int64_t ret;
+
+    __asm__ __volatile__ ("ldaxr    %0, [%1]        \n"
+                          : "=&r" (ret)
+                          : "r" (addr));
+
+    return ret;
+}
+
+static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval)
+{
+    int ret;
+
+    __asm__ __volatile__ ("stlxr    %w0, %2, [%1]    \n"
+                          : "=&r" (ret)
+                          : "r" (addr), "r" (newval)
+                          : "cc", "memory");
+
+    return ret == 0;
+}
+
+#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg)                   \
+    static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \
+    {                                                                   \
+        type newval;                                                    \
+        int32_t tmp;                                                    \
+                                                                        \
+        __asm__ __volatile__("1:  ldxr   %" reg "0, [%2]        \n"     \
+                             "    " inst "   %" reg "0, %" reg "0, %" reg "3 \n" \
+                             "    stxr   %w1, %" reg "0, [%2]   \n"     \
+                             "    cbnz   %w1, 1b         \n"            \
+                             : "=&r" (newval), "=&r" (tmp)              \
+                             : "r" (addr), "r" (value)                  \
+                             : "cc", "memory");                         \
+                                                                        \
+        return newval;                                                  \
+    }
+
+OPAL_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w")
+OPAL_ASM_MAKE_ATOMIC(int32_t, 32, sub, "sub", "w")
+OPAL_ASM_MAKE_ATOMIC(int64_t, 64, add, "add", "")
+OPAL_ASM_MAKE_ATOMIC(int64_t, 64, sub, "sub", "")
+
+#endif /* OPAL_GCC_INLINE_ASSEMBLY */
+
+#endif /* ! OPAL_SYS_ARCH_ATOMIC_H */
--- a/opal/include/opal/sys/arm64/timer.h
+++ b/opal/include/opal/sys/arm64/timer.h
@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2008      The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#ifndef OPAL_SYS_ARCH_TIMER_H
+#define OPAL_SYS_ARCH_TIMER_H 1
+
+#include <sys/times.h>
+
+typedef uint64_t opal_timer_t;
+
+static inline opal_timer_t
+opal_sys_timer_get_cycles(void)
+{
+    opal_timer_t ret;
+    struct tms accurate_clock;
+
+    times(&accurate_clock);
+    ret = accurate_clock.tms_utime + accurate_clock.tms_stime;
+
+    return ret;
+}
+
+#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1
+
+#endif /* ! OPAL_SYS_ARCH_TIMER_H */
--- a/opal/include/opal/sys/arm64/update.sh
+++ b/opal/include/opal/sys/arm64/update.sh
@ -0,0 +1,36 @@
+#!/bin/sh
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+CFILE=/tmp/opal_atomic_$$.c
+
+trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15
+
+echo Updating atomic.s from atomic.h using gcc
+
+cat > $CFILE<<EOF
+#include <stdlib.h>
+#include <inttypes.h>
+#define static
+#define inline
+#define OPAL_GCC_INLINE_ASSEMBLY 1
+#include "../architecture.h"
+#include "atomic.h"
+EOF
+
+gcc -O1 -I. -S $CFILE -o atomic.s
--- a/opal/include/opal/sys/atomic.h
+++ b/opal/include/opal/sys/atomic.h
@ -147,6 +147,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t;
 #include "opal/sys/amd64/atomic.h"
 #elif OPAL_ASSEMBLY_ARCH == OPAL_ARM
 #include "opal/sys/arm/atomic.h"
+#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64
+#include "opal/sys/arm64/atomic.h"
 #elif OPAL_ASSEMBLY_ARCH == OPAL_IA32
 #include "opal/sys/ia32/atomic.h"
 #elif OPAL_ASSEMBLY_ARCH == OPAL_IA64
--- a/opal/include/opal/sys/cma.h
+++ b/opal/include/opal/sys/cma.h
@ -52,6 +52,13 @@
 #define __NR_process_vm_readv 376
 #define __NR_process_vm_writev 377

+#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64
+
+/* ARM64 uses the asm-generic syscall numbers */
+
+#define __NR_process_vm_readv 270
+#define __NR_process_vm_writev 271
+
 #elif OPAL_ASSEMBLY_ARCH == OPAL_MIPS

 #if _MIPS_SIM == _MIPS_SIM_ABI32