1
1

Merge pull request #1634 from hjelmn/cma

cma: add support for MIPS and ARM
Этот коммит содержится в:
Nathan Hjelm 2016-06-11 09:20:28 -06:00 коммит произвёл GitHub
родитель d58da99dbc 4a2bd83302
Коммит 109389dce2
10 изменённых файлов: 488 добавлений и 4 удалений

Просмотреть файл

@ -19,19 +19,89 @@
# check if cma support is wanted.
AC_DEFUN([OPAL_CHECK_CMA],[
if test -z "$ompi_check_cma_happy" ; then
OPAL_VAR_SCOPE_PUSH([ompi_check_cma_need_defs ompi_check_cma_kernel_version])
OPAL_VAR_SCOPE_PUSH([ompi_check_cma_need_defs ompi_check_cma_kernel_version ompi_check_cma_CFLAGS])
ompi_check_cma_happy="no"
AC_ARG_WITH([cma],
[AC_HELP_STRING([--with-cma],
[Build Cross Memory Attach support (default: no)])])
[Build Cross Memory Attach support (default: autodetect)])])
# Enable CMA support by default if process_vm_readv is defined in glibc
AC_CHECK_FUNC(process_vm_readv, [ompi_check_cma_need_defs=0],
[ompi_check_cma_need_defs=1])
if test $ompi_check_cma_need_defs = 1 ; then
ompi_check_cma_CFLAGS="$CFLAGS"
# Need some extra include paths to locate the appropriate headers
CFLAGS="$CFLAGS -I${srcdir} -I${srcdir}/opal/include"
AC_MSG_CHECKING([if internal syscall numbers for Linux CMA work])
AC_RUN_IFELSE([AC_LANG_PROGRAM([[
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <sys/syscall.h>
#include "opal/include/opal/sys/cma.h"
static void do_check (pid_t pid, int *in, int *out)
{
int check[4] = {0, 0, 0, 0}, i;
struct iovec rem_iov = {out, sizeof (check)};
struct iovec loc_iov = {check, sizeof (check)};
ssize_t rc;
rc = process_vm_readv (pid, &loc_iov, 1, &rem_iov, 1, 0);
if (sizeof (check) != rc) {
exit (1);
}
for (i = 0 ; i < 4 ; ++i) {
if (check[i] != i) {
exit (1);
}
check[i] = i * 2;
}
rem_iov.iov_base = in;
rc = process_vm_writev (pid, &loc_iov, 1, &rem_iov, 1, 0);
if (sizeof (check) != rc) {
exit (1);
}
exit (0);
}
]],[[
int i, in[4] = {-1, -1, -1, -1}, out[4] = {0, 1, 2, 3};
do_check (getpid (), in, out);
for (i = 0 ; i < 4 ; ++i) {
if (in[i] != 2 * i) {
return 1;
}
}
/* all good */
return 0;
]])],
[AC_MSG_RESULT([yes])
ompi_check_cma_happy="yes"],
[AC_MSG_RESULT([no])
ompi_check_cma_happy="no"],
[AC_MSG_RESULT([no (cross-compiling)])
ompi_check_cma_happy="no"])
CFLAGS="$ompi_check_cma_CFLAGS"
else
ompi_check_cma_happy="yes"
fi
# If the user specifically requests CMA go ahead and enable it even
# if the glibc version does not support process_vm_readv
if test $ompi_check_cma_need_defs = 0 || test "x$with_cma" = "xyes" ; then
if test "x$with_cma" = "xyes" || test "$ompi_check_cma_happy" = "yes" ; then
ompi_check_cma_happy="yes"
AC_DEFINE_UNQUOTED([OPAL_CMA_NEED_SYSCALL_DEFS],
[$ompi_check_cma_need_defs],

Просмотреть файл

@ -1025,6 +1025,14 @@ AC_DEFUN([OPAL_CONFIG_ASM],[
OPAL_ASM_SUPPORT_64BIT=1
OPAL_GCC_INLINE_ASSIGN='"mov %0=r0\n;;\n" : "=&r"(ret)'
;;
aarch64*)
opal_cv_asm_arch="ARM64"
OPAL_ASM_SUPPORT_64BIT=1
OPAL_ASM_ARM_VERSION=8
AC_DEFINE_UNQUOTED([OPAL_ASM_ARM_VERSION], [$OPAL_ASM_ARM_VERSION],
[What ARM assembly version to use])
OPAL_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)'
;;
armv7*)
opal_cv_asm_arch="ARM"

Просмотреть файл

@ -11,6 +11,8 @@
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
# Copyright (c) 2016 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -29,6 +31,7 @@ headers += \
include opal/sys/amd64/Makefile.am
include opal/sys/arm/Makefile.am
include opal/sys/arm64/Makefile.am
include opal/sys/ia32/Makefile.am
include opal/sys/ia64/Makefile.am
include opal/sys/mips/Makefile.am

Просмотреть файл

@ -11,6 +11,8 @@
* All rights reserved.
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -37,6 +39,7 @@
#define OPAL_SPARCV9_64 0062
#define OPAL_MIPS 0070
#define OPAL_ARM 0100
#define OPAL_ARM64 0101
#define OPAL_BUILTIN_SYNC 0200
#define OPAL_BUILTIN_OSX 0201
#define OPAL_BUILTIN_GCC 0202

24
opal/include/opal/sys/arm64/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,24 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2008 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This makefile.am does not stand on its own - it is included from opal/include/Makefile.am
headers += \
opal/sys/arm64/atomic.h \
opal/sys/arm64/timer.h

270
opal/include/opal/sys/arm64/atomic.h Обычный файл
Просмотреть файл

@ -0,0 +1,270 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2010 ARM ltd. All rights reserved.
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#if !defined(OPAL_SYS_ARCH_ATOMIC_H)
#define OPAL_SYS_ARCH_ATOMIC_H 1
#if OPAL_GCC_INLINE_ASSEMBLY
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_LLSC_32 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
#define OPAL_HAVE_ATOMIC_MATH_32 1
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
#define OPAL_HAVE_ATOMIC_LLSC_64 1
#define OPAL_HAVE_ATOMIC_ADD_32 1
#define OPAL_HAVE_ATOMIC_SUB_32 1
#define OPAL_HAVE_ATOMIC_ADD_64 1
#define OPAL_HAVE_ATOMIC_SUB_64 1
#define MB() __asm__ __volatile__ ("dmb sy" : : : "memory")
#define RMB() __asm__ __volatile__ ("dmb ld" : : : "memory")
#define WMB() __asm__ __volatile__ ("dmb st" : : : "memory")
/**********************************************************************
*
* Memory Barriers
*
*********************************************************************/
static inline void opal_atomic_mb (void)
{
MB();
}
static inline void opal_atomic_rmb (void)
{
RMB();
}
static inline void opal_atomic_wmb (void)
{
WMB();
}
static inline void opal_atomic_isync (void)
{
__asm__ __volatile__ ("isb");
}
/**********************************************************************
*
* Atomic math operations
*
*********************************************************************/
static inline int opal_atomic_cmpset_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
int32_t ret, tmp;
__asm__ __volatile__ ("1: ldaxr %w0, [%2] \n"
" cmp %w0, %w3 \n"
" bne 2f \n"
" stxr %w1, %w4, [%2] \n"
" cbnz %w1, 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
}
/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_32 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline int opal_atomic_cmpset_acq_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
int32_t ret, tmp;
__asm__ __volatile__ ("1: ldaxr %w0, [%2] \n"
" cmp %w0, %w3 \n"
" bne 2f \n"
" stxr %w1, %w4, [%2] \n"
" cbnz %w1, 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
}
static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
int32_t ret, tmp;
__asm__ __volatile__ ("1: ldxr %w0, [%2] \n"
" cmp %w0, %w3 \n"
" bne 2f \n"
" stlxr %w1, %w4, [%2] \n"
" cbnz %w1, 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
}
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
{
int32_t ret;
__asm__ __volatile__ ("ldaxr %w0, [%1] \n"
: "=&r" (ret)
: "r" (addr));
return ret;
}
static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
{
int ret;
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n"
: "=&r" (ret)
: "r" (addr), "r" (newval)
: "cc", "memory");
return ret == 0;
}
static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
int64_t ret;
int tmp;
__asm__ __volatile__ ("1: ldaxr %0, [%2] \n"
" cmp %0, %3 \n"
" bne 2f \n"
" stxr %w1, %4, [%2] \n"
" cbnz %w1, 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
}
/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_64 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline int opal_atomic_cmpset_acq_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
int64_t ret;
int tmp;
__asm__ __volatile__ ("1: ldaxr %0, [%2] \n"
" cmp %0, %3 \n"
" bne 2f \n"
" stxr %w1, %4, [%2] \n"
" cbnz %w1, 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
}
static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
int64_t ret;
int tmp;
__asm__ __volatile__ ("1: ldxr %0, [%2] \n"
" cmp %0, %3 \n"
" bne 2f \n"
" stlxr %w1, %4, [%2] \n"
" cbnz %w1, 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
}
static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr)
{
int64_t ret;
__asm__ __volatile__ ("ldaxr %0, [%1] \n"
: "=&r" (ret)
: "r" (addr));
return ret;
}
static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval)
{
int ret;
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n"
: "=&r" (ret)
: "r" (addr), "r" (newval)
: "cc", "memory");
return ret == 0;
}
#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \
static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \
{ \
type newval; \
int32_t tmp; \
\
__asm__ __volatile__("1: ldxr %" reg "0, [%2] \n" \
" " inst " %" reg "0, %" reg "0, %" reg "3 \n" \
" stxr %w1, %" reg "0, [%2] \n" \
" cbnz %w1, 1b \n" \
: "=&r" (newval), "=&r" (tmp) \
: "r" (addr), "r" (value) \
: "cc", "memory"); \
\
return newval; \
}
OPAL_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w")
OPAL_ASM_MAKE_ATOMIC(int32_t, 32, sub, "sub", "w")
OPAL_ASM_MAKE_ATOMIC(int64_t, 64, add, "add", "")
OPAL_ASM_MAKE_ATOMIC(int64_t, 64, sub, "sub", "")
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
#endif /* ! OPAL_SYS_ARCH_ATOMIC_H */

33
opal/include/opal/sys/arm64/timer.h Обычный файл
Просмотреть файл

@ -0,0 +1,33 @@
/*
* Copyright (c) 2008 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OPAL_SYS_ARCH_TIMER_H
#define OPAL_SYS_ARCH_TIMER_H 1
#include <sys/times.h>
typedef uint64_t opal_timer_t;
static inline opal_timer_t
opal_sys_timer_get_cycles(void)
{
opal_timer_t ret;
struct tms accurate_clock;
times(&accurate_clock);
ret = accurate_clock.tms_utime + accurate_clock.tms_stime;
return ret;
}
#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1
#endif /* ! OPAL_SYS_ARCH_TIMER_H */

36
opal/include/opal/sys/arm64/update.sh Обычный файл
Просмотреть файл

@ -0,0 +1,36 @@
#!/bin/sh
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
CFILE=/tmp/opal_atomic_$$.c
trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15
echo Updating atomic.s from atomic.h using gcc
cat > $CFILE<<EOF
#include <stdlib.h>
#include <inttypes.h>
#define static
#define inline
#define OPAL_GCC_INLINE_ASSEMBLY 1
#include "../architecture.h"
#include "atomic.h"
EOF
gcc -O1 -I. -S $CFILE -o atomic.s

Просмотреть файл

@ -157,6 +157,8 @@ enum {
#include "opal/sys/amd64/atomic.h"
#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM
#include "opal/sys/arm/atomic.h"
#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64
#include "opal/sys/arm64/atomic.h"
#elif OPAL_ASSEMBLY_ARCH == OPAL_IA32
#include "opal/sys/ia32/atomic.h"
#elif OPAL_ASSEMBLY_ARCH == OPAL_IA64

Просмотреть файл

@ -1,5 +1,7 @@
/*
* Copyright (c) 2011-2012 IBM Corporation. All rights reserved.
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
* reserved.
*
*/
@ -14,7 +16,10 @@
#ifndef OPAL_SYS_CMA_H
#define OPAL_SYS_CMA_H 1
#if !defined(OPAL_ASSEMBLY_ARCH)
/* need opal_config.h for the assembly architecture */
#include "opal_config.h"
#endif
#include "opal/sys/architecture.h"
@ -45,6 +50,36 @@
#elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64
#define __NR_process_vm_readv 351
#define __NR_process_vm_writev 352
#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM
#define __NR_process_vm_readv 376
#define __NR_process_vm_writev 377
#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64
/* ARM64 uses the asm-generic syscall numbers */
#define __NR_process_vm_readv 270
#define __NR_process_vm_writev 271
#elif OPAL_ASSEMBLY_ARCH == OPAL_MIPS
#if _MIPS_SIM == _MIPS_SIM_ABI64
#define __NR_process_vm_readv 5304
#define __NR_process_vm_writev 5305
#elif _MIPS_SIM == _MIPS_SIM_NABI32
#define __NR_process_vm_readv 6309
#define __NR_process_vm_writev 6310
#else
#error "Unsupported MIPS architecture for process_vm_readv and process_vm_writev syscalls"
#endif
#else
#error "Unsupported architecture for process_vm_readv and process_vm_writev syscalls"
#endif