1
1

Fixes trac:2680: Add ARM support.

This commit was SVN r24308.

The following Trac tickets were found above:
  Ticket 2680 --> https://svn.open-mpi.org/trac/ompi/ticket/2680
Этот коммит содержится в:
Jeff Squyres 2011-01-26 17:22:44 +00:00
родитель 81fd41f811
Коммит 511f87665b
13 изменённых файлов: 498 добавлений и 1 удалений

Просмотреть файл

@ -40,6 +40,7 @@ Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
Copyright (c) 2008-2010 Oak Ridge National Labs. All rights reserved.
Copyright (c) 2006-2010 Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2009 Bull SAS. All rights reserved.
Copyright (c) 2010 ARM ltd. All rights reserved.
$COPYRIGHT$

1
NEWS
Просмотреть файл

@ -62,6 +62,7 @@ Trunk (not on release branches yet)
OPAL levels - intended for use when configuring without MPI support
- Modified paffinity system to provide warning when bindings result in
being "bound to all", which is equivalent to "not bound"
- Added ARM support.
1.5.2

Просмотреть файл

@ -46,6 +46,15 @@ AMD64 default-.text-.globl-:--.L-@-1-0-1-1-1 amd64-linux
AMD64 default-.text-.globl-:--.L-@-1-0-1-1-0 amd64-linux-nongas
######################################################################
#
# ARM (ARMv7 and later)
#
######################################################################
ARM default-.text-.globl-:--.L-#-1-1-1-1-1 arm-linux
######################################################################
#
# Intel Pentium Class

150
opal/asm/base/ARM.asm Обычный файл
Просмотреть файл

@ -0,0 +1,150 @@
START_FILE
TEXT
ALIGN(4)
START_FUNC(opal_atomic_mb)
dmb
bx lr
END_FUNC(opal_atomic_mb)
START_FUNC(opal_atomic_rmb)
dmb
bx lr
END_FUNC(opal_atomic_rmb)
START_FUNC(opal_atomic_wmb)
dmb
bx lr
END_FUNC(opal_atomic_wmb)
START_FUNC(opal_atomic_cmpset_32)
LSYM(1)
ldrex r3, [r0]
cmp r1, r3
bne REFLSYM(2)
strex r12, r2, [r0]
cmp r12, #0
bne REFLSYM(1)
mov r0, #1
LSYM(2)
movne r0, #0
bx lr
END_FUNC(opal_atomic_cmpset_32)
START_FUNC(opal_atomic_cmpset_acq_32)
LSYM(3)
ldrex r3, [r0]
cmp r1, r3
bne REFLSYM(4)
strex r12, r2, [r0]
cmp r12, #0
bne REFLSYM(3)
dmb
mov r0, #1
LSYM(4)
movne r0, #0
bx lr
END_FUNC(opal_atomic_cmpset_acq_32)
START_FUNC(opal_atomic_cmpset_rel_32)
LSYM(5)
ldrex r3, [r0]
cmp r1, r3
bne REFLSYM(6)
dmb
strex r12, r2, [r0]
cmp r12, #0
bne REFLSYM(4)
mov r0, #1
LSYM(6)
movne r0, #0
bx lr
END_FUNC(opal_atomic_cmpset_rel_32)
#START_64BIT
START_FUNC(opal_atomic_cmpset_64)
push {r4-r7}
ldrd r6, r7, [sp, #16]
LSYM(7)
ldrexd r4, r5, [r0]
cmp r4, r2
cmpeq r5, r3
bne REFLSYM(8)
strexd r1, r6, r7, [r0]
cmp r1, #0
bne REFLSYM(7)
mov r0, #1
LSYM(8)
movne r0, #0
pop {r4-r7}
bx lr
END_FUNC(opal_atomic_cmpset_64)
START_FUNC(opal_atomic_cmpset_acq_64)
push {r4-r7}
ldrd r6, r7, [sp, #16]
LSYM(9)
ldrexd r4, r5, [r0]
cmp r4, r2
cmpeq r5, r3
bne REFLSYM(10)
strexd r1, r6, r7, [r0]
cmp r1, #0
bne REFLSYM(9)
dmb
mov r0, #1
LSYM(10)
movne r0, #0
pop {r4-r7}
bx lr
END_FUNC(opal_atomic_cmpset_acq_64)
START_FUNC(opal_atomic_cmpset_rel_64)
push {r4-r7}
ldrd r6, r7, [sp, #16]
LSYM(11)
ldrexd r4, r5, [r0]
cmp r4, r2
cmpeq r5, r3
bne REFLSYM(12)
dmb
strexd r1, r6, r7, [r0]
cmp r1, #0
bne REFLSYM(11)
mov r0, #1
LSYM(12)
movne r0, #0
pop {r4-r7}
bx lr
END_FUNC(opal_atomic_cmpset_rel_64)
#END_64BIT
START_FUNC(opal_atomic_add_32)
LSYM(13)
ldrex r2, [r0]
add r2, r2, r1
strex r3, r2, [r0]
cmp r3, #0
bne REFLSYM(13)
mov r0, r2
bx lr
END_FUNC(opal_atomic_add_32)
START_FUNC(opal_atomic_sub_32)
LSYM(14)
ldrex r2, [r0]
sub r2, r2, r1
strex r3, r2, [r0]
cmp r3, #0
bne REFLSYM(14)
mov r0, r2
bx lr
END_FUNC(opal_atomic_sub_32)

Просмотреть файл

@ -103,7 +103,11 @@ while (<INPUT>) {
}
if ($GNU_STACK == 1) {
if ($asmarch eq "ARM") {
print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\%progbits\n";
} else {
print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\@progbits\n";
}
}
close(INPUT);

Просмотреть файл

@ -900,6 +900,12 @@ AC_DEFUN([OPAL_CONFIG_ASM],[
OMPI_GCC_INLINE_ASSIGN='"bis [$]31,[$]31,%0" : "=&r"(ret)'
;;
armv7*)
ompi_cv_asm_arch="ARM"
OPAL_ASM_SUPPORT_64BIT=1
OMPI_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)'
;;
mips-*|mips64*)
# Should really find some way to make sure that we are on
# a MIPS III machine (r4000 and later)

Просмотреть файл

@ -36,6 +36,7 @@
#define OMPI_SPARCV9_32 0061
#define OMPI_SPARCV9_64 0062
#define OMPI_MIPS 0070
#define OMPI_ARM 0100
/* Formats */
#define OMPI_DEFAULT 1000 /* standard for given architecture */

24
opal/include/opal/sys/arm/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,24 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2008 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This makefile.am does not stand on its own - it is included from opal/include/Makefile.am
headers += \
opal/sys/arm/atomic.h \
opal/sys/arm/timer.h

227
opal/include/opal/sys/arm/atomic.h Обычный файл
Просмотреть файл

@ -0,0 +1,227 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2010 ARM ltd. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_SYS_ARCH_ATOMIC_H
#define OMPI_SYS_ARCH_ATOMIC_H 1
#if OPAL_WANT_SMP_LOCKS
#define MB() __asm__ __volatile__ ("dmb" : : : "memory")
#define RMB() __asm__ __volatile__ ("dmb" : : : "memory")
#define WMB() __asm__ __volatile__ ("dmb" : : : "memory")
#else
#define MB()
#define RMB()
#define WMB()
#endif
/**********************************************************************
*
* Define constants for ARMv7
*
*********************************************************************/
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
#define OPAL_HAVE_ATOMIC_MATH_32 1
#define OPAL_HAVE_ATOMIC_ADD_32 1
#define OPAL_HAVE_ATOMIC_SUB_32 1
/**********************************************************************
*
* Memory Barriers
*
*********************************************************************/
#if OMPI_GCC_INLINE_ASSEMBLY
static inline
void opal_atomic_mb(void)
{
MB();
}
static inline
void opal_atomic_rmb(void)
{
RMB();
}
static inline
void opal_atomic_wmb(void)
{
WMB();
}
/**********************************************************************
*
* Atomic math operations
*
*********************************************************************/
static inline int opal_atomic_cmpset_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
int32_t ret, tmp;
__asm__ __volatile__ (
"1: ldrex %0, [%2] \n"
" cmp %0, %3 \n"
" bne 2f \n"
" strex %1, %4, [%2] \n"
" cmp %1, #0 \n"
" bne 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
}
/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_32 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline int opal_atomic_cmpset_acq_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
int rc;
rc = opal_atomic_cmpset_32(addr, oldval, newval);
opal_atomic_rmb();
return rc;
}
static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
opal_atomic_wmb();
return opal_atomic_cmpset_32(addr, oldval, newval);
}
static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
int64_t ret;
int tmp;
__asm__ __volatile__ (
"1: ldrexd %0, %H0, [%2] \n"
" cmp %0, %3 \n"
" cmpeq %H0, %H3 \n"
" bne 2f \n"
" strexd %1, %4, %H4, [%2] \n"
" cmp %1, #0 \n"
" bne 1b \n"
"2: \n"
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "r" (oldval), "r" (newval)
: "cc", "memory");
return (ret == oldval);
}
/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_64 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline int opal_atomic_cmpset_acq_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
int rc;
rc = opal_atomic_cmpset_64(addr, oldval, newval);
opal_atomic_rmb();
return rc;
}
static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
opal_atomic_wmb();
return opal_atomic_cmpset_64(addr, oldval, newval);
}
static inline int32_t opal_atomic_add_32(volatile int32_t* v, int inc)
{
int32_t t;
int tmp;
__asm__ __volatile__(
"1: ldrex %0, [%2] \n"
" add %0, %0, %3 \n"
" strex %1, %0, [%2] \n"
" cmp %1, #0 \n"
" bne 1b \n"
: "=&r" (t), "=&r" (tmp)
: "r" (v), "r" (inc)
: "cc", "memory");
return t;
}
static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec)
{
int32_t t;
int tmp;
__asm__ __volatile__(
"1: ldrex %0, [%2] \n"
" sub %0, %0, %3 \n"
" strex %1, %0, [%2] \n"
" cmp %1, #0 \n"
" bne 1b \n"
: "=&r" (t), "=&r" (tmp)
: "r" (v), "r" (dec)
: "cc", "memory");
return t;
}
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */

33
opal/include/opal/sys/arm/timer.h Обычный файл
Просмотреть файл

@ -0,0 +1,33 @@
/*
* Copyright (c) 2008 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_SYS_ARCH_TIMER_H
#define OMPI_SYS_ARCH_TIMER_H 1
#include <sys/times.h>
typedef uint64_t opal_timer_t;
static inline opal_timer_t
opal_sys_timer_get_cycles(void)
{
opal_timer_t ret;
struct tms accurate_clock;
times(&accurate_clock);
ret = accurate_clock.tms_utime + accurate_clock.tms_stime;
return ret;
}
#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1
#endif /* ! OMPI_SYS_ARCH_TIMER_H */

37
opal/include/opal/sys/arm/update.sh Обычный файл
Просмотреть файл

@ -0,0 +1,37 @@
#!/bin/sh
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
CFILE=/tmp/opal_atomic_$$.c
trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15
echo Updating atomic.s from atomic.h using gcc
cat > $CFILE<<EOF
#include <stdlib.h>
#include <inttypes.h>
#define static
#define inline
#define OMPI_GCC_INLINE_ASSEMBLY 1
#define OPAL_WANT_SMP_LOCKS 1
#include "../architecture.h"
#include "atomic.h"
EOF
gcc -O1 -I. -S $CFILE -o atomic.s

Просмотреть файл

@ -146,6 +146,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t;
#include "opal/sys/alpha/atomic.h"
#elif OPAL_ASSEMBLY_ARCH == OMPI_AMD64
#include "opal/sys/amd64/atomic.h"
#elif OPAL_ASSEMBLY_ARCH == OMPI_ARM
#include "opal/sys/arm/atomic.h"
#elif OPAL_ASSEMBLY_ARCH == OMPI_IA32
#include "opal/sys/ia32/atomic.h"
#elif OPAL_ASSEMBLY_ARCH == OMPI_IA64

Просмотреть файл

@ -79,6 +79,8 @@ BEGIN_C_DECLS
/* don't include system-level gorp when generating doxygen files */
#elif OPAL_ASSEMBLY_ARCH == OMPI_AMD64
#include "opal/sys/amd64/timer.h"
#elif OPAL_ASSEMBLY_ARCH == OMPI_ARM
#include "opal/sys/arm/timer.h"
#elif OPAL_ASSEMBLY_ARCH == OMPI_IA32
#include "opal/sys/ia32/timer.h"
#elif OPAL_ASSEMBLY_ARCH == OMPI_IA64