1
1

* Update build system to support non-GCC inline assembly, including detection

of assembler format
* Fix minor bugs in AMD64, PPC, and IA32 assembly for atomic operations
* Clean up the #defines to look for when examining level of atomic operation
  support

This commit was SVN r4183.
Этот коммит содержится в:
Brian Barrett 2005-01-27 01:39:55 +00:00
родитель b240395d9a
Коммит 9c1a277804
39 изменённых файлов: 2875 добавлений и 840 удалений

Просмотреть файл

@ -892,7 +892,7 @@ INCLUDE_FILE_PATTERNS =
# or name=definition (no spaces). If the definition and the = are
# omitted =1 is assumed.
PREDEFINED =
PREDEFINED = DOXYGEN
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
# this tag can be used to specify a list of macro names that should be expanded.

Просмотреть файл

@ -36,6 +36,9 @@ sinclude(config/f90_check_type.m4)
sinclude(config/f90_get_alignment.m4)
sinclude(config/f90_get_sizeof.m4)
sinclude(config/ompi_try_assemble.m4)
sinclude(config/ompi_config_asm.m4)
sinclude(config/ompi_case_sensitive_fs_setup.m4)
sinclude(config/ompi_check_optflags.m4)
sinclude(config/ompi_config_subdir.m4)

763
config/ompi_config_asm.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,763 @@
dnl
dnl Copyright (c) 2004-2005 The Trustees of Indiana University.
dnl All rights reserved.
dnl Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
dnl All rights reserved.
dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
dnl University of Stuttgart. All rights reserved.
dnl $COPYRIGHT$
dnl
dnl Additional copyrights may follow
dnl
dnl $HEADER$
dnl
dnl #################################################################
dnl
dnl OMPI_CHECK_ASM_TEXT
dnl
dnl Determine how to set current mode as text.
dnl
dnl #################################################################
AC_DEFUN([OMPI_CHECK_ASM_TEXT],[
AC_MSG_CHECKING([directive for setting text section])
ompi_cv_asm_text=""
case $host in
*-aix*)
ompi_cv_asm_text=[".csect .text[PR]"]
;;
*)
ompi_cv_asm_text=".text"
;;
esac
AC_MSG_RESULT([$ompi_cv_asm_text])
AC_DEFINE_UNQUOTED([OMPI_ASM_TEXT], ["$ompi_cv_asm_text"],
[Assembly directive for setting text section])
OMPI_ASM_TEXT="$ompi_cv_asm_text"
AC_SUBST(OMPI_ASM_TEXT)
])dnl
dnl #################################################################
dnl
dnl OMPI_CHECK_ASM_GLOBAL
dnl
dnl Sets OMPI_ASM_GLOBAL to the value to prefix global values
dnl
dnl I'm sure if I don't have a test for this, there will be some
dnl dumb platform that uses something else
dnl
dnl #################################################################
AC_DEFUN([OMPI_CHECK_ASM_GLOBAL],[
AC_MSG_CHECKING([directive for exporting symbols])
ompi_cv_asm_global=""
case $host in
*)
ompi_cv_asm_global=".globl"
;;
esac
AC_MSG_RESULT([$ompi_cv_asm_global])
AC_DEFINE_UNQUOTED([OMPI_ASM_GLOBAL], ["$ompi_cv_asm_global"],
[Assembly directive for exporting symbols])
OMPI_ASM_GLOBAL="$ompi_cv_asm_global"
AC_SUBST(OMPI_AS_GLOBAL)
])dnl
dnl #################################################################
dnl
dnl OMPI_CHECK_ASM_LSYM
dnl
dnl Sets OMPI_ASM_LSYM to the prefix value on a symbol to make it
dnl an internal label (jump target and whatnot)
dnl
dnl We look for L .L $ L$ (in that order) for something that both
dnl assembles and does not leave a label in the output of nm. Fall
dnl back to L if nothing else seems to work :/
dnl
dnl #################################################################
AC_DEFUN([OMPI_CHECK_ASM_LSYM],[
AC_REQUIRE([OMPI_CHECK_ASM_LABEL_SUFFIX])
AC_REQUIRE([AC_PROG_NM])
AC_MSG_CHECKING([prefix for lsym labels])
ompi_cv_asm_lsym="L"
for sym in L .L $ L$ ; do
asm_result=0
echo "configure: trying $sym" >& AC_FD_CC
OMPI_TRY_ASSEMBLE([foobar$ompi_cv_asm_label_suffix
${sym}mytestlabel$ompi_cv_asm_label_suffix],
[# ok, we succeeded at assembling. see if we can nm,
# throwing the results in a file
if $NM conftest.$OBJEXT > conftest.out 2>&AC_FD_CC ; then
if test "`grep mytestlabel conftest.out`" = "" ; then
# there was no symbol... looks promising to me
ompi_cv_asm_lsym="$sym"
asm_result=1
elif test ["`grep ' [Nt] .*mytestlabel' conftest.out`"] = "" ; then
# see if we have a non-global-ish symbol
# but we should see if we can do better.
ompi_cv_asm_lsym="$sym"
fi
else
# not so much on the NM goodness :/
echo "$NM failed. Output from NM was:" >& AC_FD_CC
cat conftest.out > AC_FD_CC
AC_MSG_WARN([$NM could not read object file])
fi
])
if test "$asm_result" = "1" ; then
break
fi
done
rm -f conftest.out
AC_MSG_RESULT([$ompi_cv_asm_lsym])
AC_DEFINE_UNQUOTED([OMPI_ASM_LSYM], ["$ompi_cv_asm_lsym"],
[Assembly prefix for lsym labels])
OMPI_ASM_LSYM="$ompi_cv_asm_lsym"
AC_SUBST(OMPI_ASM_LSYM)
unset asm_result sym
])dnl
dnl #################################################################
dnl
dnl OMPI_CHECK_ASM_GSYM
dnl
dnl Sets OMPI_ASM_GSYM to the prefix value on a symbol to make it
dnl a global linkable from C. Basically, an _ or not.
dnl
dnl #################################################################
AC_DEFUN([OMPI_CHECK_ASM_GSYM],[
AC_REQUIRE([OMPI_CHECK_ASM_TEXT])
AC_REQUIRE([OMPI_CHECK_ASM_GLOBAL])
AC_REQUIRE([OMPI_CHECK_ASM_LABEL_SUFFIX])
AC_MSG_CHECKING([prefix for global symbol labels])
ompi_cv_asm_gsym="none"
for sym in "_" "" ; do
asm_result=0
echo "configure: trying $sym" >& AC_FD_CC
cat > conftest_c.c <<EOF
#ifdef __cplusplus
extern "C" {
#endif
void gsym_test_func(void);
#ifdef __cplusplus
}
#endif
int
main(int argc, char *argv[[]])
{
gsym_test_func();
return 0;
}
EOF
OMPI_TRY_ASSEMBLE([
$ompi_cv_asm_text
$ompi_cv_asm_global ${sym}gsym_test_func
${sym}gsym_test_func${ompi_cv_asm_label_suffix}],
[ompi_compile="$CC $CFLAGS -I. conftest_c.c -c > conftest.cmpl 2>&1"
if AC_TRY_EVAL(ompi_compile) ; then
# save the warnings
cat conftest.cmpl >&AC_FD_CC
ompi_link="$CC $CFLAGS conftest_c.$OBJEXT conftest.$OBJEXT -o conftest > conftest.link 2>&1"
if AC_TRY_EVAL(ompi_link) ; then
# save the warnings
cat conftest.link >&AC_FD_CC
asm_result=1
else
cat conftest.link >&AC_FD_CC
echo "configure: failed C program was: " >&AC_FD_CC
cat conftest_c.c >&AC_FD_CC
echo "configure: failed ASM program was: " >&AC_FD_CC
cat conftest.s >&AC_FD_CC
asm_result=0
fi
else
# save output and failed program
cat conftest.cmpl >&AC_FD_CC
echo "configure: failed C program was: " >&AC_FD_CC
cat conftest.c >&AC_FD_CC
asm_result=0
fi],
[asm_result=0])
if test "$asm_result" = "1" ; then
ompi_cv_asm_gsym="$sym"
break
fi
done
rm -f conftest.*
AC_MSG_RESULT([$ompi_cv_asm_gsym])
if test "$ompi_cv_asm_gsym" = "none" ; then
AC_MSG_ERROR([Could not determine global symbol label prefix])
fi
AC_DEFINE_UNQUOTED([OMPI_ASM_GSYM], ["$ompi_cv_asm_gsym"],
[Assembly prefix for lsym labels])
OMPI_ASM_GSYM="$ompi_cv_asm_gsym"
AC_SUBST(OMPI_ASM_GSYM)
])dnl
dnl #################################################################
dnl
dnl OMPI_CHECK_ASM_LABEL_SUFFIX
dnl
dnl Sets OMPI_ASM_LABEL_SUFFIX to the value to suffix for labels
dnl
dnl I'm sure if I don't have a test for this, there will be some
dnl dumb platform that uses something else
dnl
dnl #################################################################
AC_DEFUN([OMPI_CHECK_ASM_LABEL_SUFFIX],[
AC_MSG_CHECKING([suffix for labels])
ompi_cv_asm_label_suffix=""
case $host in
*)
ompi_cv_asm_label_suffix=":"
;;
esac
AC_MSG_RESULT([$ompi_cv_asm_label_suffix])
AC_DEFINE_UNQUOTED([OMPI_ASM_LABEL_SUFFIX], ["$ompi_cv_asm_label_suffix"],
[Assembly suffix for labels])
OMPI_ASM_LABEL_SUFFIX="$ompi_cv_asm_label_suffix"
AC_SUBST(OMPI_AS_LABEL_SUFFIX)
])dnl
dnl #################################################################
dnl
dnl OMPI_CHECK_ASM_ALIGN_LOG
dnl
dnl Sets OMPI_ASM_ALIGN_LOG to 1 if align is specified
dnl logarithmically, 0 otherwise
dnl
dnl #################################################################
AC_DEFUN([OMPI_CHECK_ASM_ALIGN_LOG],[
AC_REQUIRE([OMPI_CHECK_ASM_TEXT])
AC_REQUIRE([OMPI_CHECK_ASM_GLOBAL])
AC_REQUIRE([OMPI_CHECK_ASM_LABEL_SUFFIX])
AC_REQUIRE([AC_PROG_NM])
ompi_cv_asm_align_log=0
asm_result="no"
AC_MSG_CHECKING([if .align directive takes logarithmic value])
OMPI_TRY_ASSEMBLE([ $ompi_cv_asm_text
.align 4
$ompi_cv_asm_global foo
.byte 1
.align 4
foo$ompi_cv_asm_label_suffix
.byte 2],
[ompi_asm_addr=[`$NM conftest.$OBJEXT | sed -e 's/.*\([0-9a-fA-F][0-9a-fA-F]\).*foo.*/\1/'`]],
[ompi_asm_addr=""])
# test for both 16 and 10 (decimal and hex notations)
echo "configure: .align test address offset is $ompi_asm_addr" >& AC_FD_CC
if test "$ompi_asm_addr" = "16" -o "$ompi_asm_addr" = "10" ; then
ompi_cv_asm_align_log=1
asm_result="yes"
fi
AC_MSG_RESULT([$asm_result])
AC_DEFINE_UNQUOTED([OMPI_ASM_ALIGN_LOG],
[$ompi_cv_asm_align_log],
[Assembly align directive expects logarithmic value])
unset omp_asm_addr asm_result
])dnl
dnl #################################################################
dnl
dnl OMPI_CHECK_ASM_TYPE
dnl
dnl Sets OMPI_ASM_TYPE to the prefix for the function type to
dnl set a symbol's type as function (needed on ELF for shared
dnl libaries). If no .type directive is needed, sets OMPI_ASM_TYPE
dnl to an empty string
dnl
dnl We look for @ \# %
dnl
dnl #################################################################
AC_DEFUN([OMPI_CHECK_ASM_TYPE],[
AC_MSG_CHECKING([prefix for function in .type])
ompi_cv_asm_type=""
for type in @ \# % ; do
asm_result=0
echo "configure: trying $type" >& AC_FD_CC
OMPI_TRY_ASSEMBLE([ .type mysym, ${type}function],
[# ok, we succeeded at assembling. see if there was
# a warning in the output.
if test "`cat conftest.out`" = "" ; then
ompi_cv_asm_type="${type}"
asm_result=1
fi])
if test "$asm_result" = "1" ; then
break
fi
done
rm -f conftest.out
AC_MSG_RESULT([$ompi_cv_asm_type])
AC_DEFINE_UNQUOTED([OMPI_ASM_TYPE], ["$ompi_cv_asm_type"],
[How to set function type in .type directive])
OMPI_ASM_TYPE="$ompi_cv_asm_type"
AC_SUBST(OMPI_ASM_TYPE)
unset asm_result type
])dnl
dnl #################################################################
dnl
dnl OMPI_CHECK_ASM_SIZE
dnl
dnl Sets OMPI_ASM_SIZE to 1 if we should set .size directives for
dnl each function, 0 otherwise.
dnl
dnl #################################################################
AC_DEFUN([OMPI_CHECK_ASM_SIZE],[
AC_MSG_CHECKING([if .size is needed])
ompi_cv_asm_size=0
asm_result="no"
OMPI_TRY_ASSEMBLE([ .size mysym, 1],
[# ok, we succeeded at assembling. see if there was
# a warning in the output.
if test "`cat conftest.out`" = "" ; then
ompi_cv_asm_size=1
asm_result="yes"
fi])
rm -f conftest.out
AC_MSG_RESULT([$asm_result])
AC_DEFINE_UNQUOTED([OMPI_ASM_SIZE], ["$ompi_cv_asm_size"],
[Do we need to give a .size directive?])
OMPI_ASM_SIZE="$ompi_cv_asm_size"
AC_SUBST(OMPI_ASM_TYPE)
unset asm_result
])dnl
dnl #################################################################
dnl
dnl OMPI_CHECK_POWERPC_REG
dnl
dnl See if the notation for specifying registers is X (most everyone)
dnl or rX (OS X)
dnl
dnl #################################################################
AC_DEFUN([OMPI_CHECK_POWERPC_REG],[
AC_REQUIRE([OMPI_CHECK_ASM_TEXT])
AC_MSG_CHECKING([if PowerPC registers have r prefix])
OMPI_TRY_ASSEMBLE([$ompi_cv_asm_text
addi 1,1,0],
[ompi_cv_asm_powerpc_r_reg=0],
OMPI_TRY_ASSEMBLE([$ompi_cv_asm_text
addi r1,r1,0],
[ompi_cv_asm_powerpc_r_reg=1],
AC_MSG_ERROR([Can not determine how to use PPC registers])))
if test "$ompi_cv_asm_powerpc_r_reg" = "1" ; then
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no])
fi
AC_DEFINE_UNQUOTED([OMPI_POWERPC_R_REGISTERS],
[$ompi_cv_asm_powerpc_r_reg],
[Whether r notation is used for ppc registers])
])dnl
dnl #################################################################
dnl
dnl OMPI_CHECK_POWERPC_64BIT
dnl
dnl On some powerpc chips (the PPC970 or G5), the OS usually runs in
dnl 32 bit mode, even though the hardware can do 64bit things. If
dnl the compiler will let us, emit code for 64bit test and set type
dnl operations (on a long long).
dnl
dnl #################################################################
AC_DEFUN([OMPI_CHECK_POWERPC_64BIT],[
AC_REQUIRE([OMPI_CHECK_ASM_TEXT])
AC_MSG_CHECKING([for 64-bit PowerPC assembly support])
ppc64_result=0
if test "$ompi_cv_asm_powerpc_r_reg" = "1" ; then
ldarx_asm=" ldarx r1,r1,r1";
else
ldarx_asm=" ldarx1,1,1";
fi
OMPI_TRY_ASSEMBLE([$ompi_cv_asm_text
$ldarx_asm],
[ppc64_result=1],
[ppc64_result=0])
if test "$ppc64_result" = "1" ; then
AC_MSG_RESULT([yes])
ifelse([$1],,:,[$1])
else
AC_MSG_RESULT([no])
ifelse([$2],,:,[$2])
fi
unset ppc64_result ldarx_asm
])dnl
dnl #################################################################
dnl
dnl OMPI_CHECK_INLINE_GCC
dnl
dnl Check if the compiler is capable of doing GCC-style inline
dnl assembly. Some compilers emit a warning and ignore the inline
dnl assembly (xlc on OS X) and compile without error. Therefore,
dnl the test attempts to run the emited code to check that the
dnl assembly is actually run. To run this test, one argument to
dnl the macro must be an assembly instruction in gcc format to move
dnl the value 0 into the register containing the variable ret.
dnl For PowerPC, this would be:
dnl
dnl "li %0,0" : "=&r"(ret)
dnl
dnl DEFINE OMPI_GCC_INLINE_ASSEMBLY to 0 or 1 depending on GCC
dnl support
dnl
dnl #################################################################
AC_DEFUN([OMPI_CHECK_INLINE_GCC],[
assembly="$1"
asm_result="unknown"
AC_MSG_CHECKING([if $CC supports GCC inline assembly])
if test ! "$assembly" = "" ; then
AC_RUN_IFELSE(AC_LANG_PROGRAM([[
AC_INCLUDES_DEFAULT]],
[[int ret = 1;
__asm__ __volatile__ ($assembly);
return ret;]]),
[asm_result="yes"], [asm_result="no"],
[asm_result="unknown"])
else
assembly="test skipped - assuming no"
fi
# if we're cross compiling, just try to compile and figure good enough
if test "$asm_result" = "unknown" ; then
AC_LINK_IFELSE(AC_LANG_PROGRAM([[
AC_INCLUDES_DEFAULT]],
[[int ret = 1;
__asm__ __volatile__ ($assembly);
return ret;]]),
[asm_result="yes"], [asm_result="no"])
fi
AC_MSG_RESULT([$asm_result])
if test "$asm_result" = "yes" ; then
OMPI_GCC_INLINE_ASSEMBLY=1
else
OMPI_GCC_INLINE_ASSEMBLY=0
fi
AC_DEFINE_UNQUOTED([OMPI_GCC_INLINE_ASSEMBLY],
[$OMPI_GCC_INLINE_ASSEMBLY],
[Whether compiler supports GCC style inline assembly])
unset OMPI_GCC_INLINE_ASSEMBLY assembly asm_result
])dnl
dnl #################################################################
dnl
dnl OMPI_CHECK_INLINE_DEC
dnl
dnl DEFINE OMPI_DEC to 0 or 1 depending on DEC
dnl support
dnl
dnl #################################################################
AC_DEFUN([OMPI_CHECK_INLINE_DEC],[
AC_MSG_CHECKING([if $CC supports DEC inline assembly])
AC_LINK_IFELSE(AC_LANG_PROGRAM([[
AC_INCLUDES_DEFAULT
#include <c_asm.h>]],
[[asm("");
return 0;]]),
[asm_result="yes"], [asm_result="no"])
AC_MSG_RESULT([$asm_result])
if test "$asm_result" = "yes" ; then
OMPI_DEC_INLINE_ASSEMBLY=1
else
OMPI_DEC_INLINE_ASSEMBLY=0
fi
AC_DEFINE_UNQUOTED([OMPI_DEC_INLINE_ASSEMBLY],
[$OMPI_DEC_INLINE_ASSEMBLY],
[Whether compiler supports DEC style inline assembly])
unset OMPI_DEC_INLINE_ASSEMBLY asm_result
])dnl
dnl #################################################################
dnl
dnl OMPI_CHECK_INLINE_XLC
dnl
dnl DEFINE OMPI_XLC to 0 or 1 depending on XLC
dnl support
dnl
dnl #################################################################
AC_DEFUN([OMPI_CHECK_INLINE_XLC],[
AC_MSG_CHECKING([if $CC supports XLC inline assembly])
OMPI_XLC_INLINE_ASSEMBLY=0
asm_result="no"
if test "$CC" = "xlc" ; then
if test "$CXX" = "xlC" -o "$CXX" = "xlc++" ; then
OMPI_XLC_INLINE_ASSEMBLY=1
asm_result="yes"
fi
fi
AC_MSG_RESULT([$asm_result])
AC_DEFINE_UNQUOTED([OMPI_XLC_INLINE_ASSEMBLY],
[$OMPI_XLC_INLINE_ASSEMBLY],
[Whether compiler supports XLC style inline assembly])
unset OMPI_XLC_INLINE_ASSEMBLY
])dnl
dnl #################################################################
dnl
dnl OMPI_CONFIG_ASM
dnl
dnl DEFINE OMPI_ASSEMBLY_ARCH to something in sys/architecture.h
dnl DEFINE OMPI_ASSEMBLY_FORMAT to string containing correct
dnl format for assembly (not user friendly)
dnl SUBST OMPI_ASSEMBLY_FORMAT to string containing correct
dnl format for assembly (not user friendly)
dnl
dnl #################################################################
AC_DEFUN([OMPI_CONFIG_ASM],[
AC_REQUIRE([OMPI_SETUP_CC])
AC_REQUIRE([OMPI_SETUP_CXX])
AC_REQUIRE([AM_PROG_AS])
AC_REQUIRE([OMPI_CHECK_ASM_TEXT])
AC_REQUIRE([OMPI_CHECK_ASM_GLOBAL])
AC_REQUIRE([OMPI_CHECK_ASM_GSYM])
AC_REQUIRE([OMPI_CHECK_ASM_LSYM])
AC_REQUIRE([OMPI_CHECK_ASM_TYPE])
AC_REQUIRE([OMPI_CHECK_ASM_SIZE])
AC_REQUIRE([OMPI_CHECK_ASM_LABEL_SUFFIX])
AC_REQUIRE([OMPI_CHECK_ASM_ALIGN_LOG])
AC_MSG_CHECKING([whether to enable smp locks])
AC_ARG_ENABLE(smp-locks,
AC_HELP_STRING([--enable-smp-locks],
[disable smp locks in atomic ops (default: enabled)]))
if test "$enable_smp_locks" != "no"; then
AC_MSG_RESULT([yes])
want_smp_locks=1
else
AC_MSG_RESULT([no])
want_smp_locks=1
fi
AC_DEFINE_UNQUOTED([OMPI_WANT_SMP_LOCKS], [$want_smp_locks],
[whether we want to have smp locks in atomic ops or not])
# find our architecture for purposes of assembly stuff
ompi_cv_asm_arch="UNSUPPORTED"
OMPI_GCC_INLINE_ASSIGN=""
OMPI_POWERPC_SUPPORT_64BIT=0
case "${host}" in
*-winnt*)
ompi_cv_asm_arch="WINDOWS"
;;
i?86-*)
ompi_cv_asm_arch="IA32"
OMPI_GCC_INLINE_ASSIGN='"movl [$]0, %0" : "=&r"(ret)'
;;
x86_64*)
ompi_cv_asm_arch="AMD64"
OMPI_GCC_INLINE_ASSIGN='"movl [$]0, %0" : "=&r"(ret)'
;;
ia64-*)
ompi_cv_asm_arch="IA64"
OMPI_GCC_INLINE_ASSIGN='"mov %0=r0\n;;\n" : "=&r"(ret)'
;;
alpha-*)
ompi_cv_asm_arch="ALPHA"
OMPI_GCC_INLINE_ASSIGN='"bis zero,zero,%0" : "=&r"(ret)'
;;
powerpc-*)
OMPI_CHECK_POWERPC_REG
if test "$ac_cv_sizeof_long" = "4" ; then
ompi_cv_asm_arch="POWERPC32"
# Note that on some platforms (Apple G5), even if we are
# compiling in 32 bit more (and therefore should assume
# sizeof(long) == 4), we can use the 64 bit test and set
# operations.
OMPI_CHECK_POWERPC_64BIT(OMPI_POWERPC_SUPPORT_64BIT=1)
elif test "$ac_cv_sizeof_long" = "8" ; then
OMPI_POWERPC_SUPPORT_64BIT=1
ompi_cv_asm_arch="POWERPC64"
else
AC_MSG_ERROR([Could not determine PowerPC word size: $ac_cv_sizeof_long])
fi
OMPI_GCC_INLINE_ASSIGN='"li %0,0" : "=&r"(ret)'
;;
sparc-*)
if test "$ac_cv_sizeof_long" = "4" ; then
ompi_cv_asm_arch="SPARC32"
elif test "$ac_cv_sizeof_long" = "8" ; then
ompi_cv_asm_arch="SPARC64"
else
AC_MSG_ERROR([Could not determine Sparc word size: $ac_cv_sizeof_long])
fi
OMPI_GCC_INLINE_ASSIGN='"mov 0,%0" : : "=&r"(ret)'
;;
*)
AC_MSG_ERROR([No atomic primitives available for $host])
;;
esac
AC_DEFINE_UNQUOTED([OMPI_POWERPC_SUPPORT_64BIT],
[$OMPI_POWERPC_SUPPORT_64BIT],
[Non-zero if safe to call PPC64 ops, even in PPC32 code])
AC_SUBST([OMPI_POWERPC_SUPPORT_64BIT])
# now that we know our architecture, try to inline assemble
OMPI_CHECK_INLINE_GCC([$OMPI_GCC_INLINE_ASSIGN])
OMPI_CHECK_INLINE_DEC
OMPI_CHECK_INLINE_XLC
# format:
# text-global-label_suffix-gsym-lsym-type-size-align_log-ppc_r_reg-64_bit
asm_format="${ompi_cv_asm_text}-${ompi_cv_asm_global}"
asm_format="${asm_format}-${ompi_cv_asm_label_suffix}-${ompi_cv_asm_gsym}"
asm_format="${asm_format}-${ompi_cv_asm_lsym}"
asm_format="${asm_format}-${ompi_cv_asm_type}-${ompi_cv_asm_size}"
asm_format="${asm_format}-${ompi_cv_asm_align_log}"
if test "$ompi_cv_asm_arch" = "POWERPC32" -o "$ompi_cv_asm_arch" = "POWERPC64" ; then
asm_format="${asm_format}-${ompi_cv_asm_powerpc_r_reg}"
else
asm_format="${asm_format}-1"
fi
ompi_cv_asm_format="${asm_format}-${OMPI_POWERPC_SUPPORT_64BIT}"
OMPI_ASSEMBLY_FORMAT="$ompi_cv_asm_format"
AC_MSG_CHECKING([for assembly format])
AC_MSG_RESULT([$OMPI_ASSEMBLY_FORMAT])
AC_DEFINE_UNQUOTED([OMPI_ASSEMBLY_FORMAT], ["$OMPI_ASSEMBLY_FORMAT"],
[Format of assembly file])
AC_SUBST([OMPI_ASSEMBLY_FORMAT])
result="OMPI_$ompi_cv_asm_arch"
OMPI_ASSEMBLY_ARCH="$ompi_cv_asm_arch"
AC_MSG_CHECKING([for asssembly architecture])
AC_MSG_RESULT([$ompi_cv_asm_arch])
AC_DEFINE_UNQUOTED([OMPI_ASSEMBLY_ARCH], [$result],
[Architecture type of assembly to use for atomic operations])
AC_SUBST([OMPI_ASSEMBLY_ARCH])
OMPI_ASM_FIND_FILE
unset result asm_format
])dnl
dnl #################################################################
dnl
dnl OMPI_ASM_FIND_FILE
dnl
dnl
dnl do all the evil mojo to provide a working assembly file
dnl
dnl #################################################################
AC_DEFUN([OMPI_ASM_FIND_FILE], [
AC_CHECK_PROG([PERL], [perl], [perl])
# see if we have a pre-built one already
AC_MSG_CHECKING([for pre-built assembly file])
ompi_cv_asm_file=""
if grep "$ompi_cv_asm_arch.*$ompi_cv_asm_format" "${top_ompi_srcdir}/src/asm/asm-data.txt" >conftest.out 2>&1 ; then
ompi_cv_asm_file="`cut -f3 conftest.out`"
if test ! "$ompi_cv_asm_file" = "" ; then
ompi_cv_asm_file="atomic-${ompi_cv_asm_file}.s"
if test -f "${top_ompi_srcdir}/src/asm/generated/${ompi_cv_asm_file}" ; then
AC_MSG_RESULT([yes ($ompi_cv_asm_file)])
else
AC_MSG_RESULT([no ($ompi_cv_asm_file not found)])
ompi_cv_asm_file=""
fi
fi
else
AC_MSG_RESULT([no (not in asm-data)])
fi
rm -f conftest.*
if test "$ompi_cv_asm_file" = "" ; then
if test ! "$PERL" = "" ; then
# we have perl... Can we generate a file?
AC_MSG_CHECKING([whether possible to generate assembly file])
ompi_cv_asm_file="atomic-local.s"
ompi_try="$PERL \"$top_ompi_srcdir/src/asm/generate-asm.pl\" \"$ompi_cv_asm_arch\" \"$ompi_cv_asm_format\" \"$top_ompi_srcdir/src/asm/base\" \"$top_ompi_builddir/src/asm/generated/$ompi_cv_asm_file\" >conftest.out 2>&1"
if AC_TRY_EVAL(ompi_try) ; then
# save the warnings
cat conftest.out >&AC_FD_CC
AC_MSG_RESULT([yes])
else
# save output
cat conftest.out >&AC_FD_CC
ompi_cv_asm_file=""
AC_MSG_RESULT([failed])
AC_MSG_WARN([Could not build atomic operations assembly file.])
AC_MSG_WARN([There will be no atomic operations for this build.])
fi
else
AC_MSG_WARN([Could not find prebuilt atomic operations file and could not])
AC_MSG_WARN([find perl to attempt to generate a custom assembly file.])
AC_MSG_WARN([There will be no atomic operations for this build.])
fi
fi
rm -f conftest.*
AC_MSG_CHECKING([for atomic assembly filename])
if test "$ompi_cv_asm_file" = "" ; then
AC_MSG_RESULT([none])
result=0
else
AC_MSG_RESULT([$ompi_cv_asm_file])
result=1
fi
AC_DEFINE_UNQUOTED([OMPI_HAVE_ASM_FILE], [$result],
[Whether there is an atomic assembly file available])
AM_CONDITIONAL([OMPI_HAVE_ASM_FILE], [test "$result" = "1"])
OMPI_ASM_FILE=$ompi_cv_asm_file
AC_SUBST(OMPI_ASM_FILE)
])dnl

43
config/ompi_try_assemble.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,43 @@
dnl
dnl Copyright (c) 2004-2005 The Trustees of Indiana University.
dnl All rights reserved.
dnl Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
dnl All rights reserved.
dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
dnl University of Stuttgart. All rights reserved.
dnl $COPYRIGHT$
dnl
dnl Additional copyrights may follow
dnl
dnl $HEADER$
dnl
dnl OMPI_TRY_ASSEMBLE(asm-code, [action-if-success], [action-if-fail])
dnl
dnl Attempt to assemble asm-code. If success, run action-if-success.
dnl Otherwise, run action-if-fail. Neither action-if-success nor
dnl action-if-fail are required.
dnl
dnl No preprocessing is guaranteed to be done on asm-code. Some
dnl compilers do not run the preprocessor on assembly files.
dnl
dnl On failure, asm-test.s will be included in config.out
AC_DEFUN([OMPI_TRY_ASSEMBLE],
[cat >conftest.s <<EOF
[$1]
EOF
ompi_assemble="$CCAS $CFLAGS -c conftest.s >conftest.out 2>&1"
if AC_TRY_EVAL(ompi_assemble); then
# save the warnings
cat conftest.out >&AC_FD_CC
ifelse([$2],,:,[$2])
else
# save compiler output and failed program
cat conftest.out >&AC_FD_CC
echo "configure: failed program was:" >&AC_FD_CC
cat conftest.s >&AC_FD_CC
ifelse([$3],,:,[$3])
fi
rm -f conftest*
unset ompi_assemble
])dnl

Просмотреть файл

@ -315,6 +315,16 @@ AC_DEFINE_UNQUOTED(OMPI_WANT_CXX_BINDINGS, $WANT_MPI_CXX_SUPPORT,
[Whether we want MPI cxx support or not])
##################################
# Assembler Configuration
##################################
ompi_show_subtitle "Assembler"
AM_PROG_AS
OMPI_CONFIG_ASM
##################################
# Fortran
##################################
@ -1382,6 +1392,7 @@ AC_CONFIG_FILES([
src/event/compat/sys/Makefile
src/attribute/Makefile
src/asm/Makefile
src/communicator/Makefile
src/datatype/Makefile
src/errhandler/Makefile

Просмотреть файл

@ -66,6 +66,7 @@ endif
SUBDIRS = \
include \
$(LIBLTDL_SUBDIR) \
asm \
attribute \
communicator \
datatype \
@ -94,6 +95,7 @@ lib_LTLIBRARIES = libmpi.la
libmpi_la_SOURCES =
libmpi_la_LIBADD = \
$(LIBLTDL_LTLIB) \
asm/libasm.la \
attribute/libattribute.la \
class/liblfc.la \
communicator/libcommunicator.la \

80
src/asm/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,80 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/config/Makefile.options
######################################################################
#
# This is a bit complicated. If there is anything in the library,
# it will always be atomic-asm.s. We just symlink atomic-asm.s to
# the best atomic operations available (as determined at configure
# time)
#
######################################################################
generated/@OMPI_ASM_FILE@: base/@OMPI_ASSEMBLY_ARCH@.asm
$(PERL) "$(top_srcdir)/src/asm/generate-asm.pl" "@OMPI_ASSEMBLY_ARCH@" "@OMPI_ASSEMBLY_FORMAT@" "$(top_srcdir)/src/asm/base" "$(top_builddir)/src/asm/generated/@OMPI_ASM_FILE@"
atomic-asm.s: generated/@OMPI_ASM_FILE@
rm -f atomic-asm.s
@ if test -f $(top_srcdir)/src/asm/generated/@OMPI_ASM_FILE@ ; then \
cmd="ln -s $(top_srcdir)/src/asm/generated/@OMPI_ASM_FILE@ atomic-asm.s" ; \
echo "$$cmd" ; \
$$cmd ; \
else \
cmd="ln -s $(top_builddir)/src/asm/generated/@OMPI_ASM_FILE@ atomic-asm.s" ; \
echo "$$cmd" ; \
$$cmd ; \
fi
if OMPI_HAVE_ASM_FILE
libasm_la_SOURCES = atomic-asm.s
else
libasm_la_SOURCES =
endif
libasm_la_DEPENDENCIES = generated/@OMPI_ASM_FILE@
lib_LTLIBRARIES = libasm.la
EXTRA_DIST = \
asm-data.txt \
generate-asm.pl \
generate-all-asm.sh \
base/AMD64.asm \
base/IA32.asm \
base/POWERPC32.asm \
base/POWERPC64.asm
######################################################################
TESTS = atomic-test
check_PROGRAMS = atomic-test
atomic_test_SOURCES = atomic-test.c
atomic_test_LDADD = libasm.la
######################################################################
clean-local:
rm -f atomic-asm.s
maintainer-clean-local:
rm -f generated/atomic-local.s
######################################################################
#
# Copy over all the generated files
#
dist-hook:
mkdir ${distdir}/generated
sh generate-all-asm.sh "$(PERL)" "$(srcdir)" "$(distdir)"

27
src/asm/asm-data.txt Обычный файл
Просмотреть файл

@ -0,0 +1,27 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
#
# Database for mapping architecture and assembly format to prebuilt
# assembly files.
#
# FORMAT:
# ARCHITECTURE ASSEMBLY FORMAT BASE FILENAME
#
AMD64 .text-.globl-:--.L-@-1-0-1-0 amd64-linux
IA32 .text-.globl-:--.L-@-1-0-1-0 ia32-linux
POWERPC32 .text-.globl-:-_-L--0-1-1-0 powerpc32-osx
POWERPC32 .text-.globl-:-_-L--0-1-1-1 powerpc32-64-osx
POWERPC64 .text-.globl-:-_-L--0-1-1-1 powerpc64-osx

457
src/asm/atomic-test.c Обычный файл
Просмотреть файл

@ -0,0 +1,457 @@
#undef OMPI_BUILDING
#include "ompi_config.h"
#include <assert.h>
#include <getopt.h>
#ifdef HAVE_PTHREAD_H
#include <pthread.h>
#endif
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "include/sys/atomic.h"
/**
* A testing support library to provide uniform reporting output
*/
static int ompi_n_tests;
static int ompi_n_success;
static int ompi_n_failures;
static char *ompi_description;
static void test_init(char *a)
{
/* local variables */
size_t len;
/* save the descriptive string */
len = strlen(a);
ompi_description = (char *) malloc(len + 1);
assert(ompi_description);
strcpy(ompi_description, a);
/* initialize counters */
ompi_n_tests = 0;
ompi_n_success = 0;
ompi_n_failures = 0;
return;
}
static void test_success(void)
{
ompi_n_tests++;
ompi_n_success++;
}
static void test_failure(char *a)
{
ompi_n_tests++;
ompi_n_failures++;
fprintf(stderr, " Failure : ");
fprintf(stderr, a);
fprintf(stderr, "\n");
fflush(stderr);
}
static int test_verify_int(int expected_result, int test_result)
{
int return_value;
return_value = 1;
if (expected_result != test_result) {
test_failure("Comparison failure");
fprintf(stderr, " Expected result: %d\n", expected_result);
fprintf(stderr, " Test result: %d\n", test_result);
fflush(stderr);
return_value = 0;
} else {
test_success();
}
return return_value;
}
static int test_finalize(void)
{
int return_value;
return_value = 1;
if (ompi_n_tests == ompi_n_success) {
fprintf(stderr, "SUPPORT: OMPI Test Passed: %s: (%d tests)\n",
ompi_description, ompi_n_tests);
fflush(stderr);
} else {
fprintf(stderr,
"SUPPORT: OMPI Test failed: %s (%d of %d failed)\n",
ompi_description, ompi_n_failures, ompi_n_tests);
fflush(stderr);
return_value = 0;
}
return return_value;
}
/* note this is for additional output that does NOT go to STDERR but STDOUT */
static void test_comment (char* userstr)
{
fprintf(stdout, "%s:%s\n", ompi_description, userstr);
}
/* default options */
int nreps = 100;
int nthreads = 2;
int enable_verbose = 0;
int enable_64_bit_tests = 0;
volatile int32_t vol32;
int32_t val32;
int32_t old32;
int32_t new32;
#ifdef ENABLE_64_BIT
volatile int64_t vol64;
int64_t val64;
int64_t old64;
int64_t new64;
#endif
volatile int volint;
int valint;
int oldint;
int newint;
volatile void *volptr;
void *oldptr;
void *newptr;
static void help(void)
{
printf("Usage: threadtest [flags]\n"
"\n"
" Flags may be any of\n"
#ifdef ENABLE_64_BIT
" -l do 64-bit tests\n"
#endif
" -r NREPS number of repetitions\n"
" -t NTRHEADS number of threads\n"
" -v verbose output\n"
" -h print this info\n" "\n"
" Numbers may be postfixed with 'k' or 'm'\n\n");
#ifndef ENABLE_64_BIT
printf(" 64-bit tests are not enabled in this build of the tests\n\n");
#endif
exit(EXIT_SUCCESS);
}
static void usage(void)
{
fprintf(stderr,
"Usage: threadtest [flags]\n" " threadtest -h\n");
exit(EXIT_FAILURE);
}
static void verbose(const char *fmt, ...)
{
if (enable_verbose) {
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
}
}
static int str2size(char *str)
{
int size;
char mod[32];
switch (sscanf(str, "%d%1[mMkK]", &size, mod)) {
case 1:
return (size);
case 2:
switch (*mod) {
case 'm':
case 'M':
return (size << 20);
case 'k':
case 'K':
return (size << 10);
default:
return (size);
}
default:
return (-1);
}
}
static void *thread_main(void *arg)
{
int rank = (int) arg;
int i;
verbose("thread-%d: Hello\n", rank);
/* thread tests */
for (i = 0; i < nreps; i++) {
ompi_atomic_add_32(&val32, 5);
#ifdef ENABLE_64_BIT
if (enable_64_bit_tests) {
ompi_atomic_add_64(&val64, 5);
}
#endif
ompi_atomic_add(&valint, 5);
}
return (void *) (rank + 1000);
}
int main(int argc, char *argv[])
{
int c;
int tid;
pthread_t *th;
/* option processing */
test_init("atomic operations");
while ((c = getopt(argc, argv, "hlr:t:v")) != -1) {
switch (c) {
case 'h':
help();
break;
case 'l':
#ifdef ENABLE_64_BIT
enable_64_bit_tests = 1;
#else
usage();
#endif
break;
case 'r':
if ((nreps = str2size(optarg)) <= 0) {
usage();
}
break;
case 't':
if ((nthreads = str2size(optarg)) <= 0) {
usage();
}
break;
case 'v':
enable_verbose = 1;
break;
default:
usage();
}
}
if (optind != argc) {
usage();
}
verbose("main: %s\n", argv[0]);
verbose("main: nthreads = %d\n", nthreads);
verbose("main: nreps = %d\n", nreps);
/* first test single-threaded functionality */
/* -- cmpset 32-bit tests -- */
vol32 = 42, old32 = 42, new32 = 50;
test_verify_int(ompi_atomic_cmpset_32(&vol32, old32, new32), 1);
test_verify_int(vol32, new32);
vol32 = 42, old32 = 420, new32 = 50;
test_verify_int(ompi_atomic_cmpset_32(&vol32, old32, new32), 0);
test_verify_int(vol32, 42);
vol32 = 42, old32 = 42, new32 = 50;
test_verify_int(ompi_atomic_cmpset_acq_32(&vol32, old32, new32), 1);
test_verify_int(vol32, new32);
vol32 = 42, old32 = 420, new32 = 50;
test_verify_int(ompi_atomic_cmpset_acq_32(&vol32, old32, new32), 0);
test_verify_int(vol32, 42);
vol32 = 42, old32 = 42, new32 = 50;
test_verify_int(ompi_atomic_cmpset_rel_32(&vol32, old32, new32), 1);
test_verify_int(vol32, new32);
vol32 = 42, old32 = 420, new32 = 50;
test_verify_int(ompi_atomic_cmpset_rel_32(&vol32, old32, new32), 0);
test_verify_int(vol32, 42);
/* -- cmpset 64-bit tests -- */
#ifdef ENABLE_64_BIT
if (enable_64_bit_tests) {
verbose("64 bit serial tests\n");
vol64 = 42, old64 = 42, new64 = 50;
test_verify_int(1, ompi_atomic_cmpset_64(&vol64, old64, new64));
test_verify_int(new64, vol64);
verbose("64 bit serial test 2\n");
vol64 = 42, old64 = 420, new64 = 50;
test_verify_int(ompi_atomic_cmpset_64(&vol64, old64, new64), 0);
test_verify_int(vol64, 42);
vol64 = 42, old64 = 42, new64 = 50;
test_verify_int(ompi_atomic_cmpset_acq_64(&vol64, old64, new64), 1);
test_verify_int(vol64, new64);
vol64 = 42, old64 = 420, new64 = 50;
test_verify_int(ompi_atomic_cmpset_acq_64(&vol64, old64, new64), 0);
test_verify_int(vol64, 42);
vol64 = 42, old64 = 42, new64 = 50;
test_verify_int(ompi_atomic_cmpset_rel_64(&vol64, old64, new64), 1);
test_verify_int(vol64, new64);
vol64 = 42, old64 = 420, new64 = 50;
test_verify_int(ompi_atomic_cmpset_rel_64(&vol64, old64, new64), 0);
test_verify_int(vol64, 42);
}
#endif
/* -- cmpset int tests -- */
volint = 42, oldint = 42, newint = 50;
test_verify_int(ompi_atomic_cmpset(&volint, oldint, newint), 1);
test_verify_int(volint, newint);
volint = 42, oldint = 420, newint = 50;
test_verify_int(ompi_atomic_cmpset(&volint, oldint, newint), 0);
test_verify_int(volint, 42);
volint = 42, oldint = 42, newint = 50;
test_verify_int(ompi_atomic_cmpset_acq(&volint, oldint, newint), 1);
test_verify_int(volint, newint);
volint = 42, oldint = 420, newint = 50;
test_verify_int(ompi_atomic_cmpset_acq(&volint, oldint, newint), 0);
test_verify_int(volint, 42);
volint = 42, oldint = 42, newint = 50;
test_verify_int(ompi_atomic_cmpset_rel(&volint, oldint, newint), 1);
test_verify_int(volint, newint);
volint = 42, oldint = 420, newint = 50;
test_verify_int(ompi_atomic_cmpset_rel(&volint, oldint, newint), 0);
test_verify_int(volint, 42);
/* -- cmpset ptr tests -- */
volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50;
test_verify_int(ompi_atomic_cmpset(&volptr, oldptr, newptr), 1);
test_verify_int(volptr, newptr);
volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50;
test_verify_int(ompi_atomic_cmpset(&volptr, oldptr, newptr), 0);
test_verify_int(volptr, (void *) 42);
volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50;
test_verify_int(ompi_atomic_cmpset_acq(&volptr, oldptr, newptr), 1);
test_verify_int(volptr, newptr);
volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50;
test_verify_int(ompi_atomic_cmpset_acq(&volptr, oldptr, newptr), 0);
test_verify_int(volptr, (void *) 42);
volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50;
test_verify_int(ompi_atomic_cmpset_rel(&volptr, oldptr, newptr), 1);
test_verify_int(volptr, newptr);
volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50;
test_verify_int(ompi_atomic_cmpset_rel(&volptr, oldptr, newptr), 0);
test_verify_int(volptr, (void *) 42);
/* -- add_32 tests -- */
val32 = 42;
test_verify_int(ompi_atomic_add_32(&val32, 5), (42 + 5));
test_verify_int((42 + 5), val32);
/* -- add_64 tests -- */
#ifdef ENABLE_64_BIT
if (enable_64_bit_tests) {
val64 = 42;
test_verify_int(ompi_atomic_add_64(&val64, 5), (42 + 5));
test_verify_int((42 + 5), val64);
}
#endif
/* -- add_int tests -- */
valint = 42;
ompi_atomic_add(&valint, 5);
test_verify_int((42 + 5), valint);
/* threaded tests */
val32 = 0;
#ifdef ENABLE_64_BIT
val64 = 0ul;
#endif
valint = 0;
/* -- create the thread set -- */
th = (pthread_t *) malloc(nthreads * sizeof(pthread_t));
if (!th) {
perror("malloc");
exit(EXIT_FAILURE);
}
for (tid = 0; tid < nthreads; tid++) {
if (pthread_create(&th[tid], NULL, thread_main, (void *) tid) != 0) {
perror("pthread_create");
exit(EXIT_FAILURE);
}
}
/* -- wait for the thread set to finish -- */
for (tid = 0; tid < nthreads; tid++) {
void *thread_return;
if (pthread_join(th[tid], &thread_return) != 0) {
perror("pthread_join");
exit(EXIT_FAILURE);
}
verbose("main: thread %d returned %d\n", tid, (int) thread_return);
}
free(th);
test_verify_int((5 * nthreads * nreps), val32);
#ifdef ENABLE_64_BIT
if (enable_64_bit_tests) {
test_verify_int((5 * nthreads * nreps), val64);
}
#endif
test_verify_int((5 * nthreads * nreps), valint);
test_finalize();
return 0;
}

73
src/asm/base/AMD64.asm Обычный файл
Просмотреть файл

@ -0,0 +1,73 @@
TEXT
START_FUNC(ompi_atomic_mb)
pushq %rbp
movq %rsp, %rbp
leave
ret
END_FUNC(ompi_atomic_mb)
START_FUNC(ompi_atomic_rmb)
pushq %rbp
movq %rsp, %rbp
leave
ret
END_FUNC(ompi_atomic_rmb)
START_FUNC(ompi_atomic_wmb)
pushq %rbp
movq %rsp, %rbp
leave
ret
END_FUNC(ompi_atomic_wmb)
START_FUNC(ompi_atomic_cmpset_32)
pushq %rbp
movq %rsp, %rbp
movq %rdi, -8(%rbp)
movl %esi, -12(%rbp)
movl %edx, -16(%rbp)
movl -16(%rbp), %ecx
movq -8(%rbp), %rdx
movl -12(%rbp), %eax
#APP
cmpxchgl %ecx,(%rdx)
#NO_APP
movq %rax, -24(%rbp)
movl -24(%rbp), %eax
movl %eax, -28(%rbp)
movl -28(%rbp), %eax
cmpl -12(%rbp), %eax
sete %al
movzbl %al, %eax
movl %eax, -28(%rbp)
movl -28(%rbp), %eax
leave
ret
END_FUNC(ompi_atomic_cmpset_32)
START_FUNC(ompi_atomic_cmpset_64)
pushq %rbp
movq %rsp, %rbp
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movq %rdx, -24(%rbp)
movq -24(%rbp), %rcx
movq -8(%rbp), %rdx
movq -16(%rbp), %rax
#APP
cmpxchgq %rcx,(%rdx)
#NO_APP
movq %rax, -32(%rbp)
movq -32(%rbp), %rax
cmpq -16(%rbp), %rax
sete %al
movzbl %al, %eax
leave
ret
END_FUNC(ompi_atomic_cmpset_64)

108
src/asm/base/IA32.asm Обычный файл
Просмотреть файл

@ -0,0 +1,108 @@
TEXT
START_FUNC(ompi_atomic_mb)
pushl %ebp
movl %esp, %ebp
leave
ret
END_FUNC(ompi_atomic_mb)
START_FUNC(ompi_atomic_rmb)
pushl %ebp
movl %esp, %ebp
leave
ret
END_FUNC(ompi_atomic_rmb)
START_FUNC(ompi_atomic_wmb)
pushl %ebp
movl %esp, %ebp
leave
ret
END_FUNC(ompi_atomic_wmb)
START_FUNC(ompi_atomic_cmpset_32)
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %edx
movl 16(%ebp), %ecx
movl 12(%ebp), %eax
#APP
lock cmpxchgl %ecx,(%edx)
sete %dl
#NO_APP
movzbl %dl, %eax
leave
ret
END_FUNC(ompi_atomic_cmpset_32)
START_FUNC(ompi_atomic_cmpset_64)
pushl %ebp
movl %esp, %ebp
subl $32, %esp
movl %ebx, -12(%ebp)
movl %esi, -8(%ebp)
movl %edi, -4(%ebp)
movl 8(%ebp), %edi
movl 12(%ebp), %eax
movl 16(%ebp), %edx
movl %eax, -24(%ebp)
movl %edx, -20(%ebp)
movl 20(%ebp), %eax
movl 24(%ebp), %edx
movl %eax, -32(%ebp)
movl %edx, -28(%ebp)
movl -24(%ebp), %ebx
movl -20(%ebp), %edx
movl -32(%ebp), %esi
movl -28(%ebp), %ecx
movl %ebx, %eax
#APP
push %ebx
movl %esi, %ebx
lock cmpxchg8b (%edi)
sete %dl
pop %ebx
#NO_APP
movzbl %dl, %eax
movl -12(%ebp), %ebx
movl -8(%ebp), %esi
movl -4(%ebp), %edi
movl %ebp, %esp
popl %ebp
ret
END_FUNC(ompi_atomic_cmpset_64)
START_FUNC(ompi_atomic_add_32)
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %eax
movl 12(%ebp), %edx
#APP
lock addl %edx,(%eax)
#NO_APP
movl (%eax), %eax
leave
ret
END_FUNC(ompi_atomic_add_32)
START_FUNC(ompi_atomic_sub_32)
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %eax
movl 12(%ebp), %edx
#APP
lock subl %edx,(%eax)
#NO_APP
movl (%eax), %eax
leave
ret
END_FUNC(ompi_atomic_sub_32)

143
src/asm/base/POWERPC32.asm Обычный файл
Просмотреть файл

@ -0,0 +1,143 @@
TEXT
ALIGN(4)
START_FUNC(ompi_atomic_mb)
sync
blr
END_FUNC(ompi_atomic_mb)
START_FUNC(ompi_atomic_rmb)
lwsync
blr
END_FUNC(ompi_atomic_rmb)
START_FUNC(ompi_atomic_wmb)
eieio
blr
END_FUNC(ompi_atomic_wmb)
START_FUNC(ompi_atomic_cmpset_32)
1: lwarx r0, 0, r3
cmpw 0, r0, r4
bne- 2f
stwcx. r5, 0, r3
bne- 1b
sync
2:
xor r3,r0,r4
subfic r2,r3,0
adde r3,r2,r3
blr
END_FUNC(ompi_atomic_cmpset_32)
START_FUNC(ompi_atomic_cmpset_acq_32)
1: lwarx r0, 0, r3
cmpw 0, r0, r4
bne- 2f
stwcx. r5, 0, r3
bne- 1b
sync
2:
xor r3,r0,r4
subfic r2,r3,0
adde r3,r2,r3
lwsync
blr
END_FUNC(ompi_atomic_cmpset_acq_32)
START_FUNC(ompi_atomic_cmpset_rel_32)
eieio
1: lwarx r0, 0, r3
cmpw 0, r0, r4
bne- 2f
stwcx. r5, 0, r3
bne- 1b
sync
2:
xor r3,r0,r4
subfic r2,r3,0
adde r3,r2,r3
blr
END_FUNC(ompi_atomic_cmpset_rel_32)
#START_64BIT
START_FUNC(ompi_atomic_cmpset_64)
1: ldarx r9, 0, r3
cmpd 0, r9, r4
bne- 2f
stdcx. r6, 0, r3
bne- 1b
2:
li r3,0
cmpw cr7,r9,r4
bnelr+ cr7
cmpw cr7,r10,r5
bnelr+ cr7
li r3,1
blr
END_FUNC(ompi_atomic_cmpset_64)
START_FUNC(ompi_atomic_cmpset_acq_64)
1: ldarx r9, 0, r3
cmpd 0, r9, r4
bne- 2f
stdcx. r6, 0, r3
bne- 1b
2:
cmpw cr0,r9,r4
li r3,0
bne+ cr0,L15
cmpw cr0,r10,r5
bne+ cr0,L15
li r3,1
L15:
lwsync
blr
END_FUNC(ompi_atomic_cmpset_acq_64)
START_FUNC(ompi_atomic_cmpset_rel_64)
eieio
1: ldarx r9, 0, r3
cmpd 0, r9, r4
bne- 2f
stdcx. r6, 0, r3
bne- 1b
2:
cmpw cr0,r9,r4
li r3,0
bnelr+ cr0
cmpw cr0,r10,r5
bnelr+ cr0
li r3,1
blr
END_FUNC(ompi_atomic_cmpset_rel_64)
#END_64BIT
START_FUNC(ompi_atomic_add_32)
1: lwarx r0, 0, r3
add r0, r4, r0
stwcx. r0, 0, r3
bne- 1b
lwz r3,0(r3)
blr
END_FUNC(ompi_atomic_add_32)
START_FUNC(ompi_atomic_sub_32)
1: lwarx r0,0,r3
subf r0,r4,r0
stwcx. r0,0,r3
bne- 1b
lwz r3,0(r3)
blr
END_FUNC(ompi_atomic_sub_32)

156
src/asm/base/POWERPC64.asm Обычный файл
Просмотреть файл

@ -0,0 +1,156 @@
TEXT
ALIGN(4)
START_FUNC(ompi_atomic_mb)
sync
blr
END_FUNC(ompi_atomic_mb)
START_FUNC(ompi_atomic_rmb)
lwsync
blr
END_FUNC(ompi_atomic_rmb)
START_FUNC(ompi_atomic_wmb)
eieio
blr
END_FUNC(ompi_atomic_wmb)
START_FUNC(ompi_atomic_cmpset_32)
1: lwarx r0, 0, r3
cmpw 0, r0, r4
bne- 2f
stwcx. r5, 0, r3
bne- 1b
sync
2:
cmpw cr7,r0,r4
mfcr r3
rlwinm r3,r3,31,1
blr
END_FUNC(ompi_atomic_cmpset_32)
START_FUNC(ompi_atomic_cmpset_acq_32)
mflr r0
std r29,-24(r1)
std r0,16(r1)
stdu r1,-144(r1)
bl _ompi_atomic_cmpset_32
mr r29,r3
bl _ompi_atomic_rmb
mr r3,r29
addi r1,r1,144
ld r0,16(r1)
mtlr r0
ld r29,-24(r1)
blr
END_FUNC(ompi_atomic_cmpset_acq_32)
START_FUNC(ompi_atomic_cmpset_rel_32)
mflr r0
std r27,-40(r1)
std r28,-32(r1)
std r29,-24(r1)
std r0,16(r1)
stdu r1,-160(r1)
mr r29,r3
mr r28,r4
mr r27,r5
bl _ompi_atomic_wmb
mr r3,r29
mr r4,r28
mr r5,r27
bl _ompi_atomic_cmpset_32
addi r1,r1,160
ld r0,16(r1)
mtlr r0
ld r27,-40(r1)
ld r28,-32(r1)
ld r29,-24(r1)
blr
END_FUNC(ompi_atomic_cmpset_rel_32)
START_FUNC(ompi_atomic_cmpset_64)
1: ldarx r0, 0, r3
cmpd 0, r0, r4
bne- 2f
stdcx. r5, 0, r3
bne- 1b
2:
xor r3,r4,r0
subfic r2,r3,0
adde r3,r2,r3
blr
END_FUNC(ompi_atomic_cmpset_64)
START_FUNC(ompi_atomic_cmpset_acq_64)
mflr r0
std r29,-24(r1)
std r0,16(r1)
stdu r1,-144(r1)
bl _ompi_atomic_cmpset_64
mr r29,r3
bl _ompi_atomic_rmb
mr r3,r29
addi r1,r1,144
ld r0,16(r1)
mtlr r0
ld r29,-24(r1)
blr
END_FUNC(ompi_atomic_cmpset_acq_64)
START_FUNC(ompi_atomic_cmpset_rel_64)
mflr r0
std r27,-40(r1)
std r28,-32(r1)
std r29,-24(r1)
std r0,16(r1)
stdu r1,-160(r1)
mr r29,r3
mr r28,r4
mr r27,r5
bl _ompi_atomic_wmb
mr r3,r29
mr r4,r28
mr r5,r27
bl _ompi_atomic_cmpset_64
addi r1,r1,160
ld r0,16(r1)
mtlr r0
ld r27,-40(r1)
ld r28,-32(r1)
ld r29,-24(r1)
blr
END_FUNC(ompi_atomic_cmpset_rel_64)
START_FUNC(ompi_atomic_add_32)
1: lwarx r0, 0, r3
add r0, r4, r0
stwcx. r0, 0, r3
bne- 1b
lwz r3,0(r3)
extsw r3,r3
blr
END_FUNC(ompi_atomic_add_32)
START_FUNC(ompi_atomic_sub_32)
1: lwarx r0,0,r3
subf r0,r4,r0
stwcx. r0,0,r3
bne- 1b
lwz r3,0(r3)
extsw r3,r3
blr
END_FUNC(ompi_atomic_sub_32)

31
src/asm/generate-all-asm.sh Обычный файл
Просмотреть файл

@ -0,0 +1,31 @@
#!/bin/sh
perl="$1"
srcdir="$2"
destdir="$3"
ret=0
if test "$perl" = "" -o "$srcdir" = "" -o "$destdir" = "" ; then
echo "ERROR: invalid argument to generate-all-asm.sh"
echo "usage: generate-all-asm.sh [PERL] [SRCDIR] [DESTDIR]"
exit 1
fi
for asmarch in `grep -v '^#' "$srcdir/asm-data.txt" | cut -f1 | xargs` ; do
if test ! -f "${srcdir}/base/${asmarch}.asm" ; then
echo "WARNING: Skipping missing assembly arch ${asmarch}"
continue
fi
for asmformat in `grep $asmarch "$srcdir/asm-data.txt" | cut -f2 | xargs` ; do
echo "--> Generating assembly for $asmarch $asmformat"
output="`grep \"$asmarch.*$asmformat\" $srcdir/asm-data.txt | cut -f3`"
$perl generate-asm.pl "$asmarch" "$asmformat" "$srcdir/base" "$destdir/generated/atomic-$output.s"
if test "$?" != "0" ; then
echo "WARNING: Failed to generate assembly for $asmarch $asmformat"
ret=1
fi
done
done
exit $ret

92
src/asm/generate-asm.pl Обычный файл
Просмотреть файл

@ -0,0 +1,92 @@
#!/usr/bin/perl -w
my $asmarch = shift;
my $asmformat = shift;
my $basedir = shift;
my $output = shift;
if ( ! $asmarch) {
print "usage: generate-asm.pl [ASMARCH] [ASMFORMAT] [BASEDIR] [OUTPUT NAME]\n";
exit(1);
}
open(INPUT, "$basedir/$asmarch.asm") ||
die "Could not open $basedir/$asmarch.asm: $!\n";
open(OUTPUT, ">$output") || die "Could not open $output: $1\n";
my $TEXT = "";
my $GLOBAL = "";
my $SUFFIX = "";
my $GSYM = "";
my $LSYM = "";
my $TYPE = "";
my $SIZE = 0;
my $ALIGN_LOG = 0;
my $DEL_R_REG = 0;
my $IS64BIT = 0;
($TEXT, $GLOBAL, $SUFFIX, $GSYM, $LSYM, $TYPE, $SIZE, $ALIGN_LOG, $DEL_R_REG, $IS64BIT) = (
$asmformat =~ /(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)/);
my $current_func = "";
my $delete = 0;
while (<INPUT>) {
s/TEXT/$TEXT/g;
s/GLOBAL/$GLOBAL/g;
s/GSYM\((.*)\)/$GSYM$1$SUFFIX/g;
s/LSYM\((.*)\)/$LSYM$1$SUFFIX/g;
if ($DEL_R_REG == 0) {
s/r([0-9][0-9]?)/$1/g;
}
if (/START_FUNC\((.*)\)/) {
$current_func = $1;
$_ = "\t$GLOBAL $GSYM$current_func\n";
if (! $TYPE eq "") {
$_ .= "\t.type $current_func, $TYPE" . "function\n";
}
$_ .= "$GSYM$current_func$SUFFIX\n";
}
if (/END_FUNC\((.*)\)/) {
s/END_FUNC\((.*)\)//g;
if ($SIZE != 0) {
$_ = "\t.size $current_func, .-$current_func\n";
} else {
chomp;
}
}
if ($ALIGN_LOG == 0) {
s/ALIGN\((\d*)\)/.align $1/g;
} else {
# Ugh...
if (m/ALIGN\((\d*)\)/) {
$val = $1;
$result = 0;
while ($val > 1) { $val /= 2; $result++ }
s/ALIGN\((\d*)\)/.align $result/;
}
}
if (/^\#START_64BIT/) {
$_ = "";
if ($IS64BIT == 0) {
$delete = 1;
}
}
if (/^\#END_64BIT/) {
$_ = "";
$delete = 0;
}
if ($delete == 0) {
print OUTPUT $_;
}
}
close(INPUT);
close(OUTPUT);

Просмотреть файл

@ -19,7 +19,7 @@
* On alpha, everything is load-locked, store-conditional...
*/
#ifdef HAVE_SMP
#if OMPI_WANT_SMP_LOCKS
#define MB() __asm__ __volatile__ ("mb");
#define RMB() __asm__ __volatile__ ("mb");
@ -34,6 +34,25 @@
#endif
/**********************************************************************
*
* Define constants for PowerPC 32
*
*********************************************************************/
#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1
#define OMPI_HAVE_ATOMIC_CMPSET_32 1
#define OMPI_HAVE_ATOMIC_CMPSET_64 1
/**********************************************************************
*
* Memory Barriers
*
*********************************************************************/
#if OMPI_GCC_INLINE_ASSEMBLY
static inline void ompi_atomic_mb(void)
{
MB();
@ -51,7 +70,16 @@ static inline void ompi_atomic_wmb(void)
WMB();
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
/**********************************************************************
*
* Atomic math operations
*
*********************************************************************/
#if OMPI_GCC_INLINE_ASSEMBLY
static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
@ -96,7 +124,7 @@ static inline int ompi_atomic_cmpset_rel_32(volatile int32_t *addr,
return ompi_atomic_cmpset_32(addr, oldval, newval);
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
@ -141,4 +169,8 @@ static inline int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
return ompi_atomic_cmpset_64(addr, oldval, newval);
}
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */

Просмотреть файл

@ -24,6 +24,7 @@ cat > $CFILE<<EOF
#include <inttypes.h>
#define static
#define inline
#define OMPI_GCC_INLINE_ASSEMBLY 1
#include "atomic.h"
EOF

Просмотреть файл

@ -19,7 +19,7 @@
*/
#ifdef HAVE_SMP
#if OMPI_WANT_SMP_LOCKS
#define SMPLOCK "lock; "
#define MB() __asm__ __volatile__("": : :"memory")
#else
@ -28,6 +28,25 @@
#endif
/**********************************************************************
*
* Define constants for AMD64 / x86_64 / EM64T / ...
*
*********************************************************************/
#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1
#define OMPI_HAVE_ATOMIC_CMPSET_32 1
#define OMPI_HAVE_ATOMIC_CMPSET_64 1
/**********************************************************************
*
* Memory Barriers
*
*********************************************************************/
#if OMPI_GCC_INLINE_ASSEMBLY
static inline void ompi_atomic_mb(void)
{
MB();
@ -45,7 +64,16 @@ static inline void ompi_atomic_wmb(void)
MB();
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
/**********************************************************************
*
* Atomic math operations
*
*********************************************************************/
#if OMPI_GCC_INLINE_ASSEMBLY
static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
@ -57,10 +85,13 @@ static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
return ((int32_t)prev == oldval);
}
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
#define ompi_atomic_cmpset_acq_32 ompi_atomic_cmpset_32
#define ompi_atomic_cmpset_rel_32 ompi_atomic_cmpset_32
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
#if OMPI_GCC_INLINE_ASSEMBLY
static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
@ -75,6 +106,8 @@ static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
return (prev == oldval);
}
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
#define ompi_atomic_cmpset_acq_64 ompi_atomic_cmpset_64
#define ompi_atomic_cmpset_rel_64 ompi_atomic_cmpset_64

Просмотреть файл

@ -24,6 +24,7 @@ cat > $CFILE<<EOF
#include <inttypes.h>
#define static
#define inline
#define OMPI_GCC_INLINE_ASSEMBLY 1
#include "atomic.h"
EOF

40
src/include/sys/architecture.h Обычный файл
Просмотреть файл

@ -0,0 +1,40 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/*
* List of supported architectures
*/
#ifndef OMPI_SYS_ARCHITECTURE_H
#define OMPI_SYS_ARCHITECTURE_H
/* Architectures */
#define OMPI_UNSUPPORTED 0000
#define OMPI_WINDOWS 0001
#define OMPI_IA32 0010
#define OMPI_IA64 0020
#define OMPI_AMD64 0030
#define OMPI_ALPHA 0040
#define OMPI_POWERPC32 0050
#define OMPI_POWERPC64 0051
#define OMPI_SPARC32 0060
#define OMPI_SPARC64 0061
/* Formats */
#define OMPI_DEFAULT 1000 /* standard for given architecture */
#define OMPI_DARWIN 1001 /* Darwin / OS X on PowerPC */
#define OMPI_PPC_LINUX 1002 /* Linux on PowerPC */
#define OMPI_AIX 1003 /* AIX on Power / PowerPC */
#endif /* #ifndef OMPI_SYS_ARCHITECTURE_H */

Просмотреть файл

@ -23,6 +23,18 @@
* http://www.freebsd.org/cgi/man.cgi?query=atomic&sektion=9
*
* Only the necessary subset of functions are implemented here.
*
* The following #defines will be true / false based on
* assembly support:
*
* \c OMPI_HAVE_MEM_BARRIER atomic memory barriers
* \c OMPI_HAVE_ATOMIC_SPINLOCKS atomic spinlocks
* \c OMPI_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly"
* \c OMPI_HAVE_ATOMIC_MATH_64 if 32 bit add/sub/cmpset can be done "atomicly"
*
* Note that for the Atomic math, atomic add/sub may be implemented as
* C code using ompi_atomic_cmpset. The appearance of atomic
* operation will be upheld in these cases.
*/
#ifndef OMPI_SYS_ATOMIC_H
@ -30,11 +42,91 @@
#include "ompi_config.h"
#include "include/sys/architecture.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**********************************************************************
*
* Load the appropriate architecture files and set some reasonable
* default values for our support
*
*********************************************************************/
#if defined(DOXYGEN)
/* don't include system-level gorp when generating doxygen files */
#elif OMPI_ASSEMBLY_ARCH == OMPI_WINDOWS || defined(win32)
/* windows first, as they have API-level primitives for this stuff */
#include "include/sys/win32/atomic.h"
#elif OMPI_ASSEMBLY_ARCH == OMPI_ALPHA
#include "include/sys/alpha/atomic.h"
#elif OMPI_ASSEMBLY_ARCH == OMPI_AMD64
#include "include/sys/amd64/atomic.h"
#elif OMPI_ASSEMBLY_ARCH == OMPI_IA32
#include "include/sys/ia32/atomic.h"
#elif OMPI_ASSEMBLY_ARCH == OMPI_IA64
#include "include/sys/ia64/atomic.h"
#elif OMPI_ASSEMBLY_ARCH == OMPI_POWERPC32
#include "include/sys/powerpc/atomic.h"
#elif OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64
#include "include/sys/powerpc/atomic.h"
#elif OMPI_ASSEMBLY_ARCH == OMPI_SPARC32
#error "32 bit Sparc support not implemented yet"
#elif OMPI_ASSEMBLY_ARCH == OMPI_SPARC64
#include "include/sys/sparc64/atomic.h"
#endif
/* compare and set operations can't really be emulated from software,
so if these defines aren't already set, they should be set to 0
now */
#ifndef OMPI_HAVE_ATOMIC_CMPSET_32
#define OMPI_HAVE_ATOMIC_CMPSET_32 0
#endif
#ifndef OMPI_HAVE_ATOMIC_CMPSET_64
#define OMPI_HAVE_ATOMIC_CMPSET_64 0
#endif
/**********************************************************************
*
* Memory Barriers - defined here if running doxygen or have barriers
* but can't inline
*
*********************************************************************/
#ifndef OMPI_HAVE_ATOMIC_MEM_BARRIER
/* no way to emulate in C code */
#define OMPI_HAVE_ATOMIC_MEM_BARRIER 0
#endif
#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_MEM_BARRIER
/**
* Memory barrier
*/
void ompi_atomic_mb(void);
/**
* Read memory barrier
*/
void ompi_atomic_rmb(void);
/**
* Write memory barrier.
*/
void ompi_atomic_wmb(void);
#endif /* defined(DOXYGEN) || OMPI_HAVE_MEM_BARRIER */
/**********************************************************************
*
* Atomic spinlocks - always inlined, if have atomic cmpset
*
*********************************************************************/
/**
* Volatile lock object (with optional padding).
*/
@ -44,38 +136,33 @@ struct ompi_lock_t {
char padding[sizeof(int)]; /**< Array for optional padding */
} u;
};
typedef struct ompi_lock_t ompi_lock_t;
/**
* Memory barrier
*/
static inline void ompi_atomic_mb(void);
/**
* Read memory barrier
*/
static inline void ompi_atomic_rmb(void);
/**
* Write memory barrier.
*/
static inline void ompi_atomic_wmb(void);
#if 0
/**
* Atomically add to an integer.
*
* @param addr Address of integer.
* @param newval Value to set.
* @return Old value of integer.
*/
static inline int ompi_atomic_fetch_and_set_int(volatile void *addr, int newval);
#ifndef OMPI_HAVE_ATOMIC_SPINLOCKS
#define OMPI_HAVE_ATOMIC_SPINLOCKS (OMPI_HAVE_ATOMIC_CMPSET_32 || OMPI_HAVE_ATOMIC_CMPSET_64)
#endif
#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_SPINLOCKS
/**
* Enumeration of lock states
*/
enum {
OMPI_ATOMIC_UNLOCKED = 0,
OMPI_ATOMIC_LOCKED = 1
};
/**
* Initialize a lock to value
*
* @param lock Address of the lock
* @param value Initial value to set lock to
*/
static inline void ompi_atomic_init(ompi_lock_t* lock, int value);
/**
* Try to acquire a lock.
*
@ -100,144 +187,96 @@ static inline void ompi_atomic_lock(ompi_lock_t *lock);
*/
static inline void ompi_atomic_unlock(ompi_lock_t *lock);
#endif /* OMPI_HAVE_ATOMIC_SPINLOCKS */
/*
* Include system specific inline asm definitions. Otherwise
* the definitions are in system specific .s files in src/util.
*/
/* Include win32/atomic.h if we are in windows platform. Else, we
can go through other compilers and options. */
#ifdef WIN32
#define OMPI_HAVE_ATOMIC_WIN32 1
#include "include/sys/win32/atomic.h"
#else /* only now go through this stuff */
#define OMPI_HAVE_ATOMIC_WIN32 0
#if defined(__alpha__)
# define OMPI_HAVE_ATOMIC 1
# ifdef __GNUC__
# include "alpha/atomic.h"
# endif
#elif defined(__amd64__) || defined(__x86_64__)
# define OMPI_HAVE_ATOMIC 1
# ifdef __GNUC__
# include "amd64/atomic.h"
# endif
#elif defined(__i386__)
# define OMPI_HAVE_ATOMIC 1
# ifdef __GNUC__
# include "ia32/atomic.h"
# endif
#elif defined(__ia64__)
# define OMPI_HAVE_ATOMIC 1
# ifdef __GNUC__
# include "ia64/atomic.h"
# endif
#elif defined(__POWERPC__)
# define OMPI_HAVE_ATOMIC 1
# ifdef __GNUC__
# include "powerpc/atomic.h"
# endif
#elif defined(__sparc__) || defined(__sparc)
# define OMPI_HAVE_ATOMIC 1
# ifdef __GNUC__
# include "sparc64/atomic.h"
# endif
#else
#error No atomic operations defined yet
/**********************************************************************
*
* Atomic math operations
*
*********************************************************************/
#ifndef OMPI_HAVE_ATOMIC_CMPSET_32
#define OMPI_HAVE_ATOMIC_CMPSET_32 0
#endif
#endif /* ifdef WIN32*/
#ifndef OMPI_HAVE_ATOMIC
#define OMPI_HAVE_ATOMIC 0
#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_CMPSET_32
int ompi_atomic_cmpset_32(volatile int32_t *addr, int32_t oldval,
int32_t newval);
int ompi_atomic_cmpset_acq_32(volatile int32_t *addr, int32_t oldval,
int32_t newval);
int ompi_atomic_cmpset_rel_32(volatile int32_t *addr, int32_t oldval,
int32_t newval);
#endif
/* All the architectures provide a compare_and_set atomic operations. If
* they dont provide atomic additions and/or substractions then we can
* define these operations using the atomic compare_and_set.
*/
#if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32)
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
static inline int32_t ompi_atomic_add_32(volatile int32_t *addr, int delta)
{
int32_t oldval;
do {
oldval = *addr;
} while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval + delta));
return (oldval + delta);
}
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32 */
#ifndef OMPI_HAVE_ATOMIC_CMPSET_64
#define OMPI_HAVE_ATOMIC_CMPSET_64 0
#endif
#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_CMPSET_64
int ompi_atomic_cmpset_64(volatile int64_t *addr, int64_t oldval,
int64_t newval);
int ompi_atomic_cmpset_acq_64(volatile int64_t *addr, int64_t oldval,
int64_t newval);
int ompi_atomic_cmpset_rel_64(volatile int64_t *addr, int64_t oldval,
int64_t newval);
#endif
#if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32)
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
static inline int32_t ompi_atomic_sub_32(volatile int32_t *addr, int delta)
{
int32_t oldval;
do {
oldval = *addr;
} while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval - delta));
return (oldval - delta);
}
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32 */
#ifndef OMPI_HAVE_ATOMIC_MATH_32
/* define to 0 for these tests. WIll fix up later. */
#define OMPI_HAVE_ATOMIC_MATH_32 0
#endif
#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_MATH_32 || OMPI_HAVE_ATOMIC_CMPSET_32
#if ! OMPI_HAVE_ATOMIC_MATH_32
static inline
#endif
int32_t ompi_atomic_add_32(volatile int32_t *addr, int delta);
#if ! OMPI_HAVE_ATOMIC_MATH_32
static inline
#endif
int32_t ompi_atomic_sub_32(volatile int32_t *addr, int delta);
#endif /* OMPI_HAVE_ATOMIC_MATH_32 */
#if ! OMPI_HAVE_ATOMIC_MATH_32
/* fix up the value of ompi_have_atomic_math_32 to allow for C versions */
#undef OMPI_HAVE_ATOMIC_MATH_32
#define OMPI_HAVE_ATOMIC_MATH_32 OMPI_HAVE_ATOMIC_CMPSET_32
#endif
/* Some architectures does not provide support for the 64 bits
* atomic operations. Until we find a better solution let's just
* undefine all those functions.
*/
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64)
#if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_64)
static inline int64_t ompi_atomic_add_64(volatile int64_t *addr, int64_t delta)
{
int64_t oldval;
do {
oldval = *addr;
} while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval + delta));
return (oldval + delta);
}
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_64 */
#ifndef OMPI_HAVE_ATOMIC_MATH_64
/* define to 0 for these tests. WIll fix up later. */
#define OMPI_HAVE_ATOMIC_MATH_64 0
#endif
#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_MATH_64 || OMPI_HAVE_ATOMIC_CMPSET_64
#if OMPI_HAVE_ATOMIC_CMPSET_64
static inline
#endif
int64_t ompi_atomic_add_64(volatile int64_t *addr, int64_t delta);
#if OMPI_HAVE_ATOMIC_CMPSET_64
static inline
#endif
int64_t ompi_atomic_sub_64(volatile int64_t *addr, int64_t delta);
#endif /* OMPI_HAVE_ATOMIC_MATH_32 */
#if ! OMPI_HAVE_ATOMIC_MATH_64
/* fix up the value of ompi_have_atomic_math_64 to allow for C versions */
#undef OMPI_HAVE_ATOMIC_MATH_64
#define OMPI_HAVE_ATOMIC_MATH_64 OMPI_HAVE_ATOMIC_CMPSET_64
#endif
#if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_64)
static inline int64_t ompi_atomic_sub_64(volatile int64_t *addr, int64_t delta)
{
int64_t oldval;
do {
oldval = *addr;
} while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval - delta));
return (oldval - delta);
}
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_64 */
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */
#if defined(DOXYGEN) || (OMPI_HAVE_ATOMIC_CMPSET_32 || OMPI_HAVE_ATOMIC_CMPSET_64)
/* these are always done with inline functions, so always mark as
static inline */
static inline int ompi_atomic_cmpset_xx(volatile void* addr, int64_t oldval,
int64_t newval, size_t length);
static inline int ompi_atomic_cmpset_acq_xx(volatile void* addr,
int64_t oldval, int64_t newval,
size_t length);
static inline int ompi_atomic_cmpset_rel_xx(volatile void* addr,
int64_t oldval, int64_t newval,
size_t length);
static inline void ompi_atomic_add_xx(volatile void* addr,
int32_t value, size_t length);
static inline void ompi_atomic_sub_xx(volatile void* addr,
int32_t value, size_t length);
static inline int ompi_atomic_cmpset_xx( volatile void* addr, int64_t oldval,
int64_t newval, size_t length )
{
switch( length ) {
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
case 4:
return ompi_atomic_cmpset_32( (volatile int32_t*)addr,
(int32_t)oldval, (int32_t)newval );
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64)
case 8:
return ompi_atomic_cmpset_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */
default:
/* This should never happen, so deliberately cause a seg fault
for corefile analysis */
*(int*)(0) = 0;
}
return 0; /* always fail */
}
/**
* Atomic compare and set of pointer with relaxed semantics. This
@ -251,30 +290,9 @@ static inline int ompi_atomic_cmpset_xx( volatile void* addr, int64_t oldval,
* See ompi_atomic_cmpset_* for pseudo-code.
*/
#define ompi_atomic_cmpset( ADDR, OLDVAL, NEWVAL ) \
ompi_atomic_cmpset_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), (int64_t)(NEWVAL), sizeof(*(ADDR)) )
ompi_atomic_cmpset_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \
(int64_t)(NEWVAL), sizeof(*(ADDR)) )
static inline int ompi_atomic_cmpset_acq_xx( volatile void* addr, int64_t oldval,
int64_t newval, size_t length )
{
switch( length ) {
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
case 4:
return ompi_atomic_cmpset_acq_32( (volatile int32_t*)addr,
(int32_t)oldval, (int32_t)newval );
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64)
case 8:
return ompi_atomic_cmpset_acq_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */
default:
/* This should never happen, so deliberately cause a seg fault
for corefile analysis */
*(int*)(0) = 0;
}
return 0; /* always fail */
}
/**
* Atomic compare and set of pointer with acquire semantics. This
@ -288,30 +306,9 @@ static inline int ompi_atomic_cmpset_acq_xx( volatile void* addr, int64_t oldval
* See ompi_atomic_cmpset_acq_* for pseudo-code.
*/
#define ompi_atomic_cmpset_acq( ADDR, OLDVAL, NEWVAL ) \
ompi_atomic_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), (int64_t)(NEWVAL), sizeof(*(ADDR)) )
ompi_atomic_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \
(int64_t)(NEWVAL), sizeof(*(ADDR)) )
static inline int ompi_atomic_cmpset_rel_xx( volatile void* addr, int64_t oldval,
int64_t newval, size_t length )
{
switch( length ) {
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
case 4:
return ompi_atomic_cmpset_rel_32( (volatile int32_t*)addr,
(int32_t)oldval, (int32_t)newval );
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64)
case 8:
return ompi_atomic_cmpset_rel_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */
default:
/* This should never happen, so deliberately cause a seg fault
for corefile analysis */
*(int*)(0) = 0;
}
return 0; /* always fail */
}
/**
* Atomic compare and set of pointer with release semantics. This
@ -325,28 +322,9 @@ static inline int ompi_atomic_cmpset_rel_xx( volatile void* addr, int64_t oldval
* See ompi_atomic_cmpsetrel_* for pseudo-code.
*/
#define ompi_atomic_cmpset_rel( ADDR, OLDVAL, NEWVAL ) \
ompi_atomic_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), (int64_t)(NEWVAL), sizeof(*(ADDR)) )
ompi_atomic_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \
(int64_t)(NEWVAL), sizeof(*(ADDR)) )
static inline void ompi_atomic_add_xx( volatile void* addr, int32_t value, size_t length )
{
switch( length ) {
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
case 4:
ompi_atomic_add_32( (volatile int32_t*)addr, (int32_t)value );
break;
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64)
case 8:
ompi_atomic_add_64( (volatile int64_t*)addr, (int64_t)value );
break;
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */
default:
/* This should never happen, so deliberately cause a seg fault
for corefile analysis */
*(int*)(0) = 0;
}
}
/**
* Atomically increment the content depending on the type. This
@ -357,28 +335,8 @@ static inline void ompi_atomic_add_xx( volatile void* addr, int32_t value, size_
* @param delta Value to add (converted to <TYPE>).
*/
#define ompi_atomic_add( ADDR, VALUE ) \
ompi_atomic_add_xx( (volatile void*)(ADDR), (int32_t)(VALUE), sizeof(*(ADDR)) )
static inline void ompi_atomic_sub_xx( volatile void* addr, int32_t value, size_t length )
{
switch( length ) {
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
case 4:
ompi_atomic_sub_32( (volatile int32_t*)addr, (int32_t)value );
break;
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64)
case 8:
ompi_atomic_sub_64( (volatile int64_t*)addr, (int64_t)value );
break;
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */
default:
/* This should never happen, so deliberately cause a seg fault
for corefile analysis */
*(int*)(0) = 0;
}
}
ompi_atomic_add_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \
sizeof(*(ADDR)) )
/**
* Atomically decrement the content depending on the type. This
@ -389,55 +347,23 @@ static inline void ompi_atomic_sub_xx( volatile void* addr, int32_t value, size_
* @param delta Value to substract (converted to <TYPE>).
*/
#define ompi_atomic_sub( ADDR, VALUE ) \
ompi_atomic_sub_xx( (volatile void*)(ADDR), (int32_t)(VALUE), sizeof(*(ADDR)) )
ompi_atomic_sub_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \
sizeof(*(ADDR)) )
#if OMPI_HAVE_ATOMIC || OMPI_HAVE_ATOMIC_WIN32
/*
* Atomic locks
*/
/**
* Enumeration of lock states
*/
enum {
OMPI_ATOMIC_UNLOCKED = 0,
OMPI_ATOMIC_LOCKED = 1
};
#endif /* OMPI_HAVE_ATOMIC_MATH_32 || OMPI_HAVE_ATOMIC_MATH_64 */
static inline int ompi_atomic_trylock(ompi_lock_t *lock)
{
return ompi_atomic_cmpset_acq( &(lock->u.lock),
OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_LOCKED);
/**********************************************************************
*
* Include system specific inline asm definitions. Otherwise
* the definitions are in system specific .s files in src/util.
*
*********************************************************************/
#include "include/sys/atomic_impl.h"
#if defined(c_plusplus) || defined(__cplusplus)
}
static inline void ompi_atomic_lock(ompi_lock_t *lock)
{
while( !ompi_atomic_cmpset_acq( &(lock->u.lock),
OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_LOCKED) ) {
while (lock->u.lock == OMPI_ATOMIC_LOCKED) {
/* spin */ ;
}
}
}
static inline void ompi_atomic_unlock(ompi_lock_t *lock)
{
/*
ompi_atomic_cmpset_rel( &(lock->u.lock),
OMPI_ATOMIC_LOCKED, OMPI_ATOMIC_UNLOCKED);
*/
lock->u.lock=OMPI_ATOMIC_UNLOCKED;
}
/* Lock initialization function. It set the lock to UNLOCKED.
*/
static inline void ompi_atomic_init( ompi_lock_t* lock, int value )
{
lock->u.lock = value;
}
#endif /* OMPI_HAVE_ATOMIC || OMPI_HAVE_ATOMIC_WIN32 */
#endif
#endif /* OMPI_SYS_ATOMIC_H */

270
src/include/sys/atomic_impl.h Обычный файл
Просмотреть файл

@ -0,0 +1,270 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/* Inline C implementation of the functions defined in atomic.h */
/**********************************************************************
*
* Atomic math operations
*
* All the architectures provide a compare_and_set atomic operations. If
* they dont provide atomic additions and/or substractions then we can
* define these operations using the atomic compare_and_set.
*
* Some architectures does not provide support for the 64 bits
* atomic operations. Until we find a better solution let's just
* undefine all those functions if there is no 64 bit cmpset
*
*********************************************************************/
#if OMPI_HAVE_ATOMIC_CMPSET_32
#if !defined(OMPI_HAVE_ATOMIC_ADD_32)
#define OMPI_HAVE_ATOMIC_ADD_32 1
static inline int32_t
ompi_atomic_add_32(volatile int32_t *addr, int delta)
{
int32_t oldval;
do {
oldval = *addr;
} while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval + delta));
return (oldval + delta);
}
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
#if !defined(OMPI_HAVE_ATOMIC_SUB_32)
#define OMPI_HAVE_ATOMIC_SUB_32 1
static inline int32_t
ompi_atomic_sub_32(volatile int32_t *addr, int delta)
{
int32_t oldval;
do {
oldval = *addr;
} while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval - delta));
return (oldval - delta);
}
#endif /* OMPI_HAVE_ATOMIC_SUB_32 */
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
#if OMPI_HAVE_ATOMIC_CMPSET_64
#if !defined(OMPI_HAVE_ATOMIC_ADD_64)
#define OMPI_HAVE_ATOMIC_ADD_64 1
static inline int64_t
ompi_atomic_add_64(volatile int64_t *addr, int64_t delta)
{
int64_t oldval;
do {
oldval = *addr;
} while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval + delta));
return (oldval + delta);
}
#endif /* OMPI_HAVE_ATOMIC_ADD_64 */
#if !defined(OMPI_HAVE_ATOMIC_SUB_64)
#define OMPI_HAVE_ATOMIC_SUB_64 1
static inline int64_t
ompi_atomic_sub_64(volatile int64_t *addr, int64_t delta)
{
int64_t oldval;
do {
oldval = *addr;
} while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval - delta));
return (oldval - delta);
}
#endif /* OMPI_HAVE_ATOMIC_SUB_64 */
#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */
#if (OMPI_HAVE_ATOMIC_CMPSET_32 || OMPI_HAVE_ATOMIC_CMPSET_64)
static inline int
ompi_atomic_cmpset_xx(volatile void* addr, int64_t oldval,
int64_t newval, size_t length)
{
switch( length ) {
#if OMPI_HAVE_ATOMIC_CMPSET_32
case 4:
return ompi_atomic_cmpset_32( (volatile int32_t*)addr,
(int32_t)oldval, (int32_t)newval );
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
#if OMPI_HAVE_ATOMIC_CMPSET_64
case 8:
return ompi_atomic_cmpset_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */
default:
/* This should never happen, so deliberately cause a seg fault
for corefile analysis */
*(int*)(0) = 0;
}
return 0; /* always fail */
}
static inline int
ompi_atomic_cmpset_acq_xx(volatile void* addr, int64_t oldval,
int64_t newval, size_t length)
{
switch( length ) {
#if OMPI_HAVE_ATOMIC_CMPSET_32
case 4:
return ompi_atomic_cmpset_acq_32( (volatile int32_t*)addr,
(int32_t)oldval, (int32_t)newval );
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
#if OMPI_HAVE_ATOMIC_CMPSET_64
case 8:
return ompi_atomic_cmpset_acq_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */
default:
/* This should never happen, so deliberately cause a seg fault
for corefile analysis */
*(int*)(0) = 0;
}
return 0; /* always fail */
}
static inline int
ompi_atomic_cmpset_rel_xx(volatile void* addr, int64_t oldval,
int64_t newval, size_t length)
{
switch( length ) {
#if OMPI_HAVE_ATOMIC_CMPSET_32
case 4:
return ompi_atomic_cmpset_rel_32( (volatile int32_t*)addr,
(int32_t)oldval, (int32_t)newval );
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
#if OMPI_HAVE_ATOMIC_CMPSET_64
case 8:
return ompi_atomic_cmpset_rel_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */
default:
/* This should never happen, so deliberately cause a seg fault
for corefile analysis */
*(int*)(0) = 0;
}
return 0; /* always fail */
}
static inline void
ompi_atomic_add_xx(volatile void* addr, int32_t value, size_t length)
{
switch( length ) {
#if OMPI_HAVE_ATOMIC_CMPSET_32
case 4:
ompi_atomic_add_32( (volatile int32_t*)addr, (int32_t)value );
break;
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
#if OMPI_HAVE_ATOMIC_CMPSET_64
case 8:
ompi_atomic_add_64( (volatile int64_t*)addr, (int64_t)value );
break;
#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */
default:
/* This should never happen, so deliberately cause a seg fault
for corefile analysis */
*(int*)(0) = 0;
}
}
static inline void
ompi_atomic_sub_xx(volatile void* addr, int32_t value, size_t length)
{
switch( length ) {
#if OMPI_HAVE_ATOMIC_CMPSET_32
case 4:
ompi_atomic_sub_32( (volatile int32_t*)addr, (int32_t)value );
break;
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
#if OMPI_HAVE_ATOMIC_CMPSET_64
case 8:
ompi_atomic_sub_64( (volatile int64_t*)addr, (int64_t)value );
break;
#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */
default:
/* This should never happen, so deliberately cause a seg fault
for corefile analysis */
*(int*)(0) = 0;
}
}
#endif /* (OMPI_HAVE_ATOMIC_CMPSET_32 || OMPI_HAVE_ATOMIC_CMPSET_64) */
/**********************************************************************
*
* Atomic spinlocks
*
*********************************************************************/
#if OMPI_HAVE_ATOMIC_SPINLOCKS
/*
* Lock initialization function. It set the lock to UNLOCKED.
*/
static inline void
ompi_atomic_init( ompi_lock_t* lock, int value )
{
lock->u.lock = value;
}
static inline int
ompi_atomic_trylock(ompi_lock_t *lock)
{
return ompi_atomic_cmpset_acq( &(lock->u.lock),
OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_LOCKED);
}
static inline void
ompi_atomic_lock(ompi_lock_t *lock)
{
while( !ompi_atomic_cmpset_acq( &(lock->u.lock),
OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_LOCKED) ) {
while (lock->u.lock == OMPI_ATOMIC_LOCKED) {
/* spin */ ;
}
}
}
static inline void
ompi_atomic_unlock(ompi_lock_t *lock)
{
/*
ompi_atomic_cmpset_rel( &(lock->u.lock),
OMPI_ATOMIC_LOCKED, OMPI_ATOMIC_UNLOCKED);
*/
lock->u.lock=OMPI_ATOMIC_UNLOCKED;
}
#endif /* OMPI_HAVE_ATOMIC_SPINLOCKS */

Просмотреть файл

@ -14,7 +14,7 @@
include $(top_srcdir)/config/Makefile.options
noinst_HEADERS = atomic.h atomic.s
noinst_HEADERS = atomic.h
# Conditionally install the header files

Просмотреть файл

@ -19,8 +19,7 @@
* On ia32, we use cmpxchg.
*/
#ifdef HAVE_SMP
#if OMPI_WANT_SMP_LOCKS
#define SMPLOCK "lock; "
#define MB() __asm__ __volatile__("": : :"memory")
#else
@ -29,6 +28,29 @@
#endif
/**********************************************************************
*
* Define constants for IA32
*
*********************************************************************/
#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1
#define OMPI_HAVE_ATOMIC_CMPSET_32 1
#define OMPI_HAVE_ATOMIC_MATH_32 1
#define OMPI_HAVE_ATOMIC_ADD_32 1
#define OMPI_HAVE_ATOMIC_SUB_32 1
#define OMPI_HAVE_ATOMIC_CMPSET_64 1
/**********************************************************************
*
* Memory Barriers
*
*********************************************************************/
#if OMPI_GCC_INLINE_ASSEMBLY
static inline void ompi_atomic_mb(void)
{
MB();
@ -46,7 +68,16 @@ static inline void ompi_atomic_wmb(void)
MB();
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
/**********************************************************************
*
* Atomic math operations
*
*********************************************************************/
#if OMPI_GCC_INLINE_ASSEMBLY
static inline int ompi_atomic_cmpset_32(volatile int32_t *addr,
int32_t oldval,
int32_t newval)
@ -62,14 +93,17 @@ static inline int ompi_atomic_cmpset_32(volatile int32_t *addr,
return (int)ret;
}
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
#define ompi_atomic_cmpset_acq_32 ompi_atomic_cmpset_32
#define ompi_atomic_cmpset_rel_32 ompi_atomic_cmpset_32
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
typedef struct {
uint32_t lo;
uint32_t hi;
} lwords_t;
#if OMPI_GCC_INLINE_ASSEMBLY
#ifndef ll_low /* GLIBC provides these somewhere, so protect */
#define ll_low(x) *(((unsigned int*)&(x))+0)
#define ll_high(x) *(((unsigned int*)&(x))+1)
#endif
/* On Linux the EBX register is used by the shared libraries
* to keep the global offset. In same time this register is
@ -85,27 +119,28 @@ static inline int ompi_atomic_cmpset_64(volatile int64_t *addr,
* Compare EDX:EAX with m64. If equal, set ZF and load ECX:EBX into
* m64. Else, clear ZF and load m64 into EDX:EAX.
*/
lwords_t *pold = (lwords_t*)&oldval;
lwords_t *pnew = (lwords_t*)&newval;
unsigned char realized;
unsigned char ret;
__asm__ __volatile(
"push %%ebx \n\t"
"movl %4, %%ebx \n\t"
SMPLOCK "cmpxchg8b %1 \n\t"
"sete %0 \n\t"
"pop %%ebx \n\t"
: "=qm" (realized)
: "m"(*((volatile long*)addr)), "a"(pold->lo), "d"(pold->hi),
"r"(pnew->lo), "c"(pnew->hi)
: "cc", "memory" );
return realized;
__asm__ __volatile__(
"push %%ebx \n\t"
"movl %4, %%ebx \n\t"
SMPLOCK "cmpxchg8b (%1) \n\t"
"sete %0 \n\t"
"pop %%ebx \n\t"
: "=qm"(ret)
: "D"(addr), "a"(ll_low(oldval)), "d"(ll_high(oldval)),
"r"(ll_low(newval)), "c"(ll_high(newval))
: "cc", "memory");
return (int) ret;
}
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
#define ompi_atomic_cmpset_acq_64 ompi_atomic_cmpset_64
#define ompi_atomic_cmpset_rel_64 ompi_atomic_cmpset_64
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32
#if OMPI_GCC_INLINE_ASSEMBLY
/**
* atomic_add - add integer to atomic variable
* @i: integer value to add
@ -122,7 +157,7 @@ static inline int ompi_atomic_add_32(volatile int32_t* v, int i)
return (*v); /* should be an atomic operation */
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32
/**
* atomic_sub - subtract the atomic variable
* @i: integer value to subtract
@ -139,5 +174,6 @@ static inline int ompi_atomic_sub_32(volatile int32_t* v, int i)
return (*v); /* should be an atomic operation */
}
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */

Просмотреть файл

@ -1,193 +0,0 @@
;;
;; Copyright (c) 2004-2005 The Trustees of Indiana University.
;; All rights reserved.
;; Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
;; All rights reserved.
;; Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
;; University of Stuttgart. All rights reserved.
;; $COPYRIGHT$
;;
;; Additional copyrights may follow
;;
;; $HEADER$
;;
.file "atomic.c"
.text
.globl ompi_atomic_mb
.type ompi_atomic_mb,@function
ompi_atomic_mb:
pushl %ebp
movl %esp, %ebp
leave
ret
.Lfe1:
.size ompi_atomic_mb,.Lfe1-ompi_atomic_mb
.globl ompi_atomic_rmb
.type ompi_atomic_rmb,@function
ompi_atomic_rmb:
pushl %ebp
movl %esp, %ebp
leave
ret
.Lfe2:
.size ompi_atomic_rmb,.Lfe2-ompi_atomic_rmb
.globl ompi_atomic_wmb
.type ompi_atomic_wmb,@function
ompi_atomic_wmb:
pushl %ebp
movl %esp, %ebp
leave
ret
.Lfe3:
.size ompi_atomic_wmb,.Lfe3-ompi_atomic_wmb
.globl ompi_atomic_cmpset_32
.type ompi_atomic_cmpset_32,@function
ompi_atomic_cmpset_32:
pushl %ebp
movl %esp, %ebp
pushl %ebx
subl $4, %esp
movl 12(%ebp), %eax
movl %eax, -8(%ebp)
movl -8(%ebp), %edx
movl 16(%ebp), %ecx
movl %edx, %eax
movl 8(%ebp), %ebx
#APP
cmpxchgl %ecx,(%ebx)
setz %al
movzbl %al,%eax
#NO_APP
movl %eax, %edx
movl %edx, -8(%ebp)
movl -8(%ebp), %eax
cmpl 12(%ebp), %eax
sete %al
movzbl %al, %eax
addl $4, %esp
popl %ebx
leave
ret
.Lfe4:
.size ompi_atomic_cmpset_32,.Lfe4-ompi_atomic_cmpset_32
.globl ompi_atomic_cmpset_acq_32
.type ompi_atomic_cmpset_acq_32,@function
ompi_atomic_cmpset_acq_32:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
subl $4, %esp
pushl 16(%ebp)
pushl 12(%ebp)
pushl 8(%ebp)
call ompi_atomic_cmpset_32
addl $16, %esp
leave
ret
.Lfe5:
.size ompi_atomic_cmpset_acq_32,.Lfe5-ompi_atomic_cmpset_acq_32
.globl ompi_atomic_cmpset_rel_32
.type ompi_atomic_cmpset_rel_32,@function
ompi_atomic_cmpset_rel_32:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
subl $4, %esp
pushl 16(%ebp)
pushl 12(%ebp)
pushl 8(%ebp)
call ompi_atomic_cmpset_32
addl $16, %esp
leave
ret
.Lfe6:
.size ompi_atomic_cmpset_rel_32,.Lfe6-ompi_atomic_cmpset_rel_32
.globl ompi_atomic_cmpset_64
.type ompi_atomic_cmpset_64,@function
ompi_atomic_cmpset_64:
pushl %ebp
movl %esp, %ebp
pushl %ebx
subl $28, %esp
movl 12(%ebp), %eax
movl 16(%ebp), %edx
movl %eax, -16(%ebp)
movl %edx, -12(%ebp)
movl 20(%ebp), %eax
movl 24(%ebp), %edx
movl %eax, -24(%ebp)
movl %edx, -20(%ebp)
movl -16(%ebp), %eax
movl -12(%ebp), %edx
movl %eax, -32(%ebp)
movl %edx, -28(%ebp)
movl -16(%ebp), %ecx
movl -12(%ebp), %ebx
movl -32(%ebp), %eax
xorl %ecx, %eax
movl -28(%ebp), %edx
xorl %ebx, %edx
orl %edx, %eax
testl %eax, %eax
sete %al
movzbl %al, %eax
addl $28, %esp
popl %ebx
leave
ret
.Lfe7:
.size ompi_atomic_cmpset_64,.Lfe7-ompi_atomic_cmpset_64
.globl ompi_atomic_cmpset_acq_64
.type ompi_atomic_cmpset_acq_64,@function
ompi_atomic_cmpset_acq_64:
pushl %ebp
movl %esp, %ebp
subl $24, %esp
movl 12(%ebp), %eax
movl 16(%ebp), %edx
movl %eax, -8(%ebp)
movl %edx, -4(%ebp)
movl 20(%ebp), %eax
movl 24(%ebp), %edx
movl %eax, -16(%ebp)
movl %edx, -12(%ebp)
subl $12, %esp
pushl -12(%ebp)
pushl -16(%ebp)
pushl -4(%ebp)
pushl -8(%ebp)
pushl 8(%ebp)
call ompi_atomic_cmpset_64
addl $32, %esp
leave
ret
.Lfe8:
.size ompi_atomic_cmpset_acq_64,.Lfe8-ompi_atomic_cmpset_acq_64
.globl ompi_atomic_cmpset_rel_64
.type ompi_atomic_cmpset_rel_64,@function
ompi_atomic_cmpset_rel_64:
pushl %ebp
movl %esp, %ebp
subl $24, %esp
movl 12(%ebp), %eax
movl 16(%ebp), %edx
movl %eax, -8(%ebp)
movl %edx, -4(%ebp)
movl 20(%ebp), %eax
movl 24(%ebp), %edx
movl %eax, -16(%ebp)
movl %edx, -12(%ebp)
subl $12, %esp
pushl -12(%ebp)
pushl -16(%ebp)
pushl -4(%ebp)
pushl -8(%ebp)
pushl 8(%ebp)
call ompi_atomic_cmpset_64
addl $32, %esp
leave
ret
.Lfe9:
.size ompi_atomic_cmpset_rel_64,.Lfe9-ompi_atomic_cmpset_rel_64
.ident "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"

Просмотреть файл

@ -24,7 +24,9 @@ cat > $CFILE<<EOF
#include <inttypes.h>
#define static
#define inline
#define OMPI_GCC_INLINE_ASSEMBLY 1
#define OMPI_WANT_SMP_LOCKS 1
#include "atomic.h"
EOF
gcc -I. -S $CFILE -o atomic.s
gcc -O1 -I. -S $CFILE -o atomic.s

Просмотреть файл

@ -20,13 +20,30 @@
*/
#ifdef HAVE_SMP
#if OMPI_WANT_SMP_LOCKS
#define MB() __asm__ __volatile__("": : :"memory")
#else
#define MB()
#endif
/**********************************************************************
*
* Define constants for IA64
*
*********************************************************************/
#define OMPI_HAVE_MEM_BARRIER 1
#define OMPI_HAVE_ATOMIC_CMPSET_32
#define OMPI_HAVE_ATOMIC_CMPSET_64
/**********************************************************************
*
* Memory Barriers
*
*********************************************************************/
#if OMPI_GCC_INLINE_ASSEMBLY
static inline void ompi_atomic_mb(void)
{
MB();
@ -44,13 +61,23 @@ static inline void ompi_atomic_wmb(void)
MB();
}
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
/**********************************************************************
*
* Atomic math operations
*
*********************************************************************/
#if OMPI_GCC_INLINE_ASSEMBLY
#define ia64_cmpxchg4_acq(ptr, new, old) \
({ \
__u64 ia64_intri_res; \
ia64_intri_res; \
})
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
static inline int ompi_atomic_cmpset_acq_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
@ -76,10 +103,13 @@ static inline int ompi_atomic_cmpset_rel_32( volatile int32_t *addr,
return ((int32_t)ret == oldval);
}
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
#define ompi_atomic_cmpset_32 ompi_atomic_cmpset_acq_32
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
#if OMPI_GCC_INLINE_ASSEMBLY
static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
@ -105,6 +135,7 @@ static inline int ompi_atomic_cmpset_rel_64( volatile int64_t *addr,
return ((int32_t)ret == oldval);
}
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
#define ompi_atomic_cmpset_64 ompi_atomic_cmpset_acq_64

Просмотреть файл

@ -24,6 +24,7 @@ cat > $CFILE<<EOF
#include <inttypes.h>
#define static
#define inline
#define OMPI_GCC_INLINE_ASSEMBLY 1
#include "atomic.h"
EOF

Просмотреть файл

@ -14,7 +14,7 @@
include $(top_srcdir)/config/Makefile.options
noinst_HEADERS = atomic.h atomic.s
noinst_HEADERS = atomic.h
# Conditionally install the header files

Просмотреть файл

@ -38,26 +38,88 @@
#endif
static inline void ompi_atomic_mb(void)
/**********************************************************************
*
* Define constants for PowerPC 32
*
*********************************************************************/
#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1
#define OMPI_HAVE_ATOMIC_CMPSET_32 1
#define OMPI_HAVE_ATOMIC_MATH_32 1
#define OMPI_HAVE_ATOMIC_ADD_32 1
#define OMPI_HAVE_ATOMIC_SUB_32 1
#if (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64) || (OMPI_POWERPC_SUPPORT_64BIT && OMPI_GCC_INLINE_ASSEMBLY)
#define OMPI_HAVE_ATOMIC_CMPSET_64 1
#endif
/**********************************************************************
*
* Memory Barriers
*
*********************************************************************/
#if OMPI_GCC_INLINE_ASSEMBLY
static inline
void ompi_atomic_mb(void)
{
MB();
}
static inline void ompi_atomic_rmb(void)
static inline
void ompi_atomic_rmb(void)
{
RMB();
}
static inline void ompi_atomic_wmb(void)
static inline
void ompi_atomic_wmb(void)
{
WMB();
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
#elif OMPI_XLC_INLINE_ASSEMBLY /* end OMPI_GCC_INLINE_ASSEMBLY */
/* Yeah, I don't know who thought this was a reasonable syntax for
* inline assembly. Do these because they are used so often and they
* are fairly simple (aka: there is a tech pub on IBM's web site
* containing the right hex for the instructions).
*/
void ompi_atomic_mb(void);
#pragma mc_func ompi_atomic_mb { "7c0004ac" } /* sync */
#pragma reg_killed_by ompi_atomic_mb /* none */
void ompi_atomic_rmb(void);
#pragma mc_func ompi_atomic_rmb { "7c2004ac" } /* lwsync */
#pragma reg_killed_by ompi_atomic_rmb /* none */
void ompi_atomic_wmb(void);
#pragma mc_func ompi_atomic_wmb { "7c0006ac" } /* eieio */
#pragma reg_killed_by ompi_atomic_wmb /* none */
#else /* end OMPI_XLC_INLINE_ASSEMBLY */
void ompi_atomic_mb(void);
void ompi_atomic_rmb(void);
void ompi_atomic_wmb(void);
#endif
/**********************************************************************
*
* Atomic math operations
*
*********************************************************************/
#if OMPI_GCC_INLINE_ASSEMBLY
static inline int ompi_atomic_cmpset_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
int32_t ret;
@ -76,9 +138,13 @@ static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
return (ret == oldval);
}
static inline int ompi_atomic_cmpset_acq_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_32 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline int ompi_atomic_cmpset_acq_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
int rc;
@ -89,17 +155,33 @@ static inline int ompi_atomic_cmpset_acq_32( volatile int32_t *addr,
}
static inline int ompi_atomic_cmpset_rel_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
static inline int ompi_atomic_cmpset_rel_32(volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
ompi_atomic_wmb();
return ompi_atomic_cmpset_32(addr, oldval, newval);
}
#if defined(HOW_TO_DECIDE_IF_THE_ARCHI_SUPPORT_64_BITS_ATOMICS)
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
#else
int ompi_atomic_cmpset_32(volatile int32_t *addr,
int32_t oldval, int32_t newval);
int ompi_atomic_cmpset_acq_32(volatile int32_t *addr,
int32_t oldval, int32_t newval);
int ompi_atomic_cmpset_rel_32(volatile int32_t *addr,
int32_t oldval, int32_t newval);
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
#if OMPI_POWERPC_SUPPORT_64BIT
#if OMPI_GCC_INLINE_ASSEMBLY
/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_64 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
the assembly, meaning there is one function call overhead instead
of two */
static inline int ompi_atomic_cmpset_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
int64_t ret;
@ -117,9 +199,8 @@ static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
return (ret == oldval);
}
static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline int ompi_atomic_cmpset_acq_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
int rc;
@ -130,15 +211,30 @@ static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr,
}
static inline int ompi_atomic_cmpset_rel_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
static inline int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
ompi_atomic_wmb();
return ompi_atomic_cmpset_64(addr, oldval, newval);
}
#endif /* HOW_TO_DECIDE_IF_THE_ARCHI_SUPPORT_64_BITS_ATOMICS */
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32
#elif OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64
/* currently, don't have 64 bit apps for non-inline assembly */
int ompi_atomic_cmpset_64(volatile int64_t *addr,
int64_t oldval, int64_t newval);
int ompi_atomic_cmpset_acq_64(volatile int64_t *addr,
int64_t oldval, int64_t newval);
int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
int64_t oldval, int64_t newval);
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
#endif /* OMPI_POWERPC_SUPPORT_64BIT */
#if OMPI_GCC_INLINE_ASSEMBLY
static inline int32_t ompi_atomic_add_32(volatile int32_t* v, int inc)
{
int32_t t;
@ -155,7 +251,7 @@ static inline int32_t ompi_atomic_add_32(volatile int32_t* v, int inc)
return *v;
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32
static inline int32_t ompi_atomic_sub_32(volatile int32_t* v, int dec)
{
int32_t t;
@ -172,4 +268,7 @@ static inline int32_t ompi_atomic_sub_32(volatile int32_t* v, int dec)
return *v;
}
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */

Просмотреть файл

@ -1,245 +0,0 @@
;;
;; Copyright (c) 2004-2005 The Trustees of Indiana University.
;; All rights reserved.
;; Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
;; All rights reserved.
;; Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
;; University of Stuttgart. All rights reserved.
;; $COPYRIGHT$
;;
;; Additional copyrights may follow
;;
;; $HEADER$
;;
.section __TEXT,__text,regular,pure_instructions
.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
.section __TEXT,__text,regular,pure_instructions
.align 2
.align 2
.globl _ompi_atomic_mb
.section __TEXT,__text,regular,pure_instructions
.align 2
_ompi_atomic_mb:
stmw r30,-8(r1)
stwu r1,-48(r1)
mr r30,r1
lwz r1,0(r1)
lmw r30,-8(r1)
blr
.align 2
.globl _ompi_atomic_rmb
.section __TEXT,__text,regular,pure_instructions
.align 2
_ompi_atomic_rmb:
stmw r30,-8(r1)
stwu r1,-48(r1)
mr r30,r1
lwz r1,0(r1)
lmw r30,-8(r1)
blr
.align 2
.globl _ompi_atomic_wmb
.section __TEXT,__text,regular,pure_instructions
.align 2
_ompi_atomic_wmb:
stmw r30,-8(r1)
stwu r1,-48(r1)
mr r30,r1
lwz r1,0(r1)
lmw r30,-8(r1)
blr
.align 2
.globl _ompi_atomic_cmpset_32
.section __TEXT,__text,regular,pure_instructions
.align 2
_ompi_atomic_cmpset_32:
stmw r30,-8(r1)
stwu r1,-64(r1)
mr r30,r1
stw r3,88(r30)
stw r4,92(r30)
stw r5,96(r30)
lwz r10,88(r30)
lwz r11,88(r30)
lwz r9,92(r30)
lwz r0,96(r30)
lwz r2,88(r30)
1: lwarx r8, 0, r11
cmpw 0, r8, r9
bne- 2f
stwcx. r0, 0, r11
bne- 1b
2:
mr r0,r8
stw r0,32(r30)
lwz r2,32(r30)
lwz r0,92(r30)
cmpw cr7,r2,r0
mfcr r0
rlwinm r0,r0,31,1
mr r3,r0
lwz r1,0(r1)
lmw r30,-8(r1)
blr
.align 2
.globl _ompi_atomic_cmpset_acq_32
.section __TEXT,__text,regular,pure_instructions
.align 2
_ompi_atomic_cmpset_acq_32:
mflr r0
stmw r30,-8(r1)
stw r0,8(r1)
stwu r1,-96(r1)
mr r30,r1
stw r3,120(r30)
stw r4,124(r30)
stw r5,128(r30)
lwz r3,120(r30)
lwz r4,124(r30)
lwz r5,128(r30)
bl _ompi_atomic_cmpset_32
mr r0,r3
stw r0,64(r30)
bl _ompi_atomic_rmb
lwz r0,64(r30)
mr r3,r0
lwz r1,0(r1)
lwz r0,8(r1)
mtlr r0
lmw r30,-8(r1)
blr
.align 2
.globl _ompi_atomic_cmpset_rel_32
.section __TEXT,__text,regular,pure_instructions
.align 2
_ompi_atomic_cmpset_rel_32:
mflr r0
stmw r30,-8(r1)
stw r0,8(r1)
stwu r1,-80(r1)
mr r30,r1
stw r3,104(r30)
stw r4,108(r30)
stw r5,112(r30)
bl _ompi_atomic_wmb
lwz r3,104(r30)
lwz r4,108(r30)
lwz r5,112(r30)
bl _ompi_atomic_cmpset_32
mr r0,r3
mr r3,r0
lwz r1,0(r1)
lwz r0,8(r1)
mtlr r0
lmw r30,-8(r1)
blr
.align 2
.globl _ompi_atomic_cmpset_64
.section __TEXT,__text,regular,pure_instructions
.align 2
_ompi_atomic_cmpset_64:
stmw r30,-8(r1)
stwu r1,-96(r1)
mr r30,r1
stw r3,120(r30)
stw r4,32(r30)
stw r5,36(r30)
stw r6,40(r30)
stw r7,44(r30)
lwz r10,120(r30)
lwz r0,120(r30)
lwz r11,32(r30)
lwz r12,36(r30)
lwz r2,40(r30)
lwz r3,44(r30)
lwz r9,120(r30)
1: ldarx r7, 0, r0
cmpd 0, r7, r11
bne- 2f
stdcx. r2, 0, r0
bne- 1b
2:
mr r2,r7
mr r3,r8
stw r2,64(r30)
stw r3,68(r30)
lfd f0,64(r30)
stfd f0,48(r30)
li r8,0
stw r8,56(r30)
lwz r2,48(r30)
lwz r0,32(r30)
cmpw cr7,r2,r0
bne cr7,L8
lwz r0,52(r30)
lwz r2,36(r30)
cmpw cr7,r0,r2
bne cr7,L8
li r0,1
stw r0,56(r30)
L8:
lwz r0,56(r30)
mr r3,r0
lwz r1,0(r1)
lmw r30,-8(r1)
blr
.align 2
.globl _ompi_atomic_cmpset_acq_64
.section __TEXT,__text,regular,pure_instructions
.align 2
_ompi_atomic_cmpset_acq_64:
mflr r0
stmw r30,-8(r1)
stw r0,8(r1)
stwu r1,-112(r1)
mr r30,r1
stw r3,136(r30)
stw r4,64(r30)
stw r5,68(r30)
stw r6,72(r30)
stw r7,76(r30)
lwz r3,136(r30)
lwz r4,64(r30)
lwz r5,68(r30)
lwz r6,72(r30)
lwz r7,76(r30)
bl _ompi_atomic_cmpset_64
mr r0,r3
stw r0,80(r30)
bl _ompi_atomic_rmb
lwz r0,80(r30)
mr r3,r0
lwz r1,0(r1)
lwz r0,8(r1)
mtlr r0
lmw r30,-8(r1)
blr
.align 2
.globl _ompi_atomic_cmpset_rel_64
.section __TEXT,__text,regular,pure_instructions
.align 2
_ompi_atomic_cmpset_rel_64:
mflr r0
stmw r30,-8(r1)
stw r0,8(r1)
stwu r1,-96(r1)
mr r30,r1
stw r3,120(r30)
stw r4,64(r30)
stw r5,68(r30)
stw r6,72(r30)
stw r7,76(r30)
bl _ompi_atomic_wmb
lwz r3,120(r30)
lwz r4,64(r30)
lwz r5,68(r30)
lwz r6,72(r30)
lwz r7,76(r30)
bl _ompi_atomic_cmpset_64
mr r0,r3
mr r3,r0
lwz r1,0(r1)
lwz r0,8(r1)
mtlr r0
lmw r30,-8(r1)
blr

Просмотреть файл

@ -24,7 +24,9 @@ cat > $CFILE<<EOF
#include <inttypes.h>
#define static
#define inline
#define OMPI_GCC_INLINE_ASSEMBLY 1
#define OMPI_POWERPC_SUPPORT_64BIT 0
#include "atomic.h"
EOF
gcc -I. -S $CFILE -o atomic.s
gcc -DHAVE_SMP -I. -S $CFILE -o atomic.s

Просмотреть файл

@ -21,13 +21,32 @@
#define ASI_P "0x80"
#ifdef HAVE_SMP
#if OMPI_WANT_SMP_LOCKS
#define MEMBAR(type) __asm__ __volatile__ ("membar" type : : : "memory")
#else
#define MEMBAR(type)
#endif
/**********************************************************************
*
* Define constants for UltraSparc 64
*
*********************************************************************/
#define OMPI_HAVE_MEM_BARRIER 1
#define OMPI_HAVE_ATOMIC_CMPSET_32 1
#define OMPI_HAVE_ATOMIC_CMPSET_64 1
/**********************************************************************
*
* Memory Barriers
*
*********************************************************************/
#if OMPI_GCC_INLINE_ASSEMBLY
static inline void ompi_atomic_mb(void)
{
MEMBAR("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad");
@ -45,7 +64,16 @@ static inline void ompi_atomic_wmb(void)
MEMBAR("#StoreStore");
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
/**********************************************************************
*
* Atomic math operations
*
*********************************************************************/
#if OMPI_GCC_INLINE_ASSEMBLY
static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
@ -77,7 +105,7 @@ static inline int ompi_atomic_cmpset_rel_32( volatile int32_t *addr,
return ompi_atomic_cmpset_32(addr, oldval, newval);
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
@ -101,6 +129,7 @@ static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr,
return rc;
}
static inline int ompi_atomic_cmpset_rel_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
@ -108,5 +137,7 @@ static inline int ompi_atomic_cmpset_rel_64( volatile int64_t *addr,
return ompi_atomic_cmpset_64(addr, oldval, newval);
}
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */

Просмотреть файл

@ -24,6 +24,7 @@ cat > $CFILE<<EOF
#include <inttypes.h>
#define static
#define inline
#define OMPI_GCC_INLINE_ASSEMBLY 1
#include "atomic.h"
EOF

Просмотреть файл

@ -15,9 +15,12 @@
#ifndef OMPI_SYS_ARCH_ATOMIC_H
#define OMPI_SYS_ARCH_ATOMIC_H 1
/*
* On ia64, we use cmpxchg, which supports acquire/release semantics natively.
*/
/**********************************************************************
*
* Memory Barriers
*
*********************************************************************/
#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1
static inline void ompi_atomic_mb(void)
{
@ -42,7 +45,14 @@ static inline void ompi_atomic_wmb(void)
#endif
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
/**********************************************************************
*
* Atomic math operations
*
*********************************************************************/
#define OMPI_HAVE_ATOMIC_CMPSET_32
static inline int ompi_atomic_cmpset_acq_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
@ -81,7 +91,7 @@ static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
#endif
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
#define OMPI_HAVE_ATOMIC_CMPSET_64
static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr,
int64_t oldval, int64_t newval)
{
@ -119,7 +129,9 @@ static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
#endif
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32
#define OMPI_HAVE_ATOMIC_MATH_32
#define OMPI_HAVE_ATOMIC_ADD_32
static inline int32_t ompi_atomic_add_32(volatile int32_t *addr, int32_t delta)
{
return InterlockedExchangeAdd ((LONG volatile *) addr,
@ -127,7 +139,9 @@ static inline int32_t ompi_atomic_add_32(volatile int32_t *addr, int32_t delta)
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_64
#define OMPI_HAVE_ATOMIC_MATH_64
#define OMPI_HAVE_ATOMIC_ADD_64
static inline int64_t ompi_atomic_add_64(volatile int64_t *addr, int64_t delta)
{
#if 0
@ -139,7 +153,7 @@ static inline int64_t ompi_atomic_add_64(volatile int64_t *addr, int64_t delta)
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32
#define OMPI_HAVE_ATOMIC_SUB_32
static inline int32_t ompi_atomic_sub_32(volatile int32_t *addr, int32_t delta)
{
return InterlockedExchangeAdd( (LONG volatile *) addr,
@ -147,7 +161,7 @@ static inline int32_t ompi_atomic_sub_32(volatile int32_t *addr, int32_t delta)
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_64
#define OMPI_HAVE_ATOMIC_SUB_64
static inline int64_t ompi_atomic_sub_64(volatile int64_t *addr, int64_t delta)
{
#if 0

Просмотреть файл

@ -42,7 +42,7 @@ static void ompi_mutex_construct(ompi_mutex_t *m)
#if OMPI_HAVE_POSIX_THREADS
pthread_mutex_init(&m->m_lock_pthread, 0);
#endif
#if OMPI_HAVE_ATOMIC
#if OMPI_HAVE_ATOMIC_SPINLOCKS
ompi_atomic_init( &m->m_lock_atomic, OMPI_ATOMIC_UNLOCKED );
#endif
}

Просмотреть файл

@ -50,7 +50,7 @@ struct ompi_mutex_t {
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_mutex_t);
#if OMPI_HAVE_ATOMIC && OMPI_HAVE_POSIX_THREADS
#if OMPI_HAVE_ATOMIC_SPINLOCKS && OMPI_HAVE_POSIX_THREADS
/*
* ompi_mutex_* implemented using pthreads
@ -127,7 +127,7 @@ static inline void ompi_mutex_atomic_unlock(ompi_mutex_t *m)
}
#elif OMPI_HAVE_ATOMIC
#elif OMPI_HAVE_ATOMIC_SPINLOCKS
/*
* ompi_mutex_* and ompi_mutex_atomic_* implemented using atomic

Просмотреть файл

@ -51,7 +51,6 @@ headers = \
libutil_la_SOURCES = \
$(headers) \
assembly.s \
argv.c \
cmd_line.c \
few.c \

Просмотреть файл

@ -1,35 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/*
* File to instantiate assembly level code for non-GNU C compilers.
*/
#ifndef __GNUC__
#if defined(__alpha__)
# include "include/sys/alpha/atomic.s"
#elif defined(__amd64__) || defined(__x86_64__)
# include "include/sys/amd64/atomic.s"
#elif defined(__i386__)
# include "include/sys/ia32/atomic.s"
#elif defined(__ia64__)
# include "include/sys/ia64/atomic.s"
#elif defined(__POWERPC__)
# include "include/sys/powerpc/atomic.s"
#elif defined(__sparc__) || defined(__sparc)
# include "include/sys/sparc64/atomic.s"
#endif
#endif