From 9c1a2778041eed1a595d3d911e0d57f6a915cf8c Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Thu, 27 Jan 2005 01:39:55 +0000 Subject: [PATCH] * Update build system to support non-GCC inline assembly, including detection of assembler format * Fix minor bugs in AMD64, PPC, and IA32 assembly for atomic operations * Clean up the #defines to look for when examining level of atomic operation support This commit was SVN r4183. --- Doxyfile | 2 +- acinclude.m4 | 3 + config/ompi_config_asm.m4 | 763 ++++++++++++++++++++++++++++ config/ompi_try_assemble.m4 | 43 ++ configure.ac | 11 + src/Makefile.am | 2 + src/asm/Makefile.am | 80 +++ src/asm/asm-data.txt | 27 + src/asm/atomic-test.c | 457 +++++++++++++++++ src/asm/base/AMD64.asm | 73 +++ src/asm/base/IA32.asm | 108 ++++ src/asm/base/POWERPC32.asm | 143 ++++++ src/asm/base/POWERPC64.asm | 156 ++++++ src/asm/generate-all-asm.sh | 31 ++ src/asm/generate-asm.pl | 92 ++++ src/include/sys/alpha/atomic.h | 38 +- src/include/sys/alpha/update.sh | 1 + src/include/sys/amd64/atomic.h | 39 +- src/include/sys/amd64/update.sh | 1 + src/include/sys/architecture.h | 40 ++ src/include/sys/atomic.h | 506 ++++++++---------- src/include/sys/atomic_impl.h | 270 ++++++++++ src/include/sys/ia32/Makefile.am | 2 +- src/include/sys/ia32/atomic.h | 84 ++- src/include/sys/ia32/atomic.s | 193 ------- src/include/sys/ia32/update.sh | 4 +- src/include/sys/ia64/atomic.h | 37 +- src/include/sys/ia64/update.sh | 1 + src/include/sys/powerpc/Makefile.am | 2 +- src/include/sys/powerpc/atomic.h | 145 +++++- src/include/sys/powerpc/atomic.s | 245 --------- src/include/sys/powerpc/update.sh | 4 +- src/include/sys/sparc64/atomic.h | 37 +- src/include/sys/sparc64/update.sh | 1 + src/include/sys/win32/atomic.h | 32 +- src/threads/mutex.c | 2 +- src/threads/mutex_unix.h | 4 +- src/util/Makefile.am | 1 - src/util/assembly.s | 35 -- 39 files changed, 2875 insertions(+), 840 deletions(-) create mode 100644 config/ompi_config_asm.m4 create mode 100644 config/ompi_try_assemble.m4 create mode 100644 src/asm/Makefile.am create mode 100644 src/asm/asm-data.txt create mode 100644 src/asm/atomic-test.c create mode 100644 src/asm/base/AMD64.asm create mode 100644 src/asm/base/IA32.asm create mode 100644 src/asm/base/POWERPC32.asm create mode 100644 src/asm/base/POWERPC64.asm create mode 100644 src/asm/generate-all-asm.sh create mode 100644 src/asm/generate-asm.pl create mode 100644 src/include/sys/architecture.h create mode 100644 src/include/sys/atomic_impl.h delete mode 100644 src/include/sys/ia32/atomic.s delete mode 100644 src/include/sys/powerpc/atomic.s delete mode 100644 src/util/assembly.s diff --git a/Doxyfile b/Doxyfile index 7934d4d905..27c25456c3 100644 --- a/Doxyfile +++ b/Doxyfile @@ -892,7 +892,7 @@ INCLUDE_FILE_PATTERNS = # or name=definition (no spaces). If the definition and the = are # omitted =1 is assumed. -PREDEFINED = +PREDEFINED = DOXYGEN # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. diff --git a/acinclude.m4 b/acinclude.m4 index 7bb09eaa78..1f1aad57cb 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -36,6 +36,9 @@ sinclude(config/f90_check_type.m4) sinclude(config/f90_get_alignment.m4) sinclude(config/f90_get_sizeof.m4) +sinclude(config/ompi_try_assemble.m4) +sinclude(config/ompi_config_asm.m4) + sinclude(config/ompi_case_sensitive_fs_setup.m4) sinclude(config/ompi_check_optflags.m4) sinclude(config/ompi_config_subdir.m4) diff --git a/config/ompi_config_asm.m4 b/config/ompi_config_asm.m4 new file mode 100644 index 0000000000..3d36a0d0a6 --- /dev/null +++ b/config/ompi_config_asm.m4 @@ -0,0 +1,763 @@ +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University. +dnl All rights reserved. +dnl Copyright (c) 2004-2005 The Trustees of the University of Tennessee. +dnl All rights reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + + +dnl ################################################################# +dnl +dnl OMPI_CHECK_ASM_TEXT +dnl +dnl Determine how to set current mode as text. +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CHECK_ASM_TEXT],[ + AC_MSG_CHECKING([directive for setting text section]) + ompi_cv_asm_text="" + case $host in + *-aix*) + ompi_cv_asm_text=[".csect .text[PR]"] + ;; + *) + ompi_cv_asm_text=".text" + ;; + esac + AC_MSG_RESULT([$ompi_cv_asm_text]) + AC_DEFINE_UNQUOTED([OMPI_ASM_TEXT], ["$ompi_cv_asm_text"], + [Assembly directive for setting text section]) + OMPI_ASM_TEXT="$ompi_cv_asm_text" + AC_SUBST(OMPI_ASM_TEXT) +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_CHECK_ASM_GLOBAL +dnl +dnl Sets OMPI_ASM_GLOBAL to the value to prefix global values +dnl +dnl I'm sure if I don't have a test for this, there will be some +dnl dumb platform that uses something else +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CHECK_ASM_GLOBAL],[ + AC_MSG_CHECKING([directive for exporting symbols]) + ompi_cv_asm_global="" + case $host in + *) + ompi_cv_asm_global=".globl" + ;; + esac + AC_MSG_RESULT([$ompi_cv_asm_global]) + AC_DEFINE_UNQUOTED([OMPI_ASM_GLOBAL], ["$ompi_cv_asm_global"], + [Assembly directive for exporting symbols]) + OMPI_ASM_GLOBAL="$ompi_cv_asm_global" + AC_SUBST(OMPI_AS_GLOBAL) +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_CHECK_ASM_LSYM +dnl +dnl Sets OMPI_ASM_LSYM to the prefix value on a symbol to make it +dnl an internal label (jump target and whatnot) +dnl +dnl We look for L .L $ L$ (in that order) for something that both +dnl assembles and does not leave a label in the output of nm. Fall +dnl back to L if nothing else seems to work :/ +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CHECK_ASM_LSYM],[ + AC_REQUIRE([OMPI_CHECK_ASM_LABEL_SUFFIX]) + AC_REQUIRE([AC_PROG_NM]) + + AC_MSG_CHECKING([prefix for lsym labels]) + ompi_cv_asm_lsym="L" + + for sym in L .L $ L$ ; do + asm_result=0 + echo "configure: trying $sym" >& AC_FD_CC + OMPI_TRY_ASSEMBLE([foobar$ompi_cv_asm_label_suffix +${sym}mytestlabel$ompi_cv_asm_label_suffix], + [# ok, we succeeded at assembling. see if we can nm, + # throwing the results in a file + if $NM conftest.$OBJEXT > conftest.out 2>&AC_FD_CC ; then + if test "`grep mytestlabel conftest.out`" = "" ; then + # there was no symbol... looks promising to me + ompi_cv_asm_lsym="$sym" + asm_result=1 + elif test ["`grep ' [Nt] .*mytestlabel' conftest.out`"] = "" ; then + # see if we have a non-global-ish symbol + # but we should see if we can do better. + ompi_cv_asm_lsym="$sym" + fi + else + # not so much on the NM goodness :/ + echo "$NM failed. Output from NM was:" >& AC_FD_CC + cat conftest.out > AC_FD_CC + AC_MSG_WARN([$NM could not read object file]) + fi + ]) + if test "$asm_result" = "1" ; then + break + fi + done + rm -f conftest.out + + AC_MSG_RESULT([$ompi_cv_asm_lsym]) + AC_DEFINE_UNQUOTED([OMPI_ASM_LSYM], ["$ompi_cv_asm_lsym"], + [Assembly prefix for lsym labels]) + OMPI_ASM_LSYM="$ompi_cv_asm_lsym" + AC_SUBST(OMPI_ASM_LSYM) + unset asm_result sym +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_CHECK_ASM_GSYM +dnl +dnl Sets OMPI_ASM_GSYM to the prefix value on a symbol to make it +dnl a global linkable from C. Basically, an _ or not. +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CHECK_ASM_GSYM],[ + AC_REQUIRE([OMPI_CHECK_ASM_TEXT]) + AC_REQUIRE([OMPI_CHECK_ASM_GLOBAL]) + AC_REQUIRE([OMPI_CHECK_ASM_LABEL_SUFFIX]) + + AC_MSG_CHECKING([prefix for global symbol labels]) + ompi_cv_asm_gsym="none" + + for sym in "_" "" ; do + asm_result=0 + echo "configure: trying $sym" >& AC_FD_CC +cat > conftest_c.c <&AC_FD_CC + ompi_link="$CC $CFLAGS conftest_c.$OBJEXT conftest.$OBJEXT -o conftest > conftest.link 2>&1" + if AC_TRY_EVAL(ompi_link) ; then + # save the warnings + cat conftest.link >&AC_FD_CC + asm_result=1 + else + cat conftest.link >&AC_FD_CC + echo "configure: failed C program was: " >&AC_FD_CC + cat conftest_c.c >&AC_FD_CC + echo "configure: failed ASM program was: " >&AC_FD_CC + cat conftest.s >&AC_FD_CC + asm_result=0 + fi + else + # save output and failed program + cat conftest.cmpl >&AC_FD_CC + echo "configure: failed C program was: " >&AC_FD_CC + cat conftest.c >&AC_FD_CC + asm_result=0 + fi], + [asm_result=0]) + if test "$asm_result" = "1" ; then + ompi_cv_asm_gsym="$sym" + break + fi + done + rm -f conftest.* + + AC_MSG_RESULT([$ompi_cv_asm_gsym]) + + if test "$ompi_cv_asm_gsym" = "none" ; then + AC_MSG_ERROR([Could not determine global symbol label prefix]) + fi + + AC_DEFINE_UNQUOTED([OMPI_ASM_GSYM], ["$ompi_cv_asm_gsym"], + [Assembly prefix for lsym labels]) + OMPI_ASM_GSYM="$ompi_cv_asm_gsym" + AC_SUBST(OMPI_ASM_GSYM) +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_CHECK_ASM_LABEL_SUFFIX +dnl +dnl Sets OMPI_ASM_LABEL_SUFFIX to the value to suffix for labels +dnl +dnl I'm sure if I don't have a test for this, there will be some +dnl dumb platform that uses something else +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CHECK_ASM_LABEL_SUFFIX],[ + AC_MSG_CHECKING([suffix for labels]) + ompi_cv_asm_label_suffix="" + case $host in + *) + ompi_cv_asm_label_suffix=":" + ;; + esac + AC_MSG_RESULT([$ompi_cv_asm_label_suffix]) + AC_DEFINE_UNQUOTED([OMPI_ASM_LABEL_SUFFIX], ["$ompi_cv_asm_label_suffix"], + [Assembly suffix for labels]) + OMPI_ASM_LABEL_SUFFIX="$ompi_cv_asm_label_suffix" + AC_SUBST(OMPI_AS_LABEL_SUFFIX) +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_CHECK_ASM_ALIGN_LOG +dnl +dnl Sets OMPI_ASM_ALIGN_LOG to 1 if align is specified +dnl logarithmically, 0 otherwise +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CHECK_ASM_ALIGN_LOG],[ + AC_REQUIRE([OMPI_CHECK_ASM_TEXT]) + AC_REQUIRE([OMPI_CHECK_ASM_GLOBAL]) + AC_REQUIRE([OMPI_CHECK_ASM_LABEL_SUFFIX]) + AC_REQUIRE([AC_PROG_NM]) + + ompi_cv_asm_align_log=0 + asm_result="no" + AC_MSG_CHECKING([if .align directive takes logarithmic value]) + OMPI_TRY_ASSEMBLE([ $ompi_cv_asm_text + .align 4 + $ompi_cv_asm_global foo + .byte 1 + .align 4 +foo$ompi_cv_asm_label_suffix + .byte 2], + [ompi_asm_addr=[`$NM conftest.$OBJEXT | sed -e 's/.*\([0-9a-fA-F][0-9a-fA-F]\).*foo.*/\1/'`]], + [ompi_asm_addr=""]) + # test for both 16 and 10 (decimal and hex notations) + echo "configure: .align test address offset is $ompi_asm_addr" >& AC_FD_CC + if test "$ompi_asm_addr" = "16" -o "$ompi_asm_addr" = "10" ; then + ompi_cv_asm_align_log=1 + asm_result="yes" + fi + AC_MSG_RESULT([$asm_result]) + + AC_DEFINE_UNQUOTED([OMPI_ASM_ALIGN_LOG], + [$ompi_cv_asm_align_log], + [Assembly align directive expects logarithmic value]) + + unset omp_asm_addr asm_result +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_CHECK_ASM_TYPE +dnl +dnl Sets OMPI_ASM_TYPE to the prefix for the function type to +dnl set a symbol's type as function (needed on ELF for shared +dnl libaries). If no .type directive is needed, sets OMPI_ASM_TYPE +dnl to an empty string +dnl +dnl We look for @ \# % +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CHECK_ASM_TYPE],[ + AC_MSG_CHECKING([prefix for function in .type]) + ompi_cv_asm_type="" + + for type in @ \# % ; do + asm_result=0 + echo "configure: trying $type" >& AC_FD_CC + OMPI_TRY_ASSEMBLE([ .type mysym, ${type}function], + [# ok, we succeeded at assembling. see if there was + # a warning in the output. + if test "`cat conftest.out`" = "" ; then + ompi_cv_asm_type="${type}" + asm_result=1 + fi]) + if test "$asm_result" = "1" ; then + break + fi + done + rm -f conftest.out + + AC_MSG_RESULT([$ompi_cv_asm_type]) + AC_DEFINE_UNQUOTED([OMPI_ASM_TYPE], ["$ompi_cv_asm_type"], + [How to set function type in .type directive]) + OMPI_ASM_TYPE="$ompi_cv_asm_type" + AC_SUBST(OMPI_ASM_TYPE) + unset asm_result type +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_CHECK_ASM_SIZE +dnl +dnl Sets OMPI_ASM_SIZE to 1 if we should set .size directives for +dnl each function, 0 otherwise. +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CHECK_ASM_SIZE],[ + AC_MSG_CHECKING([if .size is needed]) + ompi_cv_asm_size=0 + asm_result="no" + + OMPI_TRY_ASSEMBLE([ .size mysym, 1], + [# ok, we succeeded at assembling. see if there was + # a warning in the output. + if test "`cat conftest.out`" = "" ; then + ompi_cv_asm_size=1 + asm_result="yes" + fi]) + rm -f conftest.out + + AC_MSG_RESULT([$asm_result]) + AC_DEFINE_UNQUOTED([OMPI_ASM_SIZE], ["$ompi_cv_asm_size"], + [Do we need to give a .size directive?]) + OMPI_ASM_SIZE="$ompi_cv_asm_size" + AC_SUBST(OMPI_ASM_TYPE) + unset asm_result +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_CHECK_POWERPC_REG +dnl +dnl See if the notation for specifying registers is X (most everyone) +dnl or rX (OS X) +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CHECK_POWERPC_REG],[ + AC_REQUIRE([OMPI_CHECK_ASM_TEXT]) + AC_MSG_CHECKING([if PowerPC registers have r prefix]) + OMPI_TRY_ASSEMBLE([$ompi_cv_asm_text + addi 1,1,0], + [ompi_cv_asm_powerpc_r_reg=0], + OMPI_TRY_ASSEMBLE([$ompi_cv_asm_text + addi r1,r1,0], + [ompi_cv_asm_powerpc_r_reg=1], + AC_MSG_ERROR([Can not determine how to use PPC registers]))) + if test "$ompi_cv_asm_powerpc_r_reg" = "1" ; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + + AC_DEFINE_UNQUOTED([OMPI_POWERPC_R_REGISTERS], + [$ompi_cv_asm_powerpc_r_reg], + [Whether r notation is used for ppc registers]) +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_CHECK_POWERPC_64BIT +dnl +dnl On some powerpc chips (the PPC970 or G5), the OS usually runs in +dnl 32 bit mode, even though the hardware can do 64bit things. If +dnl the compiler will let us, emit code for 64bit test and set type +dnl operations (on a long long). +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CHECK_POWERPC_64BIT],[ + AC_REQUIRE([OMPI_CHECK_ASM_TEXT]) + + AC_MSG_CHECKING([for 64-bit PowerPC assembly support]) + ppc64_result=0 + if test "$ompi_cv_asm_powerpc_r_reg" = "1" ; then + ldarx_asm=" ldarx r1,r1,r1"; + else + ldarx_asm=" ldarx1,1,1"; + fi + OMPI_TRY_ASSEMBLE([$ompi_cv_asm_text + $ldarx_asm], + [ppc64_result=1], + [ppc64_result=0]) + if test "$ppc64_result" = "1" ; then + AC_MSG_RESULT([yes]) + ifelse([$1],,:,[$1]) + else + AC_MSG_RESULT([no]) + ifelse([$2],,:,[$2]) + fi + + unset ppc64_result ldarx_asm +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_CHECK_INLINE_GCC +dnl +dnl Check if the compiler is capable of doing GCC-style inline +dnl assembly. Some compilers emit a warning and ignore the inline +dnl assembly (xlc on OS X) and compile without error. Therefore, +dnl the test attempts to run the emited code to check that the +dnl assembly is actually run. To run this test, one argument to +dnl the macro must be an assembly instruction in gcc format to move +dnl the value 0 into the register containing the variable ret. +dnl For PowerPC, this would be: +dnl +dnl "li %0,0" : "=&r"(ret) +dnl +dnl DEFINE OMPI_GCC_INLINE_ASSEMBLY to 0 or 1 depending on GCC +dnl support +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CHECK_INLINE_GCC],[ + assembly="$1" + asm_result="unknown" + + AC_MSG_CHECKING([if $CC supports GCC inline assembly]) + + if test ! "$assembly" = "" ; then + AC_RUN_IFELSE(AC_LANG_PROGRAM([[ +AC_INCLUDES_DEFAULT]], +[[int ret = 1; +__asm__ __volatile__ ($assembly); +return ret;]]), + [asm_result="yes"], [asm_result="no"], + [asm_result="unknown"]) + else + assembly="test skipped - assuming no" + fi + + # if we're cross compiling, just try to compile and figure good enough + if test "$asm_result" = "unknown" ; then + AC_LINK_IFELSE(AC_LANG_PROGRAM([[ +AC_INCLUDES_DEFAULT]], +[[int ret = 1; +__asm__ __volatile__ ($assembly); +return ret;]]), + [asm_result="yes"], [asm_result="no"]) + fi + + AC_MSG_RESULT([$asm_result]) + + if test "$asm_result" = "yes" ; then + OMPI_GCC_INLINE_ASSEMBLY=1 + else + OMPI_GCC_INLINE_ASSEMBLY=0 + fi + + AC_DEFINE_UNQUOTED([OMPI_GCC_INLINE_ASSEMBLY], + [$OMPI_GCC_INLINE_ASSEMBLY], + [Whether compiler supports GCC style inline assembly]) + + unset OMPI_GCC_INLINE_ASSEMBLY assembly asm_result +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_CHECK_INLINE_DEC +dnl +dnl DEFINE OMPI_DEC to 0 or 1 depending on DEC +dnl support +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CHECK_INLINE_DEC],[ + + AC_MSG_CHECKING([if $CC supports DEC inline assembly]) + + AC_LINK_IFELSE(AC_LANG_PROGRAM([[ +AC_INCLUDES_DEFAULT +#include ]], +[[asm(""); +return 0;]]), + [asm_result="yes"], [asm_result="no"]) + + AC_MSG_RESULT([$asm_result]) + + if test "$asm_result" = "yes" ; then + OMPI_DEC_INLINE_ASSEMBLY=1 + else + OMPI_DEC_INLINE_ASSEMBLY=0 + fi + + AC_DEFINE_UNQUOTED([OMPI_DEC_INLINE_ASSEMBLY], + [$OMPI_DEC_INLINE_ASSEMBLY], + [Whether compiler supports DEC style inline assembly]) + + unset OMPI_DEC_INLINE_ASSEMBLY asm_result +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_CHECK_INLINE_XLC +dnl +dnl DEFINE OMPI_XLC to 0 or 1 depending on XLC +dnl support +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CHECK_INLINE_XLC],[ + + AC_MSG_CHECKING([if $CC supports XLC inline assembly]) + + OMPI_XLC_INLINE_ASSEMBLY=0 + asm_result="no" + if test "$CC" = "xlc" ; then + if test "$CXX" = "xlC" -o "$CXX" = "xlc++" ; then + OMPI_XLC_INLINE_ASSEMBLY=1 + asm_result="yes" + fi + fi + + AC_MSG_RESULT([$asm_result]) + AC_DEFINE_UNQUOTED([OMPI_XLC_INLINE_ASSEMBLY], + [$OMPI_XLC_INLINE_ASSEMBLY], + [Whether compiler supports XLC style inline assembly]) + + unset OMPI_XLC_INLINE_ASSEMBLY +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_CONFIG_ASM +dnl +dnl DEFINE OMPI_ASSEMBLY_ARCH to something in sys/architecture.h +dnl DEFINE OMPI_ASSEMBLY_FORMAT to string containing correct +dnl format for assembly (not user friendly) +dnl SUBST OMPI_ASSEMBLY_FORMAT to string containing correct +dnl format for assembly (not user friendly) +dnl +dnl ################################################################# +AC_DEFUN([OMPI_CONFIG_ASM],[ + AC_REQUIRE([OMPI_SETUP_CC]) + AC_REQUIRE([OMPI_SETUP_CXX]) + AC_REQUIRE([AM_PROG_AS]) + AC_REQUIRE([OMPI_CHECK_ASM_TEXT]) + AC_REQUIRE([OMPI_CHECK_ASM_GLOBAL]) + AC_REQUIRE([OMPI_CHECK_ASM_GSYM]) + AC_REQUIRE([OMPI_CHECK_ASM_LSYM]) + AC_REQUIRE([OMPI_CHECK_ASM_TYPE]) + AC_REQUIRE([OMPI_CHECK_ASM_SIZE]) + AC_REQUIRE([OMPI_CHECK_ASM_LABEL_SUFFIX]) + AC_REQUIRE([OMPI_CHECK_ASM_ALIGN_LOG]) + +AC_MSG_CHECKING([whether to enable smp locks]) +AC_ARG_ENABLE(smp-locks, + AC_HELP_STRING([--enable-smp-locks], + [disable smp locks in atomic ops (default: enabled)])) +if test "$enable_smp_locks" != "no"; then + AC_MSG_RESULT([yes]) + want_smp_locks=1 +else + AC_MSG_RESULT([no]) + want_smp_locks=1 +fi +AC_DEFINE_UNQUOTED([OMPI_WANT_SMP_LOCKS], [$want_smp_locks], + [whether we want to have smp locks in atomic ops or not]) + + +# find our architecture for purposes of assembly stuff +ompi_cv_asm_arch="UNSUPPORTED" +OMPI_GCC_INLINE_ASSIGN="" +OMPI_POWERPC_SUPPORT_64BIT=0 +case "${host}" in + *-winnt*) + ompi_cv_asm_arch="WINDOWS" + ;; + + i?86-*) + ompi_cv_asm_arch="IA32" + OMPI_GCC_INLINE_ASSIGN='"movl [$]0, %0" : "=&r"(ret)' + ;; + + x86_64*) + ompi_cv_asm_arch="AMD64" + OMPI_GCC_INLINE_ASSIGN='"movl [$]0, %0" : "=&r"(ret)' + ;; + + ia64-*) + ompi_cv_asm_arch="IA64" + OMPI_GCC_INLINE_ASSIGN='"mov %0=r0\n;;\n" : "=&r"(ret)' + ;; + + alpha-*) + ompi_cv_asm_arch="ALPHA" + OMPI_GCC_INLINE_ASSIGN='"bis zero,zero,%0" : "=&r"(ret)' + ;; + + powerpc-*) + OMPI_CHECK_POWERPC_REG + if test "$ac_cv_sizeof_long" = "4" ; then + ompi_cv_asm_arch="POWERPC32" + + # Note that on some platforms (Apple G5), even if we are + # compiling in 32 bit more (and therefore should assume + # sizeof(long) == 4), we can use the 64 bit test and set + # operations. + OMPI_CHECK_POWERPC_64BIT(OMPI_POWERPC_SUPPORT_64BIT=1) + elif test "$ac_cv_sizeof_long" = "8" ; then + OMPI_POWERPC_SUPPORT_64BIT=1 + ompi_cv_asm_arch="POWERPC64" + else + AC_MSG_ERROR([Could not determine PowerPC word size: $ac_cv_sizeof_long]) + fi + OMPI_GCC_INLINE_ASSIGN='"li %0,0" : "=&r"(ret)' + ;; + + sparc-*) + if test "$ac_cv_sizeof_long" = "4" ; then + ompi_cv_asm_arch="SPARC32" + elif test "$ac_cv_sizeof_long" = "8" ; then + ompi_cv_asm_arch="SPARC64" + else + AC_MSG_ERROR([Could not determine Sparc word size: $ac_cv_sizeof_long]) + fi + OMPI_GCC_INLINE_ASSIGN='"mov 0,%0" : : "=&r"(ret)' + ;; + + *) + AC_MSG_ERROR([No atomic primitives available for $host]) + ;; +esac + +AC_DEFINE_UNQUOTED([OMPI_POWERPC_SUPPORT_64BIT], + [$OMPI_POWERPC_SUPPORT_64BIT], + [Non-zero if safe to call PPC64 ops, even in PPC32 code]) +AC_SUBST([OMPI_POWERPC_SUPPORT_64BIT]) + +# now that we know our architecture, try to inline assemble +OMPI_CHECK_INLINE_GCC([$OMPI_GCC_INLINE_ASSIGN]) +OMPI_CHECK_INLINE_DEC +OMPI_CHECK_INLINE_XLC + +# format: +# text-global-label_suffix-gsym-lsym-type-size-align_log-ppc_r_reg-64_bit + +asm_format="${ompi_cv_asm_text}-${ompi_cv_asm_global}" +asm_format="${asm_format}-${ompi_cv_asm_label_suffix}-${ompi_cv_asm_gsym}" +asm_format="${asm_format}-${ompi_cv_asm_lsym}" +asm_format="${asm_format}-${ompi_cv_asm_type}-${ompi_cv_asm_size}" +asm_format="${asm_format}-${ompi_cv_asm_align_log}" +if test "$ompi_cv_asm_arch" = "POWERPC32" -o "$ompi_cv_asm_arch" = "POWERPC64" ; then + asm_format="${asm_format}-${ompi_cv_asm_powerpc_r_reg}" +else + asm_format="${asm_format}-1" +fi +ompi_cv_asm_format="${asm_format}-${OMPI_POWERPC_SUPPORT_64BIT}" +OMPI_ASSEMBLY_FORMAT="$ompi_cv_asm_format" + +AC_MSG_CHECKING([for assembly format]) +AC_MSG_RESULT([$OMPI_ASSEMBLY_FORMAT]) +AC_DEFINE_UNQUOTED([OMPI_ASSEMBLY_FORMAT], ["$OMPI_ASSEMBLY_FORMAT"], + [Format of assembly file]) +AC_SUBST([OMPI_ASSEMBLY_FORMAT]) + +result="OMPI_$ompi_cv_asm_arch" +OMPI_ASSEMBLY_ARCH="$ompi_cv_asm_arch" +AC_MSG_CHECKING([for asssembly architecture]) +AC_MSG_RESULT([$ompi_cv_asm_arch]) +AC_DEFINE_UNQUOTED([OMPI_ASSEMBLY_ARCH], [$result], + [Architecture type of assembly to use for atomic operations]) +AC_SUBST([OMPI_ASSEMBLY_ARCH]) + +OMPI_ASM_FIND_FILE + +unset result asm_format +])dnl + + +dnl ################################################################# +dnl +dnl OMPI_ASM_FIND_FILE +dnl +dnl +dnl do all the evil mojo to provide a working assembly file +dnl +dnl ################################################################# +AC_DEFUN([OMPI_ASM_FIND_FILE], [ + AC_CHECK_PROG([PERL], [perl], [perl]) + + # see if we have a pre-built one already + AC_MSG_CHECKING([for pre-built assembly file]) + ompi_cv_asm_file="" + if grep "$ompi_cv_asm_arch.*$ompi_cv_asm_format" "${top_ompi_srcdir}/src/asm/asm-data.txt" >conftest.out 2>&1 ; then + ompi_cv_asm_file="`cut -f3 conftest.out`" + if test ! "$ompi_cv_asm_file" = "" ; then + ompi_cv_asm_file="atomic-${ompi_cv_asm_file}.s" + if test -f "${top_ompi_srcdir}/src/asm/generated/${ompi_cv_asm_file}" ; then + AC_MSG_RESULT([yes ($ompi_cv_asm_file)]) + else + AC_MSG_RESULT([no ($ompi_cv_asm_file not found)]) + ompi_cv_asm_file="" + fi + fi + else + AC_MSG_RESULT([no (not in asm-data)]) + fi + rm -f conftest.* + + if test "$ompi_cv_asm_file" = "" ; then + if test ! "$PERL" = "" ; then + # we have perl... Can we generate a file? + AC_MSG_CHECKING([whether possible to generate assembly file]) + ompi_cv_asm_file="atomic-local.s" + ompi_try="$PERL \"$top_ompi_srcdir/src/asm/generate-asm.pl\" \"$ompi_cv_asm_arch\" \"$ompi_cv_asm_format\" \"$top_ompi_srcdir/src/asm/base\" \"$top_ompi_builddir/src/asm/generated/$ompi_cv_asm_file\" >conftest.out 2>&1" + if AC_TRY_EVAL(ompi_try) ; then + # save the warnings + cat conftest.out >&AC_FD_CC + AC_MSG_RESULT([yes]) + else + # save output + cat conftest.out >&AC_FD_CC + ompi_cv_asm_file="" + AC_MSG_RESULT([failed]) + AC_MSG_WARN([Could not build atomic operations assembly file.]) + AC_MSG_WARN([There will be no atomic operations for this build.]) + fi + else + AC_MSG_WARN([Could not find prebuilt atomic operations file and could not]) + AC_MSG_WARN([find perl to attempt to generate a custom assembly file.]) + AC_MSG_WARN([There will be no atomic operations for this build.]) + fi + fi + rm -f conftest.* + + AC_MSG_CHECKING([for atomic assembly filename]) + if test "$ompi_cv_asm_file" = "" ; then + AC_MSG_RESULT([none]) + result=0 + else + AC_MSG_RESULT([$ompi_cv_asm_file]) + result=1 + fi + + AC_DEFINE_UNQUOTED([OMPI_HAVE_ASM_FILE], [$result], + [Whether there is an atomic assembly file available]) + AM_CONDITIONAL([OMPI_HAVE_ASM_FILE], [test "$result" = "1"]) + + OMPI_ASM_FILE=$ompi_cv_asm_file + AC_SUBST(OMPI_ASM_FILE) +])dnl diff --git a/config/ompi_try_assemble.m4 b/config/ompi_try_assemble.m4 new file mode 100644 index 0000000000..1bb494066b --- /dev/null +++ b/config/ompi_try_assemble.m4 @@ -0,0 +1,43 @@ +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University. +dnl All rights reserved. +dnl Copyright (c) 2004-2005 The Trustees of the University of Tennessee. +dnl All rights reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +dnl OMPI_TRY_ASSEMBLE(asm-code, [action-if-success], [action-if-fail]) +dnl +dnl Attempt to assemble asm-code. If success, run action-if-success. +dnl Otherwise, run action-if-fail. Neither action-if-success nor +dnl action-if-fail are required. +dnl +dnl No preprocessing is guaranteed to be done on asm-code. Some +dnl compilers do not run the preprocessor on assembly files. +dnl +dnl On failure, asm-test.s will be included in config.out +AC_DEFUN([OMPI_TRY_ASSEMBLE], +[cat >conftest.s <&AC_FD_CC + ifelse([$2],,:,[$2]) +else + # save compiler output and failed program + cat conftest.out >&AC_FD_CC + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.s >&AC_FD_CC + ifelse([$3],,:,[$3]) +fi +rm -f conftest* +unset ompi_assemble +])dnl diff --git a/configure.ac b/configure.ac index 0d8de5cefb..08422d5935 100644 --- a/configure.ac +++ b/configure.ac @@ -315,6 +315,16 @@ AC_DEFINE_UNQUOTED(OMPI_WANT_CXX_BINDINGS, $WANT_MPI_CXX_SUPPORT, [Whether we want MPI cxx support or not]) +################################## +# Assembler Configuration +################################## + +ompi_show_subtitle "Assembler" + +AM_PROG_AS +OMPI_CONFIG_ASM + + ################################## # Fortran ################################## @@ -1382,6 +1392,7 @@ AC_CONFIG_FILES([ src/event/compat/sys/Makefile src/attribute/Makefile + src/asm/Makefile src/communicator/Makefile src/datatype/Makefile src/errhandler/Makefile diff --git a/src/Makefile.am b/src/Makefile.am index 85d4eaecbe..ee899a497c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -66,6 +66,7 @@ endif SUBDIRS = \ include \ $(LIBLTDL_SUBDIR) \ + asm \ attribute \ communicator \ datatype \ @@ -94,6 +95,7 @@ lib_LTLIBRARIES = libmpi.la libmpi_la_SOURCES = libmpi_la_LIBADD = \ $(LIBLTDL_LTLIB) \ + asm/libasm.la \ attribute/libattribute.la \ class/liblfc.la \ communicator/libcommunicator.la \ diff --git a/src/asm/Makefile.am b/src/asm/Makefile.am new file mode 100644 index 0000000000..df7d07cbf0 --- /dev/null +++ b/src/asm/Makefile.am @@ -0,0 +1,80 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University. +# All rights reserved. +# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. +# All rights reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +include $(top_srcdir)/config/Makefile.options + +###################################################################### +# +# This is a bit complicated. If there is anything in the library, +# it will always be atomic-asm.s. We just symlink atomic-asm.s to +# the best atomic operations available (as determined at configure +# time) +# +###################################################################### +generated/@OMPI_ASM_FILE@: base/@OMPI_ASSEMBLY_ARCH@.asm + $(PERL) "$(top_srcdir)/src/asm/generate-asm.pl" "@OMPI_ASSEMBLY_ARCH@" "@OMPI_ASSEMBLY_FORMAT@" "$(top_srcdir)/src/asm/base" "$(top_builddir)/src/asm/generated/@OMPI_ASM_FILE@" + +atomic-asm.s: generated/@OMPI_ASM_FILE@ + rm -f atomic-asm.s + @ if test -f $(top_srcdir)/src/asm/generated/@OMPI_ASM_FILE@ ; then \ + cmd="ln -s $(top_srcdir)/src/asm/generated/@OMPI_ASM_FILE@ atomic-asm.s" ; \ + echo "$$cmd" ; \ + $$cmd ; \ + else \ + cmd="ln -s $(top_builddir)/src/asm/generated/@OMPI_ASM_FILE@ atomic-asm.s" ; \ + echo "$$cmd" ; \ + $$cmd ; \ + fi + +if OMPI_HAVE_ASM_FILE +libasm_la_SOURCES = atomic-asm.s +else +libasm_la_SOURCES = +endif + +libasm_la_DEPENDENCIES = generated/@OMPI_ASM_FILE@ +lib_LTLIBRARIES = libasm.la + +EXTRA_DIST = \ + asm-data.txt \ + generate-asm.pl \ + generate-all-asm.sh \ + base/AMD64.asm \ + base/IA32.asm \ + base/POWERPC32.asm \ + base/POWERPC64.asm + +###################################################################### + +TESTS = atomic-test +check_PROGRAMS = atomic-test +atomic_test_SOURCES = atomic-test.c +atomic_test_LDADD = libasm.la + +###################################################################### + +clean-local: + rm -f atomic-asm.s + +maintainer-clean-local: + rm -f generated/atomic-local.s + +###################################################################### + +# +# Copy over all the generated files +# +dist-hook: + mkdir ${distdir}/generated + sh generate-all-asm.sh "$(PERL)" "$(srcdir)" "$(distdir)" diff --git a/src/asm/asm-data.txt b/src/asm/asm-data.txt new file mode 100644 index 0000000000..a63c53445c --- /dev/null +++ b/src/asm/asm-data.txt @@ -0,0 +1,27 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University. +# All rights reserved. +# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. +# All rights reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# Database for mapping architecture and assembly format to prebuilt +# assembly files. +# +# FORMAT: +# ARCHITECTURE ASSEMBLY FORMAT BASE FILENAME +# + +AMD64 .text-.globl-:--.L-@-1-0-1-0 amd64-linux +IA32 .text-.globl-:--.L-@-1-0-1-0 ia32-linux +POWERPC32 .text-.globl-:-_-L--0-1-1-0 powerpc32-osx +POWERPC32 .text-.globl-:-_-L--0-1-1-1 powerpc32-64-osx +POWERPC64 .text-.globl-:-_-L--0-1-1-1 powerpc64-osx diff --git a/src/asm/atomic-test.c b/src/asm/atomic-test.c new file mode 100644 index 0000000000..3824409cd8 --- /dev/null +++ b/src/asm/atomic-test.c @@ -0,0 +1,457 @@ +#undef OMPI_BUILDING + +#include "ompi_config.h" +#include +#include +#ifdef HAVE_PTHREAD_H +#include +#endif +#include +#include +#include +#include + +#include "include/sys/atomic.h" + +/** + * A testing support library to provide uniform reporting output + */ + +static int ompi_n_tests; +static int ompi_n_success; +static int ompi_n_failures; +static char *ompi_description; + +static void test_init(char *a) +{ + /* local variables */ + size_t len; + + /* save the descriptive string */ + len = strlen(a); + ompi_description = (char *) malloc(len + 1); + assert(ompi_description); + + strcpy(ompi_description, a); + + /* initialize counters */ + ompi_n_tests = 0; + ompi_n_success = 0; + ompi_n_failures = 0; + + return; + +} + + +static void test_success(void) +{ + ompi_n_tests++; + ompi_n_success++; +} + + +static void test_failure(char *a) +{ + ompi_n_tests++; + ompi_n_failures++; + + fprintf(stderr, " Failure : "); + fprintf(stderr, a); + fprintf(stderr, "\n"); + fflush(stderr); +} + + +static int test_verify_int(int expected_result, int test_result) +{ + int return_value; + + return_value = 1; + if (expected_result != test_result) { + test_failure("Comparison failure"); + fprintf(stderr, " Expected result: %d\n", expected_result); + fprintf(stderr, " Test result: %d\n", test_result); + fflush(stderr); + return_value = 0; + } else { + test_success(); + } + + return return_value; +} + + +static int test_finalize(void) +{ + int return_value; + + return_value = 1; + + if (ompi_n_tests == ompi_n_success) { + fprintf(stderr, "SUPPORT: OMPI Test Passed: %s: (%d tests)\n", + ompi_description, ompi_n_tests); + fflush(stderr); + } else { + fprintf(stderr, + "SUPPORT: OMPI Test failed: %s (%d of %d failed)\n", + ompi_description, ompi_n_failures, ompi_n_tests); + fflush(stderr); + return_value = 0; + } + + return return_value; +} + + +/* note this is for additional output that does NOT go to STDERR but STDOUT */ +static void test_comment (char* userstr) +{ + fprintf(stdout, "%s:%s\n", ompi_description, userstr); +} + +/* default options */ + +int nreps = 100; +int nthreads = 2; +int enable_verbose = 0; +int enable_64_bit_tests = 0; + +volatile int32_t vol32; +int32_t val32; +int32_t old32; +int32_t new32; + +#ifdef ENABLE_64_BIT +volatile int64_t vol64; +int64_t val64; +int64_t old64; +int64_t new64; +#endif + +volatile int volint; +int valint; +int oldint; +int newint; + +volatile void *volptr; +void *oldptr; +void *newptr; + + +static void help(void) +{ + printf("Usage: threadtest [flags]\n" + "\n" + " Flags may be any of\n" +#ifdef ENABLE_64_BIT + " -l do 64-bit tests\n" +#endif + " -r NREPS number of repetitions\n" + " -t NTRHEADS number of threads\n" + " -v verbose output\n" + " -h print this info\n" "\n" + " Numbers may be postfixed with 'k' or 'm'\n\n"); + +#ifndef ENABLE_64_BIT + printf(" 64-bit tests are not enabled in this build of the tests\n\n"); +#endif + + exit(EXIT_SUCCESS); +} + + +static void usage(void) +{ + fprintf(stderr, + "Usage: threadtest [flags]\n" " threadtest -h\n"); + exit(EXIT_FAILURE); +} + + +static void verbose(const char *fmt, ...) +{ + if (enable_verbose) { + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } +} + + +static int str2size(char *str) +{ + int size; + char mod[32]; + + switch (sscanf(str, "%d%1[mMkK]", &size, mod)) { + case 1: + return (size); + case 2: + switch (*mod) { + case 'm': + case 'M': + return (size << 20); + case 'k': + case 'K': + return (size << 10); + default: + return (size); + } + default: + return (-1); + } +} + + +static void *thread_main(void *arg) +{ + int rank = (int) arg; + int i; + + verbose("thread-%d: Hello\n", rank); + + /* thread tests */ + + for (i = 0; i < nreps; i++) { + ompi_atomic_add_32(&val32, 5); +#ifdef ENABLE_64_BIT + if (enable_64_bit_tests) { + ompi_atomic_add_64(&val64, 5); + } +#endif + ompi_atomic_add(&valint, 5); + } + + return (void *) (rank + 1000); +} + + +int main(int argc, char *argv[]) +{ + int c; + int tid; + pthread_t *th; + + /* option processing */ + + test_init("atomic operations"); + + while ((c = getopt(argc, argv, "hlr:t:v")) != -1) { + switch (c) { + case 'h': + help(); + break; + case 'l': +#ifdef ENABLE_64_BIT + enable_64_bit_tests = 1; +#else + usage(); +#endif + break; + case 'r': + if ((nreps = str2size(optarg)) <= 0) { + usage(); + } + break; + case 't': + if ((nthreads = str2size(optarg)) <= 0) { + usage(); + } + break; + case 'v': + enable_verbose = 1; + break; + default: + usage(); + } + } + if (optind != argc) { + usage(); + } + + verbose("main: %s\n", argv[0]); + verbose("main: nthreads = %d\n", nthreads); + verbose("main: nreps = %d\n", nreps); + + /* first test single-threaded functionality */ + + /* -- cmpset 32-bit tests -- */ + + vol32 = 42, old32 = 42, new32 = 50; + test_verify_int(ompi_atomic_cmpset_32(&vol32, old32, new32), 1); + test_verify_int(vol32, new32); + + vol32 = 42, old32 = 420, new32 = 50; + test_verify_int(ompi_atomic_cmpset_32(&vol32, old32, new32), 0); + test_verify_int(vol32, 42); + + vol32 = 42, old32 = 42, new32 = 50; + test_verify_int(ompi_atomic_cmpset_acq_32(&vol32, old32, new32), 1); + test_verify_int(vol32, new32); + + vol32 = 42, old32 = 420, new32 = 50; + test_verify_int(ompi_atomic_cmpset_acq_32(&vol32, old32, new32), 0); + test_verify_int(vol32, 42); + + vol32 = 42, old32 = 42, new32 = 50; + test_verify_int(ompi_atomic_cmpset_rel_32(&vol32, old32, new32), 1); + test_verify_int(vol32, new32); + + vol32 = 42, old32 = 420, new32 = 50; + test_verify_int(ompi_atomic_cmpset_rel_32(&vol32, old32, new32), 0); + test_verify_int(vol32, 42); + + /* -- cmpset 64-bit tests -- */ + +#ifdef ENABLE_64_BIT + if (enable_64_bit_tests) { + verbose("64 bit serial tests\n"); + vol64 = 42, old64 = 42, new64 = 50; + test_verify_int(1, ompi_atomic_cmpset_64(&vol64, old64, new64)); + test_verify_int(new64, vol64); + + verbose("64 bit serial test 2\n"); + vol64 = 42, old64 = 420, new64 = 50; + test_verify_int(ompi_atomic_cmpset_64(&vol64, old64, new64), 0); + test_verify_int(vol64, 42); + + vol64 = 42, old64 = 42, new64 = 50; + test_verify_int(ompi_atomic_cmpset_acq_64(&vol64, old64, new64), 1); + test_verify_int(vol64, new64); + + vol64 = 42, old64 = 420, new64 = 50; + test_verify_int(ompi_atomic_cmpset_acq_64(&vol64, old64, new64), 0); + test_verify_int(vol64, 42); + + vol64 = 42, old64 = 42, new64 = 50; + test_verify_int(ompi_atomic_cmpset_rel_64(&vol64, old64, new64), 1); + test_verify_int(vol64, new64); + + vol64 = 42, old64 = 420, new64 = 50; + test_verify_int(ompi_atomic_cmpset_rel_64(&vol64, old64, new64), 0); + test_verify_int(vol64, 42); + } +#endif + /* -- cmpset int tests -- */ + + volint = 42, oldint = 42, newint = 50; + test_verify_int(ompi_atomic_cmpset(&volint, oldint, newint), 1); + test_verify_int(volint, newint); + + volint = 42, oldint = 420, newint = 50; + test_verify_int(ompi_atomic_cmpset(&volint, oldint, newint), 0); + test_verify_int(volint, 42); + + volint = 42, oldint = 42, newint = 50; + test_verify_int(ompi_atomic_cmpset_acq(&volint, oldint, newint), 1); + test_verify_int(volint, newint); + + volint = 42, oldint = 420, newint = 50; + test_verify_int(ompi_atomic_cmpset_acq(&volint, oldint, newint), 0); + test_verify_int(volint, 42); + + volint = 42, oldint = 42, newint = 50; + test_verify_int(ompi_atomic_cmpset_rel(&volint, oldint, newint), 1); + test_verify_int(volint, newint); + + volint = 42, oldint = 420, newint = 50; + test_verify_int(ompi_atomic_cmpset_rel(&volint, oldint, newint), 0); + test_verify_int(volint, 42); + + + /* -- cmpset ptr tests -- */ + + volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; + test_verify_int(ompi_atomic_cmpset(&volptr, oldptr, newptr), 1); + test_verify_int(volptr, newptr); + + volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; + test_verify_int(ompi_atomic_cmpset(&volptr, oldptr, newptr), 0); + test_verify_int(volptr, (void *) 42); + + volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; + test_verify_int(ompi_atomic_cmpset_acq(&volptr, oldptr, newptr), 1); + test_verify_int(volptr, newptr); + + volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; + test_verify_int(ompi_atomic_cmpset_acq(&volptr, oldptr, newptr), 0); + test_verify_int(volptr, (void *) 42); + + volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; + test_verify_int(ompi_atomic_cmpset_rel(&volptr, oldptr, newptr), 1); + test_verify_int(volptr, newptr); + + volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; + test_verify_int(ompi_atomic_cmpset_rel(&volptr, oldptr, newptr), 0); + test_verify_int(volptr, (void *) 42); + + /* -- add_32 tests -- */ + + val32 = 42; + test_verify_int(ompi_atomic_add_32(&val32, 5), (42 + 5)); + test_verify_int((42 + 5), val32); + + /* -- add_64 tests -- */ +#ifdef ENABLE_64_BIT + if (enable_64_bit_tests) { + val64 = 42; + test_verify_int(ompi_atomic_add_64(&val64, 5), (42 + 5)); + test_verify_int((42 + 5), val64); + } +#endif + /* -- add_int tests -- */ + + valint = 42; + ompi_atomic_add(&valint, 5); + test_verify_int((42 + 5), valint); + + + /* threaded tests */ + + val32 = 0; +#ifdef ENABLE_64_BIT + val64 = 0ul; +#endif + valint = 0; + + /* -- create the thread set -- */ + + th = (pthread_t *) malloc(nthreads * sizeof(pthread_t)); + if (!th) { + perror("malloc"); + exit(EXIT_FAILURE); + } + for (tid = 0; tid < nthreads; tid++) { + if (pthread_create(&th[tid], NULL, thread_main, (void *) tid) != 0) { + perror("pthread_create"); + exit(EXIT_FAILURE); + } + } + + /* -- wait for the thread set to finish -- */ + + for (tid = 0; tid < nthreads; tid++) { + void *thread_return; + + if (pthread_join(th[tid], &thread_return) != 0) { + perror("pthread_join"); + exit(EXIT_FAILURE); + } + verbose("main: thread %d returned %d\n", tid, (int) thread_return); + } + free(th); + + test_verify_int((5 * nthreads * nreps), val32); +#ifdef ENABLE_64_BIT + if (enable_64_bit_tests) { + test_verify_int((5 * nthreads * nreps), val64); + } +#endif + test_verify_int((5 * nthreads * nreps), valint); + + test_finalize(); + + return 0; +} diff --git a/src/asm/base/AMD64.asm b/src/asm/base/AMD64.asm new file mode 100644 index 0000000000..ff26fa5367 --- /dev/null +++ b/src/asm/base/AMD64.asm @@ -0,0 +1,73 @@ + TEXT + +START_FUNC(ompi_atomic_mb) + pushq %rbp + movq %rsp, %rbp + leave + ret +END_FUNC(ompi_atomic_mb) + + +START_FUNC(ompi_atomic_rmb) + pushq %rbp + movq %rsp, %rbp + leave + ret +END_FUNC(ompi_atomic_rmb) + + +START_FUNC(ompi_atomic_wmb) + pushq %rbp + movq %rsp, %rbp + leave + ret +END_FUNC(ompi_atomic_wmb) + + +START_FUNC(ompi_atomic_cmpset_32) + pushq %rbp + movq %rsp, %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) + movl %edx, -16(%rbp) + movl -16(%rbp), %ecx + movq -8(%rbp), %rdx + movl -12(%rbp), %eax +#APP + cmpxchgl %ecx,(%rdx) +#NO_APP + movq %rax, -24(%rbp) + movl -24(%rbp), %eax + movl %eax, -28(%rbp) + movl -28(%rbp), %eax + cmpl -12(%rbp), %eax + sete %al + movzbl %al, %eax + movl %eax, -28(%rbp) + movl -28(%rbp), %eax + leave + ret +END_FUNC(ompi_atomic_cmpset_32) + + +START_FUNC(ompi_atomic_cmpset_64) + pushq %rbp + movq %rsp, %rbp + movq %rdi, -8(%rbp) + movq %rsi, -16(%rbp) + movq %rdx, -24(%rbp) + movq -24(%rbp), %rcx + movq -8(%rbp), %rdx + movq -16(%rbp), %rax +#APP + cmpxchgq %rcx,(%rdx) + +#NO_APP + movq %rax, -32(%rbp) + movq -32(%rbp), %rax + cmpq -16(%rbp), %rax + sete %al + movzbl %al, %eax + leave + ret +END_FUNC(ompi_atomic_cmpset_64) diff --git a/src/asm/base/IA32.asm b/src/asm/base/IA32.asm new file mode 100644 index 0000000000..187fb5fb0e --- /dev/null +++ b/src/asm/base/IA32.asm @@ -0,0 +1,108 @@ + TEXT + +START_FUNC(ompi_atomic_mb) + pushl %ebp + movl %esp, %ebp + leave + ret +END_FUNC(ompi_atomic_mb) + + +START_FUNC(ompi_atomic_rmb) + pushl %ebp + movl %esp, %ebp + leave + ret +END_FUNC(ompi_atomic_rmb) + + +START_FUNC(ompi_atomic_wmb) + pushl %ebp + movl %esp, %ebp + leave + ret +END_FUNC(ompi_atomic_wmb) + + +START_FUNC(ompi_atomic_cmpset_32) + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %edx + movl 16(%ebp), %ecx + movl 12(%ebp), %eax +#APP + lock cmpxchgl %ecx,(%edx) + sete %dl + +#NO_APP + movzbl %dl, %eax + leave + ret +END_FUNC(ompi_atomic_cmpset_32) + + +START_FUNC(ompi_atomic_cmpset_64) + pushl %ebp + movl %esp, %ebp + subl $32, %esp + movl %ebx, -12(%ebp) + movl %esi, -8(%ebp) + movl %edi, -4(%ebp) + movl 8(%ebp), %edi + movl 12(%ebp), %eax + movl 16(%ebp), %edx + movl %eax, -24(%ebp) + movl %edx, -20(%ebp) + movl 20(%ebp), %eax + movl 24(%ebp), %edx + movl %eax, -32(%ebp) + movl %edx, -28(%ebp) + movl -24(%ebp), %ebx + movl -20(%ebp), %edx + movl -32(%ebp), %esi + movl -28(%ebp), %ecx + movl %ebx, %eax +#APP + push %ebx + movl %esi, %ebx + lock cmpxchg8b (%edi) + sete %dl + pop %ebx + +#NO_APP + movzbl %dl, %eax + movl -12(%ebp), %ebx + movl -8(%ebp), %esi + movl -4(%ebp), %edi + movl %ebp, %esp + popl %ebp + ret +END_FUNC(ompi_atomic_cmpset_64) + + +START_FUNC(ompi_atomic_add_32) + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx +#APP + lock addl %edx,(%eax) +#NO_APP + movl (%eax), %eax + leave + ret +END_FUNC(ompi_atomic_add_32) + + +START_FUNC(ompi_atomic_sub_32) + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx +#APP + lock subl %edx,(%eax) +#NO_APP + movl (%eax), %eax + leave + ret +END_FUNC(ompi_atomic_sub_32) diff --git a/src/asm/base/POWERPC32.asm b/src/asm/base/POWERPC32.asm new file mode 100644 index 0000000000..423bb4a72f --- /dev/null +++ b/src/asm/base/POWERPC32.asm @@ -0,0 +1,143 @@ + TEXT + + ALIGN(4) +START_FUNC(ompi_atomic_mb) + sync + blr +END_FUNC(ompi_atomic_mb) + + +START_FUNC(ompi_atomic_rmb) + lwsync + blr +END_FUNC(ompi_atomic_rmb) + + +START_FUNC(ompi_atomic_wmb) + eieio + blr +END_FUNC(ompi_atomic_wmb) + + +START_FUNC(ompi_atomic_cmpset_32) + 1: lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- 2f + stwcx. r5, 0, r3 + bne- 1b + sync + 2: + xor r3,r0,r4 + subfic r2,r3,0 + adde r3,r2,r3 + blr +END_FUNC(ompi_atomic_cmpset_32) + + +START_FUNC(ompi_atomic_cmpset_acq_32) + 1: lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- 2f + stwcx. r5, 0, r3 + bne- 1b + sync + 2: + xor r3,r0,r4 + subfic r2,r3,0 + adde r3,r2,r3 + lwsync + blr +END_FUNC(ompi_atomic_cmpset_acq_32) + + +START_FUNC(ompi_atomic_cmpset_rel_32) + eieio + 1: lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- 2f + stwcx. r5, 0, r3 + bne- 1b + sync + 2: + xor r3,r0,r4 + subfic r2,r3,0 + adde r3,r2,r3 + blr +END_FUNC(ompi_atomic_cmpset_rel_32) + +#START_64BIT +START_FUNC(ompi_atomic_cmpset_64) + 1: ldarx r9, 0, r3 + cmpd 0, r9, r4 + bne- 2f + stdcx. r6, 0, r3 + bne- 1b + 2: + li r3,0 + cmpw cr7,r9,r4 + bnelr+ cr7 + cmpw cr7,r10,r5 + bnelr+ cr7 + li r3,1 + blr +END_FUNC(ompi_atomic_cmpset_64) + + +START_FUNC(ompi_atomic_cmpset_acq_64) + 1: ldarx r9, 0, r3 + cmpd 0, r9, r4 + bne- 2f + stdcx. r6, 0, r3 + bne- 1b + 2: + cmpw cr0,r9,r4 + li r3,0 + bne+ cr0,L15 + cmpw cr0,r10,r5 + bne+ cr0,L15 + li r3,1 +L15: + lwsync + blr +END_FUNC(ompi_atomic_cmpset_acq_64) + + +START_FUNC(ompi_atomic_cmpset_rel_64) + eieio + 1: ldarx r9, 0, r3 + cmpd 0, r9, r4 + bne- 2f + stdcx. r6, 0, r3 + bne- 1b + 2: + cmpw cr0,r9,r4 + li r3,0 + bnelr+ cr0 + cmpw cr0,r10,r5 + bnelr+ cr0 + li r3,1 + blr +END_FUNC(ompi_atomic_cmpset_rel_64) +#END_64BIT + + +START_FUNC(ompi_atomic_add_32) + 1: lwarx r0, 0, r3 + add r0, r4, r0 + stwcx. r0, 0, r3 + bne- 1b + + lwz r3,0(r3) + blr +END_FUNC(ompi_atomic_add_32) + + +START_FUNC(ompi_atomic_sub_32) + 1: lwarx r0,0,r3 + subf r0,r4,r0 + stwcx. r0,0,r3 + bne- 1b + + lwz r3,0(r3) + blr +END_FUNC(ompi_atomic_sub_32) diff --git a/src/asm/base/POWERPC64.asm b/src/asm/base/POWERPC64.asm new file mode 100644 index 0000000000..bbf866a76b --- /dev/null +++ b/src/asm/base/POWERPC64.asm @@ -0,0 +1,156 @@ + TEXT + + ALIGN(4) +START_FUNC(ompi_atomic_mb) + sync + blr +END_FUNC(ompi_atomic_mb) + + +START_FUNC(ompi_atomic_rmb) + lwsync + blr +END_FUNC(ompi_atomic_rmb) + + +START_FUNC(ompi_atomic_wmb) + eieio + blr +END_FUNC(ompi_atomic_wmb) + + +START_FUNC(ompi_atomic_cmpset_32) + 1: lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- 2f + stwcx. r5, 0, r3 + bne- 1b + sync + 2: + cmpw cr7,r0,r4 + mfcr r3 + rlwinm r3,r3,31,1 + blr +END_FUNC(ompi_atomic_cmpset_32) + + +START_FUNC(ompi_atomic_cmpset_acq_32) + mflr r0 + std r29,-24(r1) + std r0,16(r1) + stdu r1,-144(r1) + bl _ompi_atomic_cmpset_32 + mr r29,r3 + bl _ompi_atomic_rmb + mr r3,r29 + addi r1,r1,144 + ld r0,16(r1) + mtlr r0 + ld r29,-24(r1) + blr +END_FUNC(ompi_atomic_cmpset_acq_32) + + +START_FUNC(ompi_atomic_cmpset_rel_32) + mflr r0 + std r27,-40(r1) + std r28,-32(r1) + std r29,-24(r1) + std r0,16(r1) + stdu r1,-160(r1) + mr r29,r3 + mr r28,r4 + mr r27,r5 + bl _ompi_atomic_wmb + mr r3,r29 + mr r4,r28 + mr r5,r27 + bl _ompi_atomic_cmpset_32 + addi r1,r1,160 + ld r0,16(r1) + mtlr r0 + ld r27,-40(r1) + ld r28,-32(r1) + ld r29,-24(r1) + blr +END_FUNC(ompi_atomic_cmpset_rel_32) + + +START_FUNC(ompi_atomic_cmpset_64) + 1: ldarx r0, 0, r3 + cmpd 0, r0, r4 + bne- 2f + stdcx. r5, 0, r3 + bne- 1b + 2: + xor r3,r4,r0 + subfic r2,r3,0 + adde r3,r2,r3 + blr +END_FUNC(ompi_atomic_cmpset_64) + + +START_FUNC(ompi_atomic_cmpset_acq_64) + mflr r0 + std r29,-24(r1) + std r0,16(r1) + stdu r1,-144(r1) + bl _ompi_atomic_cmpset_64 + mr r29,r3 + bl _ompi_atomic_rmb + mr r3,r29 + addi r1,r1,144 + ld r0,16(r1) + mtlr r0 + ld r29,-24(r1) + blr +END_FUNC(ompi_atomic_cmpset_acq_64) + + +START_FUNC(ompi_atomic_cmpset_rel_64) + mflr r0 + std r27,-40(r1) + std r28,-32(r1) + std r29,-24(r1) + std r0,16(r1) + stdu r1,-160(r1) + mr r29,r3 + mr r28,r4 + mr r27,r5 + bl _ompi_atomic_wmb + mr r3,r29 + mr r4,r28 + mr r5,r27 + bl _ompi_atomic_cmpset_64 + addi r1,r1,160 + ld r0,16(r1) + mtlr r0 + ld r27,-40(r1) + ld r28,-32(r1) + ld r29,-24(r1) + blr +END_FUNC(ompi_atomic_cmpset_rel_64) + + +START_FUNC(ompi_atomic_add_32) + 1: lwarx r0, 0, r3 + add r0, r4, r0 + stwcx. r0, 0, r3 + bne- 1b + + lwz r3,0(r3) + extsw r3,r3 + blr +END_FUNC(ompi_atomic_add_32) + + +START_FUNC(ompi_atomic_sub_32) + 1: lwarx r0,0,r3 + subf r0,r4,r0 + stwcx. r0,0,r3 + bne- 1b + + lwz r3,0(r3) + extsw r3,r3 + blr +END_FUNC(ompi_atomic_sub_32) diff --git a/src/asm/generate-all-asm.sh b/src/asm/generate-all-asm.sh new file mode 100644 index 0000000000..6eba4aa3ad --- /dev/null +++ b/src/asm/generate-all-asm.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +perl="$1" +srcdir="$2" +destdir="$3" +ret=0 + +if test "$perl" = "" -o "$srcdir" = "" -o "$destdir" = "" ; then + echo "ERROR: invalid argument to generate-all-asm.sh" + echo "usage: generate-all-asm.sh [PERL] [SRCDIR] [DESTDIR]" + exit 1 +fi + +for asmarch in `grep -v '^#' "$srcdir/asm-data.txt" | cut -f1 | xargs` ; do + if test ! -f "${srcdir}/base/${asmarch}.asm" ; then + echo "WARNING: Skipping missing assembly arch ${asmarch}" + continue + fi + + for asmformat in `grep $asmarch "$srcdir/asm-data.txt" | cut -f2 | xargs` ; do + echo "--> Generating assembly for $asmarch $asmformat" + output="`grep \"$asmarch.*$asmformat\" $srcdir/asm-data.txt | cut -f3`" + $perl generate-asm.pl "$asmarch" "$asmformat" "$srcdir/base" "$destdir/generated/atomic-$output.s" + if test "$?" != "0" ; then + echo "WARNING: Failed to generate assembly for $asmarch $asmformat" + ret=1 + fi + done +done + +exit $ret diff --git a/src/asm/generate-asm.pl b/src/asm/generate-asm.pl new file mode 100644 index 0000000000..a53e9b60e6 --- /dev/null +++ b/src/asm/generate-asm.pl @@ -0,0 +1,92 @@ +#!/usr/bin/perl -w + + +my $asmarch = shift; +my $asmformat = shift; +my $basedir = shift; +my $output = shift; + +if ( ! $asmarch) { + print "usage: generate-asm.pl [ASMARCH] [ASMFORMAT] [BASEDIR] [OUTPUT NAME]\n"; + exit(1); +} + +open(INPUT, "$basedir/$asmarch.asm") || + die "Could not open $basedir/$asmarch.asm: $!\n"; +open(OUTPUT, ">$output") || die "Could not open $output: $1\n"; + +my $TEXT = ""; +my $GLOBAL = ""; +my $SUFFIX = ""; +my $GSYM = ""; +my $LSYM = ""; +my $TYPE = ""; +my $SIZE = 0; +my $ALIGN_LOG = 0; +my $DEL_R_REG = 0; +my $IS64BIT = 0; + +($TEXT, $GLOBAL, $SUFFIX, $GSYM, $LSYM, $TYPE, $SIZE, $ALIGN_LOG, $DEL_R_REG, $IS64BIT) = ( + $asmformat =~ /(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)/); + +my $current_func = ""; +my $delete = 0; + +while () { + s/TEXT/$TEXT/g; + s/GLOBAL/$GLOBAL/g; + s/GSYM\((.*)\)/$GSYM$1$SUFFIX/g; + s/LSYM\((.*)\)/$LSYM$1$SUFFIX/g; + if ($DEL_R_REG == 0) { + s/r([0-9][0-9]?)/$1/g; + } + + if (/START_FUNC\((.*)\)/) { + $current_func = $1; + $_ = "\t$GLOBAL $GSYM$current_func\n"; + if (! $TYPE eq "") { + $_ .= "\t.type $current_func, $TYPE" . "function\n"; + } + $_ .= "$GSYM$current_func$SUFFIX\n"; + } + + if (/END_FUNC\((.*)\)/) { + s/END_FUNC\((.*)\)//g; + if ($SIZE != 0) { + $_ = "\t.size $current_func, .-$current_func\n"; + } else { + chomp; + } + } + + if ($ALIGN_LOG == 0) { + s/ALIGN\((\d*)\)/.align $1/g; + } else { + # Ugh... + if (m/ALIGN\((\d*)\)/) { + $val = $1; + $result = 0; + while ($val > 1) { $val /= 2; $result++ } + s/ALIGN\((\d*)\)/.align $result/; + } + } + + if (/^\#START_64BIT/) { + $_ = ""; + if ($IS64BIT == 0) { + $delete = 1; + } + } + if (/^\#END_64BIT/) { + $_ = ""; + $delete = 0; + } + + if ($delete == 0) { + print OUTPUT $_; + } + +} + +close(INPUT); +close(OUTPUT); diff --git a/src/include/sys/alpha/atomic.h b/src/include/sys/alpha/atomic.h index b387a721e0..f9b41e481e 100644 --- a/src/include/sys/alpha/atomic.h +++ b/src/include/sys/alpha/atomic.h @@ -19,7 +19,7 @@ * On alpha, everything is load-locked, store-conditional... */ -#ifdef HAVE_SMP +#if OMPI_WANT_SMP_LOCKS #define MB() __asm__ __volatile__ ("mb"); #define RMB() __asm__ __volatile__ ("mb"); @@ -34,6 +34,25 @@ #endif +/********************************************************************** + * + * Define constants for PowerPC 32 + * + *********************************************************************/ +#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1 + +#define OMPI_HAVE_ATOMIC_CMPSET_32 1 + +#define OMPI_HAVE_ATOMIC_CMPSET_64 1 + + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#if OMPI_GCC_INLINE_ASSEMBLY + static inline void ompi_atomic_mb(void) { MB(); @@ -51,7 +70,16 @@ static inline void ompi_atomic_wmb(void) WMB(); } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if OMPI_GCC_INLINE_ASSEMBLY + static inline int ompi_atomic_cmpset_32( volatile int32_t *addr, int32_t oldval, int32_t newval) { @@ -96,7 +124,7 @@ static inline int ompi_atomic_cmpset_rel_32(volatile int32_t *addr, return ompi_atomic_cmpset_32(addr, oldval, newval); } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 + static inline int ompi_atomic_cmpset_64( volatile int64_t *addr, int64_t oldval, int64_t newval) { @@ -141,4 +169,8 @@ static inline int ompi_atomic_cmpset_rel_64(volatile int64_t *addr, return ompi_atomic_cmpset_64(addr, oldval, newval); } + +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + + #endif /* ! OMPI_SYS_ARCH_ATOMIC_H */ diff --git a/src/include/sys/alpha/update.sh b/src/include/sys/alpha/update.sh index dad665e039..9487684052 100644 --- a/src/include/sys/alpha/update.sh +++ b/src/include/sys/alpha/update.sh @@ -24,6 +24,7 @@ cat > $CFILE< #define static #define inline +#define OMPI_GCC_INLINE_ASSEMBLY 1 #include "atomic.h" EOF diff --git a/src/include/sys/amd64/atomic.h b/src/include/sys/amd64/atomic.h index 78e99a35bc..78e1406021 100644 --- a/src/include/sys/amd64/atomic.h +++ b/src/include/sys/amd64/atomic.h @@ -19,7 +19,7 @@ */ -#ifdef HAVE_SMP +#if OMPI_WANT_SMP_LOCKS #define SMPLOCK "lock; " #define MB() __asm__ __volatile__("": : :"memory") #else @@ -28,6 +28,25 @@ #endif +/********************************************************************** + * + * Define constants for AMD64 / x86_64 / EM64T / ... + * + *********************************************************************/ +#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1 + +#define OMPI_HAVE_ATOMIC_CMPSET_32 1 + +#define OMPI_HAVE_ATOMIC_CMPSET_64 1 + + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#if OMPI_GCC_INLINE_ASSEMBLY + static inline void ompi_atomic_mb(void) { MB(); @@ -45,7 +64,16 @@ static inline void ompi_atomic_wmb(void) MB(); } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if OMPI_GCC_INLINE_ASSEMBLY + static inline int ompi_atomic_cmpset_32( volatile int32_t *addr, int32_t oldval, int32_t newval) { @@ -57,10 +85,13 @@ static inline int ompi_atomic_cmpset_32( volatile int32_t *addr, return ((int32_t)prev == oldval); } +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + #define ompi_atomic_cmpset_acq_32 ompi_atomic_cmpset_32 #define ompi_atomic_cmpset_rel_32 ompi_atomic_cmpset_32 -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 +#if OMPI_GCC_INLINE_ASSEMBLY + static inline int ompi_atomic_cmpset_64( volatile int64_t *addr, int64_t oldval, int64_t newval) { @@ -75,6 +106,8 @@ static inline int ompi_atomic_cmpset_64( volatile int64_t *addr, return (prev == oldval); } +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + #define ompi_atomic_cmpset_acq_64 ompi_atomic_cmpset_64 #define ompi_atomic_cmpset_rel_64 ompi_atomic_cmpset_64 diff --git a/src/include/sys/amd64/update.sh b/src/include/sys/amd64/update.sh index dad665e039..9487684052 100644 --- a/src/include/sys/amd64/update.sh +++ b/src/include/sys/amd64/update.sh @@ -24,6 +24,7 @@ cat > $CFILE< #define static #define inline +#define OMPI_GCC_INLINE_ASSEMBLY 1 #include "atomic.h" EOF diff --git a/src/include/sys/architecture.h b/src/include/sys/architecture.h new file mode 100644 index 0000000000..34aeb5f62f --- /dev/null +++ b/src/include/sys/architecture.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* + * List of supported architectures + */ + +#ifndef OMPI_SYS_ARCHITECTURE_H +#define OMPI_SYS_ARCHITECTURE_H + +/* Architectures */ +#define OMPI_UNSUPPORTED 0000 +#define OMPI_WINDOWS 0001 +#define OMPI_IA32 0010 +#define OMPI_IA64 0020 +#define OMPI_AMD64 0030 +#define OMPI_ALPHA 0040 +#define OMPI_POWERPC32 0050 +#define OMPI_POWERPC64 0051 +#define OMPI_SPARC32 0060 +#define OMPI_SPARC64 0061 + +/* Formats */ +#define OMPI_DEFAULT 1000 /* standard for given architecture */ +#define OMPI_DARWIN 1001 /* Darwin / OS X on PowerPC */ +#define OMPI_PPC_LINUX 1002 /* Linux on PowerPC */ +#define OMPI_AIX 1003 /* AIX on Power / PowerPC */ + +#endif /* #ifndef OMPI_SYS_ARCHITECTURE_H */ diff --git a/src/include/sys/atomic.h b/src/include/sys/atomic.h index 862d82e52e..b93c160aa4 100644 --- a/src/include/sys/atomic.h +++ b/src/include/sys/atomic.h @@ -23,6 +23,18 @@ * http://www.freebsd.org/cgi/man.cgi?query=atomic&sektion=9 * * Only the necessary subset of functions are implemented here. + * + * The following #defines will be true / false based on + * assembly support: + * + * \c OMPI_HAVE_MEM_BARRIER atomic memory barriers + * \c OMPI_HAVE_ATOMIC_SPINLOCKS atomic spinlocks + * \c OMPI_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly" + * \c OMPI_HAVE_ATOMIC_MATH_64 if 32 bit add/sub/cmpset can be done "atomicly" + * + * Note that for the Atomic math, atomic add/sub may be implemented as + * C code using ompi_atomic_cmpset. The appearance of atomic + * operation will be upheld in these cases. */ #ifndef OMPI_SYS_ATOMIC_H @@ -30,11 +42,91 @@ #include "ompi_config.h" +#include "include/sys/architecture.h" + #ifdef HAVE_SYS_TYPES_H #include #endif +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/********************************************************************** + * + * Load the appropriate architecture files and set some reasonable + * default values for our support + * + *********************************************************************/ +#if defined(DOXYGEN) +/* don't include system-level gorp when generating doxygen files */ +#elif OMPI_ASSEMBLY_ARCH == OMPI_WINDOWS || defined(win32) +/* windows first, as they have API-level primitives for this stuff */ +#include "include/sys/win32/atomic.h" +#elif OMPI_ASSEMBLY_ARCH == OMPI_ALPHA +#include "include/sys/alpha/atomic.h" +#elif OMPI_ASSEMBLY_ARCH == OMPI_AMD64 +#include "include/sys/amd64/atomic.h" +#elif OMPI_ASSEMBLY_ARCH == OMPI_IA32 +#include "include/sys/ia32/atomic.h" +#elif OMPI_ASSEMBLY_ARCH == OMPI_IA64 +#include "include/sys/ia64/atomic.h" +#elif OMPI_ASSEMBLY_ARCH == OMPI_POWERPC32 +#include "include/sys/powerpc/atomic.h" +#elif OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64 +#include "include/sys/powerpc/atomic.h" +#elif OMPI_ASSEMBLY_ARCH == OMPI_SPARC32 +#error "32 bit Sparc support not implemented yet" +#elif OMPI_ASSEMBLY_ARCH == OMPI_SPARC64 +#include "include/sys/sparc64/atomic.h" +#endif + +/* compare and set operations can't really be emulated from software, + so if these defines aren't already set, they should be set to 0 + now */ +#ifndef OMPI_HAVE_ATOMIC_CMPSET_32 +#define OMPI_HAVE_ATOMIC_CMPSET_32 0 +#endif +#ifndef OMPI_HAVE_ATOMIC_CMPSET_64 +#define OMPI_HAVE_ATOMIC_CMPSET_64 0 +#endif + +/********************************************************************** + * + * Memory Barriers - defined here if running doxygen or have barriers + * but can't inline + * + *********************************************************************/ +#ifndef OMPI_HAVE_ATOMIC_MEM_BARRIER +/* no way to emulate in C code */ +#define OMPI_HAVE_ATOMIC_MEM_BARRIER 0 +#endif + +#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_MEM_BARRIER +/** + * Memory barrier + */ +void ompi_atomic_mb(void); + +/** + * Read memory barrier + */ +void ompi_atomic_rmb(void); + +/** + * Write memory barrier. + */ +void ompi_atomic_wmb(void); + +#endif /* defined(DOXYGEN) || OMPI_HAVE_MEM_BARRIER */ + + +/********************************************************************** + * + * Atomic spinlocks - always inlined, if have atomic cmpset + * + *********************************************************************/ /** * Volatile lock object (with optional padding). */ @@ -44,38 +136,33 @@ struct ompi_lock_t { char padding[sizeof(int)]; /**< Array for optional padding */ } u; }; - typedef struct ompi_lock_t ompi_lock_t; - -/** - * Memory barrier - */ -static inline void ompi_atomic_mb(void); - - -/** - * Read memory barrier - */ -static inline void ompi_atomic_rmb(void); - - -/** - * Write memory barrier. - */ -static inline void ompi_atomic_wmb(void); - -#if 0 -/** - * Atomically add to an integer. - * - * @param addr Address of integer. - * @param newval Value to set. - * @return Old value of integer. - */ -static inline int ompi_atomic_fetch_and_set_int(volatile void *addr, int newval); +#ifndef OMPI_HAVE_ATOMIC_SPINLOCKS +#define OMPI_HAVE_ATOMIC_SPINLOCKS (OMPI_HAVE_ATOMIC_CMPSET_32 || OMPI_HAVE_ATOMIC_CMPSET_64) #endif +#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_SPINLOCKS + + +/** + * Enumeration of lock states + */ +enum { + OMPI_ATOMIC_UNLOCKED = 0, + OMPI_ATOMIC_LOCKED = 1 +}; + + +/** + * Initialize a lock to value + * + * @param lock Address of the lock + * @param value Initial value to set lock to + */ +static inline void ompi_atomic_init(ompi_lock_t* lock, int value); + + /** * Try to acquire a lock. * @@ -100,144 +187,96 @@ static inline void ompi_atomic_lock(ompi_lock_t *lock); */ static inline void ompi_atomic_unlock(ompi_lock_t *lock); +#endif /* OMPI_HAVE_ATOMIC_SPINLOCKS */ -/* - * Include system specific inline asm definitions. Otherwise - * the definitions are in system specific .s files in src/util. - */ -/* Include win32/atomic.h if we are in windows platform. Else, we - can go through other compilers and options. */ -#ifdef WIN32 -#define OMPI_HAVE_ATOMIC_WIN32 1 -#include "include/sys/win32/atomic.h" -#else /* only now go through this stuff */ -#define OMPI_HAVE_ATOMIC_WIN32 0 - -#if defined(__alpha__) -# define OMPI_HAVE_ATOMIC 1 -# ifdef __GNUC__ -# include "alpha/atomic.h" -# endif -#elif defined(__amd64__) || defined(__x86_64__) -# define OMPI_HAVE_ATOMIC 1 -# ifdef __GNUC__ -# include "amd64/atomic.h" -# endif -#elif defined(__i386__) -# define OMPI_HAVE_ATOMIC 1 -# ifdef __GNUC__ -# include "ia32/atomic.h" -# endif -#elif defined(__ia64__) -# define OMPI_HAVE_ATOMIC 1 -# ifdef __GNUC__ -# include "ia64/atomic.h" -# endif -#elif defined(__POWERPC__) -# define OMPI_HAVE_ATOMIC 1 -# ifdef __GNUC__ -# include "powerpc/atomic.h" -# endif -#elif defined(__sparc__) || defined(__sparc) -# define OMPI_HAVE_ATOMIC 1 -# ifdef __GNUC__ -# include "sparc64/atomic.h" -# endif -#else -#error No atomic operations defined yet +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#ifndef OMPI_HAVE_ATOMIC_CMPSET_32 +#define OMPI_HAVE_ATOMIC_CMPSET_32 0 #endif -#endif /* ifdef WIN32*/ - -#ifndef OMPI_HAVE_ATOMIC -#define OMPI_HAVE_ATOMIC 0 +#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_CMPSET_32 +int ompi_atomic_cmpset_32(volatile int32_t *addr, int32_t oldval, + int32_t newval); +int ompi_atomic_cmpset_acq_32(volatile int32_t *addr, int32_t oldval, + int32_t newval); +int ompi_atomic_cmpset_rel_32(volatile int32_t *addr, int32_t oldval, + int32_t newval); #endif -/* All the architectures provide a compare_and_set atomic operations. If - * they dont provide atomic additions and/or substractions then we can - * define these operations using the atomic compare_and_set. - */ -#if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32) -#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32) -static inline int32_t ompi_atomic_add_32(volatile int32_t *addr, int delta) -{ - int32_t oldval; - - do { - oldval = *addr; - } while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval + delta)); - return (oldval + delta); -} -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */ -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32 */ +#ifndef OMPI_HAVE_ATOMIC_CMPSET_64 +#define OMPI_HAVE_ATOMIC_CMPSET_64 0 +#endif +#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_CMPSET_64 +int ompi_atomic_cmpset_64(volatile int64_t *addr, int64_t oldval, + int64_t newval); +int ompi_atomic_cmpset_acq_64(volatile int64_t *addr, int64_t oldval, + int64_t newval); +int ompi_atomic_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, + int64_t newval); +#endif -#if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32) -#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32) -static inline int32_t ompi_atomic_sub_32(volatile int32_t *addr, int delta) -{ - int32_t oldval; - - do { - oldval = *addr; - } while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval - delta)); - return (oldval - delta); -} -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */ -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32 */ +#ifndef OMPI_HAVE_ATOMIC_MATH_32 +/* define to 0 for these tests. WIll fix up later. */ +#define OMPI_HAVE_ATOMIC_MATH_32 0 +#endif +#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_MATH_32 || OMPI_HAVE_ATOMIC_CMPSET_32 +#if ! OMPI_HAVE_ATOMIC_MATH_32 +static inline +#endif +int32_t ompi_atomic_add_32(volatile int32_t *addr, int delta); +#if ! OMPI_HAVE_ATOMIC_MATH_32 +static inline +#endif +int32_t ompi_atomic_sub_32(volatile int32_t *addr, int delta); +#endif /* OMPI_HAVE_ATOMIC_MATH_32 */ +#if ! OMPI_HAVE_ATOMIC_MATH_32 +/* fix up the value of ompi_have_atomic_math_32 to allow for C versions */ +#undef OMPI_HAVE_ATOMIC_MATH_32 +#define OMPI_HAVE_ATOMIC_MATH_32 OMPI_HAVE_ATOMIC_CMPSET_32 +#endif -/* Some architectures does not provide support for the 64 bits - * atomic operations. Until we find a better solution let's just - * undefine all those functions. - */ -#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64) -#if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_64) -static inline int64_t ompi_atomic_add_64(volatile int64_t *addr, int64_t delta) -{ - int64_t oldval; - - do { - oldval = *addr; - } while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval + delta)); - return (oldval + delta); -} -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_64 */ +#ifndef OMPI_HAVE_ATOMIC_MATH_64 +/* define to 0 for these tests. WIll fix up later. */ +#define OMPI_HAVE_ATOMIC_MATH_64 0 +#endif +#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_MATH_64 || OMPI_HAVE_ATOMIC_CMPSET_64 +#if OMPI_HAVE_ATOMIC_CMPSET_64 +static inline +#endif +int64_t ompi_atomic_add_64(volatile int64_t *addr, int64_t delta); +#if OMPI_HAVE_ATOMIC_CMPSET_64 +static inline +#endif +int64_t ompi_atomic_sub_64(volatile int64_t *addr, int64_t delta); +#endif /* OMPI_HAVE_ATOMIC_MATH_32 */ +#if ! OMPI_HAVE_ATOMIC_MATH_64 +/* fix up the value of ompi_have_atomic_math_64 to allow for C versions */ +#undef OMPI_HAVE_ATOMIC_MATH_64 +#define OMPI_HAVE_ATOMIC_MATH_64 OMPI_HAVE_ATOMIC_CMPSET_64 +#endif -#if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_64) -static inline int64_t ompi_atomic_sub_64(volatile int64_t *addr, int64_t delta) -{ - int64_t oldval; - do { - oldval = *addr; - } while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval - delta)); - return (oldval - delta); -} -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_64 */ -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */ +#if defined(DOXYGEN) || (OMPI_HAVE_ATOMIC_CMPSET_32 || OMPI_HAVE_ATOMIC_CMPSET_64) +/* these are always done with inline functions, so always mark as + static inline */ +static inline int ompi_atomic_cmpset_xx(volatile void* addr, int64_t oldval, + int64_t newval, size_t length); +static inline int ompi_atomic_cmpset_acq_xx(volatile void* addr, + int64_t oldval, int64_t newval, + size_t length); +static inline int ompi_atomic_cmpset_rel_xx(volatile void* addr, + int64_t oldval, int64_t newval, + size_t length); +static inline void ompi_atomic_add_xx(volatile void* addr, + int32_t value, size_t length); +static inline void ompi_atomic_sub_xx(volatile void* addr, + int32_t value, size_t length); -static inline int ompi_atomic_cmpset_xx( volatile void* addr, int64_t oldval, - int64_t newval, size_t length ) -{ - switch( length ) { -#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32) - case 4: - return ompi_atomic_cmpset_32( (volatile int32_t*)addr, - (int32_t)oldval, (int32_t)newval ); -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */ - -#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64) - case 8: - return ompi_atomic_cmpset_64( (volatile int64_t*)addr, - (int64_t)oldval, (int64_t)newval ); -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */ - default: - /* This should never happen, so deliberately cause a seg fault - for corefile analysis */ - *(int*)(0) = 0; - } - return 0; /* always fail */ -} /** * Atomic compare and set of pointer with relaxed semantics. This @@ -251,30 +290,9 @@ static inline int ompi_atomic_cmpset_xx( volatile void* addr, int64_t oldval, * See ompi_atomic_cmpset_* for pseudo-code. */ #define ompi_atomic_cmpset( ADDR, OLDVAL, NEWVAL ) \ - ompi_atomic_cmpset_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), (int64_t)(NEWVAL), sizeof(*(ADDR)) ) + ompi_atomic_cmpset_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ + (int64_t)(NEWVAL), sizeof(*(ADDR)) ) -static inline int ompi_atomic_cmpset_acq_xx( volatile void* addr, int64_t oldval, - int64_t newval, size_t length ) -{ - switch( length ) { -#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32) - case 4: - return ompi_atomic_cmpset_acq_32( (volatile int32_t*)addr, - (int32_t)oldval, (int32_t)newval ); -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */ - -#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64) - case 8: - return ompi_atomic_cmpset_acq_64( (volatile int64_t*)addr, - (int64_t)oldval, (int64_t)newval ); -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */ - default: - /* This should never happen, so deliberately cause a seg fault - for corefile analysis */ - *(int*)(0) = 0; - } - return 0; /* always fail */ -} /** * Atomic compare and set of pointer with acquire semantics. This @@ -288,30 +306,9 @@ static inline int ompi_atomic_cmpset_acq_xx( volatile void* addr, int64_t oldval * See ompi_atomic_cmpset_acq_* for pseudo-code. */ #define ompi_atomic_cmpset_acq( ADDR, OLDVAL, NEWVAL ) \ - ompi_atomic_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), (int64_t)(NEWVAL), sizeof(*(ADDR)) ) + ompi_atomic_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ + (int64_t)(NEWVAL), sizeof(*(ADDR)) ) -static inline int ompi_atomic_cmpset_rel_xx( volatile void* addr, int64_t oldval, - int64_t newval, size_t length ) -{ - switch( length ) { -#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32) - case 4: - return ompi_atomic_cmpset_rel_32( (volatile int32_t*)addr, - (int32_t)oldval, (int32_t)newval ); -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */ - -#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64) - case 8: - return ompi_atomic_cmpset_rel_64( (volatile int64_t*)addr, - (int64_t)oldval, (int64_t)newval ); -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */ - default: - /* This should never happen, so deliberately cause a seg fault - for corefile analysis */ - *(int*)(0) = 0; - } - return 0; /* always fail */ -} /** * Atomic compare and set of pointer with release semantics. This @@ -325,28 +322,9 @@ static inline int ompi_atomic_cmpset_rel_xx( volatile void* addr, int64_t oldval * See ompi_atomic_cmpsetrel_* for pseudo-code. */ #define ompi_atomic_cmpset_rel( ADDR, OLDVAL, NEWVAL ) \ - ompi_atomic_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), (int64_t)(NEWVAL), sizeof(*(ADDR)) ) + ompi_atomic_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ + (int64_t)(NEWVAL), sizeof(*(ADDR)) ) -static inline void ompi_atomic_add_xx( volatile void* addr, int32_t value, size_t length ) -{ - switch( length ) { -#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32) - case 4: - ompi_atomic_add_32( (volatile int32_t*)addr, (int32_t)value ); - break; -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */ - -#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64) - case 8: - ompi_atomic_add_64( (volatile int64_t*)addr, (int64_t)value ); - break; -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */ - default: - /* This should never happen, so deliberately cause a seg fault - for corefile analysis */ - *(int*)(0) = 0; - } -} /** * Atomically increment the content depending on the type. This @@ -357,28 +335,8 @@ static inline void ompi_atomic_add_xx( volatile void* addr, int32_t value, size_ * @param delta Value to add (converted to ). */ #define ompi_atomic_add( ADDR, VALUE ) \ - ompi_atomic_add_xx( (volatile void*)(ADDR), (int32_t)(VALUE), sizeof(*(ADDR)) ) - -static inline void ompi_atomic_sub_xx( volatile void* addr, int32_t value, size_t length ) -{ - switch( length ) { -#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32) - case 4: - ompi_atomic_sub_32( (volatile int32_t*)addr, (int32_t)value ); - break; -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */ - -#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64) - case 8: - ompi_atomic_sub_64( (volatile int64_t*)addr, (int64_t)value ); - break; -#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */ - default: - /* This should never happen, so deliberately cause a seg fault - for corefile analysis */ - *(int*)(0) = 0; - } -} + ompi_atomic_add_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ + sizeof(*(ADDR)) ) /** * Atomically decrement the content depending on the type. This @@ -389,55 +347,23 @@ static inline void ompi_atomic_sub_xx( volatile void* addr, int32_t value, size_ * @param delta Value to substract (converted to ). */ #define ompi_atomic_sub( ADDR, VALUE ) \ - ompi_atomic_sub_xx( (volatile void*)(ADDR), (int32_t)(VALUE), sizeof(*(ADDR)) ) + ompi_atomic_sub_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ + sizeof(*(ADDR)) ) -#if OMPI_HAVE_ATOMIC || OMPI_HAVE_ATOMIC_WIN32 - -/* - * Atomic locks - */ - -/** - * Enumeration of lock states - */ -enum { - OMPI_ATOMIC_UNLOCKED = 0, - OMPI_ATOMIC_LOCKED = 1 -}; +#endif /* OMPI_HAVE_ATOMIC_MATH_32 || OMPI_HAVE_ATOMIC_MATH_64 */ -static inline int ompi_atomic_trylock(ompi_lock_t *lock) -{ - return ompi_atomic_cmpset_acq( &(lock->u.lock), - OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_LOCKED); +/********************************************************************** + * + * Include system specific inline asm definitions. Otherwise + * the definitions are in system specific .s files in src/util. + * + *********************************************************************/ +#include "include/sys/atomic_impl.h" + + +#if defined(c_plusplus) || defined(__cplusplus) } - -static inline void ompi_atomic_lock(ompi_lock_t *lock) -{ - while( !ompi_atomic_cmpset_acq( &(lock->u.lock), - OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_LOCKED) ) { - while (lock->u.lock == OMPI_ATOMIC_LOCKED) { - /* spin */ ; - } - } -} - -static inline void ompi_atomic_unlock(ompi_lock_t *lock) -{ - /* - ompi_atomic_cmpset_rel( &(lock->u.lock), - OMPI_ATOMIC_LOCKED, OMPI_ATOMIC_UNLOCKED); - */ - lock->u.lock=OMPI_ATOMIC_UNLOCKED; -} - -/* Lock initialization function. It set the lock to UNLOCKED. - */ -static inline void ompi_atomic_init( ompi_lock_t* lock, int value ) -{ - lock->u.lock = value; -} - -#endif /* OMPI_HAVE_ATOMIC || OMPI_HAVE_ATOMIC_WIN32 */ +#endif #endif /* OMPI_SYS_ATOMIC_H */ diff --git a/src/include/sys/atomic_impl.h b/src/include/sys/atomic_impl.h new file mode 100644 index 0000000000..af28f3a420 --- /dev/null +++ b/src/include/sys/atomic_impl.h @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* Inline C implementation of the functions defined in atomic.h */ + + +/********************************************************************** + * + * Atomic math operations + * + * All the architectures provide a compare_and_set atomic operations. If + * they dont provide atomic additions and/or substractions then we can + * define these operations using the atomic compare_and_set. + * + * Some architectures does not provide support for the 64 bits + * atomic operations. Until we find a better solution let's just + * undefine all those functions if there is no 64 bit cmpset + * + *********************************************************************/ +#if OMPI_HAVE_ATOMIC_CMPSET_32 + +#if !defined(OMPI_HAVE_ATOMIC_ADD_32) +#define OMPI_HAVE_ATOMIC_ADD_32 1 +static inline int32_t +ompi_atomic_add_32(volatile int32_t *addr, int delta) +{ + int32_t oldval; + + do { + oldval = *addr; + } while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval + delta)); + return (oldval + delta); +} +#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */ + + +#if !defined(OMPI_HAVE_ATOMIC_SUB_32) +#define OMPI_HAVE_ATOMIC_SUB_32 1 +static inline int32_t +ompi_atomic_sub_32(volatile int32_t *addr, int delta) +{ + int32_t oldval; + + do { + oldval = *addr; + } while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval - delta)); + return (oldval - delta); +} +#endif /* OMPI_HAVE_ATOMIC_SUB_32 */ + +#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */ + + +#if OMPI_HAVE_ATOMIC_CMPSET_64 + +#if !defined(OMPI_HAVE_ATOMIC_ADD_64) +#define OMPI_HAVE_ATOMIC_ADD_64 1 +static inline int64_t +ompi_atomic_add_64(volatile int64_t *addr, int64_t delta) +{ + int64_t oldval; + + do { + oldval = *addr; + } while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval + delta)); + return (oldval + delta); +} +#endif /* OMPI_HAVE_ATOMIC_ADD_64 */ + + +#if !defined(OMPI_HAVE_ATOMIC_SUB_64) +#define OMPI_HAVE_ATOMIC_SUB_64 1 +static inline int64_t +ompi_atomic_sub_64(volatile int64_t *addr, int64_t delta) +{ + int64_t oldval; + + do { + oldval = *addr; + } while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval - delta)); + return (oldval - delta); +} +#endif /* OMPI_HAVE_ATOMIC_SUB_64 */ + +#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */ + + +#if (OMPI_HAVE_ATOMIC_CMPSET_32 || OMPI_HAVE_ATOMIC_CMPSET_64) + +static inline int +ompi_atomic_cmpset_xx(volatile void* addr, int64_t oldval, + int64_t newval, size_t length) +{ + switch( length ) { +#if OMPI_HAVE_ATOMIC_CMPSET_32 + case 4: + return ompi_atomic_cmpset_32( (volatile int32_t*)addr, + (int32_t)oldval, (int32_t)newval ); +#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */ + +#if OMPI_HAVE_ATOMIC_CMPSET_64 + case 8: + return ompi_atomic_cmpset_64( (volatile int64_t*)addr, + (int64_t)oldval, (int64_t)newval ); +#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */ + default: + /* This should never happen, so deliberately cause a seg fault + for corefile analysis */ + *(int*)(0) = 0; + } + return 0; /* always fail */ +} + + +static inline int +ompi_atomic_cmpset_acq_xx(volatile void* addr, int64_t oldval, + int64_t newval, size_t length) +{ + switch( length ) { +#if OMPI_HAVE_ATOMIC_CMPSET_32 + case 4: + return ompi_atomic_cmpset_acq_32( (volatile int32_t*)addr, + (int32_t)oldval, (int32_t)newval ); +#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */ + +#if OMPI_HAVE_ATOMIC_CMPSET_64 + case 8: + return ompi_atomic_cmpset_acq_64( (volatile int64_t*)addr, + (int64_t)oldval, (int64_t)newval ); +#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */ + default: + /* This should never happen, so deliberately cause a seg fault + for corefile analysis */ + *(int*)(0) = 0; + } + return 0; /* always fail */ +} + + +static inline int +ompi_atomic_cmpset_rel_xx(volatile void* addr, int64_t oldval, + int64_t newval, size_t length) +{ + switch( length ) { +#if OMPI_HAVE_ATOMIC_CMPSET_32 + case 4: + return ompi_atomic_cmpset_rel_32( (volatile int32_t*)addr, + (int32_t)oldval, (int32_t)newval ); +#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */ + +#if OMPI_HAVE_ATOMIC_CMPSET_64 + case 8: + return ompi_atomic_cmpset_rel_64( (volatile int64_t*)addr, + (int64_t)oldval, (int64_t)newval ); +#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */ + default: + /* This should never happen, so deliberately cause a seg fault + for corefile analysis */ + *(int*)(0) = 0; + } + return 0; /* always fail */ +} + + +static inline void +ompi_atomic_add_xx(volatile void* addr, int32_t value, size_t length) +{ + switch( length ) { +#if OMPI_HAVE_ATOMIC_CMPSET_32 + case 4: + ompi_atomic_add_32( (volatile int32_t*)addr, (int32_t)value ); + break; +#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */ + +#if OMPI_HAVE_ATOMIC_CMPSET_64 + case 8: + ompi_atomic_add_64( (volatile int64_t*)addr, (int64_t)value ); + break; +#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */ + default: + /* This should never happen, so deliberately cause a seg fault + for corefile analysis */ + *(int*)(0) = 0; + } +} + + +static inline void +ompi_atomic_sub_xx(volatile void* addr, int32_t value, size_t length) +{ + switch( length ) { +#if OMPI_HAVE_ATOMIC_CMPSET_32 + case 4: + ompi_atomic_sub_32( (volatile int32_t*)addr, (int32_t)value ); + break; +#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */ + +#if OMPI_HAVE_ATOMIC_CMPSET_64 + case 8: + ompi_atomic_sub_64( (volatile int64_t*)addr, (int64_t)value ); + break; +#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */ + default: + /* This should never happen, so deliberately cause a seg fault + for corefile analysis */ + *(int*)(0) = 0; + } +} + +#endif /* (OMPI_HAVE_ATOMIC_CMPSET_32 || OMPI_HAVE_ATOMIC_CMPSET_64) */ + + +/********************************************************************** + * + * Atomic spinlocks + * + *********************************************************************/ +#if OMPI_HAVE_ATOMIC_SPINLOCKS +/* + * Lock initialization function. It set the lock to UNLOCKED. + */ +static inline void +ompi_atomic_init( ompi_lock_t* lock, int value ) +{ + lock->u.lock = value; +} + + +static inline int +ompi_atomic_trylock(ompi_lock_t *lock) +{ + return ompi_atomic_cmpset_acq( &(lock->u.lock), + OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_LOCKED); +} + + +static inline void +ompi_atomic_lock(ompi_lock_t *lock) +{ + while( !ompi_atomic_cmpset_acq( &(lock->u.lock), + OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_LOCKED) ) { + while (lock->u.lock == OMPI_ATOMIC_LOCKED) { + /* spin */ ; + } + } +} + + +static inline void +ompi_atomic_unlock(ompi_lock_t *lock) +{ + /* + ompi_atomic_cmpset_rel( &(lock->u.lock), + OMPI_ATOMIC_LOCKED, OMPI_ATOMIC_UNLOCKED); + */ + lock->u.lock=OMPI_ATOMIC_UNLOCKED; +} + +#endif /* OMPI_HAVE_ATOMIC_SPINLOCKS */ diff --git a/src/include/sys/ia32/Makefile.am b/src/include/sys/ia32/Makefile.am index 7452a41040..505c034c08 100644 --- a/src/include/sys/ia32/Makefile.am +++ b/src/include/sys/ia32/Makefile.am @@ -14,7 +14,7 @@ include $(top_srcdir)/config/Makefile.options -noinst_HEADERS = atomic.h atomic.s +noinst_HEADERS = atomic.h # Conditionally install the header files diff --git a/src/include/sys/ia32/atomic.h b/src/include/sys/ia32/atomic.h index b1648d0c23..c83d279832 100644 --- a/src/include/sys/ia32/atomic.h +++ b/src/include/sys/ia32/atomic.h @@ -19,8 +19,7 @@ * On ia32, we use cmpxchg. */ - -#ifdef HAVE_SMP +#if OMPI_WANT_SMP_LOCKS #define SMPLOCK "lock; " #define MB() __asm__ __volatile__("": : :"memory") #else @@ -29,6 +28,29 @@ #endif +/********************************************************************** + * + * Define constants for IA32 + * + *********************************************************************/ +#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1 + +#define OMPI_HAVE_ATOMIC_CMPSET_32 1 + +#define OMPI_HAVE_ATOMIC_MATH_32 1 +#define OMPI_HAVE_ATOMIC_ADD_32 1 +#define OMPI_HAVE_ATOMIC_SUB_32 1 + +#define OMPI_HAVE_ATOMIC_CMPSET_64 1 + + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#if OMPI_GCC_INLINE_ASSEMBLY + static inline void ompi_atomic_mb(void) { MB(); @@ -46,7 +68,16 @@ static inline void ompi_atomic_wmb(void) MB(); } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if OMPI_GCC_INLINE_ASSEMBLY + static inline int ompi_atomic_cmpset_32(volatile int32_t *addr, int32_t oldval, int32_t newval) @@ -62,14 +93,17 @@ static inline int ompi_atomic_cmpset_32(volatile int32_t *addr, return (int)ret; } +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + #define ompi_atomic_cmpset_acq_32 ompi_atomic_cmpset_32 #define ompi_atomic_cmpset_rel_32 ompi_atomic_cmpset_32 -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 -typedef struct { - uint32_t lo; - uint32_t hi; -} lwords_t; +#if OMPI_GCC_INLINE_ASSEMBLY + +#ifndef ll_low /* GLIBC provides these somewhere, so protect */ +#define ll_low(x) *(((unsigned int*)&(x))+0) +#define ll_high(x) *(((unsigned int*)&(x))+1) +#endif /* On Linux the EBX register is used by the shared libraries * to keep the global offset. In same time this register is @@ -85,27 +119,28 @@ static inline int ompi_atomic_cmpset_64(volatile int64_t *addr, * Compare EDX:EAX with m64. If equal, set ZF and load ECX:EBX into * m64. Else, clear ZF and load m64 into EDX:EAX. */ - lwords_t *pold = (lwords_t*)&oldval; - lwords_t *pnew = (lwords_t*)&newval; - unsigned char realized; + unsigned char ret; - __asm__ __volatile( - "push %%ebx \n\t" - "movl %4, %%ebx \n\t" - SMPLOCK "cmpxchg8b %1 \n\t" - "sete %0 \n\t" - "pop %%ebx \n\t" - : "=qm" (realized) - : "m"(*((volatile long*)addr)), "a"(pold->lo), "d"(pold->hi), - "r"(pnew->lo), "c"(pnew->hi) - : "cc", "memory" ); - return realized; + __asm__ __volatile__( + "push %%ebx \n\t" + "movl %4, %%ebx \n\t" + SMPLOCK "cmpxchg8b (%1) \n\t" + "sete %0 \n\t" + "pop %%ebx \n\t" + : "=qm"(ret) + : "D"(addr), "a"(ll_low(oldval)), "d"(ll_high(oldval)), + "r"(ll_low(newval)), "c"(ll_high(newval)) + : "cc", "memory"); + return (int) ret; } +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + #define ompi_atomic_cmpset_acq_64 ompi_atomic_cmpset_64 #define ompi_atomic_cmpset_rel_64 ompi_atomic_cmpset_64 -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32 +#if OMPI_GCC_INLINE_ASSEMBLY + /** * atomic_add - add integer to atomic variable * @i: integer value to add @@ -122,7 +157,7 @@ static inline int ompi_atomic_add_32(volatile int32_t* v, int i) return (*v); /* should be an atomic operation */ } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32 + /** * atomic_sub - subtract the atomic variable * @i: integer value to subtract @@ -139,5 +174,6 @@ static inline int ompi_atomic_sub_32(volatile int32_t* v, int i) return (*v); /* should be an atomic operation */ } +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ #endif /* ! OMPI_SYS_ARCH_ATOMIC_H */ diff --git a/src/include/sys/ia32/atomic.s b/src/include/sys/ia32/atomic.s deleted file mode 100644 index 908d36d88a..0000000000 --- a/src/include/sys/ia32/atomic.s +++ /dev/null @@ -1,193 +0,0 @@ -;; -;; Copyright (c) 2004-2005 The Trustees of Indiana University. -;; All rights reserved. -;; Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -;; All rights reserved. -;; Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -;; University of Stuttgart. All rights reserved. -;; $COPYRIGHT$ -;; -;; Additional copyrights may follow -;; -;; $HEADER$ -;; - .file "atomic.c" - .text -.globl ompi_atomic_mb - .type ompi_atomic_mb,@function -ompi_atomic_mb: - pushl %ebp - movl %esp, %ebp - leave - ret -.Lfe1: - .size ompi_atomic_mb,.Lfe1-ompi_atomic_mb -.globl ompi_atomic_rmb - .type ompi_atomic_rmb,@function -ompi_atomic_rmb: - pushl %ebp - movl %esp, %ebp - leave - ret -.Lfe2: - .size ompi_atomic_rmb,.Lfe2-ompi_atomic_rmb -.globl ompi_atomic_wmb - .type ompi_atomic_wmb,@function -ompi_atomic_wmb: - pushl %ebp - movl %esp, %ebp - leave - ret -.Lfe3: - .size ompi_atomic_wmb,.Lfe3-ompi_atomic_wmb -.globl ompi_atomic_cmpset_32 - .type ompi_atomic_cmpset_32,@function -ompi_atomic_cmpset_32: - pushl %ebp - movl %esp, %ebp - pushl %ebx - subl $4, %esp - movl 12(%ebp), %eax - movl %eax, -8(%ebp) - movl -8(%ebp), %edx - movl 16(%ebp), %ecx - movl %edx, %eax - movl 8(%ebp), %ebx -#APP - cmpxchgl %ecx,(%ebx) - setz %al - movzbl %al,%eax - -#NO_APP - movl %eax, %edx - movl %edx, -8(%ebp) - movl -8(%ebp), %eax - cmpl 12(%ebp), %eax - sete %al - movzbl %al, %eax - addl $4, %esp - popl %ebx - leave - ret -.Lfe4: - .size ompi_atomic_cmpset_32,.Lfe4-ompi_atomic_cmpset_32 -.globl ompi_atomic_cmpset_acq_32 - .type ompi_atomic_cmpset_acq_32,@function -ompi_atomic_cmpset_acq_32: - pushl %ebp - movl %esp, %ebp - subl $8, %esp - subl $4, %esp - pushl 16(%ebp) - pushl 12(%ebp) - pushl 8(%ebp) - call ompi_atomic_cmpset_32 - addl $16, %esp - leave - ret -.Lfe5: - .size ompi_atomic_cmpset_acq_32,.Lfe5-ompi_atomic_cmpset_acq_32 -.globl ompi_atomic_cmpset_rel_32 - .type ompi_atomic_cmpset_rel_32,@function -ompi_atomic_cmpset_rel_32: - pushl %ebp - movl %esp, %ebp - subl $8, %esp - subl $4, %esp - pushl 16(%ebp) - pushl 12(%ebp) - pushl 8(%ebp) - call ompi_atomic_cmpset_32 - addl $16, %esp - leave - ret -.Lfe6: - .size ompi_atomic_cmpset_rel_32,.Lfe6-ompi_atomic_cmpset_rel_32 -.globl ompi_atomic_cmpset_64 - .type ompi_atomic_cmpset_64,@function -ompi_atomic_cmpset_64: - pushl %ebp - movl %esp, %ebp - pushl %ebx - subl $28, %esp - movl 12(%ebp), %eax - movl 16(%ebp), %edx - movl %eax, -16(%ebp) - movl %edx, -12(%ebp) - movl 20(%ebp), %eax - movl 24(%ebp), %edx - movl %eax, -24(%ebp) - movl %edx, -20(%ebp) - movl -16(%ebp), %eax - movl -12(%ebp), %edx - movl %eax, -32(%ebp) - movl %edx, -28(%ebp) - movl -16(%ebp), %ecx - movl -12(%ebp), %ebx - movl -32(%ebp), %eax - xorl %ecx, %eax - movl -28(%ebp), %edx - xorl %ebx, %edx - orl %edx, %eax - testl %eax, %eax - sete %al - movzbl %al, %eax - addl $28, %esp - popl %ebx - leave - ret -.Lfe7: - .size ompi_atomic_cmpset_64,.Lfe7-ompi_atomic_cmpset_64 -.globl ompi_atomic_cmpset_acq_64 - .type ompi_atomic_cmpset_acq_64,@function -ompi_atomic_cmpset_acq_64: - pushl %ebp - movl %esp, %ebp - subl $24, %esp - movl 12(%ebp), %eax - movl 16(%ebp), %edx - movl %eax, -8(%ebp) - movl %edx, -4(%ebp) - movl 20(%ebp), %eax - movl 24(%ebp), %edx - movl %eax, -16(%ebp) - movl %edx, -12(%ebp) - subl $12, %esp - pushl -12(%ebp) - pushl -16(%ebp) - pushl -4(%ebp) - pushl -8(%ebp) - pushl 8(%ebp) - call ompi_atomic_cmpset_64 - addl $32, %esp - leave - ret -.Lfe8: - .size ompi_atomic_cmpset_acq_64,.Lfe8-ompi_atomic_cmpset_acq_64 -.globl ompi_atomic_cmpset_rel_64 - .type ompi_atomic_cmpset_rel_64,@function -ompi_atomic_cmpset_rel_64: - pushl %ebp - movl %esp, %ebp - subl $24, %esp - movl 12(%ebp), %eax - movl 16(%ebp), %edx - movl %eax, -8(%ebp) - movl %edx, -4(%ebp) - movl 20(%ebp), %eax - movl 24(%ebp), %edx - movl %eax, -16(%ebp) - movl %edx, -12(%ebp) - subl $12, %esp - pushl -12(%ebp) - pushl -16(%ebp) - pushl -4(%ebp) - pushl -8(%ebp) - pushl 8(%ebp) - call ompi_atomic_cmpset_64 - addl $32, %esp - leave - ret -.Lfe9: - .size ompi_atomic_cmpset_rel_64,.Lfe9-ompi_atomic_cmpset_rel_64 - .ident "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)" diff --git a/src/include/sys/ia32/update.sh b/src/include/sys/ia32/update.sh index dad665e039..0c3a2f3f8f 100644 --- a/src/include/sys/ia32/update.sh +++ b/src/include/sys/ia32/update.sh @@ -24,7 +24,9 @@ cat > $CFILE< #define static #define inline +#define OMPI_GCC_INLINE_ASSEMBLY 1 +#define OMPI_WANT_SMP_LOCKS 1 #include "atomic.h" EOF -gcc -I. -S $CFILE -o atomic.s +gcc -O1 -I. -S $CFILE -o atomic.s diff --git a/src/include/sys/ia64/atomic.h b/src/include/sys/ia64/atomic.h index 7e698980cc..3a29029f9b 100644 --- a/src/include/sys/ia64/atomic.h +++ b/src/include/sys/ia64/atomic.h @@ -20,13 +20,30 @@ */ -#ifdef HAVE_SMP +#if OMPI_WANT_SMP_LOCKS #define MB() __asm__ __volatile__("": : :"memory") #else #define MB() #endif +/********************************************************************** + * + * Define constants for IA64 + * + *********************************************************************/ +#define OMPI_HAVE_MEM_BARRIER 1 + +#define OMPI_HAVE_ATOMIC_CMPSET_32 +#define OMPI_HAVE_ATOMIC_CMPSET_64 + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#if OMPI_GCC_INLINE_ASSEMBLY + static inline void ompi_atomic_mb(void) { MB(); @@ -44,13 +61,23 @@ static inline void ompi_atomic_wmb(void) MB(); } + +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if OMPI_GCC_INLINE_ASSEMBLY + #define ia64_cmpxchg4_acq(ptr, new, old) \ ({ \ __u64 ia64_intri_res; \ ia64_intri_res; \ }) -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 static inline int ompi_atomic_cmpset_acq_32( volatile int32_t *addr, int32_t oldval, int32_t newval) { @@ -76,10 +103,13 @@ static inline int ompi_atomic_cmpset_rel_32( volatile int32_t *addr, return ((int32_t)ret == oldval); } +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + #define ompi_atomic_cmpset_32 ompi_atomic_cmpset_acq_32 -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 +#if OMPI_GCC_INLINE_ASSEMBLY + static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr, int64_t oldval, int64_t newval) { @@ -105,6 +135,7 @@ static inline int ompi_atomic_cmpset_rel_64( volatile int64_t *addr, return ((int32_t)ret == oldval); } +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ #define ompi_atomic_cmpset_64 ompi_atomic_cmpset_acq_64 diff --git a/src/include/sys/ia64/update.sh b/src/include/sys/ia64/update.sh index dad665e039..9487684052 100644 --- a/src/include/sys/ia64/update.sh +++ b/src/include/sys/ia64/update.sh @@ -24,6 +24,7 @@ cat > $CFILE< #define static #define inline +#define OMPI_GCC_INLINE_ASSEMBLY 1 #include "atomic.h" EOF diff --git a/src/include/sys/powerpc/Makefile.am b/src/include/sys/powerpc/Makefile.am index d7564459aa..7f1d28deb7 100644 --- a/src/include/sys/powerpc/Makefile.am +++ b/src/include/sys/powerpc/Makefile.am @@ -14,7 +14,7 @@ include $(top_srcdir)/config/Makefile.options -noinst_HEADERS = atomic.h atomic.s +noinst_HEADERS = atomic.h # Conditionally install the header files diff --git a/src/include/sys/powerpc/atomic.h b/src/include/sys/powerpc/atomic.h index 53e9390f68..11083d7624 100644 --- a/src/include/sys/powerpc/atomic.h +++ b/src/include/sys/powerpc/atomic.h @@ -38,26 +38,88 @@ #endif -static inline void ompi_atomic_mb(void) +/********************************************************************** + * + * Define constants for PowerPC 32 + * + *********************************************************************/ +#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1 + +#define OMPI_HAVE_ATOMIC_CMPSET_32 1 + +#define OMPI_HAVE_ATOMIC_MATH_32 1 +#define OMPI_HAVE_ATOMIC_ADD_32 1 +#define OMPI_HAVE_ATOMIC_SUB_32 1 + +#if (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64) || (OMPI_POWERPC_SUPPORT_64BIT && OMPI_GCC_INLINE_ASSEMBLY) +#define OMPI_HAVE_ATOMIC_CMPSET_64 1 +#endif + + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#if OMPI_GCC_INLINE_ASSEMBLY + +static inline +void ompi_atomic_mb(void) { MB(); } -static inline void ompi_atomic_rmb(void) +static inline +void ompi_atomic_rmb(void) { RMB(); } -static inline void ompi_atomic_wmb(void) +static inline +void ompi_atomic_wmb(void) { WMB(); } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 -static inline int ompi_atomic_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +#elif OMPI_XLC_INLINE_ASSEMBLY /* end OMPI_GCC_INLINE_ASSEMBLY */ + +/* Yeah, I don't know who thought this was a reasonable syntax for + * inline assembly. Do these because they are used so often and they + * are fairly simple (aka: there is a tech pub on IBM's web site + * containing the right hex for the instructions). + */ + +void ompi_atomic_mb(void); +#pragma mc_func ompi_atomic_mb { "7c0004ac" } /* sync */ +#pragma reg_killed_by ompi_atomic_mb /* none */ + +void ompi_atomic_rmb(void); +#pragma mc_func ompi_atomic_rmb { "7c2004ac" } /* lwsync */ +#pragma reg_killed_by ompi_atomic_rmb /* none */ + +void ompi_atomic_wmb(void); +#pragma mc_func ompi_atomic_wmb { "7c0006ac" } /* eieio */ +#pragma reg_killed_by ompi_atomic_wmb /* none */ + +#else /* end OMPI_XLC_INLINE_ASSEMBLY */ + +void ompi_atomic_mb(void); +void ompi_atomic_rmb(void); +void ompi_atomic_wmb(void); + +#endif + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if OMPI_GCC_INLINE_ASSEMBLY + +static inline int ompi_atomic_cmpset_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) { int32_t ret; @@ -76,9 +138,13 @@ static inline int ompi_atomic_cmpset_32( volatile int32_t *addr, return (ret == oldval); } - -static inline int ompi_atomic_cmpset_acq_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +/* these two functions aren't inlined in the non-gcc case because then + there would be two function calls (since neither cmpset_32 nor + atomic_?mb can be inlined). Instead, we "inline" them by hand in + the assembly, meaning there is one function call overhead instead + of two */ +static inline int ompi_atomic_cmpset_acq_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) { int rc; @@ -89,17 +155,33 @@ static inline int ompi_atomic_cmpset_acq_32( volatile int32_t *addr, } -static inline int ompi_atomic_cmpset_rel_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) +static inline int ompi_atomic_cmpset_rel_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) { ompi_atomic_wmb(); return ompi_atomic_cmpset_32(addr, oldval, newval); } -#if defined(HOW_TO_DECIDE_IF_THE_ARCHI_SUPPORT_64_BITS_ATOMICS) -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 -static inline int ompi_atomic_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +#else +int ompi_atomic_cmpset_32(volatile int32_t *addr, + int32_t oldval, int32_t newval); +int ompi_atomic_cmpset_acq_32(volatile int32_t *addr, + int32_t oldval, int32_t newval); +int ompi_atomic_cmpset_rel_32(volatile int32_t *addr, + int32_t oldval, int32_t newval); +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + + +#if OMPI_POWERPC_SUPPORT_64BIT + +#if OMPI_GCC_INLINE_ASSEMBLY +/* these two functions aren't inlined in the non-gcc case because then + there would be two function calls (since neither cmpset_64 nor + atomic_?mb can be inlined). Instead, we "inline" them by hand in + the assembly, meaning there is one function call overhead instead + of two */ +static inline int ompi_atomic_cmpset_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) { int64_t ret; @@ -117,9 +199,8 @@ static inline int ompi_atomic_cmpset_64( volatile int64_t *addr, return (ret == oldval); } - -static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline int ompi_atomic_cmpset_acq_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) { int rc; @@ -130,15 +211,30 @@ static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr, } -static inline int ompi_atomic_cmpset_rel_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) +static inline int ompi_atomic_cmpset_rel_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) { ompi_atomic_wmb(); return ompi_atomic_cmpset_64(addr, oldval, newval); } -#endif /* HOW_TO_DECIDE_IF_THE_ARCHI_SUPPORT_64_BITS_ATOMICS */ -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32 +#elif OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64 +/* currently, don't have 64 bit apps for non-inline assembly */ + +int ompi_atomic_cmpset_64(volatile int64_t *addr, + int64_t oldval, int64_t newval); +int ompi_atomic_cmpset_acq_64(volatile int64_t *addr, + int64_t oldval, int64_t newval); +int ompi_atomic_cmpset_rel_64(volatile int64_t *addr, + int64_t oldval, int64_t newval); + +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + +#endif /* OMPI_POWERPC_SUPPORT_64BIT */ + + +#if OMPI_GCC_INLINE_ASSEMBLY + static inline int32_t ompi_atomic_add_32(volatile int32_t* v, int inc) { int32_t t; @@ -155,7 +251,7 @@ static inline int32_t ompi_atomic_add_32(volatile int32_t* v, int inc) return *v; } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32 + static inline int32_t ompi_atomic_sub_32(volatile int32_t* v, int dec) { int32_t t; @@ -172,4 +268,7 @@ static inline int32_t ompi_atomic_sub_32(volatile int32_t* v, int dec) return *v; } + +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + #endif /* ! OMPI_SYS_ARCH_ATOMIC_H */ diff --git a/src/include/sys/powerpc/atomic.s b/src/include/sys/powerpc/atomic.s deleted file mode 100644 index fbffec427a..0000000000 --- a/src/include/sys/powerpc/atomic.s +++ /dev/null @@ -1,245 +0,0 @@ -;; -;; Copyright (c) 2004-2005 The Trustees of Indiana University. -;; All rights reserved. -;; Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -;; All rights reserved. -;; Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -;; University of Stuttgart. All rights reserved. -;; $COPYRIGHT$ -;; -;; Additional copyrights may follow -;; -;; $HEADER$ -;; - .section __TEXT,__text,regular,pure_instructions - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 -.section __TEXT,__text,regular,pure_instructions - .align 2 - .align 2 - .globl _ompi_atomic_mb -.section __TEXT,__text,regular,pure_instructions - .align 2 -_ompi_atomic_mb: - stmw r30,-8(r1) - stwu r1,-48(r1) - mr r30,r1 - lwz r1,0(r1) - lmw r30,-8(r1) - blr - .align 2 - .globl _ompi_atomic_rmb -.section __TEXT,__text,regular,pure_instructions - .align 2 -_ompi_atomic_rmb: - stmw r30,-8(r1) - stwu r1,-48(r1) - mr r30,r1 - lwz r1,0(r1) - lmw r30,-8(r1) - blr - .align 2 - .globl _ompi_atomic_wmb -.section __TEXT,__text,regular,pure_instructions - .align 2 -_ompi_atomic_wmb: - stmw r30,-8(r1) - stwu r1,-48(r1) - mr r30,r1 - lwz r1,0(r1) - lmw r30,-8(r1) - blr - .align 2 - .globl _ompi_atomic_cmpset_32 -.section __TEXT,__text,regular,pure_instructions - .align 2 -_ompi_atomic_cmpset_32: - stmw r30,-8(r1) - stwu r1,-64(r1) - mr r30,r1 - stw r3,88(r30) - stw r4,92(r30) - stw r5,96(r30) - lwz r10,88(r30) - lwz r11,88(r30) - lwz r9,92(r30) - lwz r0,96(r30) - lwz r2,88(r30) - 1: lwarx r8, 0, r11 - cmpw 0, r8, r9 - bne- 2f - stwcx. r0, 0, r11 - bne- 1b -2: - mr r0,r8 - stw r0,32(r30) - lwz r2,32(r30) - lwz r0,92(r30) - cmpw cr7,r2,r0 - mfcr r0 - rlwinm r0,r0,31,1 - mr r3,r0 - lwz r1,0(r1) - lmw r30,-8(r1) - blr - .align 2 - .globl _ompi_atomic_cmpset_acq_32 -.section __TEXT,__text,regular,pure_instructions - .align 2 -_ompi_atomic_cmpset_acq_32: - mflr r0 - stmw r30,-8(r1) - stw r0,8(r1) - stwu r1,-96(r1) - mr r30,r1 - stw r3,120(r30) - stw r4,124(r30) - stw r5,128(r30) - lwz r3,120(r30) - lwz r4,124(r30) - lwz r5,128(r30) - bl _ompi_atomic_cmpset_32 - mr r0,r3 - stw r0,64(r30) - bl _ompi_atomic_rmb - lwz r0,64(r30) - mr r3,r0 - lwz r1,0(r1) - lwz r0,8(r1) - mtlr r0 - lmw r30,-8(r1) - blr - .align 2 - .globl _ompi_atomic_cmpset_rel_32 -.section __TEXT,__text,regular,pure_instructions - .align 2 -_ompi_atomic_cmpset_rel_32: - mflr r0 - stmw r30,-8(r1) - stw r0,8(r1) - stwu r1,-80(r1) - mr r30,r1 - stw r3,104(r30) - stw r4,108(r30) - stw r5,112(r30) - bl _ompi_atomic_wmb - lwz r3,104(r30) - lwz r4,108(r30) - lwz r5,112(r30) - bl _ompi_atomic_cmpset_32 - mr r0,r3 - mr r3,r0 - lwz r1,0(r1) - lwz r0,8(r1) - mtlr r0 - lmw r30,-8(r1) - blr - .align 2 - .globl _ompi_atomic_cmpset_64 -.section __TEXT,__text,regular,pure_instructions - .align 2 -_ompi_atomic_cmpset_64: - stmw r30,-8(r1) - stwu r1,-96(r1) - mr r30,r1 - stw r3,120(r30) - stw r4,32(r30) - stw r5,36(r30) - stw r6,40(r30) - stw r7,44(r30) - lwz r10,120(r30) - lwz r0,120(r30) - lwz r11,32(r30) - lwz r12,36(r30) - lwz r2,40(r30) - lwz r3,44(r30) - lwz r9,120(r30) - 1: ldarx r7, 0, r0 - cmpd 0, r7, r11 - bne- 2f - stdcx. r2, 0, r0 - bne- 1b -2: - mr r2,r7 - mr r3,r8 - stw r2,64(r30) - stw r3,68(r30) - lfd f0,64(r30) - stfd f0,48(r30) - li r8,0 - stw r8,56(r30) - lwz r2,48(r30) - lwz r0,32(r30) - cmpw cr7,r2,r0 - bne cr7,L8 - lwz r0,52(r30) - lwz r2,36(r30) - cmpw cr7,r0,r2 - bne cr7,L8 - li r0,1 - stw r0,56(r30) -L8: - lwz r0,56(r30) - mr r3,r0 - lwz r1,0(r1) - lmw r30,-8(r1) - blr - .align 2 - .globl _ompi_atomic_cmpset_acq_64 -.section __TEXT,__text,regular,pure_instructions - .align 2 -_ompi_atomic_cmpset_acq_64: - mflr r0 - stmw r30,-8(r1) - stw r0,8(r1) - stwu r1,-112(r1) - mr r30,r1 - stw r3,136(r30) - stw r4,64(r30) - stw r5,68(r30) - stw r6,72(r30) - stw r7,76(r30) - lwz r3,136(r30) - lwz r4,64(r30) - lwz r5,68(r30) - lwz r6,72(r30) - lwz r7,76(r30) - bl _ompi_atomic_cmpset_64 - mr r0,r3 - stw r0,80(r30) - bl _ompi_atomic_rmb - lwz r0,80(r30) - mr r3,r0 - lwz r1,0(r1) - lwz r0,8(r1) - mtlr r0 - lmw r30,-8(r1) - blr - .align 2 - .globl _ompi_atomic_cmpset_rel_64 -.section __TEXT,__text,regular,pure_instructions - .align 2 -_ompi_atomic_cmpset_rel_64: - mflr r0 - stmw r30,-8(r1) - stw r0,8(r1) - stwu r1,-96(r1) - mr r30,r1 - stw r3,120(r30) - stw r4,64(r30) - stw r5,68(r30) - stw r6,72(r30) - stw r7,76(r30) - bl _ompi_atomic_wmb - lwz r3,120(r30) - lwz r4,64(r30) - lwz r5,68(r30) - lwz r6,72(r30) - lwz r7,76(r30) - bl _ompi_atomic_cmpset_64 - mr r0,r3 - mr r3,r0 - lwz r1,0(r1) - lwz r0,8(r1) - mtlr r0 - lmw r30,-8(r1) - blr diff --git a/src/include/sys/powerpc/update.sh b/src/include/sys/powerpc/update.sh index dad665e039..33c66298d2 100644 --- a/src/include/sys/powerpc/update.sh +++ b/src/include/sys/powerpc/update.sh @@ -24,7 +24,9 @@ cat > $CFILE< #define static #define inline +#define OMPI_GCC_INLINE_ASSEMBLY 1 +#define OMPI_POWERPC_SUPPORT_64BIT 0 #include "atomic.h" EOF -gcc -I. -S $CFILE -o atomic.s +gcc -DHAVE_SMP -I. -S $CFILE -o atomic.s diff --git a/src/include/sys/sparc64/atomic.h b/src/include/sys/sparc64/atomic.h index 4ba421e383..4ea10c41f3 100644 --- a/src/include/sys/sparc64/atomic.h +++ b/src/include/sys/sparc64/atomic.h @@ -21,13 +21,32 @@ #define ASI_P "0x80" -#ifdef HAVE_SMP +#if OMPI_WANT_SMP_LOCKS #define MEMBAR(type) __asm__ __volatile__ ("membar" type : : : "memory") #else #define MEMBAR(type) #endif +/********************************************************************** + * + * Define constants for UltraSparc 64 + * + *********************************************************************/ +#define OMPI_HAVE_MEM_BARRIER 1 + +#define OMPI_HAVE_ATOMIC_CMPSET_32 1 + +#define OMPI_HAVE_ATOMIC_CMPSET_64 1 + + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#if OMPI_GCC_INLINE_ASSEMBLY + static inline void ompi_atomic_mb(void) { MEMBAR("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad"); @@ -45,7 +64,16 @@ static inline void ompi_atomic_wmb(void) MEMBAR("#StoreStore"); } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if OMPI_GCC_INLINE_ASSEMBLY + static inline int ompi_atomic_cmpset_32( volatile int32_t *addr, int32_t oldval, int32_t newval) { @@ -77,7 +105,7 @@ static inline int ompi_atomic_cmpset_rel_32( volatile int32_t *addr, return ompi_atomic_cmpset_32(addr, oldval, newval); } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 + static inline int ompi_atomic_cmpset_64( volatile int64_t *addr, int64_t oldval, int64_t newval) { @@ -101,6 +129,7 @@ static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr, return rc; } + static inline int ompi_atomic_cmpset_rel_64( volatile int64_t *addr, int64_t oldval, int64_t newval) { @@ -108,5 +137,7 @@ static inline int ompi_atomic_cmpset_rel_64( volatile int64_t *addr, return ompi_atomic_cmpset_64(addr, oldval, newval); } +#endif /* OMPI_GCC_INLINE_ASSEMBLY */ + #endif /* ! OMPI_SYS_ARCH_ATOMIC_H */ diff --git a/src/include/sys/sparc64/update.sh b/src/include/sys/sparc64/update.sh index dad665e039..9487684052 100644 --- a/src/include/sys/sparc64/update.sh +++ b/src/include/sys/sparc64/update.sh @@ -24,6 +24,7 @@ cat > $CFILE< #define static #define inline +#define OMPI_GCC_INLINE_ASSEMBLY 1 #include "atomic.h" EOF diff --git a/src/include/sys/win32/atomic.h b/src/include/sys/win32/atomic.h index 0c8824a934..cd2be135be 100644 --- a/src/include/sys/win32/atomic.h +++ b/src/include/sys/win32/atomic.h @@ -15,9 +15,12 @@ #ifndef OMPI_SYS_ARCH_ATOMIC_H #define OMPI_SYS_ARCH_ATOMIC_H 1 -/* - * On ia64, we use cmpxchg, which supports acquire/release semantics natively. - */ +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1 static inline void ompi_atomic_mb(void) { @@ -42,7 +45,14 @@ static inline void ompi_atomic_wmb(void) #endif } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + +#define OMPI_HAVE_ATOMIC_CMPSET_32 static inline int ompi_atomic_cmpset_acq_32( volatile int32_t *addr, int32_t oldval, int32_t newval) { @@ -81,7 +91,7 @@ static inline int ompi_atomic_cmpset_32( volatile int32_t *addr, #endif } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 +#define OMPI_HAVE_ATOMIC_CMPSET_64 static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr, int64_t oldval, int64_t newval) { @@ -119,7 +129,9 @@ static inline int ompi_atomic_cmpset_64( volatile int64_t *addr, #endif } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32 +#define OMPI_HAVE_ATOMIC_MATH_32 + +#define OMPI_HAVE_ATOMIC_ADD_32 static inline int32_t ompi_atomic_add_32(volatile int32_t *addr, int32_t delta) { return InterlockedExchangeAdd ((LONG volatile *) addr, @@ -127,7 +139,9 @@ static inline int32_t ompi_atomic_add_32(volatile int32_t *addr, int32_t delta) } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_64 +#define OMPI_HAVE_ATOMIC_MATH_64 + +#define OMPI_HAVE_ATOMIC_ADD_64 static inline int64_t ompi_atomic_add_64(volatile int64_t *addr, int64_t delta) { #if 0 @@ -139,7 +153,7 @@ static inline int64_t ompi_atomic_add_64(volatile int64_t *addr, int64_t delta) } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32 +#define OMPI_HAVE_ATOMIC_SUB_32 static inline int32_t ompi_atomic_sub_32(volatile int32_t *addr, int32_t delta) { return InterlockedExchangeAdd( (LONG volatile *) addr, @@ -147,7 +161,7 @@ static inline int32_t ompi_atomic_sub_32(volatile int32_t *addr, int32_t delta) } -#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_64 +#define OMPI_HAVE_ATOMIC_SUB_64 static inline int64_t ompi_atomic_sub_64(volatile int64_t *addr, int64_t delta) { #if 0 diff --git a/src/threads/mutex.c b/src/threads/mutex.c index 01961f3667..b41d23525f 100644 --- a/src/threads/mutex.c +++ b/src/threads/mutex.c @@ -42,7 +42,7 @@ static void ompi_mutex_construct(ompi_mutex_t *m) #if OMPI_HAVE_POSIX_THREADS pthread_mutex_init(&m->m_lock_pthread, 0); #endif -#if OMPI_HAVE_ATOMIC +#if OMPI_HAVE_ATOMIC_SPINLOCKS ompi_atomic_init( &m->m_lock_atomic, OMPI_ATOMIC_UNLOCKED ); #endif } diff --git a/src/threads/mutex_unix.h b/src/threads/mutex_unix.h index a140b24ccc..46b94c63b2 100644 --- a/src/threads/mutex_unix.h +++ b/src/threads/mutex_unix.h @@ -50,7 +50,7 @@ struct ompi_mutex_t { OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_mutex_t); -#if OMPI_HAVE_ATOMIC && OMPI_HAVE_POSIX_THREADS +#if OMPI_HAVE_ATOMIC_SPINLOCKS && OMPI_HAVE_POSIX_THREADS /* * ompi_mutex_* implemented using pthreads @@ -127,7 +127,7 @@ static inline void ompi_mutex_atomic_unlock(ompi_mutex_t *m) } -#elif OMPI_HAVE_ATOMIC +#elif OMPI_HAVE_ATOMIC_SPINLOCKS /* * ompi_mutex_* and ompi_mutex_atomic_* implemented using atomic diff --git a/src/util/Makefile.am b/src/util/Makefile.am index aeb456e48d..ecfb9bad52 100644 --- a/src/util/Makefile.am +++ b/src/util/Makefile.am @@ -51,7 +51,6 @@ headers = \ libutil_la_SOURCES = \ $(headers) \ - assembly.s \ argv.c \ cmd_line.c \ few.c \ diff --git a/src/util/assembly.s b/src/util/assembly.s deleted file mode 100644 index 620c14fee1..0000000000 --- a/src/util/assembly.s +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* - * File to instantiate assembly level code for non-GNU C compilers. - */ - -#ifndef __GNUC__ - -#if defined(__alpha__) -# include "include/sys/alpha/atomic.s" -#elif defined(__amd64__) || defined(__x86_64__) -# include "include/sys/amd64/atomic.s" -#elif defined(__i386__) -# include "include/sys/ia32/atomic.s" -#elif defined(__ia64__) -# include "include/sys/ia64/atomic.s" -#elif defined(__POWERPC__) -# include "include/sys/powerpc/atomic.s" -#elif defined(__sparc__) || defined(__sparc) -# include "include/sys/sparc64/atomic.s" -#endif - -#endif