* Update build system to support non-GCC inline assembly, including detection
of assembler format * Fix minor bugs in AMD64, PPC, and IA32 assembly for atomic operations * Clean up the #defines to look for when examining level of atomic operation support This commit was SVN r4183.
Этот коммит содержится в:
родитель
b240395d9a
Коммит
9c1a277804
2
Doxyfile
2
Doxyfile
@ -892,7 +892,7 @@ INCLUDE_FILE_PATTERNS =
|
||||
# or name=definition (no spaces). If the definition and the = are
|
||||
# omitted =1 is assumed.
|
||||
|
||||
PREDEFINED =
|
||||
PREDEFINED = DOXYGEN
|
||||
|
||||
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
|
||||
# this tag can be used to specify a list of macro names that should be expanded.
|
||||
|
@ -36,6 +36,9 @@ sinclude(config/f90_check_type.m4)
|
||||
sinclude(config/f90_get_alignment.m4)
|
||||
sinclude(config/f90_get_sizeof.m4)
|
||||
|
||||
sinclude(config/ompi_try_assemble.m4)
|
||||
sinclude(config/ompi_config_asm.m4)
|
||||
|
||||
sinclude(config/ompi_case_sensitive_fs_setup.m4)
|
||||
sinclude(config/ompi_check_optflags.m4)
|
||||
sinclude(config/ompi_config_subdir.m4)
|
||||
|
763
config/ompi_config_asm.m4
Обычный файл
763
config/ompi_config_asm.m4
Обычный файл
@ -0,0 +1,763 @@
|
||||
dnl
|
||||
dnl Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
dnl All rights reserved.
|
||||
dnl Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
dnl All rights reserved.
|
||||
dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
dnl University of Stuttgart. All rights reserved.
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
dnl
|
||||
dnl $HEADER$
|
||||
dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CHECK_ASM_TEXT
|
||||
dnl
|
||||
dnl Determine how to set current mode as text.
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CHECK_ASM_TEXT],[
|
||||
AC_MSG_CHECKING([directive for setting text section])
|
||||
ompi_cv_asm_text=""
|
||||
case $host in
|
||||
*-aix*)
|
||||
ompi_cv_asm_text=[".csect .text[PR]"]
|
||||
;;
|
||||
*)
|
||||
ompi_cv_asm_text=".text"
|
||||
;;
|
||||
esac
|
||||
AC_MSG_RESULT([$ompi_cv_asm_text])
|
||||
AC_DEFINE_UNQUOTED([OMPI_ASM_TEXT], ["$ompi_cv_asm_text"],
|
||||
[Assembly directive for setting text section])
|
||||
OMPI_ASM_TEXT="$ompi_cv_asm_text"
|
||||
AC_SUBST(OMPI_ASM_TEXT)
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CHECK_ASM_GLOBAL
|
||||
dnl
|
||||
dnl Sets OMPI_ASM_GLOBAL to the value to prefix global values
|
||||
dnl
|
||||
dnl I'm sure if I don't have a test for this, there will be some
|
||||
dnl dumb platform that uses something else
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CHECK_ASM_GLOBAL],[
|
||||
AC_MSG_CHECKING([directive for exporting symbols])
|
||||
ompi_cv_asm_global=""
|
||||
case $host in
|
||||
*)
|
||||
ompi_cv_asm_global=".globl"
|
||||
;;
|
||||
esac
|
||||
AC_MSG_RESULT([$ompi_cv_asm_global])
|
||||
AC_DEFINE_UNQUOTED([OMPI_ASM_GLOBAL], ["$ompi_cv_asm_global"],
|
||||
[Assembly directive for exporting symbols])
|
||||
OMPI_ASM_GLOBAL="$ompi_cv_asm_global"
|
||||
AC_SUBST(OMPI_AS_GLOBAL)
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CHECK_ASM_LSYM
|
||||
dnl
|
||||
dnl Sets OMPI_ASM_LSYM to the prefix value on a symbol to make it
|
||||
dnl an internal label (jump target and whatnot)
|
||||
dnl
|
||||
dnl We look for L .L $ L$ (in that order) for something that both
|
||||
dnl assembles and does not leave a label in the output of nm. Fall
|
||||
dnl back to L if nothing else seems to work :/
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CHECK_ASM_LSYM],[
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_LABEL_SUFFIX])
|
||||
AC_REQUIRE([AC_PROG_NM])
|
||||
|
||||
AC_MSG_CHECKING([prefix for lsym labels])
|
||||
ompi_cv_asm_lsym="L"
|
||||
|
||||
for sym in L .L $ L$ ; do
|
||||
asm_result=0
|
||||
echo "configure: trying $sym" >& AC_FD_CC
|
||||
OMPI_TRY_ASSEMBLE([foobar$ompi_cv_asm_label_suffix
|
||||
${sym}mytestlabel$ompi_cv_asm_label_suffix],
|
||||
[# ok, we succeeded at assembling. see if we can nm,
|
||||
# throwing the results in a file
|
||||
if $NM conftest.$OBJEXT > conftest.out 2>&AC_FD_CC ; then
|
||||
if test "`grep mytestlabel conftest.out`" = "" ; then
|
||||
# there was no symbol... looks promising to me
|
||||
ompi_cv_asm_lsym="$sym"
|
||||
asm_result=1
|
||||
elif test ["`grep ' [Nt] .*mytestlabel' conftest.out`"] = "" ; then
|
||||
# see if we have a non-global-ish symbol
|
||||
# but we should see if we can do better.
|
||||
ompi_cv_asm_lsym="$sym"
|
||||
fi
|
||||
else
|
||||
# not so much on the NM goodness :/
|
||||
echo "$NM failed. Output from NM was:" >& AC_FD_CC
|
||||
cat conftest.out > AC_FD_CC
|
||||
AC_MSG_WARN([$NM could not read object file])
|
||||
fi
|
||||
])
|
||||
if test "$asm_result" = "1" ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
rm -f conftest.out
|
||||
|
||||
AC_MSG_RESULT([$ompi_cv_asm_lsym])
|
||||
AC_DEFINE_UNQUOTED([OMPI_ASM_LSYM], ["$ompi_cv_asm_lsym"],
|
||||
[Assembly prefix for lsym labels])
|
||||
OMPI_ASM_LSYM="$ompi_cv_asm_lsym"
|
||||
AC_SUBST(OMPI_ASM_LSYM)
|
||||
unset asm_result sym
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CHECK_ASM_GSYM
|
||||
dnl
|
||||
dnl Sets OMPI_ASM_GSYM to the prefix value on a symbol to make it
|
||||
dnl a global linkable from C. Basically, an _ or not.
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CHECK_ASM_GSYM],[
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_TEXT])
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_GLOBAL])
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_LABEL_SUFFIX])
|
||||
|
||||
AC_MSG_CHECKING([prefix for global symbol labels])
|
||||
ompi_cv_asm_gsym="none"
|
||||
|
||||
for sym in "_" "" ; do
|
||||
asm_result=0
|
||||
echo "configure: trying $sym" >& AC_FD_CC
|
||||
cat > conftest_c.c <<EOF
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void gsym_test_func(void);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
int
|
||||
main(int argc, char *argv[[]])
|
||||
{
|
||||
gsym_test_func();
|
||||
return 0;
|
||||
}
|
||||
EOF
|
||||
OMPI_TRY_ASSEMBLE([
|
||||
$ompi_cv_asm_text
|
||||
$ompi_cv_asm_global ${sym}gsym_test_func
|
||||
${sym}gsym_test_func${ompi_cv_asm_label_suffix}],
|
||||
[ompi_compile="$CC $CFLAGS -I. conftest_c.c -c > conftest.cmpl 2>&1"
|
||||
if AC_TRY_EVAL(ompi_compile) ; then
|
||||
# save the warnings
|
||||
cat conftest.cmpl >&AC_FD_CC
|
||||
ompi_link="$CC $CFLAGS conftest_c.$OBJEXT conftest.$OBJEXT -o conftest > conftest.link 2>&1"
|
||||
if AC_TRY_EVAL(ompi_link) ; then
|
||||
# save the warnings
|
||||
cat conftest.link >&AC_FD_CC
|
||||
asm_result=1
|
||||
else
|
||||
cat conftest.link >&AC_FD_CC
|
||||
echo "configure: failed C program was: " >&AC_FD_CC
|
||||
cat conftest_c.c >&AC_FD_CC
|
||||
echo "configure: failed ASM program was: " >&AC_FD_CC
|
||||
cat conftest.s >&AC_FD_CC
|
||||
asm_result=0
|
||||
fi
|
||||
else
|
||||
# save output and failed program
|
||||
cat conftest.cmpl >&AC_FD_CC
|
||||
echo "configure: failed C program was: " >&AC_FD_CC
|
||||
cat conftest.c >&AC_FD_CC
|
||||
asm_result=0
|
||||
fi],
|
||||
[asm_result=0])
|
||||
if test "$asm_result" = "1" ; then
|
||||
ompi_cv_asm_gsym="$sym"
|
||||
break
|
||||
fi
|
||||
done
|
||||
rm -f conftest.*
|
||||
|
||||
AC_MSG_RESULT([$ompi_cv_asm_gsym])
|
||||
|
||||
if test "$ompi_cv_asm_gsym" = "none" ; then
|
||||
AC_MSG_ERROR([Could not determine global symbol label prefix])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([OMPI_ASM_GSYM], ["$ompi_cv_asm_gsym"],
|
||||
[Assembly prefix for lsym labels])
|
||||
OMPI_ASM_GSYM="$ompi_cv_asm_gsym"
|
||||
AC_SUBST(OMPI_ASM_GSYM)
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CHECK_ASM_LABEL_SUFFIX
|
||||
dnl
|
||||
dnl Sets OMPI_ASM_LABEL_SUFFIX to the value to suffix for labels
|
||||
dnl
|
||||
dnl I'm sure if I don't have a test for this, there will be some
|
||||
dnl dumb platform that uses something else
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CHECK_ASM_LABEL_SUFFIX],[
|
||||
AC_MSG_CHECKING([suffix for labels])
|
||||
ompi_cv_asm_label_suffix=""
|
||||
case $host in
|
||||
*)
|
||||
ompi_cv_asm_label_suffix=":"
|
||||
;;
|
||||
esac
|
||||
AC_MSG_RESULT([$ompi_cv_asm_label_suffix])
|
||||
AC_DEFINE_UNQUOTED([OMPI_ASM_LABEL_SUFFIX], ["$ompi_cv_asm_label_suffix"],
|
||||
[Assembly suffix for labels])
|
||||
OMPI_ASM_LABEL_SUFFIX="$ompi_cv_asm_label_suffix"
|
||||
AC_SUBST(OMPI_AS_LABEL_SUFFIX)
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CHECK_ASM_ALIGN_LOG
|
||||
dnl
|
||||
dnl Sets OMPI_ASM_ALIGN_LOG to 1 if align is specified
|
||||
dnl logarithmically, 0 otherwise
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CHECK_ASM_ALIGN_LOG],[
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_TEXT])
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_GLOBAL])
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_LABEL_SUFFIX])
|
||||
AC_REQUIRE([AC_PROG_NM])
|
||||
|
||||
ompi_cv_asm_align_log=0
|
||||
asm_result="no"
|
||||
AC_MSG_CHECKING([if .align directive takes logarithmic value])
|
||||
OMPI_TRY_ASSEMBLE([ $ompi_cv_asm_text
|
||||
.align 4
|
||||
$ompi_cv_asm_global foo
|
||||
.byte 1
|
||||
.align 4
|
||||
foo$ompi_cv_asm_label_suffix
|
||||
.byte 2],
|
||||
[ompi_asm_addr=[`$NM conftest.$OBJEXT | sed -e 's/.*\([0-9a-fA-F][0-9a-fA-F]\).*foo.*/\1/'`]],
|
||||
[ompi_asm_addr=""])
|
||||
# test for both 16 and 10 (decimal and hex notations)
|
||||
echo "configure: .align test address offset is $ompi_asm_addr" >& AC_FD_CC
|
||||
if test "$ompi_asm_addr" = "16" -o "$ompi_asm_addr" = "10" ; then
|
||||
ompi_cv_asm_align_log=1
|
||||
asm_result="yes"
|
||||
fi
|
||||
AC_MSG_RESULT([$asm_result])
|
||||
|
||||
AC_DEFINE_UNQUOTED([OMPI_ASM_ALIGN_LOG],
|
||||
[$ompi_cv_asm_align_log],
|
||||
[Assembly align directive expects logarithmic value])
|
||||
|
||||
unset omp_asm_addr asm_result
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CHECK_ASM_TYPE
|
||||
dnl
|
||||
dnl Sets OMPI_ASM_TYPE to the prefix for the function type to
|
||||
dnl set a symbol's type as function (needed on ELF for shared
|
||||
dnl libaries). If no .type directive is needed, sets OMPI_ASM_TYPE
|
||||
dnl to an empty string
|
||||
dnl
|
||||
dnl We look for @ \# %
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CHECK_ASM_TYPE],[
|
||||
AC_MSG_CHECKING([prefix for function in .type])
|
||||
ompi_cv_asm_type=""
|
||||
|
||||
for type in @ \# % ; do
|
||||
asm_result=0
|
||||
echo "configure: trying $type" >& AC_FD_CC
|
||||
OMPI_TRY_ASSEMBLE([ .type mysym, ${type}function],
|
||||
[# ok, we succeeded at assembling. see if there was
|
||||
# a warning in the output.
|
||||
if test "`cat conftest.out`" = "" ; then
|
||||
ompi_cv_asm_type="${type}"
|
||||
asm_result=1
|
||||
fi])
|
||||
if test "$asm_result" = "1" ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
rm -f conftest.out
|
||||
|
||||
AC_MSG_RESULT([$ompi_cv_asm_type])
|
||||
AC_DEFINE_UNQUOTED([OMPI_ASM_TYPE], ["$ompi_cv_asm_type"],
|
||||
[How to set function type in .type directive])
|
||||
OMPI_ASM_TYPE="$ompi_cv_asm_type"
|
||||
AC_SUBST(OMPI_ASM_TYPE)
|
||||
unset asm_result type
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CHECK_ASM_SIZE
|
||||
dnl
|
||||
dnl Sets OMPI_ASM_SIZE to 1 if we should set .size directives for
|
||||
dnl each function, 0 otherwise.
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CHECK_ASM_SIZE],[
|
||||
AC_MSG_CHECKING([if .size is needed])
|
||||
ompi_cv_asm_size=0
|
||||
asm_result="no"
|
||||
|
||||
OMPI_TRY_ASSEMBLE([ .size mysym, 1],
|
||||
[# ok, we succeeded at assembling. see if there was
|
||||
# a warning in the output.
|
||||
if test "`cat conftest.out`" = "" ; then
|
||||
ompi_cv_asm_size=1
|
||||
asm_result="yes"
|
||||
fi])
|
||||
rm -f conftest.out
|
||||
|
||||
AC_MSG_RESULT([$asm_result])
|
||||
AC_DEFINE_UNQUOTED([OMPI_ASM_SIZE], ["$ompi_cv_asm_size"],
|
||||
[Do we need to give a .size directive?])
|
||||
OMPI_ASM_SIZE="$ompi_cv_asm_size"
|
||||
AC_SUBST(OMPI_ASM_TYPE)
|
||||
unset asm_result
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CHECK_POWERPC_REG
|
||||
dnl
|
||||
dnl See if the notation for specifying registers is X (most everyone)
|
||||
dnl or rX (OS X)
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CHECK_POWERPC_REG],[
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_TEXT])
|
||||
AC_MSG_CHECKING([if PowerPC registers have r prefix])
|
||||
OMPI_TRY_ASSEMBLE([$ompi_cv_asm_text
|
||||
addi 1,1,0],
|
||||
[ompi_cv_asm_powerpc_r_reg=0],
|
||||
OMPI_TRY_ASSEMBLE([$ompi_cv_asm_text
|
||||
addi r1,r1,0],
|
||||
[ompi_cv_asm_powerpc_r_reg=1],
|
||||
AC_MSG_ERROR([Can not determine how to use PPC registers])))
|
||||
if test "$ompi_cv_asm_powerpc_r_reg" = "1" ; then
|
||||
AC_MSG_RESULT([yes])
|
||||
else
|
||||
AC_MSG_RESULT([no])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([OMPI_POWERPC_R_REGISTERS],
|
||||
[$ompi_cv_asm_powerpc_r_reg],
|
||||
[Whether r notation is used for ppc registers])
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CHECK_POWERPC_64BIT
|
||||
dnl
|
||||
dnl On some powerpc chips (the PPC970 or G5), the OS usually runs in
|
||||
dnl 32 bit mode, even though the hardware can do 64bit things. If
|
||||
dnl the compiler will let us, emit code for 64bit test and set type
|
||||
dnl operations (on a long long).
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CHECK_POWERPC_64BIT],[
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_TEXT])
|
||||
|
||||
AC_MSG_CHECKING([for 64-bit PowerPC assembly support])
|
||||
ppc64_result=0
|
||||
if test "$ompi_cv_asm_powerpc_r_reg" = "1" ; then
|
||||
ldarx_asm=" ldarx r1,r1,r1";
|
||||
else
|
||||
ldarx_asm=" ldarx1,1,1";
|
||||
fi
|
||||
OMPI_TRY_ASSEMBLE([$ompi_cv_asm_text
|
||||
$ldarx_asm],
|
||||
[ppc64_result=1],
|
||||
[ppc64_result=0])
|
||||
if test "$ppc64_result" = "1" ; then
|
||||
AC_MSG_RESULT([yes])
|
||||
ifelse([$1],,:,[$1])
|
||||
else
|
||||
AC_MSG_RESULT([no])
|
||||
ifelse([$2],,:,[$2])
|
||||
fi
|
||||
|
||||
unset ppc64_result ldarx_asm
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CHECK_INLINE_GCC
|
||||
dnl
|
||||
dnl Check if the compiler is capable of doing GCC-style inline
|
||||
dnl assembly. Some compilers emit a warning and ignore the inline
|
||||
dnl assembly (xlc on OS X) and compile without error. Therefore,
|
||||
dnl the test attempts to run the emited code to check that the
|
||||
dnl assembly is actually run. To run this test, one argument to
|
||||
dnl the macro must be an assembly instruction in gcc format to move
|
||||
dnl the value 0 into the register containing the variable ret.
|
||||
dnl For PowerPC, this would be:
|
||||
dnl
|
||||
dnl "li %0,0" : "=&r"(ret)
|
||||
dnl
|
||||
dnl DEFINE OMPI_GCC_INLINE_ASSEMBLY to 0 or 1 depending on GCC
|
||||
dnl support
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CHECK_INLINE_GCC],[
|
||||
assembly="$1"
|
||||
asm_result="unknown"
|
||||
|
||||
AC_MSG_CHECKING([if $CC supports GCC inline assembly])
|
||||
|
||||
if test ! "$assembly" = "" ; then
|
||||
AC_RUN_IFELSE(AC_LANG_PROGRAM([[
|
||||
AC_INCLUDES_DEFAULT]],
|
||||
[[int ret = 1;
|
||||
__asm__ __volatile__ ($assembly);
|
||||
return ret;]]),
|
||||
[asm_result="yes"], [asm_result="no"],
|
||||
[asm_result="unknown"])
|
||||
else
|
||||
assembly="test skipped - assuming no"
|
||||
fi
|
||||
|
||||
# if we're cross compiling, just try to compile and figure good enough
|
||||
if test "$asm_result" = "unknown" ; then
|
||||
AC_LINK_IFELSE(AC_LANG_PROGRAM([[
|
||||
AC_INCLUDES_DEFAULT]],
|
||||
[[int ret = 1;
|
||||
__asm__ __volatile__ ($assembly);
|
||||
return ret;]]),
|
||||
[asm_result="yes"], [asm_result="no"])
|
||||
fi
|
||||
|
||||
AC_MSG_RESULT([$asm_result])
|
||||
|
||||
if test "$asm_result" = "yes" ; then
|
||||
OMPI_GCC_INLINE_ASSEMBLY=1
|
||||
else
|
||||
OMPI_GCC_INLINE_ASSEMBLY=0
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([OMPI_GCC_INLINE_ASSEMBLY],
|
||||
[$OMPI_GCC_INLINE_ASSEMBLY],
|
||||
[Whether compiler supports GCC style inline assembly])
|
||||
|
||||
unset OMPI_GCC_INLINE_ASSEMBLY assembly asm_result
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CHECK_INLINE_DEC
|
||||
dnl
|
||||
dnl DEFINE OMPI_DEC to 0 or 1 depending on DEC
|
||||
dnl support
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CHECK_INLINE_DEC],[
|
||||
|
||||
AC_MSG_CHECKING([if $CC supports DEC inline assembly])
|
||||
|
||||
AC_LINK_IFELSE(AC_LANG_PROGRAM([[
|
||||
AC_INCLUDES_DEFAULT
|
||||
#include <c_asm.h>]],
|
||||
[[asm("");
|
||||
return 0;]]),
|
||||
[asm_result="yes"], [asm_result="no"])
|
||||
|
||||
AC_MSG_RESULT([$asm_result])
|
||||
|
||||
if test "$asm_result" = "yes" ; then
|
||||
OMPI_DEC_INLINE_ASSEMBLY=1
|
||||
else
|
||||
OMPI_DEC_INLINE_ASSEMBLY=0
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([OMPI_DEC_INLINE_ASSEMBLY],
|
||||
[$OMPI_DEC_INLINE_ASSEMBLY],
|
||||
[Whether compiler supports DEC style inline assembly])
|
||||
|
||||
unset OMPI_DEC_INLINE_ASSEMBLY asm_result
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CHECK_INLINE_XLC
|
||||
dnl
|
||||
dnl DEFINE OMPI_XLC to 0 or 1 depending on XLC
|
||||
dnl support
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CHECK_INLINE_XLC],[
|
||||
|
||||
AC_MSG_CHECKING([if $CC supports XLC inline assembly])
|
||||
|
||||
OMPI_XLC_INLINE_ASSEMBLY=0
|
||||
asm_result="no"
|
||||
if test "$CC" = "xlc" ; then
|
||||
if test "$CXX" = "xlC" -o "$CXX" = "xlc++" ; then
|
||||
OMPI_XLC_INLINE_ASSEMBLY=1
|
||||
asm_result="yes"
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_MSG_RESULT([$asm_result])
|
||||
AC_DEFINE_UNQUOTED([OMPI_XLC_INLINE_ASSEMBLY],
|
||||
[$OMPI_XLC_INLINE_ASSEMBLY],
|
||||
[Whether compiler supports XLC style inline assembly])
|
||||
|
||||
unset OMPI_XLC_INLINE_ASSEMBLY
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_CONFIG_ASM
|
||||
dnl
|
||||
dnl DEFINE OMPI_ASSEMBLY_ARCH to something in sys/architecture.h
|
||||
dnl DEFINE OMPI_ASSEMBLY_FORMAT to string containing correct
|
||||
dnl format for assembly (not user friendly)
|
||||
dnl SUBST OMPI_ASSEMBLY_FORMAT to string containing correct
|
||||
dnl format for assembly (not user friendly)
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_CONFIG_ASM],[
|
||||
AC_REQUIRE([OMPI_SETUP_CC])
|
||||
AC_REQUIRE([OMPI_SETUP_CXX])
|
||||
AC_REQUIRE([AM_PROG_AS])
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_TEXT])
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_GLOBAL])
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_GSYM])
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_LSYM])
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_TYPE])
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_SIZE])
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_LABEL_SUFFIX])
|
||||
AC_REQUIRE([OMPI_CHECK_ASM_ALIGN_LOG])
|
||||
|
||||
AC_MSG_CHECKING([whether to enable smp locks])
|
||||
AC_ARG_ENABLE(smp-locks,
|
||||
AC_HELP_STRING([--enable-smp-locks],
|
||||
[disable smp locks in atomic ops (default: enabled)]))
|
||||
if test "$enable_smp_locks" != "no"; then
|
||||
AC_MSG_RESULT([yes])
|
||||
want_smp_locks=1
|
||||
else
|
||||
AC_MSG_RESULT([no])
|
||||
want_smp_locks=1
|
||||
fi
|
||||
AC_DEFINE_UNQUOTED([OMPI_WANT_SMP_LOCKS], [$want_smp_locks],
|
||||
[whether we want to have smp locks in atomic ops or not])
|
||||
|
||||
|
||||
# find our architecture for purposes of assembly stuff
|
||||
ompi_cv_asm_arch="UNSUPPORTED"
|
||||
OMPI_GCC_INLINE_ASSIGN=""
|
||||
OMPI_POWERPC_SUPPORT_64BIT=0
|
||||
case "${host}" in
|
||||
*-winnt*)
|
||||
ompi_cv_asm_arch="WINDOWS"
|
||||
;;
|
||||
|
||||
i?86-*)
|
||||
ompi_cv_asm_arch="IA32"
|
||||
OMPI_GCC_INLINE_ASSIGN='"movl [$]0, %0" : "=&r"(ret)'
|
||||
;;
|
||||
|
||||
x86_64*)
|
||||
ompi_cv_asm_arch="AMD64"
|
||||
OMPI_GCC_INLINE_ASSIGN='"movl [$]0, %0" : "=&r"(ret)'
|
||||
;;
|
||||
|
||||
ia64-*)
|
||||
ompi_cv_asm_arch="IA64"
|
||||
OMPI_GCC_INLINE_ASSIGN='"mov %0=r0\n;;\n" : "=&r"(ret)'
|
||||
;;
|
||||
|
||||
alpha-*)
|
||||
ompi_cv_asm_arch="ALPHA"
|
||||
OMPI_GCC_INLINE_ASSIGN='"bis zero,zero,%0" : "=&r"(ret)'
|
||||
;;
|
||||
|
||||
powerpc-*)
|
||||
OMPI_CHECK_POWERPC_REG
|
||||
if test "$ac_cv_sizeof_long" = "4" ; then
|
||||
ompi_cv_asm_arch="POWERPC32"
|
||||
|
||||
# Note that on some platforms (Apple G5), even if we are
|
||||
# compiling in 32 bit more (and therefore should assume
|
||||
# sizeof(long) == 4), we can use the 64 bit test and set
|
||||
# operations.
|
||||
OMPI_CHECK_POWERPC_64BIT(OMPI_POWERPC_SUPPORT_64BIT=1)
|
||||
elif test "$ac_cv_sizeof_long" = "8" ; then
|
||||
OMPI_POWERPC_SUPPORT_64BIT=1
|
||||
ompi_cv_asm_arch="POWERPC64"
|
||||
else
|
||||
AC_MSG_ERROR([Could not determine PowerPC word size: $ac_cv_sizeof_long])
|
||||
fi
|
||||
OMPI_GCC_INLINE_ASSIGN='"li %0,0" : "=&r"(ret)'
|
||||
;;
|
||||
|
||||
sparc-*)
|
||||
if test "$ac_cv_sizeof_long" = "4" ; then
|
||||
ompi_cv_asm_arch="SPARC32"
|
||||
elif test "$ac_cv_sizeof_long" = "8" ; then
|
||||
ompi_cv_asm_arch="SPARC64"
|
||||
else
|
||||
AC_MSG_ERROR([Could not determine Sparc word size: $ac_cv_sizeof_long])
|
||||
fi
|
||||
OMPI_GCC_INLINE_ASSIGN='"mov 0,%0" : : "=&r"(ret)'
|
||||
;;
|
||||
|
||||
*)
|
||||
AC_MSG_ERROR([No atomic primitives available for $host])
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_DEFINE_UNQUOTED([OMPI_POWERPC_SUPPORT_64BIT],
|
||||
[$OMPI_POWERPC_SUPPORT_64BIT],
|
||||
[Non-zero if safe to call PPC64 ops, even in PPC32 code])
|
||||
AC_SUBST([OMPI_POWERPC_SUPPORT_64BIT])
|
||||
|
||||
# now that we know our architecture, try to inline assemble
|
||||
OMPI_CHECK_INLINE_GCC([$OMPI_GCC_INLINE_ASSIGN])
|
||||
OMPI_CHECK_INLINE_DEC
|
||||
OMPI_CHECK_INLINE_XLC
|
||||
|
||||
# format:
|
||||
# text-global-label_suffix-gsym-lsym-type-size-align_log-ppc_r_reg-64_bit
|
||||
|
||||
asm_format="${ompi_cv_asm_text}-${ompi_cv_asm_global}"
|
||||
asm_format="${asm_format}-${ompi_cv_asm_label_suffix}-${ompi_cv_asm_gsym}"
|
||||
asm_format="${asm_format}-${ompi_cv_asm_lsym}"
|
||||
asm_format="${asm_format}-${ompi_cv_asm_type}-${ompi_cv_asm_size}"
|
||||
asm_format="${asm_format}-${ompi_cv_asm_align_log}"
|
||||
if test "$ompi_cv_asm_arch" = "POWERPC32" -o "$ompi_cv_asm_arch" = "POWERPC64" ; then
|
||||
asm_format="${asm_format}-${ompi_cv_asm_powerpc_r_reg}"
|
||||
else
|
||||
asm_format="${asm_format}-1"
|
||||
fi
|
||||
ompi_cv_asm_format="${asm_format}-${OMPI_POWERPC_SUPPORT_64BIT}"
|
||||
OMPI_ASSEMBLY_FORMAT="$ompi_cv_asm_format"
|
||||
|
||||
AC_MSG_CHECKING([for assembly format])
|
||||
AC_MSG_RESULT([$OMPI_ASSEMBLY_FORMAT])
|
||||
AC_DEFINE_UNQUOTED([OMPI_ASSEMBLY_FORMAT], ["$OMPI_ASSEMBLY_FORMAT"],
|
||||
[Format of assembly file])
|
||||
AC_SUBST([OMPI_ASSEMBLY_FORMAT])
|
||||
|
||||
result="OMPI_$ompi_cv_asm_arch"
|
||||
OMPI_ASSEMBLY_ARCH="$ompi_cv_asm_arch"
|
||||
AC_MSG_CHECKING([for asssembly architecture])
|
||||
AC_MSG_RESULT([$ompi_cv_asm_arch])
|
||||
AC_DEFINE_UNQUOTED([OMPI_ASSEMBLY_ARCH], [$result],
|
||||
[Architecture type of assembly to use for atomic operations])
|
||||
AC_SUBST([OMPI_ASSEMBLY_ARCH])
|
||||
|
||||
OMPI_ASM_FIND_FILE
|
||||
|
||||
unset result asm_format
|
||||
])dnl
|
||||
|
||||
|
||||
dnl #################################################################
|
||||
dnl
|
||||
dnl OMPI_ASM_FIND_FILE
|
||||
dnl
|
||||
dnl
|
||||
dnl do all the evil mojo to provide a working assembly file
|
||||
dnl
|
||||
dnl #################################################################
|
||||
AC_DEFUN([OMPI_ASM_FIND_FILE], [
|
||||
AC_CHECK_PROG([PERL], [perl], [perl])
|
||||
|
||||
# see if we have a pre-built one already
|
||||
AC_MSG_CHECKING([for pre-built assembly file])
|
||||
ompi_cv_asm_file=""
|
||||
if grep "$ompi_cv_asm_arch.*$ompi_cv_asm_format" "${top_ompi_srcdir}/src/asm/asm-data.txt" >conftest.out 2>&1 ; then
|
||||
ompi_cv_asm_file="`cut -f3 conftest.out`"
|
||||
if test ! "$ompi_cv_asm_file" = "" ; then
|
||||
ompi_cv_asm_file="atomic-${ompi_cv_asm_file}.s"
|
||||
if test -f "${top_ompi_srcdir}/src/asm/generated/${ompi_cv_asm_file}" ; then
|
||||
AC_MSG_RESULT([yes ($ompi_cv_asm_file)])
|
||||
else
|
||||
AC_MSG_RESULT([no ($ompi_cv_asm_file not found)])
|
||||
ompi_cv_asm_file=""
|
||||
fi
|
||||
fi
|
||||
else
|
||||
AC_MSG_RESULT([no (not in asm-data)])
|
||||
fi
|
||||
rm -f conftest.*
|
||||
|
||||
if test "$ompi_cv_asm_file" = "" ; then
|
||||
if test ! "$PERL" = "" ; then
|
||||
# we have perl... Can we generate a file?
|
||||
AC_MSG_CHECKING([whether possible to generate assembly file])
|
||||
ompi_cv_asm_file="atomic-local.s"
|
||||
ompi_try="$PERL \"$top_ompi_srcdir/src/asm/generate-asm.pl\" \"$ompi_cv_asm_arch\" \"$ompi_cv_asm_format\" \"$top_ompi_srcdir/src/asm/base\" \"$top_ompi_builddir/src/asm/generated/$ompi_cv_asm_file\" >conftest.out 2>&1"
|
||||
if AC_TRY_EVAL(ompi_try) ; then
|
||||
# save the warnings
|
||||
cat conftest.out >&AC_FD_CC
|
||||
AC_MSG_RESULT([yes])
|
||||
else
|
||||
# save output
|
||||
cat conftest.out >&AC_FD_CC
|
||||
ompi_cv_asm_file=""
|
||||
AC_MSG_RESULT([failed])
|
||||
AC_MSG_WARN([Could not build atomic operations assembly file.])
|
||||
AC_MSG_WARN([There will be no atomic operations for this build.])
|
||||
fi
|
||||
else
|
||||
AC_MSG_WARN([Could not find prebuilt atomic operations file and could not])
|
||||
AC_MSG_WARN([find perl to attempt to generate a custom assembly file.])
|
||||
AC_MSG_WARN([There will be no atomic operations for this build.])
|
||||
fi
|
||||
fi
|
||||
rm -f conftest.*
|
||||
|
||||
AC_MSG_CHECKING([for atomic assembly filename])
|
||||
if test "$ompi_cv_asm_file" = "" ; then
|
||||
AC_MSG_RESULT([none])
|
||||
result=0
|
||||
else
|
||||
AC_MSG_RESULT([$ompi_cv_asm_file])
|
||||
result=1
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([OMPI_HAVE_ASM_FILE], [$result],
|
||||
[Whether there is an atomic assembly file available])
|
||||
AM_CONDITIONAL([OMPI_HAVE_ASM_FILE], [test "$result" = "1"])
|
||||
|
||||
OMPI_ASM_FILE=$ompi_cv_asm_file
|
||||
AC_SUBST(OMPI_ASM_FILE)
|
||||
])dnl
|
43
config/ompi_try_assemble.m4
Обычный файл
43
config/ompi_try_assemble.m4
Обычный файл
@ -0,0 +1,43 @@
|
||||
dnl
|
||||
dnl Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
dnl All rights reserved.
|
||||
dnl Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
dnl All rights reserved.
|
||||
dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
dnl University of Stuttgart. All rights reserved.
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
dnl
|
||||
dnl $HEADER$
|
||||
dnl
|
||||
|
||||
dnl OMPI_TRY_ASSEMBLE(asm-code, [action-if-success], [action-if-fail])
|
||||
dnl
|
||||
dnl Attempt to assemble asm-code. If success, run action-if-success.
|
||||
dnl Otherwise, run action-if-fail. Neither action-if-success nor
|
||||
dnl action-if-fail are required.
|
||||
dnl
|
||||
dnl No preprocessing is guaranteed to be done on asm-code. Some
|
||||
dnl compilers do not run the preprocessor on assembly files.
|
||||
dnl
|
||||
dnl On failure, asm-test.s will be included in config.out
|
||||
AC_DEFUN([OMPI_TRY_ASSEMBLE],
|
||||
[cat >conftest.s <<EOF
|
||||
[$1]
|
||||
EOF
|
||||
ompi_assemble="$CCAS $CFLAGS -c conftest.s >conftest.out 2>&1"
|
||||
if AC_TRY_EVAL(ompi_assemble); then
|
||||
# save the warnings
|
||||
cat conftest.out >&AC_FD_CC
|
||||
ifelse([$2],,:,[$2])
|
||||
else
|
||||
# save compiler output and failed program
|
||||
cat conftest.out >&AC_FD_CC
|
||||
echo "configure: failed program was:" >&AC_FD_CC
|
||||
cat conftest.s >&AC_FD_CC
|
||||
ifelse([$3],,:,[$3])
|
||||
fi
|
||||
rm -f conftest*
|
||||
unset ompi_assemble
|
||||
])dnl
|
11
configure.ac
11
configure.ac
@ -315,6 +315,16 @@ AC_DEFINE_UNQUOTED(OMPI_WANT_CXX_BINDINGS, $WANT_MPI_CXX_SUPPORT,
|
||||
[Whether we want MPI cxx support or not])
|
||||
|
||||
|
||||
##################################
|
||||
# Assembler Configuration
|
||||
##################################
|
||||
|
||||
ompi_show_subtitle "Assembler"
|
||||
|
||||
AM_PROG_AS
|
||||
OMPI_CONFIG_ASM
|
||||
|
||||
|
||||
##################################
|
||||
# Fortran
|
||||
##################################
|
||||
@ -1382,6 +1392,7 @@ AC_CONFIG_FILES([
|
||||
src/event/compat/sys/Makefile
|
||||
|
||||
src/attribute/Makefile
|
||||
src/asm/Makefile
|
||||
src/communicator/Makefile
|
||||
src/datatype/Makefile
|
||||
src/errhandler/Makefile
|
||||
|
@ -66,6 +66,7 @@ endif
|
||||
SUBDIRS = \
|
||||
include \
|
||||
$(LIBLTDL_SUBDIR) \
|
||||
asm \
|
||||
attribute \
|
||||
communicator \
|
||||
datatype \
|
||||
@ -94,6 +95,7 @@ lib_LTLIBRARIES = libmpi.la
|
||||
libmpi_la_SOURCES =
|
||||
libmpi_la_LIBADD = \
|
||||
$(LIBLTDL_LTLIB) \
|
||||
asm/libasm.la \
|
||||
attribute/libattribute.la \
|
||||
class/liblfc.la \
|
||||
communicator/libcommunicator.la \
|
||||
|
80
src/asm/Makefile.am
Обычный файл
80
src/asm/Makefile.am
Обычный файл
@ -0,0 +1,80 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
include $(top_srcdir)/config/Makefile.options
|
||||
|
||||
######################################################################
|
||||
#
|
||||
# This is a bit complicated. If there is anything in the library,
|
||||
# it will always be atomic-asm.s. We just symlink atomic-asm.s to
|
||||
# the best atomic operations available (as determined at configure
|
||||
# time)
|
||||
#
|
||||
######################################################################
|
||||
generated/@OMPI_ASM_FILE@: base/@OMPI_ASSEMBLY_ARCH@.asm
|
||||
$(PERL) "$(top_srcdir)/src/asm/generate-asm.pl" "@OMPI_ASSEMBLY_ARCH@" "@OMPI_ASSEMBLY_FORMAT@" "$(top_srcdir)/src/asm/base" "$(top_builddir)/src/asm/generated/@OMPI_ASM_FILE@"
|
||||
|
||||
atomic-asm.s: generated/@OMPI_ASM_FILE@
|
||||
rm -f atomic-asm.s
|
||||
@ if test -f $(top_srcdir)/src/asm/generated/@OMPI_ASM_FILE@ ; then \
|
||||
cmd="ln -s $(top_srcdir)/src/asm/generated/@OMPI_ASM_FILE@ atomic-asm.s" ; \
|
||||
echo "$$cmd" ; \
|
||||
$$cmd ; \
|
||||
else \
|
||||
cmd="ln -s $(top_builddir)/src/asm/generated/@OMPI_ASM_FILE@ atomic-asm.s" ; \
|
||||
echo "$$cmd" ; \
|
||||
$$cmd ; \
|
||||
fi
|
||||
|
||||
if OMPI_HAVE_ASM_FILE
|
||||
libasm_la_SOURCES = atomic-asm.s
|
||||
else
|
||||
libasm_la_SOURCES =
|
||||
endif
|
||||
|
||||
libasm_la_DEPENDENCIES = generated/@OMPI_ASM_FILE@
|
||||
lib_LTLIBRARIES = libasm.la
|
||||
|
||||
EXTRA_DIST = \
|
||||
asm-data.txt \
|
||||
generate-asm.pl \
|
||||
generate-all-asm.sh \
|
||||
base/AMD64.asm \
|
||||
base/IA32.asm \
|
||||
base/POWERPC32.asm \
|
||||
base/POWERPC64.asm
|
||||
|
||||
######################################################################
|
||||
|
||||
TESTS = atomic-test
|
||||
check_PROGRAMS = atomic-test
|
||||
atomic_test_SOURCES = atomic-test.c
|
||||
atomic_test_LDADD = libasm.la
|
||||
|
||||
######################################################################
|
||||
|
||||
clean-local:
|
||||
rm -f atomic-asm.s
|
||||
|
||||
maintainer-clean-local:
|
||||
rm -f generated/atomic-local.s
|
||||
|
||||
######################################################################
|
||||
|
||||
#
|
||||
# Copy over all the generated files
|
||||
#
|
||||
dist-hook:
|
||||
mkdir ${distdir}/generated
|
||||
sh generate-all-asm.sh "$(PERL)" "$(srcdir)" "$(distdir)"
|
27
src/asm/asm-data.txt
Обычный файл
27
src/asm/asm-data.txt
Обычный файл
@ -0,0 +1,27 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
#
|
||||
# Database for mapping architecture and assembly format to prebuilt
|
||||
# assembly files.
|
||||
#
|
||||
# FORMAT:
|
||||
# ARCHITECTURE ASSEMBLY FORMAT BASE FILENAME
|
||||
#
|
||||
|
||||
AMD64 .text-.globl-:--.L-@-1-0-1-0 amd64-linux
|
||||
IA32 .text-.globl-:--.L-@-1-0-1-0 ia32-linux
|
||||
POWERPC32 .text-.globl-:-_-L--0-1-1-0 powerpc32-osx
|
||||
POWERPC32 .text-.globl-:-_-L--0-1-1-1 powerpc32-64-osx
|
||||
POWERPC64 .text-.globl-:-_-L--0-1-1-1 powerpc64-osx
|
457
src/asm/atomic-test.c
Обычный файл
457
src/asm/atomic-test.c
Обычный файл
@ -0,0 +1,457 @@
|
||||
#undef OMPI_BUILDING
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include <assert.h>
|
||||
#include <getopt.h>
|
||||
#ifdef HAVE_PTHREAD_H
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "include/sys/atomic.h"
|
||||
|
||||
/**
|
||||
* A testing support library to provide uniform reporting output
|
||||
*/
|
||||
|
||||
static int ompi_n_tests;
|
||||
static int ompi_n_success;
|
||||
static int ompi_n_failures;
|
||||
static char *ompi_description;
|
||||
|
||||
static void test_init(char *a)
|
||||
{
|
||||
/* local variables */
|
||||
size_t len;
|
||||
|
||||
/* save the descriptive string */
|
||||
len = strlen(a);
|
||||
ompi_description = (char *) malloc(len + 1);
|
||||
assert(ompi_description);
|
||||
|
||||
strcpy(ompi_description, a);
|
||||
|
||||
/* initialize counters */
|
||||
ompi_n_tests = 0;
|
||||
ompi_n_success = 0;
|
||||
ompi_n_failures = 0;
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static void test_success(void)
|
||||
{
|
||||
ompi_n_tests++;
|
||||
ompi_n_success++;
|
||||
}
|
||||
|
||||
|
||||
static void test_failure(char *a)
|
||||
{
|
||||
ompi_n_tests++;
|
||||
ompi_n_failures++;
|
||||
|
||||
fprintf(stderr, " Failure : ");
|
||||
fprintf(stderr, a);
|
||||
fprintf(stderr, "\n");
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
|
||||
static int test_verify_int(int expected_result, int test_result)
|
||||
{
|
||||
int return_value;
|
||||
|
||||
return_value = 1;
|
||||
if (expected_result != test_result) {
|
||||
test_failure("Comparison failure");
|
||||
fprintf(stderr, " Expected result: %d\n", expected_result);
|
||||
fprintf(stderr, " Test result: %d\n", test_result);
|
||||
fflush(stderr);
|
||||
return_value = 0;
|
||||
} else {
|
||||
test_success();
|
||||
}
|
||||
|
||||
return return_value;
|
||||
}
|
||||
|
||||
|
||||
static int test_finalize(void)
|
||||
{
|
||||
int return_value;
|
||||
|
||||
return_value = 1;
|
||||
|
||||
if (ompi_n_tests == ompi_n_success) {
|
||||
fprintf(stderr, "SUPPORT: OMPI Test Passed: %s: (%d tests)\n",
|
||||
ompi_description, ompi_n_tests);
|
||||
fflush(stderr);
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"SUPPORT: OMPI Test failed: %s (%d of %d failed)\n",
|
||||
ompi_description, ompi_n_failures, ompi_n_tests);
|
||||
fflush(stderr);
|
||||
return_value = 0;
|
||||
}
|
||||
|
||||
return return_value;
|
||||
}
|
||||
|
||||
|
||||
/* note this is for additional output that does NOT go to STDERR but STDOUT */
|
||||
static void test_comment (char* userstr)
|
||||
{
|
||||
fprintf(stdout, "%s:%s\n", ompi_description, userstr);
|
||||
}
|
||||
|
||||
/* default options */
|
||||
|
||||
int nreps = 100;
|
||||
int nthreads = 2;
|
||||
int enable_verbose = 0;
|
||||
int enable_64_bit_tests = 0;
|
||||
|
||||
volatile int32_t vol32;
|
||||
int32_t val32;
|
||||
int32_t old32;
|
||||
int32_t new32;
|
||||
|
||||
#ifdef ENABLE_64_BIT
|
||||
volatile int64_t vol64;
|
||||
int64_t val64;
|
||||
int64_t old64;
|
||||
int64_t new64;
|
||||
#endif
|
||||
|
||||
volatile int volint;
|
||||
int valint;
|
||||
int oldint;
|
||||
int newint;
|
||||
|
||||
volatile void *volptr;
|
||||
void *oldptr;
|
||||
void *newptr;
|
||||
|
||||
|
||||
static void help(void)
|
||||
{
|
||||
printf("Usage: threadtest [flags]\n"
|
||||
"\n"
|
||||
" Flags may be any of\n"
|
||||
#ifdef ENABLE_64_BIT
|
||||
" -l do 64-bit tests\n"
|
||||
#endif
|
||||
" -r NREPS number of repetitions\n"
|
||||
" -t NTRHEADS number of threads\n"
|
||||
" -v verbose output\n"
|
||||
" -h print this info\n" "\n"
|
||||
" Numbers may be postfixed with 'k' or 'm'\n\n");
|
||||
|
||||
#ifndef ENABLE_64_BIT
|
||||
printf(" 64-bit tests are not enabled in this build of the tests\n\n");
|
||||
#endif
|
||||
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
static void usage(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: threadtest [flags]\n" " threadtest -h\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
||||
static void verbose(const char *fmt, ...)
|
||||
{
|
||||
if (enable_verbose) {
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
vfprintf(stderr, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int str2size(char *str)
|
||||
{
|
||||
int size;
|
||||
char mod[32];
|
||||
|
||||
switch (sscanf(str, "%d%1[mMkK]", &size, mod)) {
|
||||
case 1:
|
||||
return (size);
|
||||
case 2:
|
||||
switch (*mod) {
|
||||
case 'm':
|
||||
case 'M':
|
||||
return (size << 20);
|
||||
case 'k':
|
||||
case 'K':
|
||||
return (size << 10);
|
||||
default:
|
||||
return (size);
|
||||
}
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void *thread_main(void *arg)
|
||||
{
|
||||
int rank = (int) arg;
|
||||
int i;
|
||||
|
||||
verbose("thread-%d: Hello\n", rank);
|
||||
|
||||
/* thread tests */
|
||||
|
||||
for (i = 0; i < nreps; i++) {
|
||||
ompi_atomic_add_32(&val32, 5);
|
||||
#ifdef ENABLE_64_BIT
|
||||
if (enable_64_bit_tests) {
|
||||
ompi_atomic_add_64(&val64, 5);
|
||||
}
|
||||
#endif
|
||||
ompi_atomic_add(&valint, 5);
|
||||
}
|
||||
|
||||
return (void *) (rank + 1000);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int c;
|
||||
int tid;
|
||||
pthread_t *th;
|
||||
|
||||
/* option processing */
|
||||
|
||||
test_init("atomic operations");
|
||||
|
||||
while ((c = getopt(argc, argv, "hlr:t:v")) != -1) {
|
||||
switch (c) {
|
||||
case 'h':
|
||||
help();
|
||||
break;
|
||||
case 'l':
|
||||
#ifdef ENABLE_64_BIT
|
||||
enable_64_bit_tests = 1;
|
||||
#else
|
||||
usage();
|
||||
#endif
|
||||
break;
|
||||
case 'r':
|
||||
if ((nreps = str2size(optarg)) <= 0) {
|
||||
usage();
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
if ((nthreads = str2size(optarg)) <= 0) {
|
||||
usage();
|
||||
}
|
||||
break;
|
||||
case 'v':
|
||||
enable_verbose = 1;
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
}
|
||||
}
|
||||
if (optind != argc) {
|
||||
usage();
|
||||
}
|
||||
|
||||
verbose("main: %s\n", argv[0]);
|
||||
verbose("main: nthreads = %d\n", nthreads);
|
||||
verbose("main: nreps = %d\n", nreps);
|
||||
|
||||
/* first test single-threaded functionality */
|
||||
|
||||
/* -- cmpset 32-bit tests -- */
|
||||
|
||||
vol32 = 42, old32 = 42, new32 = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_32(&vol32, old32, new32), 1);
|
||||
test_verify_int(vol32, new32);
|
||||
|
||||
vol32 = 42, old32 = 420, new32 = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_32(&vol32, old32, new32), 0);
|
||||
test_verify_int(vol32, 42);
|
||||
|
||||
vol32 = 42, old32 = 42, new32 = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_acq_32(&vol32, old32, new32), 1);
|
||||
test_verify_int(vol32, new32);
|
||||
|
||||
vol32 = 42, old32 = 420, new32 = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_acq_32(&vol32, old32, new32), 0);
|
||||
test_verify_int(vol32, 42);
|
||||
|
||||
vol32 = 42, old32 = 42, new32 = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_rel_32(&vol32, old32, new32), 1);
|
||||
test_verify_int(vol32, new32);
|
||||
|
||||
vol32 = 42, old32 = 420, new32 = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_rel_32(&vol32, old32, new32), 0);
|
||||
test_verify_int(vol32, 42);
|
||||
|
||||
/* -- cmpset 64-bit tests -- */
|
||||
|
||||
#ifdef ENABLE_64_BIT
|
||||
if (enable_64_bit_tests) {
|
||||
verbose("64 bit serial tests\n");
|
||||
vol64 = 42, old64 = 42, new64 = 50;
|
||||
test_verify_int(1, ompi_atomic_cmpset_64(&vol64, old64, new64));
|
||||
test_verify_int(new64, vol64);
|
||||
|
||||
verbose("64 bit serial test 2\n");
|
||||
vol64 = 42, old64 = 420, new64 = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_64(&vol64, old64, new64), 0);
|
||||
test_verify_int(vol64, 42);
|
||||
|
||||
vol64 = 42, old64 = 42, new64 = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_acq_64(&vol64, old64, new64), 1);
|
||||
test_verify_int(vol64, new64);
|
||||
|
||||
vol64 = 42, old64 = 420, new64 = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_acq_64(&vol64, old64, new64), 0);
|
||||
test_verify_int(vol64, 42);
|
||||
|
||||
vol64 = 42, old64 = 42, new64 = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_rel_64(&vol64, old64, new64), 1);
|
||||
test_verify_int(vol64, new64);
|
||||
|
||||
vol64 = 42, old64 = 420, new64 = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_rel_64(&vol64, old64, new64), 0);
|
||||
test_verify_int(vol64, 42);
|
||||
}
|
||||
#endif
|
||||
/* -- cmpset int tests -- */
|
||||
|
||||
volint = 42, oldint = 42, newint = 50;
|
||||
test_verify_int(ompi_atomic_cmpset(&volint, oldint, newint), 1);
|
||||
test_verify_int(volint, newint);
|
||||
|
||||
volint = 42, oldint = 420, newint = 50;
|
||||
test_verify_int(ompi_atomic_cmpset(&volint, oldint, newint), 0);
|
||||
test_verify_int(volint, 42);
|
||||
|
||||
volint = 42, oldint = 42, newint = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_acq(&volint, oldint, newint), 1);
|
||||
test_verify_int(volint, newint);
|
||||
|
||||
volint = 42, oldint = 420, newint = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_acq(&volint, oldint, newint), 0);
|
||||
test_verify_int(volint, 42);
|
||||
|
||||
volint = 42, oldint = 42, newint = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_rel(&volint, oldint, newint), 1);
|
||||
test_verify_int(volint, newint);
|
||||
|
||||
volint = 42, oldint = 420, newint = 50;
|
||||
test_verify_int(ompi_atomic_cmpset_rel(&volint, oldint, newint), 0);
|
||||
test_verify_int(volint, 42);
|
||||
|
||||
|
||||
/* -- cmpset ptr tests -- */
|
||||
|
||||
volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50;
|
||||
test_verify_int(ompi_atomic_cmpset(&volptr, oldptr, newptr), 1);
|
||||
test_verify_int(volptr, newptr);
|
||||
|
||||
volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50;
|
||||
test_verify_int(ompi_atomic_cmpset(&volptr, oldptr, newptr), 0);
|
||||
test_verify_int(volptr, (void *) 42);
|
||||
|
||||
volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50;
|
||||
test_verify_int(ompi_atomic_cmpset_acq(&volptr, oldptr, newptr), 1);
|
||||
test_verify_int(volptr, newptr);
|
||||
|
||||
volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50;
|
||||
test_verify_int(ompi_atomic_cmpset_acq(&volptr, oldptr, newptr), 0);
|
||||
test_verify_int(volptr, (void *) 42);
|
||||
|
||||
volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50;
|
||||
test_verify_int(ompi_atomic_cmpset_rel(&volptr, oldptr, newptr), 1);
|
||||
test_verify_int(volptr, newptr);
|
||||
|
||||
volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50;
|
||||
test_verify_int(ompi_atomic_cmpset_rel(&volptr, oldptr, newptr), 0);
|
||||
test_verify_int(volptr, (void *) 42);
|
||||
|
||||
/* -- add_32 tests -- */
|
||||
|
||||
val32 = 42;
|
||||
test_verify_int(ompi_atomic_add_32(&val32, 5), (42 + 5));
|
||||
test_verify_int((42 + 5), val32);
|
||||
|
||||
/* -- add_64 tests -- */
|
||||
#ifdef ENABLE_64_BIT
|
||||
if (enable_64_bit_tests) {
|
||||
val64 = 42;
|
||||
test_verify_int(ompi_atomic_add_64(&val64, 5), (42 + 5));
|
||||
test_verify_int((42 + 5), val64);
|
||||
}
|
||||
#endif
|
||||
/* -- add_int tests -- */
|
||||
|
||||
valint = 42;
|
||||
ompi_atomic_add(&valint, 5);
|
||||
test_verify_int((42 + 5), valint);
|
||||
|
||||
|
||||
/* threaded tests */
|
||||
|
||||
val32 = 0;
|
||||
#ifdef ENABLE_64_BIT
|
||||
val64 = 0ul;
|
||||
#endif
|
||||
valint = 0;
|
||||
|
||||
/* -- create the thread set -- */
|
||||
|
||||
th = (pthread_t *) malloc(nthreads * sizeof(pthread_t));
|
||||
if (!th) {
|
||||
perror("malloc");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
for (tid = 0; tid < nthreads; tid++) {
|
||||
if (pthread_create(&th[tid], NULL, thread_main, (void *) tid) != 0) {
|
||||
perror("pthread_create");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
/* -- wait for the thread set to finish -- */
|
||||
|
||||
for (tid = 0; tid < nthreads; tid++) {
|
||||
void *thread_return;
|
||||
|
||||
if (pthread_join(th[tid], &thread_return) != 0) {
|
||||
perror("pthread_join");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
verbose("main: thread %d returned %d\n", tid, (int) thread_return);
|
||||
}
|
||||
free(th);
|
||||
|
||||
test_verify_int((5 * nthreads * nreps), val32);
|
||||
#ifdef ENABLE_64_BIT
|
||||
if (enable_64_bit_tests) {
|
||||
test_verify_int((5 * nthreads * nreps), val64);
|
||||
}
|
||||
#endif
|
||||
test_verify_int((5 * nthreads * nreps), valint);
|
||||
|
||||
test_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
73
src/asm/base/AMD64.asm
Обычный файл
73
src/asm/base/AMD64.asm
Обычный файл
@ -0,0 +1,73 @@
|
||||
TEXT
|
||||
|
||||
START_FUNC(ompi_atomic_mb)
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
leave
|
||||
ret
|
||||
END_FUNC(ompi_atomic_mb)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_rmb)
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
leave
|
||||
ret
|
||||
END_FUNC(ompi_atomic_rmb)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_wmb)
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
leave
|
||||
ret
|
||||
END_FUNC(ompi_atomic_wmb)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_32)
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
movq %rdi, -8(%rbp)
|
||||
movl %esi, -12(%rbp)
|
||||
movl %edx, -16(%rbp)
|
||||
movl -16(%rbp), %ecx
|
||||
movq -8(%rbp), %rdx
|
||||
movl -12(%rbp), %eax
|
||||
#APP
|
||||
cmpxchgl %ecx,(%rdx)
|
||||
#NO_APP
|
||||
movq %rax, -24(%rbp)
|
||||
movl -24(%rbp), %eax
|
||||
movl %eax, -28(%rbp)
|
||||
movl -28(%rbp), %eax
|
||||
cmpl -12(%rbp), %eax
|
||||
sete %al
|
||||
movzbl %al, %eax
|
||||
movl %eax, -28(%rbp)
|
||||
movl -28(%rbp), %eax
|
||||
leave
|
||||
ret
|
||||
END_FUNC(ompi_atomic_cmpset_32)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_64)
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
movq %rdi, -8(%rbp)
|
||||
movq %rsi, -16(%rbp)
|
||||
movq %rdx, -24(%rbp)
|
||||
movq -24(%rbp), %rcx
|
||||
movq -8(%rbp), %rdx
|
||||
movq -16(%rbp), %rax
|
||||
#APP
|
||||
cmpxchgq %rcx,(%rdx)
|
||||
|
||||
#NO_APP
|
||||
movq %rax, -32(%rbp)
|
||||
movq -32(%rbp), %rax
|
||||
cmpq -16(%rbp), %rax
|
||||
sete %al
|
||||
movzbl %al, %eax
|
||||
leave
|
||||
ret
|
||||
END_FUNC(ompi_atomic_cmpset_64)
|
108
src/asm/base/IA32.asm
Обычный файл
108
src/asm/base/IA32.asm
Обычный файл
@ -0,0 +1,108 @@
|
||||
TEXT
|
||||
|
||||
START_FUNC(ompi_atomic_mb)
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
leave
|
||||
ret
|
||||
END_FUNC(ompi_atomic_mb)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_rmb)
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
leave
|
||||
ret
|
||||
END_FUNC(ompi_atomic_rmb)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_wmb)
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
leave
|
||||
ret
|
||||
END_FUNC(ompi_atomic_wmb)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_32)
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
movl 8(%ebp), %edx
|
||||
movl 16(%ebp), %ecx
|
||||
movl 12(%ebp), %eax
|
||||
#APP
|
||||
lock cmpxchgl %ecx,(%edx)
|
||||
sete %dl
|
||||
|
||||
#NO_APP
|
||||
movzbl %dl, %eax
|
||||
leave
|
||||
ret
|
||||
END_FUNC(ompi_atomic_cmpset_32)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_64)
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
subl $32, %esp
|
||||
movl %ebx, -12(%ebp)
|
||||
movl %esi, -8(%ebp)
|
||||
movl %edi, -4(%ebp)
|
||||
movl 8(%ebp), %edi
|
||||
movl 12(%ebp), %eax
|
||||
movl 16(%ebp), %edx
|
||||
movl %eax, -24(%ebp)
|
||||
movl %edx, -20(%ebp)
|
||||
movl 20(%ebp), %eax
|
||||
movl 24(%ebp), %edx
|
||||
movl %eax, -32(%ebp)
|
||||
movl %edx, -28(%ebp)
|
||||
movl -24(%ebp), %ebx
|
||||
movl -20(%ebp), %edx
|
||||
movl -32(%ebp), %esi
|
||||
movl -28(%ebp), %ecx
|
||||
movl %ebx, %eax
|
||||
#APP
|
||||
push %ebx
|
||||
movl %esi, %ebx
|
||||
lock cmpxchg8b (%edi)
|
||||
sete %dl
|
||||
pop %ebx
|
||||
|
||||
#NO_APP
|
||||
movzbl %dl, %eax
|
||||
movl -12(%ebp), %ebx
|
||||
movl -8(%ebp), %esi
|
||||
movl -4(%ebp), %edi
|
||||
movl %ebp, %esp
|
||||
popl %ebp
|
||||
ret
|
||||
END_FUNC(ompi_atomic_cmpset_64)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_add_32)
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
movl 8(%ebp), %eax
|
||||
movl 12(%ebp), %edx
|
||||
#APP
|
||||
lock addl %edx,(%eax)
|
||||
#NO_APP
|
||||
movl (%eax), %eax
|
||||
leave
|
||||
ret
|
||||
END_FUNC(ompi_atomic_add_32)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_sub_32)
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
movl 8(%ebp), %eax
|
||||
movl 12(%ebp), %edx
|
||||
#APP
|
||||
lock subl %edx,(%eax)
|
||||
#NO_APP
|
||||
movl (%eax), %eax
|
||||
leave
|
||||
ret
|
||||
END_FUNC(ompi_atomic_sub_32)
|
143
src/asm/base/POWERPC32.asm
Обычный файл
143
src/asm/base/POWERPC32.asm
Обычный файл
@ -0,0 +1,143 @@
|
||||
TEXT
|
||||
|
||||
ALIGN(4)
|
||||
START_FUNC(ompi_atomic_mb)
|
||||
sync
|
||||
blr
|
||||
END_FUNC(ompi_atomic_mb)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_rmb)
|
||||
lwsync
|
||||
blr
|
||||
END_FUNC(ompi_atomic_rmb)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_wmb)
|
||||
eieio
|
||||
blr
|
||||
END_FUNC(ompi_atomic_wmb)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_32)
|
||||
1: lwarx r0, 0, r3
|
||||
cmpw 0, r0, r4
|
||||
bne- 2f
|
||||
stwcx. r5, 0, r3
|
||||
bne- 1b
|
||||
sync
|
||||
2:
|
||||
xor r3,r0,r4
|
||||
subfic r2,r3,0
|
||||
adde r3,r2,r3
|
||||
blr
|
||||
END_FUNC(ompi_atomic_cmpset_32)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_acq_32)
|
||||
1: lwarx r0, 0, r3
|
||||
cmpw 0, r0, r4
|
||||
bne- 2f
|
||||
stwcx. r5, 0, r3
|
||||
bne- 1b
|
||||
sync
|
||||
2:
|
||||
xor r3,r0,r4
|
||||
subfic r2,r3,0
|
||||
adde r3,r2,r3
|
||||
lwsync
|
||||
blr
|
||||
END_FUNC(ompi_atomic_cmpset_acq_32)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_rel_32)
|
||||
eieio
|
||||
1: lwarx r0, 0, r3
|
||||
cmpw 0, r0, r4
|
||||
bne- 2f
|
||||
stwcx. r5, 0, r3
|
||||
bne- 1b
|
||||
sync
|
||||
2:
|
||||
xor r3,r0,r4
|
||||
subfic r2,r3,0
|
||||
adde r3,r2,r3
|
||||
blr
|
||||
END_FUNC(ompi_atomic_cmpset_rel_32)
|
||||
|
||||
#START_64BIT
|
||||
START_FUNC(ompi_atomic_cmpset_64)
|
||||
1: ldarx r9, 0, r3
|
||||
cmpd 0, r9, r4
|
||||
bne- 2f
|
||||
stdcx. r6, 0, r3
|
||||
bne- 1b
|
||||
2:
|
||||
li r3,0
|
||||
cmpw cr7,r9,r4
|
||||
bnelr+ cr7
|
||||
cmpw cr7,r10,r5
|
||||
bnelr+ cr7
|
||||
li r3,1
|
||||
blr
|
||||
END_FUNC(ompi_atomic_cmpset_64)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_acq_64)
|
||||
1: ldarx r9, 0, r3
|
||||
cmpd 0, r9, r4
|
||||
bne- 2f
|
||||
stdcx. r6, 0, r3
|
||||
bne- 1b
|
||||
2:
|
||||
cmpw cr0,r9,r4
|
||||
li r3,0
|
||||
bne+ cr0,L15
|
||||
cmpw cr0,r10,r5
|
||||
bne+ cr0,L15
|
||||
li r3,1
|
||||
L15:
|
||||
lwsync
|
||||
blr
|
||||
END_FUNC(ompi_atomic_cmpset_acq_64)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_rel_64)
|
||||
eieio
|
||||
1: ldarx r9, 0, r3
|
||||
cmpd 0, r9, r4
|
||||
bne- 2f
|
||||
stdcx. r6, 0, r3
|
||||
bne- 1b
|
||||
2:
|
||||
cmpw cr0,r9,r4
|
||||
li r3,0
|
||||
bnelr+ cr0
|
||||
cmpw cr0,r10,r5
|
||||
bnelr+ cr0
|
||||
li r3,1
|
||||
blr
|
||||
END_FUNC(ompi_atomic_cmpset_rel_64)
|
||||
#END_64BIT
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_add_32)
|
||||
1: lwarx r0, 0, r3
|
||||
add r0, r4, r0
|
||||
stwcx. r0, 0, r3
|
||||
bne- 1b
|
||||
|
||||
lwz r3,0(r3)
|
||||
blr
|
||||
END_FUNC(ompi_atomic_add_32)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_sub_32)
|
||||
1: lwarx r0,0,r3
|
||||
subf r0,r4,r0
|
||||
stwcx. r0,0,r3
|
||||
bne- 1b
|
||||
|
||||
lwz r3,0(r3)
|
||||
blr
|
||||
END_FUNC(ompi_atomic_sub_32)
|
156
src/asm/base/POWERPC64.asm
Обычный файл
156
src/asm/base/POWERPC64.asm
Обычный файл
@ -0,0 +1,156 @@
|
||||
TEXT
|
||||
|
||||
ALIGN(4)
|
||||
START_FUNC(ompi_atomic_mb)
|
||||
sync
|
||||
blr
|
||||
END_FUNC(ompi_atomic_mb)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_rmb)
|
||||
lwsync
|
||||
blr
|
||||
END_FUNC(ompi_atomic_rmb)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_wmb)
|
||||
eieio
|
||||
blr
|
||||
END_FUNC(ompi_atomic_wmb)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_32)
|
||||
1: lwarx r0, 0, r3
|
||||
cmpw 0, r0, r4
|
||||
bne- 2f
|
||||
stwcx. r5, 0, r3
|
||||
bne- 1b
|
||||
sync
|
||||
2:
|
||||
cmpw cr7,r0,r4
|
||||
mfcr r3
|
||||
rlwinm r3,r3,31,1
|
||||
blr
|
||||
END_FUNC(ompi_atomic_cmpset_32)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_acq_32)
|
||||
mflr r0
|
||||
std r29,-24(r1)
|
||||
std r0,16(r1)
|
||||
stdu r1,-144(r1)
|
||||
bl _ompi_atomic_cmpset_32
|
||||
mr r29,r3
|
||||
bl _ompi_atomic_rmb
|
||||
mr r3,r29
|
||||
addi r1,r1,144
|
||||
ld r0,16(r1)
|
||||
mtlr r0
|
||||
ld r29,-24(r1)
|
||||
blr
|
||||
END_FUNC(ompi_atomic_cmpset_acq_32)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_rel_32)
|
||||
mflr r0
|
||||
std r27,-40(r1)
|
||||
std r28,-32(r1)
|
||||
std r29,-24(r1)
|
||||
std r0,16(r1)
|
||||
stdu r1,-160(r1)
|
||||
mr r29,r3
|
||||
mr r28,r4
|
||||
mr r27,r5
|
||||
bl _ompi_atomic_wmb
|
||||
mr r3,r29
|
||||
mr r4,r28
|
||||
mr r5,r27
|
||||
bl _ompi_atomic_cmpset_32
|
||||
addi r1,r1,160
|
||||
ld r0,16(r1)
|
||||
mtlr r0
|
||||
ld r27,-40(r1)
|
||||
ld r28,-32(r1)
|
||||
ld r29,-24(r1)
|
||||
blr
|
||||
END_FUNC(ompi_atomic_cmpset_rel_32)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_64)
|
||||
1: ldarx r0, 0, r3
|
||||
cmpd 0, r0, r4
|
||||
bne- 2f
|
||||
stdcx. r5, 0, r3
|
||||
bne- 1b
|
||||
2:
|
||||
xor r3,r4,r0
|
||||
subfic r2,r3,0
|
||||
adde r3,r2,r3
|
||||
blr
|
||||
END_FUNC(ompi_atomic_cmpset_64)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_acq_64)
|
||||
mflr r0
|
||||
std r29,-24(r1)
|
||||
std r0,16(r1)
|
||||
stdu r1,-144(r1)
|
||||
bl _ompi_atomic_cmpset_64
|
||||
mr r29,r3
|
||||
bl _ompi_atomic_rmb
|
||||
mr r3,r29
|
||||
addi r1,r1,144
|
||||
ld r0,16(r1)
|
||||
mtlr r0
|
||||
ld r29,-24(r1)
|
||||
blr
|
||||
END_FUNC(ompi_atomic_cmpset_acq_64)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_cmpset_rel_64)
|
||||
mflr r0
|
||||
std r27,-40(r1)
|
||||
std r28,-32(r1)
|
||||
std r29,-24(r1)
|
||||
std r0,16(r1)
|
||||
stdu r1,-160(r1)
|
||||
mr r29,r3
|
||||
mr r28,r4
|
||||
mr r27,r5
|
||||
bl _ompi_atomic_wmb
|
||||
mr r3,r29
|
||||
mr r4,r28
|
||||
mr r5,r27
|
||||
bl _ompi_atomic_cmpset_64
|
||||
addi r1,r1,160
|
||||
ld r0,16(r1)
|
||||
mtlr r0
|
||||
ld r27,-40(r1)
|
||||
ld r28,-32(r1)
|
||||
ld r29,-24(r1)
|
||||
blr
|
||||
END_FUNC(ompi_atomic_cmpset_rel_64)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_add_32)
|
||||
1: lwarx r0, 0, r3
|
||||
add r0, r4, r0
|
||||
stwcx. r0, 0, r3
|
||||
bne- 1b
|
||||
|
||||
lwz r3,0(r3)
|
||||
extsw r3,r3
|
||||
blr
|
||||
END_FUNC(ompi_atomic_add_32)
|
||||
|
||||
|
||||
START_FUNC(ompi_atomic_sub_32)
|
||||
1: lwarx r0,0,r3
|
||||
subf r0,r4,r0
|
||||
stwcx. r0,0,r3
|
||||
bne- 1b
|
||||
|
||||
lwz r3,0(r3)
|
||||
extsw r3,r3
|
||||
blr
|
||||
END_FUNC(ompi_atomic_sub_32)
|
31
src/asm/generate-all-asm.sh
Обычный файл
31
src/asm/generate-all-asm.sh
Обычный файл
@ -0,0 +1,31 @@
|
||||
#!/bin/sh
|
||||
|
||||
perl="$1"
|
||||
srcdir="$2"
|
||||
destdir="$3"
|
||||
ret=0
|
||||
|
||||
if test "$perl" = "" -o "$srcdir" = "" -o "$destdir" = "" ; then
|
||||
echo "ERROR: invalid argument to generate-all-asm.sh"
|
||||
echo "usage: generate-all-asm.sh [PERL] [SRCDIR] [DESTDIR]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for asmarch in `grep -v '^#' "$srcdir/asm-data.txt" | cut -f1 | xargs` ; do
|
||||
if test ! -f "${srcdir}/base/${asmarch}.asm" ; then
|
||||
echo "WARNING: Skipping missing assembly arch ${asmarch}"
|
||||
continue
|
||||
fi
|
||||
|
||||
for asmformat in `grep $asmarch "$srcdir/asm-data.txt" | cut -f2 | xargs` ; do
|
||||
echo "--> Generating assembly for $asmarch $asmformat"
|
||||
output="`grep \"$asmarch.*$asmformat\" $srcdir/asm-data.txt | cut -f3`"
|
||||
$perl generate-asm.pl "$asmarch" "$asmformat" "$srcdir/base" "$destdir/generated/atomic-$output.s"
|
||||
if test "$?" != "0" ; then
|
||||
echo "WARNING: Failed to generate assembly for $asmarch $asmformat"
|
||||
ret=1
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
exit $ret
|
92
src/asm/generate-asm.pl
Обычный файл
92
src/asm/generate-asm.pl
Обычный файл
@ -0,0 +1,92 @@
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
|
||||
my $asmarch = shift;
|
||||
my $asmformat = shift;
|
||||
my $basedir = shift;
|
||||
my $output = shift;
|
||||
|
||||
if ( ! $asmarch) {
|
||||
print "usage: generate-asm.pl [ASMARCH] [ASMFORMAT] [BASEDIR] [OUTPUT NAME]\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
open(INPUT, "$basedir/$asmarch.asm") ||
|
||||
die "Could not open $basedir/$asmarch.asm: $!\n";
|
||||
open(OUTPUT, ">$output") || die "Could not open $output: $1\n";
|
||||
|
||||
my $TEXT = "";
|
||||
my $GLOBAL = "";
|
||||
my $SUFFIX = "";
|
||||
my $GSYM = "";
|
||||
my $LSYM = "";
|
||||
my $TYPE = "";
|
||||
my $SIZE = 0;
|
||||
my $ALIGN_LOG = 0;
|
||||
my $DEL_R_REG = 0;
|
||||
my $IS64BIT = 0;
|
||||
|
||||
($TEXT, $GLOBAL, $SUFFIX, $GSYM, $LSYM, $TYPE, $SIZE, $ALIGN_LOG, $DEL_R_REG, $IS64BIT) = (
|
||||
$asmformat =~ /(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)/);
|
||||
|
||||
my $current_func = "";
|
||||
my $delete = 0;
|
||||
|
||||
while (<INPUT>) {
|
||||
s/TEXT/$TEXT/g;
|
||||
s/GLOBAL/$GLOBAL/g;
|
||||
s/GSYM\((.*)\)/$GSYM$1$SUFFIX/g;
|
||||
s/LSYM\((.*)\)/$LSYM$1$SUFFIX/g;
|
||||
if ($DEL_R_REG == 0) {
|
||||
s/r([0-9][0-9]?)/$1/g;
|
||||
}
|
||||
|
||||
if (/START_FUNC\((.*)\)/) {
|
||||
$current_func = $1;
|
||||
$_ = "\t$GLOBAL $GSYM$current_func\n";
|
||||
if (! $TYPE eq "") {
|
||||
$_ .= "\t.type $current_func, $TYPE" . "function\n";
|
||||
}
|
||||
$_ .= "$GSYM$current_func$SUFFIX\n";
|
||||
}
|
||||
|
||||
if (/END_FUNC\((.*)\)/) {
|
||||
s/END_FUNC\((.*)\)//g;
|
||||
if ($SIZE != 0) {
|
||||
$_ = "\t.size $current_func, .-$current_func\n";
|
||||
} else {
|
||||
chomp;
|
||||
}
|
||||
}
|
||||
|
||||
if ($ALIGN_LOG == 0) {
|
||||
s/ALIGN\((\d*)\)/.align $1/g;
|
||||
} else {
|
||||
# Ugh...
|
||||
if (m/ALIGN\((\d*)\)/) {
|
||||
$val = $1;
|
||||
$result = 0;
|
||||
while ($val > 1) { $val /= 2; $result++ }
|
||||
s/ALIGN\((\d*)\)/.align $result/;
|
||||
}
|
||||
}
|
||||
|
||||
if (/^\#START_64BIT/) {
|
||||
$_ = "";
|
||||
if ($IS64BIT == 0) {
|
||||
$delete = 1;
|
||||
}
|
||||
}
|
||||
if (/^\#END_64BIT/) {
|
||||
$_ = "";
|
||||
$delete = 0;
|
||||
}
|
||||
|
||||
if ($delete == 0) {
|
||||
print OUTPUT $_;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
close(INPUT);
|
||||
close(OUTPUT);
|
@ -19,7 +19,7 @@
|
||||
* On alpha, everything is load-locked, store-conditional...
|
||||
*/
|
||||
|
||||
#ifdef HAVE_SMP
|
||||
#if OMPI_WANT_SMP_LOCKS
|
||||
|
||||
#define MB() __asm__ __volatile__ ("mb");
|
||||
#define RMB() __asm__ __volatile__ ("mb");
|
||||
@ -34,6 +34,25 @@
|
||||
#endif
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Define constants for PowerPC 32
|
||||
*
|
||||
*********************************************************************/
|
||||
#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_32 1
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_64 1
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Memory Barriers
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline void ompi_atomic_mb(void)
|
||||
{
|
||||
MB();
|
||||
@ -51,7 +70,16 @@ static inline void ompi_atomic_wmb(void)
|
||||
WMB();
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Atomic math operations
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
{
|
||||
@ -96,7 +124,7 @@ static inline int ompi_atomic_cmpset_rel_32(volatile int32_t *addr,
|
||||
return ompi_atomic_cmpset_32(addr, oldval, newval);
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
|
||||
|
||||
static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
@ -141,4 +169,8 @@ static inline int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
|
||||
return ompi_atomic_cmpset_64(addr, oldval, newval);
|
||||
}
|
||||
|
||||
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
|
||||
#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */
|
||||
|
@ -24,6 +24,7 @@ cat > $CFILE<<EOF
|
||||
#include <inttypes.h>
|
||||
#define static
|
||||
#define inline
|
||||
#define OMPI_GCC_INLINE_ASSEMBLY 1
|
||||
#include "atomic.h"
|
||||
EOF
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_SMP
|
||||
#if OMPI_WANT_SMP_LOCKS
|
||||
#define SMPLOCK "lock; "
|
||||
#define MB() __asm__ __volatile__("": : :"memory")
|
||||
#else
|
||||
@ -28,6 +28,25 @@
|
||||
#endif
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Define constants for AMD64 / x86_64 / EM64T / ...
|
||||
*
|
||||
*********************************************************************/
|
||||
#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_32 1
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_64 1
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Memory Barriers
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline void ompi_atomic_mb(void)
|
||||
{
|
||||
MB();
|
||||
@ -45,7 +64,16 @@ static inline void ompi_atomic_wmb(void)
|
||||
MB();
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Atomic math operations
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
{
|
||||
@ -57,10 +85,13 @@ static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
|
||||
return ((int32_t)prev == oldval);
|
||||
}
|
||||
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
#define ompi_atomic_cmpset_acq_32 ompi_atomic_cmpset_32
|
||||
#define ompi_atomic_cmpset_rel_32 ompi_atomic_cmpset_32
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
@ -75,6 +106,8 @@ static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
|
||||
return (prev == oldval);
|
||||
}
|
||||
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
#define ompi_atomic_cmpset_acq_64 ompi_atomic_cmpset_64
|
||||
#define ompi_atomic_cmpset_rel_64 ompi_atomic_cmpset_64
|
||||
|
||||
|
@ -24,6 +24,7 @@ cat > $CFILE<<EOF
|
||||
#include <inttypes.h>
|
||||
#define static
|
||||
#define inline
|
||||
#define OMPI_GCC_INLINE_ASSEMBLY 1
|
||||
#include "atomic.h"
|
||||
EOF
|
||||
|
||||
|
40
src/include/sys/architecture.h
Обычный файл
40
src/include/sys/architecture.h
Обычный файл
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/*
|
||||
* List of supported architectures
|
||||
*/
|
||||
|
||||
#ifndef OMPI_SYS_ARCHITECTURE_H
|
||||
#define OMPI_SYS_ARCHITECTURE_H
|
||||
|
||||
/* Architectures */
|
||||
#define OMPI_UNSUPPORTED 0000
|
||||
#define OMPI_WINDOWS 0001
|
||||
#define OMPI_IA32 0010
|
||||
#define OMPI_IA64 0020
|
||||
#define OMPI_AMD64 0030
|
||||
#define OMPI_ALPHA 0040
|
||||
#define OMPI_POWERPC32 0050
|
||||
#define OMPI_POWERPC64 0051
|
||||
#define OMPI_SPARC32 0060
|
||||
#define OMPI_SPARC64 0061
|
||||
|
||||
/* Formats */
|
||||
#define OMPI_DEFAULT 1000 /* standard for given architecture */
|
||||
#define OMPI_DARWIN 1001 /* Darwin / OS X on PowerPC */
|
||||
#define OMPI_PPC_LINUX 1002 /* Linux on PowerPC */
|
||||
#define OMPI_AIX 1003 /* AIX on Power / PowerPC */
|
||||
|
||||
#endif /* #ifndef OMPI_SYS_ARCHITECTURE_H */
|
@ -23,6 +23,18 @@
|
||||
* http://www.freebsd.org/cgi/man.cgi?query=atomic&sektion=9
|
||||
*
|
||||
* Only the necessary subset of functions are implemented here.
|
||||
*
|
||||
* The following #defines will be true / false based on
|
||||
* assembly support:
|
||||
*
|
||||
* \c OMPI_HAVE_MEM_BARRIER atomic memory barriers
|
||||
* \c OMPI_HAVE_ATOMIC_SPINLOCKS atomic spinlocks
|
||||
* \c OMPI_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly"
|
||||
* \c OMPI_HAVE_ATOMIC_MATH_64 if 32 bit add/sub/cmpset can be done "atomicly"
|
||||
*
|
||||
* Note that for the Atomic math, atomic add/sub may be implemented as
|
||||
* C code using ompi_atomic_cmpset. The appearance of atomic
|
||||
* operation will be upheld in these cases.
|
||||
*/
|
||||
|
||||
#ifndef OMPI_SYS_ATOMIC_H
|
||||
@ -30,11 +42,91 @@
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "include/sys/architecture.h"
|
||||
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Load the appropriate architecture files and set some reasonable
|
||||
* default values for our support
|
||||
*
|
||||
*********************************************************************/
|
||||
#if defined(DOXYGEN)
|
||||
/* don't include system-level gorp when generating doxygen files */
|
||||
#elif OMPI_ASSEMBLY_ARCH == OMPI_WINDOWS || defined(win32)
|
||||
/* windows first, as they have API-level primitives for this stuff */
|
||||
#include "include/sys/win32/atomic.h"
|
||||
#elif OMPI_ASSEMBLY_ARCH == OMPI_ALPHA
|
||||
#include "include/sys/alpha/atomic.h"
|
||||
#elif OMPI_ASSEMBLY_ARCH == OMPI_AMD64
|
||||
#include "include/sys/amd64/atomic.h"
|
||||
#elif OMPI_ASSEMBLY_ARCH == OMPI_IA32
|
||||
#include "include/sys/ia32/atomic.h"
|
||||
#elif OMPI_ASSEMBLY_ARCH == OMPI_IA64
|
||||
#include "include/sys/ia64/atomic.h"
|
||||
#elif OMPI_ASSEMBLY_ARCH == OMPI_POWERPC32
|
||||
#include "include/sys/powerpc/atomic.h"
|
||||
#elif OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64
|
||||
#include "include/sys/powerpc/atomic.h"
|
||||
#elif OMPI_ASSEMBLY_ARCH == OMPI_SPARC32
|
||||
#error "32 bit Sparc support not implemented yet"
|
||||
#elif OMPI_ASSEMBLY_ARCH == OMPI_SPARC64
|
||||
#include "include/sys/sparc64/atomic.h"
|
||||
#endif
|
||||
|
||||
/* compare and set operations can't really be emulated from software,
|
||||
so if these defines aren't already set, they should be set to 0
|
||||
now */
|
||||
#ifndef OMPI_HAVE_ATOMIC_CMPSET_32
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_32 0
|
||||
#endif
|
||||
#ifndef OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_64 0
|
||||
#endif
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Memory Barriers - defined here if running doxygen or have barriers
|
||||
* but can't inline
|
||||
*
|
||||
*********************************************************************/
|
||||
#ifndef OMPI_HAVE_ATOMIC_MEM_BARRIER
|
||||
/* no way to emulate in C code */
|
||||
#define OMPI_HAVE_ATOMIC_MEM_BARRIER 0
|
||||
#endif
|
||||
|
||||
#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_MEM_BARRIER
|
||||
/**
|
||||
* Memory barrier
|
||||
*/
|
||||
void ompi_atomic_mb(void);
|
||||
|
||||
/**
|
||||
* Read memory barrier
|
||||
*/
|
||||
void ompi_atomic_rmb(void);
|
||||
|
||||
/**
|
||||
* Write memory barrier.
|
||||
*/
|
||||
void ompi_atomic_wmb(void);
|
||||
|
||||
#endif /* defined(DOXYGEN) || OMPI_HAVE_MEM_BARRIER */
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Atomic spinlocks - always inlined, if have atomic cmpset
|
||||
*
|
||||
*********************************************************************/
|
||||
/**
|
||||
* Volatile lock object (with optional padding).
|
||||
*/
|
||||
@ -44,38 +136,33 @@ struct ompi_lock_t {
|
||||
char padding[sizeof(int)]; /**< Array for optional padding */
|
||||
} u;
|
||||
};
|
||||
|
||||
typedef struct ompi_lock_t ompi_lock_t;
|
||||
|
||||
|
||||
/**
|
||||
* Memory barrier
|
||||
*/
|
||||
static inline void ompi_atomic_mb(void);
|
||||
|
||||
|
||||
/**
|
||||
* Read memory barrier
|
||||
*/
|
||||
static inline void ompi_atomic_rmb(void);
|
||||
|
||||
|
||||
/**
|
||||
* Write memory barrier.
|
||||
*/
|
||||
static inline void ompi_atomic_wmb(void);
|
||||
|
||||
#if 0
|
||||
/**
|
||||
* Atomically add to an integer.
|
||||
*
|
||||
* @param addr Address of integer.
|
||||
* @param newval Value to set.
|
||||
* @return Old value of integer.
|
||||
*/
|
||||
static inline int ompi_atomic_fetch_and_set_int(volatile void *addr, int newval);
|
||||
#ifndef OMPI_HAVE_ATOMIC_SPINLOCKS
|
||||
#define OMPI_HAVE_ATOMIC_SPINLOCKS (OMPI_HAVE_ATOMIC_CMPSET_32 || OMPI_HAVE_ATOMIC_CMPSET_64)
|
||||
#endif
|
||||
|
||||
#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_SPINLOCKS
|
||||
|
||||
|
||||
/**
|
||||
* Enumeration of lock states
|
||||
*/
|
||||
enum {
|
||||
OMPI_ATOMIC_UNLOCKED = 0,
|
||||
OMPI_ATOMIC_LOCKED = 1
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Initialize a lock to value
|
||||
*
|
||||
* @param lock Address of the lock
|
||||
* @param value Initial value to set lock to
|
||||
*/
|
||||
static inline void ompi_atomic_init(ompi_lock_t* lock, int value);
|
||||
|
||||
|
||||
/**
|
||||
* Try to acquire a lock.
|
||||
*
|
||||
@ -100,144 +187,96 @@ static inline void ompi_atomic_lock(ompi_lock_t *lock);
|
||||
*/
|
||||
static inline void ompi_atomic_unlock(ompi_lock_t *lock);
|
||||
|
||||
#endif /* OMPI_HAVE_ATOMIC_SPINLOCKS */
|
||||
|
||||
/*
|
||||
* Include system specific inline asm definitions. Otherwise
|
||||
* the definitions are in system specific .s files in src/util.
|
||||
*/
|
||||
|
||||
/* Include win32/atomic.h if we are in windows platform. Else, we
|
||||
can go through other compilers and options. */
|
||||
#ifdef WIN32
|
||||
#define OMPI_HAVE_ATOMIC_WIN32 1
|
||||
#include "include/sys/win32/atomic.h"
|
||||
#else /* only now go through this stuff */
|
||||
#define OMPI_HAVE_ATOMIC_WIN32 0
|
||||
|
||||
#if defined(__alpha__)
|
||||
# define OMPI_HAVE_ATOMIC 1
|
||||
# ifdef __GNUC__
|
||||
# include "alpha/atomic.h"
|
||||
# endif
|
||||
#elif defined(__amd64__) || defined(__x86_64__)
|
||||
# define OMPI_HAVE_ATOMIC 1
|
||||
# ifdef __GNUC__
|
||||
# include "amd64/atomic.h"
|
||||
# endif
|
||||
#elif defined(__i386__)
|
||||
# define OMPI_HAVE_ATOMIC 1
|
||||
# ifdef __GNUC__
|
||||
# include "ia32/atomic.h"
|
||||
# endif
|
||||
#elif defined(__ia64__)
|
||||
# define OMPI_HAVE_ATOMIC 1
|
||||
# ifdef __GNUC__
|
||||
# include "ia64/atomic.h"
|
||||
# endif
|
||||
#elif defined(__POWERPC__)
|
||||
# define OMPI_HAVE_ATOMIC 1
|
||||
# ifdef __GNUC__
|
||||
# include "powerpc/atomic.h"
|
||||
# endif
|
||||
#elif defined(__sparc__) || defined(__sparc)
|
||||
# define OMPI_HAVE_ATOMIC 1
|
||||
# ifdef __GNUC__
|
||||
# include "sparc64/atomic.h"
|
||||
# endif
|
||||
#else
|
||||
#error No atomic operations defined yet
|
||||
/**********************************************************************
|
||||
*
|
||||
* Atomic math operations
|
||||
*
|
||||
*********************************************************************/
|
||||
#ifndef OMPI_HAVE_ATOMIC_CMPSET_32
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_32 0
|
||||
#endif
|
||||
#endif /* ifdef WIN32*/
|
||||
|
||||
#ifndef OMPI_HAVE_ATOMIC
|
||||
#define OMPI_HAVE_ATOMIC 0
|
||||
#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_CMPSET_32
|
||||
int ompi_atomic_cmpset_32(volatile int32_t *addr, int32_t oldval,
|
||||
int32_t newval);
|
||||
int ompi_atomic_cmpset_acq_32(volatile int32_t *addr, int32_t oldval,
|
||||
int32_t newval);
|
||||
int ompi_atomic_cmpset_rel_32(volatile int32_t *addr, int32_t oldval,
|
||||
int32_t newval);
|
||||
#endif
|
||||
|
||||
/* All the architectures provide a compare_and_set atomic operations. If
|
||||
* they dont provide atomic additions and/or substractions then we can
|
||||
* define these operations using the atomic compare_and_set.
|
||||
*/
|
||||
|
||||
#if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32)
|
||||
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
|
||||
static inline int32_t ompi_atomic_add_32(volatile int32_t *addr, int delta)
|
||||
{
|
||||
int32_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval + delta));
|
||||
return (oldval + delta);
|
||||
}
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32 */
|
||||
#ifndef OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_64 0
|
||||
#endif
|
||||
#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
int ompi_atomic_cmpset_64(volatile int64_t *addr, int64_t oldval,
|
||||
int64_t newval);
|
||||
int ompi_atomic_cmpset_acq_64(volatile int64_t *addr, int64_t oldval,
|
||||
int64_t newval);
|
||||
int ompi_atomic_cmpset_rel_64(volatile int64_t *addr, int64_t oldval,
|
||||
int64_t newval);
|
||||
#endif
|
||||
|
||||
#if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32)
|
||||
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
|
||||
static inline int32_t ompi_atomic_sub_32(volatile int32_t *addr, int delta)
|
||||
{
|
||||
int32_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval - delta));
|
||||
return (oldval - delta);
|
||||
}
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32 */
|
||||
#ifndef OMPI_HAVE_ATOMIC_MATH_32
|
||||
/* define to 0 for these tests. WIll fix up later. */
|
||||
#define OMPI_HAVE_ATOMIC_MATH_32 0
|
||||
#endif
|
||||
#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_MATH_32 || OMPI_HAVE_ATOMIC_CMPSET_32
|
||||
#if ! OMPI_HAVE_ATOMIC_MATH_32
|
||||
static inline
|
||||
#endif
|
||||
int32_t ompi_atomic_add_32(volatile int32_t *addr, int delta);
|
||||
#if ! OMPI_HAVE_ATOMIC_MATH_32
|
||||
static inline
|
||||
#endif
|
||||
int32_t ompi_atomic_sub_32(volatile int32_t *addr, int delta);
|
||||
#endif /* OMPI_HAVE_ATOMIC_MATH_32 */
|
||||
#if ! OMPI_HAVE_ATOMIC_MATH_32
|
||||
/* fix up the value of ompi_have_atomic_math_32 to allow for C versions */
|
||||
#undef OMPI_HAVE_ATOMIC_MATH_32
|
||||
#define OMPI_HAVE_ATOMIC_MATH_32 OMPI_HAVE_ATOMIC_CMPSET_32
|
||||
#endif
|
||||
|
||||
/* Some architectures does not provide support for the 64 bits
|
||||
* atomic operations. Until we find a better solution let's just
|
||||
* undefine all those functions.
|
||||
*/
|
||||
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64)
|
||||
#if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_64)
|
||||
static inline int64_t ompi_atomic_add_64(volatile int64_t *addr, int64_t delta)
|
||||
{
|
||||
int64_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval + delta));
|
||||
return (oldval + delta);
|
||||
}
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_64 */
|
||||
#ifndef OMPI_HAVE_ATOMIC_MATH_64
|
||||
/* define to 0 for these tests. WIll fix up later. */
|
||||
#define OMPI_HAVE_ATOMIC_MATH_64 0
|
||||
#endif
|
||||
#if defined(DOXYGEN) || OMPI_HAVE_ATOMIC_MATH_64 || OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
static inline
|
||||
#endif
|
||||
int64_t ompi_atomic_add_64(volatile int64_t *addr, int64_t delta);
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
static inline
|
||||
#endif
|
||||
int64_t ompi_atomic_sub_64(volatile int64_t *addr, int64_t delta);
|
||||
#endif /* OMPI_HAVE_ATOMIC_MATH_32 */
|
||||
#if ! OMPI_HAVE_ATOMIC_MATH_64
|
||||
/* fix up the value of ompi_have_atomic_math_64 to allow for C versions */
|
||||
#undef OMPI_HAVE_ATOMIC_MATH_64
|
||||
#define OMPI_HAVE_ATOMIC_MATH_64 OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
#endif
|
||||
|
||||
#if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_64)
|
||||
static inline int64_t ompi_atomic_sub_64(volatile int64_t *addr, int64_t delta)
|
||||
{
|
||||
int64_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval - delta));
|
||||
return (oldval - delta);
|
||||
}
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_64 */
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */
|
||||
#if defined(DOXYGEN) || (OMPI_HAVE_ATOMIC_CMPSET_32 || OMPI_HAVE_ATOMIC_CMPSET_64)
|
||||
/* these are always done with inline functions, so always mark as
|
||||
static inline */
|
||||
static inline int ompi_atomic_cmpset_xx(volatile void* addr, int64_t oldval,
|
||||
int64_t newval, size_t length);
|
||||
static inline int ompi_atomic_cmpset_acq_xx(volatile void* addr,
|
||||
int64_t oldval, int64_t newval,
|
||||
size_t length);
|
||||
static inline int ompi_atomic_cmpset_rel_xx(volatile void* addr,
|
||||
int64_t oldval, int64_t newval,
|
||||
size_t length);
|
||||
static inline void ompi_atomic_add_xx(volatile void* addr,
|
||||
int32_t value, size_t length);
|
||||
static inline void ompi_atomic_sub_xx(volatile void* addr,
|
||||
int32_t value, size_t length);
|
||||
|
||||
static inline int ompi_atomic_cmpset_xx( volatile void* addr, int64_t oldval,
|
||||
int64_t newval, size_t length )
|
||||
{
|
||||
switch( length ) {
|
||||
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
|
||||
case 4:
|
||||
return ompi_atomic_cmpset_32( (volatile int32_t*)addr,
|
||||
(int32_t)oldval, (int32_t)newval );
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
|
||||
|
||||
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64)
|
||||
case 8:
|
||||
return ompi_atomic_cmpset_64( (volatile int64_t*)addr,
|
||||
(int64_t)oldval, (int64_t)newval );
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */
|
||||
default:
|
||||
/* This should never happen, so deliberately cause a seg fault
|
||||
for corefile analysis */
|
||||
*(int*)(0) = 0;
|
||||
}
|
||||
return 0; /* always fail */
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomic compare and set of pointer with relaxed semantics. This
|
||||
@ -251,30 +290,9 @@ static inline int ompi_atomic_cmpset_xx( volatile void* addr, int64_t oldval,
|
||||
* See ompi_atomic_cmpset_* for pseudo-code.
|
||||
*/
|
||||
#define ompi_atomic_cmpset( ADDR, OLDVAL, NEWVAL ) \
|
||||
ompi_atomic_cmpset_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), (int64_t)(NEWVAL), sizeof(*(ADDR)) )
|
||||
ompi_atomic_cmpset_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \
|
||||
(int64_t)(NEWVAL), sizeof(*(ADDR)) )
|
||||
|
||||
static inline int ompi_atomic_cmpset_acq_xx( volatile void* addr, int64_t oldval,
|
||||
int64_t newval, size_t length )
|
||||
{
|
||||
switch( length ) {
|
||||
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
|
||||
case 4:
|
||||
return ompi_atomic_cmpset_acq_32( (volatile int32_t*)addr,
|
||||
(int32_t)oldval, (int32_t)newval );
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
|
||||
|
||||
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64)
|
||||
case 8:
|
||||
return ompi_atomic_cmpset_acq_64( (volatile int64_t*)addr,
|
||||
(int64_t)oldval, (int64_t)newval );
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */
|
||||
default:
|
||||
/* This should never happen, so deliberately cause a seg fault
|
||||
for corefile analysis */
|
||||
*(int*)(0) = 0;
|
||||
}
|
||||
return 0; /* always fail */
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomic compare and set of pointer with acquire semantics. This
|
||||
@ -288,30 +306,9 @@ static inline int ompi_atomic_cmpset_acq_xx( volatile void* addr, int64_t oldval
|
||||
* See ompi_atomic_cmpset_acq_* for pseudo-code.
|
||||
*/
|
||||
#define ompi_atomic_cmpset_acq( ADDR, OLDVAL, NEWVAL ) \
|
||||
ompi_atomic_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), (int64_t)(NEWVAL), sizeof(*(ADDR)) )
|
||||
ompi_atomic_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \
|
||||
(int64_t)(NEWVAL), sizeof(*(ADDR)) )
|
||||
|
||||
static inline int ompi_atomic_cmpset_rel_xx( volatile void* addr, int64_t oldval,
|
||||
int64_t newval, size_t length )
|
||||
{
|
||||
switch( length ) {
|
||||
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
|
||||
case 4:
|
||||
return ompi_atomic_cmpset_rel_32( (volatile int32_t*)addr,
|
||||
(int32_t)oldval, (int32_t)newval );
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
|
||||
|
||||
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64)
|
||||
case 8:
|
||||
return ompi_atomic_cmpset_rel_64( (volatile int64_t*)addr,
|
||||
(int64_t)oldval, (int64_t)newval );
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */
|
||||
default:
|
||||
/* This should never happen, so deliberately cause a seg fault
|
||||
for corefile analysis */
|
||||
*(int*)(0) = 0;
|
||||
}
|
||||
return 0; /* always fail */
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomic compare and set of pointer with release semantics. This
|
||||
@ -325,28 +322,9 @@ static inline int ompi_atomic_cmpset_rel_xx( volatile void* addr, int64_t oldval
|
||||
* See ompi_atomic_cmpsetrel_* for pseudo-code.
|
||||
*/
|
||||
#define ompi_atomic_cmpset_rel( ADDR, OLDVAL, NEWVAL ) \
|
||||
ompi_atomic_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), (int64_t)(NEWVAL), sizeof(*(ADDR)) )
|
||||
ompi_atomic_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \
|
||||
(int64_t)(NEWVAL), sizeof(*(ADDR)) )
|
||||
|
||||
static inline void ompi_atomic_add_xx( volatile void* addr, int32_t value, size_t length )
|
||||
{
|
||||
switch( length ) {
|
||||
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
|
||||
case 4:
|
||||
ompi_atomic_add_32( (volatile int32_t*)addr, (int32_t)value );
|
||||
break;
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
|
||||
|
||||
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64)
|
||||
case 8:
|
||||
ompi_atomic_add_64( (volatile int64_t*)addr, (int64_t)value );
|
||||
break;
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */
|
||||
default:
|
||||
/* This should never happen, so deliberately cause a seg fault
|
||||
for corefile analysis */
|
||||
*(int*)(0) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically increment the content depending on the type. This
|
||||
@ -357,28 +335,8 @@ static inline void ompi_atomic_add_xx( volatile void* addr, int32_t value, size_
|
||||
* @param delta Value to add (converted to <TYPE>).
|
||||
*/
|
||||
#define ompi_atomic_add( ADDR, VALUE ) \
|
||||
ompi_atomic_add_xx( (volatile void*)(ADDR), (int32_t)(VALUE), sizeof(*(ADDR)) )
|
||||
|
||||
static inline void ompi_atomic_sub_xx( volatile void* addr, int32_t value, size_t length )
|
||||
{
|
||||
switch( length ) {
|
||||
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32)
|
||||
case 4:
|
||||
ompi_atomic_sub_32( (volatile int32_t*)addr, (int32_t)value );
|
||||
break;
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32 */
|
||||
|
||||
#if defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64)
|
||||
case 8:
|
||||
ompi_atomic_sub_64( (volatile int64_t*)addr, (int64_t)value );
|
||||
break;
|
||||
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64 */
|
||||
default:
|
||||
/* This should never happen, so deliberately cause a seg fault
|
||||
for corefile analysis */
|
||||
*(int*)(0) = 0;
|
||||
}
|
||||
}
|
||||
ompi_atomic_add_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \
|
||||
sizeof(*(ADDR)) )
|
||||
|
||||
/**
|
||||
* Atomically decrement the content depending on the type. This
|
||||
@ -389,55 +347,23 @@ static inline void ompi_atomic_sub_xx( volatile void* addr, int32_t value, size_
|
||||
* @param delta Value to substract (converted to <TYPE>).
|
||||
*/
|
||||
#define ompi_atomic_sub( ADDR, VALUE ) \
|
||||
ompi_atomic_sub_xx( (volatile void*)(ADDR), (int32_t)(VALUE), sizeof(*(ADDR)) )
|
||||
ompi_atomic_sub_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \
|
||||
sizeof(*(ADDR)) )
|
||||
|
||||
#if OMPI_HAVE_ATOMIC || OMPI_HAVE_ATOMIC_WIN32
|
||||
|
||||
/*
|
||||
* Atomic locks
|
||||
*/
|
||||
|
||||
/**
|
||||
* Enumeration of lock states
|
||||
*/
|
||||
enum {
|
||||
OMPI_ATOMIC_UNLOCKED = 0,
|
||||
OMPI_ATOMIC_LOCKED = 1
|
||||
};
|
||||
#endif /* OMPI_HAVE_ATOMIC_MATH_32 || OMPI_HAVE_ATOMIC_MATH_64 */
|
||||
|
||||
|
||||
static inline int ompi_atomic_trylock(ompi_lock_t *lock)
|
||||
{
|
||||
return ompi_atomic_cmpset_acq( &(lock->u.lock),
|
||||
OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_LOCKED);
|
||||
/**********************************************************************
|
||||
*
|
||||
* Include system specific inline asm definitions. Otherwise
|
||||
* the definitions are in system specific .s files in src/util.
|
||||
*
|
||||
*********************************************************************/
|
||||
#include "include/sys/atomic_impl.h"
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
|
||||
static inline void ompi_atomic_lock(ompi_lock_t *lock)
|
||||
{
|
||||
while( !ompi_atomic_cmpset_acq( &(lock->u.lock),
|
||||
OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_LOCKED) ) {
|
||||
while (lock->u.lock == OMPI_ATOMIC_LOCKED) {
|
||||
/* spin */ ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ompi_atomic_unlock(ompi_lock_t *lock)
|
||||
{
|
||||
/*
|
||||
ompi_atomic_cmpset_rel( &(lock->u.lock),
|
||||
OMPI_ATOMIC_LOCKED, OMPI_ATOMIC_UNLOCKED);
|
||||
*/
|
||||
lock->u.lock=OMPI_ATOMIC_UNLOCKED;
|
||||
}
|
||||
|
||||
/* Lock initialization function. It set the lock to UNLOCKED.
|
||||
*/
|
||||
static inline void ompi_atomic_init( ompi_lock_t* lock, int value )
|
||||
{
|
||||
lock->u.lock = value;
|
||||
}
|
||||
|
||||
#endif /* OMPI_HAVE_ATOMIC || OMPI_HAVE_ATOMIC_WIN32 */
|
||||
#endif
|
||||
|
||||
#endif /* OMPI_SYS_ATOMIC_H */
|
||||
|
270
src/include/sys/atomic_impl.h
Обычный файл
270
src/include/sys/atomic_impl.h
Обычный файл
@ -0,0 +1,270 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/* Inline C implementation of the functions defined in atomic.h */
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Atomic math operations
|
||||
*
|
||||
* All the architectures provide a compare_and_set atomic operations. If
|
||||
* they dont provide atomic additions and/or substractions then we can
|
||||
* define these operations using the atomic compare_and_set.
|
||||
*
|
||||
* Some architectures does not provide support for the 64 bits
|
||||
* atomic operations. Until we find a better solution let's just
|
||||
* undefine all those functions if there is no 64 bit cmpset
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_32
|
||||
|
||||
#if !defined(OMPI_HAVE_ATOMIC_ADD_32)
|
||||
#define OMPI_HAVE_ATOMIC_ADD_32 1
|
||||
static inline int32_t
|
||||
ompi_atomic_add_32(volatile int32_t *addr, int delta)
|
||||
{
|
||||
int32_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval + delta));
|
||||
return (oldval + delta);
|
||||
}
|
||||
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
|
||||
|
||||
|
||||
#if !defined(OMPI_HAVE_ATOMIC_SUB_32)
|
||||
#define OMPI_HAVE_ATOMIC_SUB_32 1
|
||||
static inline int32_t
|
||||
ompi_atomic_sub_32(volatile int32_t *addr, int delta)
|
||||
{
|
||||
int32_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval - delta));
|
||||
return (oldval - delta);
|
||||
}
|
||||
#endif /* OMPI_HAVE_ATOMIC_SUB_32 */
|
||||
|
||||
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
|
||||
|
||||
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
|
||||
#if !defined(OMPI_HAVE_ATOMIC_ADD_64)
|
||||
#define OMPI_HAVE_ATOMIC_ADD_64 1
|
||||
static inline int64_t
|
||||
ompi_atomic_add_64(volatile int64_t *addr, int64_t delta)
|
||||
{
|
||||
int64_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval + delta));
|
||||
return (oldval + delta);
|
||||
}
|
||||
#endif /* OMPI_HAVE_ATOMIC_ADD_64 */
|
||||
|
||||
|
||||
#if !defined(OMPI_HAVE_ATOMIC_SUB_64)
|
||||
#define OMPI_HAVE_ATOMIC_SUB_64 1
|
||||
static inline int64_t
|
||||
ompi_atomic_sub_64(volatile int64_t *addr, int64_t delta)
|
||||
{
|
||||
int64_t oldval;
|
||||
|
||||
do {
|
||||
oldval = *addr;
|
||||
} while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval - delta));
|
||||
return (oldval - delta);
|
||||
}
|
||||
#endif /* OMPI_HAVE_ATOMIC_SUB_64 */
|
||||
|
||||
#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */
|
||||
|
||||
|
||||
#if (OMPI_HAVE_ATOMIC_CMPSET_32 || OMPI_HAVE_ATOMIC_CMPSET_64)
|
||||
|
||||
static inline int
|
||||
ompi_atomic_cmpset_xx(volatile void* addr, int64_t oldval,
|
||||
int64_t newval, size_t length)
|
||||
{
|
||||
switch( length ) {
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_32
|
||||
case 4:
|
||||
return ompi_atomic_cmpset_32( (volatile int32_t*)addr,
|
||||
(int32_t)oldval, (int32_t)newval );
|
||||
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
|
||||
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
case 8:
|
||||
return ompi_atomic_cmpset_64( (volatile int64_t*)addr,
|
||||
(int64_t)oldval, (int64_t)newval );
|
||||
#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */
|
||||
default:
|
||||
/* This should never happen, so deliberately cause a seg fault
|
||||
for corefile analysis */
|
||||
*(int*)(0) = 0;
|
||||
}
|
||||
return 0; /* always fail */
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
ompi_atomic_cmpset_acq_xx(volatile void* addr, int64_t oldval,
|
||||
int64_t newval, size_t length)
|
||||
{
|
||||
switch( length ) {
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_32
|
||||
case 4:
|
||||
return ompi_atomic_cmpset_acq_32( (volatile int32_t*)addr,
|
||||
(int32_t)oldval, (int32_t)newval );
|
||||
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
|
||||
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
case 8:
|
||||
return ompi_atomic_cmpset_acq_64( (volatile int64_t*)addr,
|
||||
(int64_t)oldval, (int64_t)newval );
|
||||
#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */
|
||||
default:
|
||||
/* This should never happen, so deliberately cause a seg fault
|
||||
for corefile analysis */
|
||||
*(int*)(0) = 0;
|
||||
}
|
||||
return 0; /* always fail */
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
ompi_atomic_cmpset_rel_xx(volatile void* addr, int64_t oldval,
|
||||
int64_t newval, size_t length)
|
||||
{
|
||||
switch( length ) {
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_32
|
||||
case 4:
|
||||
return ompi_atomic_cmpset_rel_32( (volatile int32_t*)addr,
|
||||
(int32_t)oldval, (int32_t)newval );
|
||||
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
|
||||
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
case 8:
|
||||
return ompi_atomic_cmpset_rel_64( (volatile int64_t*)addr,
|
||||
(int64_t)oldval, (int64_t)newval );
|
||||
#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */
|
||||
default:
|
||||
/* This should never happen, so deliberately cause a seg fault
|
||||
for corefile analysis */
|
||||
*(int*)(0) = 0;
|
||||
}
|
||||
return 0; /* always fail */
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
ompi_atomic_add_xx(volatile void* addr, int32_t value, size_t length)
|
||||
{
|
||||
switch( length ) {
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_32
|
||||
case 4:
|
||||
ompi_atomic_add_32( (volatile int32_t*)addr, (int32_t)value );
|
||||
break;
|
||||
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
|
||||
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
case 8:
|
||||
ompi_atomic_add_64( (volatile int64_t*)addr, (int64_t)value );
|
||||
break;
|
||||
#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */
|
||||
default:
|
||||
/* This should never happen, so deliberately cause a seg fault
|
||||
for corefile analysis */
|
||||
*(int*)(0) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
ompi_atomic_sub_xx(volatile void* addr, int32_t value, size_t length)
|
||||
{
|
||||
switch( length ) {
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_32
|
||||
case 4:
|
||||
ompi_atomic_sub_32( (volatile int32_t*)addr, (int32_t)value );
|
||||
break;
|
||||
#endif /* OMPI_HAVE_ATOMIC_CMPSET_32 */
|
||||
|
||||
#if OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
case 8:
|
||||
ompi_atomic_sub_64( (volatile int64_t*)addr, (int64_t)value );
|
||||
break;
|
||||
#endif /* OMPI_HAVE_ATOMIC_CMPSET_64 */
|
||||
default:
|
||||
/* This should never happen, so deliberately cause a seg fault
|
||||
for corefile analysis */
|
||||
*(int*)(0) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* (OMPI_HAVE_ATOMIC_CMPSET_32 || OMPI_HAVE_ATOMIC_CMPSET_64) */
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Atomic spinlocks
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_HAVE_ATOMIC_SPINLOCKS
|
||||
/*
|
||||
* Lock initialization function. It set the lock to UNLOCKED.
|
||||
*/
|
||||
static inline void
|
||||
ompi_atomic_init( ompi_lock_t* lock, int value )
|
||||
{
|
||||
lock->u.lock = value;
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
ompi_atomic_trylock(ompi_lock_t *lock)
|
||||
{
|
||||
return ompi_atomic_cmpset_acq( &(lock->u.lock),
|
||||
OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_LOCKED);
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
ompi_atomic_lock(ompi_lock_t *lock)
|
||||
{
|
||||
while( !ompi_atomic_cmpset_acq( &(lock->u.lock),
|
||||
OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_LOCKED) ) {
|
||||
while (lock->u.lock == OMPI_ATOMIC_LOCKED) {
|
||||
/* spin */ ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
ompi_atomic_unlock(ompi_lock_t *lock)
|
||||
{
|
||||
/*
|
||||
ompi_atomic_cmpset_rel( &(lock->u.lock),
|
||||
OMPI_ATOMIC_LOCKED, OMPI_ATOMIC_UNLOCKED);
|
||||
*/
|
||||
lock->u.lock=OMPI_ATOMIC_UNLOCKED;
|
||||
}
|
||||
|
||||
#endif /* OMPI_HAVE_ATOMIC_SPINLOCKS */
|
@ -14,7 +14,7 @@
|
||||
|
||||
include $(top_srcdir)/config/Makefile.options
|
||||
|
||||
noinst_HEADERS = atomic.h atomic.s
|
||||
noinst_HEADERS = atomic.h
|
||||
|
||||
# Conditionally install the header files
|
||||
|
||||
|
@ -19,8 +19,7 @@
|
||||
* On ia32, we use cmpxchg.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_SMP
|
||||
#if OMPI_WANT_SMP_LOCKS
|
||||
#define SMPLOCK "lock; "
|
||||
#define MB() __asm__ __volatile__("": : :"memory")
|
||||
#else
|
||||
@ -29,6 +28,29 @@
|
||||
#endif
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Define constants for IA32
|
||||
*
|
||||
*********************************************************************/
|
||||
#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_32 1
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_MATH_32 1
|
||||
#define OMPI_HAVE_ATOMIC_ADD_32 1
|
||||
#define OMPI_HAVE_ATOMIC_SUB_32 1
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_64 1
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Memory Barriers
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline void ompi_atomic_mb(void)
|
||||
{
|
||||
MB();
|
||||
@ -46,7 +68,16 @@ static inline void ompi_atomic_wmb(void)
|
||||
MB();
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Atomic math operations
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline int ompi_atomic_cmpset_32(volatile int32_t *addr,
|
||||
int32_t oldval,
|
||||
int32_t newval)
|
||||
@ -62,14 +93,17 @@ static inline int ompi_atomic_cmpset_32(volatile int32_t *addr,
|
||||
return (int)ret;
|
||||
}
|
||||
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
#define ompi_atomic_cmpset_acq_32 ompi_atomic_cmpset_32
|
||||
#define ompi_atomic_cmpset_rel_32 ompi_atomic_cmpset_32
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
|
||||
typedef struct {
|
||||
uint32_t lo;
|
||||
uint32_t hi;
|
||||
} lwords_t;
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
#ifndef ll_low /* GLIBC provides these somewhere, so protect */
|
||||
#define ll_low(x) *(((unsigned int*)&(x))+0)
|
||||
#define ll_high(x) *(((unsigned int*)&(x))+1)
|
||||
#endif
|
||||
|
||||
/* On Linux the EBX register is used by the shared libraries
|
||||
* to keep the global offset. In same time this register is
|
||||
@ -85,27 +119,28 @@ static inline int ompi_atomic_cmpset_64(volatile int64_t *addr,
|
||||
* Compare EDX:EAX with m64. If equal, set ZF and load ECX:EBX into
|
||||
* m64. Else, clear ZF and load m64 into EDX:EAX.
|
||||
*/
|
||||
lwords_t *pold = (lwords_t*)&oldval;
|
||||
lwords_t *pnew = (lwords_t*)&newval;
|
||||
unsigned char realized;
|
||||
unsigned char ret;
|
||||
|
||||
__asm__ __volatile(
|
||||
"push %%ebx \n\t"
|
||||
"movl %4, %%ebx \n\t"
|
||||
SMPLOCK "cmpxchg8b %1 \n\t"
|
||||
"sete %0 \n\t"
|
||||
"pop %%ebx \n\t"
|
||||
: "=qm" (realized)
|
||||
: "m"(*((volatile long*)addr)), "a"(pold->lo), "d"(pold->hi),
|
||||
"r"(pnew->lo), "c"(pnew->hi)
|
||||
: "cc", "memory" );
|
||||
return realized;
|
||||
__asm__ __volatile__(
|
||||
"push %%ebx \n\t"
|
||||
"movl %4, %%ebx \n\t"
|
||||
SMPLOCK "cmpxchg8b (%1) \n\t"
|
||||
"sete %0 \n\t"
|
||||
"pop %%ebx \n\t"
|
||||
: "=qm"(ret)
|
||||
: "D"(addr), "a"(ll_low(oldval)), "d"(ll_high(oldval)),
|
||||
"r"(ll_low(newval)), "c"(ll_high(newval))
|
||||
: "cc", "memory");
|
||||
return (int) ret;
|
||||
}
|
||||
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
#define ompi_atomic_cmpset_acq_64 ompi_atomic_cmpset_64
|
||||
#define ompi_atomic_cmpset_rel_64 ompi_atomic_cmpset_64
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
/**
|
||||
* atomic_add - add integer to atomic variable
|
||||
* @i: integer value to add
|
||||
@ -122,7 +157,7 @@ static inline int ompi_atomic_add_32(volatile int32_t* v, int i)
|
||||
return (*v); /* should be an atomic operation */
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32
|
||||
|
||||
/**
|
||||
* atomic_sub - subtract the atomic variable
|
||||
* @i: integer value to subtract
|
||||
@ -139,5 +174,6 @@ static inline int ompi_atomic_sub_32(volatile int32_t* v, int i)
|
||||
return (*v); /* should be an atomic operation */
|
||||
}
|
||||
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */
|
||||
|
@ -1,193 +0,0 @@
|
||||
;;
|
||||
;; Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
;; All rights reserved.
|
||||
;; Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
;; All rights reserved.
|
||||
;; Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
;; University of Stuttgart. All rights reserved.
|
||||
;; $COPYRIGHT$
|
||||
;;
|
||||
;; Additional copyrights may follow
|
||||
;;
|
||||
;; $HEADER$
|
||||
;;
|
||||
.file "atomic.c"
|
||||
.text
|
||||
.globl ompi_atomic_mb
|
||||
.type ompi_atomic_mb,@function
|
||||
ompi_atomic_mb:
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
leave
|
||||
ret
|
||||
.Lfe1:
|
||||
.size ompi_atomic_mb,.Lfe1-ompi_atomic_mb
|
||||
.globl ompi_atomic_rmb
|
||||
.type ompi_atomic_rmb,@function
|
||||
ompi_atomic_rmb:
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
leave
|
||||
ret
|
||||
.Lfe2:
|
||||
.size ompi_atomic_rmb,.Lfe2-ompi_atomic_rmb
|
||||
.globl ompi_atomic_wmb
|
||||
.type ompi_atomic_wmb,@function
|
||||
ompi_atomic_wmb:
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
leave
|
||||
ret
|
||||
.Lfe3:
|
||||
.size ompi_atomic_wmb,.Lfe3-ompi_atomic_wmb
|
||||
.globl ompi_atomic_cmpset_32
|
||||
.type ompi_atomic_cmpset_32,@function
|
||||
ompi_atomic_cmpset_32:
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
pushl %ebx
|
||||
subl $4, %esp
|
||||
movl 12(%ebp), %eax
|
||||
movl %eax, -8(%ebp)
|
||||
movl -8(%ebp), %edx
|
||||
movl 16(%ebp), %ecx
|
||||
movl %edx, %eax
|
||||
movl 8(%ebp), %ebx
|
||||
#APP
|
||||
cmpxchgl %ecx,(%ebx)
|
||||
setz %al
|
||||
movzbl %al,%eax
|
||||
|
||||
#NO_APP
|
||||
movl %eax, %edx
|
||||
movl %edx, -8(%ebp)
|
||||
movl -8(%ebp), %eax
|
||||
cmpl 12(%ebp), %eax
|
||||
sete %al
|
||||
movzbl %al, %eax
|
||||
addl $4, %esp
|
||||
popl %ebx
|
||||
leave
|
||||
ret
|
||||
.Lfe4:
|
||||
.size ompi_atomic_cmpset_32,.Lfe4-ompi_atomic_cmpset_32
|
||||
.globl ompi_atomic_cmpset_acq_32
|
||||
.type ompi_atomic_cmpset_acq_32,@function
|
||||
ompi_atomic_cmpset_acq_32:
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
subl $8, %esp
|
||||
subl $4, %esp
|
||||
pushl 16(%ebp)
|
||||
pushl 12(%ebp)
|
||||
pushl 8(%ebp)
|
||||
call ompi_atomic_cmpset_32
|
||||
addl $16, %esp
|
||||
leave
|
||||
ret
|
||||
.Lfe5:
|
||||
.size ompi_atomic_cmpset_acq_32,.Lfe5-ompi_atomic_cmpset_acq_32
|
||||
.globl ompi_atomic_cmpset_rel_32
|
||||
.type ompi_atomic_cmpset_rel_32,@function
|
||||
ompi_atomic_cmpset_rel_32:
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
subl $8, %esp
|
||||
subl $4, %esp
|
||||
pushl 16(%ebp)
|
||||
pushl 12(%ebp)
|
||||
pushl 8(%ebp)
|
||||
call ompi_atomic_cmpset_32
|
||||
addl $16, %esp
|
||||
leave
|
||||
ret
|
||||
.Lfe6:
|
||||
.size ompi_atomic_cmpset_rel_32,.Lfe6-ompi_atomic_cmpset_rel_32
|
||||
.globl ompi_atomic_cmpset_64
|
||||
.type ompi_atomic_cmpset_64,@function
|
||||
ompi_atomic_cmpset_64:
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
pushl %ebx
|
||||
subl $28, %esp
|
||||
movl 12(%ebp), %eax
|
||||
movl 16(%ebp), %edx
|
||||
movl %eax, -16(%ebp)
|
||||
movl %edx, -12(%ebp)
|
||||
movl 20(%ebp), %eax
|
||||
movl 24(%ebp), %edx
|
||||
movl %eax, -24(%ebp)
|
||||
movl %edx, -20(%ebp)
|
||||
movl -16(%ebp), %eax
|
||||
movl -12(%ebp), %edx
|
||||
movl %eax, -32(%ebp)
|
||||
movl %edx, -28(%ebp)
|
||||
movl -16(%ebp), %ecx
|
||||
movl -12(%ebp), %ebx
|
||||
movl -32(%ebp), %eax
|
||||
xorl %ecx, %eax
|
||||
movl -28(%ebp), %edx
|
||||
xorl %ebx, %edx
|
||||
orl %edx, %eax
|
||||
testl %eax, %eax
|
||||
sete %al
|
||||
movzbl %al, %eax
|
||||
addl $28, %esp
|
||||
popl %ebx
|
||||
leave
|
||||
ret
|
||||
.Lfe7:
|
||||
.size ompi_atomic_cmpset_64,.Lfe7-ompi_atomic_cmpset_64
|
||||
.globl ompi_atomic_cmpset_acq_64
|
||||
.type ompi_atomic_cmpset_acq_64,@function
|
||||
ompi_atomic_cmpset_acq_64:
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
subl $24, %esp
|
||||
movl 12(%ebp), %eax
|
||||
movl 16(%ebp), %edx
|
||||
movl %eax, -8(%ebp)
|
||||
movl %edx, -4(%ebp)
|
||||
movl 20(%ebp), %eax
|
||||
movl 24(%ebp), %edx
|
||||
movl %eax, -16(%ebp)
|
||||
movl %edx, -12(%ebp)
|
||||
subl $12, %esp
|
||||
pushl -12(%ebp)
|
||||
pushl -16(%ebp)
|
||||
pushl -4(%ebp)
|
||||
pushl -8(%ebp)
|
||||
pushl 8(%ebp)
|
||||
call ompi_atomic_cmpset_64
|
||||
addl $32, %esp
|
||||
leave
|
||||
ret
|
||||
.Lfe8:
|
||||
.size ompi_atomic_cmpset_acq_64,.Lfe8-ompi_atomic_cmpset_acq_64
|
||||
.globl ompi_atomic_cmpset_rel_64
|
||||
.type ompi_atomic_cmpset_rel_64,@function
|
||||
ompi_atomic_cmpset_rel_64:
|
||||
pushl %ebp
|
||||
movl %esp, %ebp
|
||||
subl $24, %esp
|
||||
movl 12(%ebp), %eax
|
||||
movl 16(%ebp), %edx
|
||||
movl %eax, -8(%ebp)
|
||||
movl %edx, -4(%ebp)
|
||||
movl 20(%ebp), %eax
|
||||
movl 24(%ebp), %edx
|
||||
movl %eax, -16(%ebp)
|
||||
movl %edx, -12(%ebp)
|
||||
subl $12, %esp
|
||||
pushl -12(%ebp)
|
||||
pushl -16(%ebp)
|
||||
pushl -4(%ebp)
|
||||
pushl -8(%ebp)
|
||||
pushl 8(%ebp)
|
||||
call ompi_atomic_cmpset_64
|
||||
addl $32, %esp
|
||||
leave
|
||||
ret
|
||||
.Lfe9:
|
||||
.size ompi_atomic_cmpset_rel_64,.Lfe9-ompi_atomic_cmpset_rel_64
|
||||
.ident "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
|
@ -24,7 +24,9 @@ cat > $CFILE<<EOF
|
||||
#include <inttypes.h>
|
||||
#define static
|
||||
#define inline
|
||||
#define OMPI_GCC_INLINE_ASSEMBLY 1
|
||||
#define OMPI_WANT_SMP_LOCKS 1
|
||||
#include "atomic.h"
|
||||
EOF
|
||||
|
||||
gcc -I. -S $CFILE -o atomic.s
|
||||
gcc -O1 -I. -S $CFILE -o atomic.s
|
||||
|
@ -20,13 +20,30 @@
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_SMP
|
||||
#if OMPI_WANT_SMP_LOCKS
|
||||
#define MB() __asm__ __volatile__("": : :"memory")
|
||||
#else
|
||||
#define MB()
|
||||
#endif
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Define constants for IA64
|
||||
*
|
||||
*********************************************************************/
|
||||
#define OMPI_HAVE_MEM_BARRIER 1
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_32
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Memory Barriers
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline void ompi_atomic_mb(void)
|
||||
{
|
||||
MB();
|
||||
@ -44,13 +61,23 @@ static inline void ompi_atomic_wmb(void)
|
||||
MB();
|
||||
}
|
||||
|
||||
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Atomic math operations
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
#define ia64_cmpxchg4_acq(ptr, new, old) \
|
||||
({ \
|
||||
__u64 ia64_intri_res; \
|
||||
ia64_intri_res; \
|
||||
})
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
|
||||
static inline int ompi_atomic_cmpset_acq_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
{
|
||||
@ -76,10 +103,13 @@ static inline int ompi_atomic_cmpset_rel_32( volatile int32_t *addr,
|
||||
return ((int32_t)ret == oldval);
|
||||
}
|
||||
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
|
||||
#define ompi_atomic_cmpset_32 ompi_atomic_cmpset_acq_32
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
@ -105,6 +135,7 @@ static inline int ompi_atomic_cmpset_rel_64( volatile int64_t *addr,
|
||||
return ((int32_t)ret == oldval);
|
||||
}
|
||||
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
#define ompi_atomic_cmpset_64 ompi_atomic_cmpset_acq_64
|
||||
|
||||
|
@ -24,6 +24,7 @@ cat > $CFILE<<EOF
|
||||
#include <inttypes.h>
|
||||
#define static
|
||||
#define inline
|
||||
#define OMPI_GCC_INLINE_ASSEMBLY 1
|
||||
#include "atomic.h"
|
||||
EOF
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
include $(top_srcdir)/config/Makefile.options
|
||||
|
||||
noinst_HEADERS = atomic.h atomic.s
|
||||
noinst_HEADERS = atomic.h
|
||||
|
||||
# Conditionally install the header files
|
||||
|
||||
|
@ -38,26 +38,88 @@
|
||||
#endif
|
||||
|
||||
|
||||
static inline void ompi_atomic_mb(void)
|
||||
/**********************************************************************
|
||||
*
|
||||
* Define constants for PowerPC 32
|
||||
*
|
||||
*********************************************************************/
|
||||
#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_32 1
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_MATH_32 1
|
||||
#define OMPI_HAVE_ATOMIC_ADD_32 1
|
||||
#define OMPI_HAVE_ATOMIC_SUB_32 1
|
||||
|
||||
#if (OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64) || (OMPI_POWERPC_SUPPORT_64BIT && OMPI_GCC_INLINE_ASSEMBLY)
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_64 1
|
||||
#endif
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Memory Barriers
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline
|
||||
void ompi_atomic_mb(void)
|
||||
{
|
||||
MB();
|
||||
}
|
||||
|
||||
|
||||
static inline void ompi_atomic_rmb(void)
|
||||
static inline
|
||||
void ompi_atomic_rmb(void)
|
||||
{
|
||||
RMB();
|
||||
}
|
||||
|
||||
|
||||
static inline void ompi_atomic_wmb(void)
|
||||
static inline
|
||||
void ompi_atomic_wmb(void)
|
||||
{
|
||||
WMB();
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
|
||||
static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
#elif OMPI_XLC_INLINE_ASSEMBLY /* end OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
/* Yeah, I don't know who thought this was a reasonable syntax for
|
||||
* inline assembly. Do these because they are used so often and they
|
||||
* are fairly simple (aka: there is a tech pub on IBM's web site
|
||||
* containing the right hex for the instructions).
|
||||
*/
|
||||
|
||||
void ompi_atomic_mb(void);
|
||||
#pragma mc_func ompi_atomic_mb { "7c0004ac" } /* sync */
|
||||
#pragma reg_killed_by ompi_atomic_mb /* none */
|
||||
|
||||
void ompi_atomic_rmb(void);
|
||||
#pragma mc_func ompi_atomic_rmb { "7c2004ac" } /* lwsync */
|
||||
#pragma reg_killed_by ompi_atomic_rmb /* none */
|
||||
|
||||
void ompi_atomic_wmb(void);
|
||||
#pragma mc_func ompi_atomic_wmb { "7c0006ac" } /* eieio */
|
||||
#pragma reg_killed_by ompi_atomic_wmb /* none */
|
||||
|
||||
#else /* end OMPI_XLC_INLINE_ASSEMBLY */
|
||||
|
||||
void ompi_atomic_mb(void);
|
||||
void ompi_atomic_rmb(void);
|
||||
void ompi_atomic_wmb(void);
|
||||
|
||||
#endif
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Atomic math operations
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline int ompi_atomic_cmpset_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
{
|
||||
int32_t ret;
|
||||
|
||||
@ -76,9 +138,13 @@ static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
|
||||
return (ret == oldval);
|
||||
}
|
||||
|
||||
|
||||
static inline int ompi_atomic_cmpset_acq_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
/* these two functions aren't inlined in the non-gcc case because then
|
||||
there would be two function calls (since neither cmpset_32 nor
|
||||
atomic_?mb can be inlined). Instead, we "inline" them by hand in
|
||||
the assembly, meaning there is one function call overhead instead
|
||||
of two */
|
||||
static inline int ompi_atomic_cmpset_acq_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
{
|
||||
int rc;
|
||||
|
||||
@ -89,17 +155,33 @@ static inline int ompi_atomic_cmpset_acq_32( volatile int32_t *addr,
|
||||
}
|
||||
|
||||
|
||||
static inline int ompi_atomic_cmpset_rel_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
static inline int ompi_atomic_cmpset_rel_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
{
|
||||
ompi_atomic_wmb();
|
||||
return ompi_atomic_cmpset_32(addr, oldval, newval);
|
||||
}
|
||||
|
||||
#if defined(HOW_TO_DECIDE_IF_THE_ARCHI_SUPPORT_64_BITS_ATOMICS)
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
|
||||
static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
#else
|
||||
int ompi_atomic_cmpset_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval);
|
||||
int ompi_atomic_cmpset_acq_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval);
|
||||
int ompi_atomic_cmpset_rel_32(volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval);
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
|
||||
#if OMPI_POWERPC_SUPPORT_64BIT
|
||||
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
/* these two functions aren't inlined in the non-gcc case because then
|
||||
there would be two function calls (since neither cmpset_64 nor
|
||||
atomic_?mb can be inlined). Instead, we "inline" them by hand in
|
||||
the assembly, meaning there is one function call overhead instead
|
||||
of two */
|
||||
static inline int ompi_atomic_cmpset_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
int64_t ret;
|
||||
|
||||
@ -117,9 +199,8 @@ static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
|
||||
return (ret == oldval);
|
||||
}
|
||||
|
||||
|
||||
static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline int ompi_atomic_cmpset_acq_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
int rc;
|
||||
|
||||
@ -130,15 +211,30 @@ static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr,
|
||||
}
|
||||
|
||||
|
||||
static inline int ompi_atomic_cmpset_rel_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
static inline int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
ompi_atomic_wmb();
|
||||
return ompi_atomic_cmpset_64(addr, oldval, newval);
|
||||
}
|
||||
#endif /* HOW_TO_DECIDE_IF_THE_ARCHI_SUPPORT_64_BITS_ATOMICS */
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32
|
||||
#elif OMPI_ASSEMBLY_ARCH == OMPI_POWERPC64
|
||||
/* currently, don't have 64 bit apps for non-inline assembly */
|
||||
|
||||
int ompi_atomic_cmpset_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval);
|
||||
int ompi_atomic_cmpset_acq_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval);
|
||||
int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval);
|
||||
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
#endif /* OMPI_POWERPC_SUPPORT_64BIT */
|
||||
|
||||
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline int32_t ompi_atomic_add_32(volatile int32_t* v, int inc)
|
||||
{
|
||||
int32_t t;
|
||||
@ -155,7 +251,7 @@ static inline int32_t ompi_atomic_add_32(volatile int32_t* v, int inc)
|
||||
return *v;
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32
|
||||
|
||||
static inline int32_t ompi_atomic_sub_32(volatile int32_t* v, int dec)
|
||||
{
|
||||
int32_t t;
|
||||
@ -172,4 +268,7 @@ static inline int32_t ompi_atomic_sub_32(volatile int32_t* v, int dec)
|
||||
return *v;
|
||||
}
|
||||
|
||||
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */
|
||||
|
@ -1,245 +0,0 @@
|
||||
;;
|
||||
;; Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
;; All rights reserved.
|
||||
;; Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
;; All rights reserved.
|
||||
;; Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
;; University of Stuttgart. All rights reserved.
|
||||
;; $COPYRIGHT$
|
||||
;;
|
||||
;; Additional copyrights may follow
|
||||
;;
|
||||
;; $HEADER$
|
||||
;;
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.align 2
|
||||
.align 2
|
||||
.globl _ompi_atomic_mb
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.align 2
|
||||
_ompi_atomic_mb:
|
||||
stmw r30,-8(r1)
|
||||
stwu r1,-48(r1)
|
||||
mr r30,r1
|
||||
lwz r1,0(r1)
|
||||
lmw r30,-8(r1)
|
||||
blr
|
||||
.align 2
|
||||
.globl _ompi_atomic_rmb
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.align 2
|
||||
_ompi_atomic_rmb:
|
||||
stmw r30,-8(r1)
|
||||
stwu r1,-48(r1)
|
||||
mr r30,r1
|
||||
lwz r1,0(r1)
|
||||
lmw r30,-8(r1)
|
||||
blr
|
||||
.align 2
|
||||
.globl _ompi_atomic_wmb
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.align 2
|
||||
_ompi_atomic_wmb:
|
||||
stmw r30,-8(r1)
|
||||
stwu r1,-48(r1)
|
||||
mr r30,r1
|
||||
lwz r1,0(r1)
|
||||
lmw r30,-8(r1)
|
||||
blr
|
||||
.align 2
|
||||
.globl _ompi_atomic_cmpset_32
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.align 2
|
||||
_ompi_atomic_cmpset_32:
|
||||
stmw r30,-8(r1)
|
||||
stwu r1,-64(r1)
|
||||
mr r30,r1
|
||||
stw r3,88(r30)
|
||||
stw r4,92(r30)
|
||||
stw r5,96(r30)
|
||||
lwz r10,88(r30)
|
||||
lwz r11,88(r30)
|
||||
lwz r9,92(r30)
|
||||
lwz r0,96(r30)
|
||||
lwz r2,88(r30)
|
||||
1: lwarx r8, 0, r11
|
||||
cmpw 0, r8, r9
|
||||
bne- 2f
|
||||
stwcx. r0, 0, r11
|
||||
bne- 1b
|
||||
2:
|
||||
mr r0,r8
|
||||
stw r0,32(r30)
|
||||
lwz r2,32(r30)
|
||||
lwz r0,92(r30)
|
||||
cmpw cr7,r2,r0
|
||||
mfcr r0
|
||||
rlwinm r0,r0,31,1
|
||||
mr r3,r0
|
||||
lwz r1,0(r1)
|
||||
lmw r30,-8(r1)
|
||||
blr
|
||||
.align 2
|
||||
.globl _ompi_atomic_cmpset_acq_32
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.align 2
|
||||
_ompi_atomic_cmpset_acq_32:
|
||||
mflr r0
|
||||
stmw r30,-8(r1)
|
||||
stw r0,8(r1)
|
||||
stwu r1,-96(r1)
|
||||
mr r30,r1
|
||||
stw r3,120(r30)
|
||||
stw r4,124(r30)
|
||||
stw r5,128(r30)
|
||||
lwz r3,120(r30)
|
||||
lwz r4,124(r30)
|
||||
lwz r5,128(r30)
|
||||
bl _ompi_atomic_cmpset_32
|
||||
mr r0,r3
|
||||
stw r0,64(r30)
|
||||
bl _ompi_atomic_rmb
|
||||
lwz r0,64(r30)
|
||||
mr r3,r0
|
||||
lwz r1,0(r1)
|
||||
lwz r0,8(r1)
|
||||
mtlr r0
|
||||
lmw r30,-8(r1)
|
||||
blr
|
||||
.align 2
|
||||
.globl _ompi_atomic_cmpset_rel_32
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.align 2
|
||||
_ompi_atomic_cmpset_rel_32:
|
||||
mflr r0
|
||||
stmw r30,-8(r1)
|
||||
stw r0,8(r1)
|
||||
stwu r1,-80(r1)
|
||||
mr r30,r1
|
||||
stw r3,104(r30)
|
||||
stw r4,108(r30)
|
||||
stw r5,112(r30)
|
||||
bl _ompi_atomic_wmb
|
||||
lwz r3,104(r30)
|
||||
lwz r4,108(r30)
|
||||
lwz r5,112(r30)
|
||||
bl _ompi_atomic_cmpset_32
|
||||
mr r0,r3
|
||||
mr r3,r0
|
||||
lwz r1,0(r1)
|
||||
lwz r0,8(r1)
|
||||
mtlr r0
|
||||
lmw r30,-8(r1)
|
||||
blr
|
||||
.align 2
|
||||
.globl _ompi_atomic_cmpset_64
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.align 2
|
||||
_ompi_atomic_cmpset_64:
|
||||
stmw r30,-8(r1)
|
||||
stwu r1,-96(r1)
|
||||
mr r30,r1
|
||||
stw r3,120(r30)
|
||||
stw r4,32(r30)
|
||||
stw r5,36(r30)
|
||||
stw r6,40(r30)
|
||||
stw r7,44(r30)
|
||||
lwz r10,120(r30)
|
||||
lwz r0,120(r30)
|
||||
lwz r11,32(r30)
|
||||
lwz r12,36(r30)
|
||||
lwz r2,40(r30)
|
||||
lwz r3,44(r30)
|
||||
lwz r9,120(r30)
|
||||
1: ldarx r7, 0, r0
|
||||
cmpd 0, r7, r11
|
||||
bne- 2f
|
||||
stdcx. r2, 0, r0
|
||||
bne- 1b
|
||||
2:
|
||||
mr r2,r7
|
||||
mr r3,r8
|
||||
stw r2,64(r30)
|
||||
stw r3,68(r30)
|
||||
lfd f0,64(r30)
|
||||
stfd f0,48(r30)
|
||||
li r8,0
|
||||
stw r8,56(r30)
|
||||
lwz r2,48(r30)
|
||||
lwz r0,32(r30)
|
||||
cmpw cr7,r2,r0
|
||||
bne cr7,L8
|
||||
lwz r0,52(r30)
|
||||
lwz r2,36(r30)
|
||||
cmpw cr7,r0,r2
|
||||
bne cr7,L8
|
||||
li r0,1
|
||||
stw r0,56(r30)
|
||||
L8:
|
||||
lwz r0,56(r30)
|
||||
mr r3,r0
|
||||
lwz r1,0(r1)
|
||||
lmw r30,-8(r1)
|
||||
blr
|
||||
.align 2
|
||||
.globl _ompi_atomic_cmpset_acq_64
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.align 2
|
||||
_ompi_atomic_cmpset_acq_64:
|
||||
mflr r0
|
||||
stmw r30,-8(r1)
|
||||
stw r0,8(r1)
|
||||
stwu r1,-112(r1)
|
||||
mr r30,r1
|
||||
stw r3,136(r30)
|
||||
stw r4,64(r30)
|
||||
stw r5,68(r30)
|
||||
stw r6,72(r30)
|
||||
stw r7,76(r30)
|
||||
lwz r3,136(r30)
|
||||
lwz r4,64(r30)
|
||||
lwz r5,68(r30)
|
||||
lwz r6,72(r30)
|
||||
lwz r7,76(r30)
|
||||
bl _ompi_atomic_cmpset_64
|
||||
mr r0,r3
|
||||
stw r0,80(r30)
|
||||
bl _ompi_atomic_rmb
|
||||
lwz r0,80(r30)
|
||||
mr r3,r0
|
||||
lwz r1,0(r1)
|
||||
lwz r0,8(r1)
|
||||
mtlr r0
|
||||
lmw r30,-8(r1)
|
||||
blr
|
||||
.align 2
|
||||
.globl _ompi_atomic_cmpset_rel_64
|
||||
.section __TEXT,__text,regular,pure_instructions
|
||||
.align 2
|
||||
_ompi_atomic_cmpset_rel_64:
|
||||
mflr r0
|
||||
stmw r30,-8(r1)
|
||||
stw r0,8(r1)
|
||||
stwu r1,-96(r1)
|
||||
mr r30,r1
|
||||
stw r3,120(r30)
|
||||
stw r4,64(r30)
|
||||
stw r5,68(r30)
|
||||
stw r6,72(r30)
|
||||
stw r7,76(r30)
|
||||
bl _ompi_atomic_wmb
|
||||
lwz r3,120(r30)
|
||||
lwz r4,64(r30)
|
||||
lwz r5,68(r30)
|
||||
lwz r6,72(r30)
|
||||
lwz r7,76(r30)
|
||||
bl _ompi_atomic_cmpset_64
|
||||
mr r0,r3
|
||||
mr r3,r0
|
||||
lwz r1,0(r1)
|
||||
lwz r0,8(r1)
|
||||
mtlr r0
|
||||
lmw r30,-8(r1)
|
||||
blr
|
@ -24,7 +24,9 @@ cat > $CFILE<<EOF
|
||||
#include <inttypes.h>
|
||||
#define static
|
||||
#define inline
|
||||
#define OMPI_GCC_INLINE_ASSEMBLY 1
|
||||
#define OMPI_POWERPC_SUPPORT_64BIT 0
|
||||
#include "atomic.h"
|
||||
EOF
|
||||
|
||||
gcc -I. -S $CFILE -o atomic.s
|
||||
gcc -DHAVE_SMP -I. -S $CFILE -o atomic.s
|
||||
|
@ -21,13 +21,32 @@
|
||||
|
||||
#define ASI_P "0x80"
|
||||
|
||||
#ifdef HAVE_SMP
|
||||
#if OMPI_WANT_SMP_LOCKS
|
||||
#define MEMBAR(type) __asm__ __volatile__ ("membar" type : : : "memory")
|
||||
#else
|
||||
#define MEMBAR(type)
|
||||
#endif
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Define constants for UltraSparc 64
|
||||
*
|
||||
*********************************************************************/
|
||||
#define OMPI_HAVE_MEM_BARRIER 1
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_32 1
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_64 1
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Memory Barriers
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline void ompi_atomic_mb(void)
|
||||
{
|
||||
MEMBAR("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad");
|
||||
@ -45,7 +64,16 @@ static inline void ompi_atomic_wmb(void)
|
||||
MEMBAR("#StoreStore");
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Atomic math operations
|
||||
*
|
||||
*********************************************************************/
|
||||
#if OMPI_GCC_INLINE_ASSEMBLY
|
||||
|
||||
static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
{
|
||||
@ -77,7 +105,7 @@ static inline int ompi_atomic_cmpset_rel_32( volatile int32_t *addr,
|
||||
return ompi_atomic_cmpset_32(addr, oldval, newval);
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
|
||||
|
||||
static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
@ -101,6 +129,7 @@ static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr,
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
static inline int ompi_atomic_cmpset_rel_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
@ -108,5 +137,7 @@ static inline int ompi_atomic_cmpset_rel_64( volatile int64_t *addr,
|
||||
return ompi_atomic_cmpset_64(addr, oldval, newval);
|
||||
}
|
||||
|
||||
#endif /* OMPI_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
|
||||
#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */
|
||||
|
@ -24,6 +24,7 @@ cat > $CFILE<<EOF
|
||||
#include <inttypes.h>
|
||||
#define static
|
||||
#define inline
|
||||
#define OMPI_GCC_INLINE_ASSEMBLY 1
|
||||
#include "atomic.h"
|
||||
EOF
|
||||
|
||||
|
@ -15,9 +15,12 @@
|
||||
#ifndef OMPI_SYS_ARCH_ATOMIC_H
|
||||
#define OMPI_SYS_ARCH_ATOMIC_H 1
|
||||
|
||||
/*
|
||||
* On ia64, we use cmpxchg, which supports acquire/release semantics natively.
|
||||
*/
|
||||
/**********************************************************************
|
||||
*
|
||||
* Memory Barriers
|
||||
*
|
||||
*********************************************************************/
|
||||
#define OMPI_HAVE_ATOMIC_MEM_BARRIER 1
|
||||
|
||||
static inline void ompi_atomic_mb(void)
|
||||
{
|
||||
@ -42,7 +45,14 @@ static inline void ompi_atomic_wmb(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_32
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* Atomic math operations
|
||||
*
|
||||
*********************************************************************/
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_32
|
||||
static inline int ompi_atomic_cmpset_acq_32( volatile int32_t *addr,
|
||||
int32_t oldval, int32_t newval)
|
||||
{
|
||||
@ -81,7 +91,7 @@ static inline int ompi_atomic_cmpset_32( volatile int32_t *addr,
|
||||
#endif
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_CMPSET_64
|
||||
#define OMPI_HAVE_ATOMIC_CMPSET_64
|
||||
static inline int ompi_atomic_cmpset_acq_64( volatile int64_t *addr,
|
||||
int64_t oldval, int64_t newval)
|
||||
{
|
||||
@ -119,7 +129,9 @@ static inline int ompi_atomic_cmpset_64( volatile int64_t *addr,
|
||||
#endif
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32
|
||||
#define OMPI_HAVE_ATOMIC_MATH_32
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_ADD_32
|
||||
static inline int32_t ompi_atomic_add_32(volatile int32_t *addr, int32_t delta)
|
||||
{
|
||||
return InterlockedExchangeAdd ((LONG volatile *) addr,
|
||||
@ -127,7 +139,9 @@ static inline int32_t ompi_atomic_add_32(volatile int32_t *addr, int32_t delta)
|
||||
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_64
|
||||
#define OMPI_HAVE_ATOMIC_MATH_64
|
||||
|
||||
#define OMPI_HAVE_ATOMIC_ADD_64
|
||||
static inline int64_t ompi_atomic_add_64(volatile int64_t *addr, int64_t delta)
|
||||
{
|
||||
#if 0
|
||||
@ -139,7 +153,7 @@ static inline int64_t ompi_atomic_add_64(volatile int64_t *addr, int64_t delta)
|
||||
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32
|
||||
#define OMPI_HAVE_ATOMIC_SUB_32
|
||||
static inline int32_t ompi_atomic_sub_32(volatile int32_t *addr, int32_t delta)
|
||||
{
|
||||
return InterlockedExchangeAdd( (LONG volatile *) addr,
|
||||
@ -147,7 +161,7 @@ static inline int32_t ompi_atomic_sub_32(volatile int32_t *addr, int32_t delta)
|
||||
|
||||
}
|
||||
|
||||
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_64
|
||||
#define OMPI_HAVE_ATOMIC_SUB_64
|
||||
static inline int64_t ompi_atomic_sub_64(volatile int64_t *addr, int64_t delta)
|
||||
{
|
||||
#if 0
|
||||
|
@ -42,7 +42,7 @@ static void ompi_mutex_construct(ompi_mutex_t *m)
|
||||
#if OMPI_HAVE_POSIX_THREADS
|
||||
pthread_mutex_init(&m->m_lock_pthread, 0);
|
||||
#endif
|
||||
#if OMPI_HAVE_ATOMIC
|
||||
#if OMPI_HAVE_ATOMIC_SPINLOCKS
|
||||
ompi_atomic_init( &m->m_lock_atomic, OMPI_ATOMIC_UNLOCKED );
|
||||
#endif
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ struct ompi_mutex_t {
|
||||
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_mutex_t);
|
||||
|
||||
|
||||
#if OMPI_HAVE_ATOMIC && OMPI_HAVE_POSIX_THREADS
|
||||
#if OMPI_HAVE_ATOMIC_SPINLOCKS && OMPI_HAVE_POSIX_THREADS
|
||||
|
||||
/*
|
||||
* ompi_mutex_* implemented using pthreads
|
||||
@ -127,7 +127,7 @@ static inline void ompi_mutex_atomic_unlock(ompi_mutex_t *m)
|
||||
}
|
||||
|
||||
|
||||
#elif OMPI_HAVE_ATOMIC
|
||||
#elif OMPI_HAVE_ATOMIC_SPINLOCKS
|
||||
|
||||
/*
|
||||
* ompi_mutex_* and ompi_mutex_atomic_* implemented using atomic
|
||||
|
@ -51,7 +51,6 @@ headers = \
|
||||
|
||||
libutil_la_SOURCES = \
|
||||
$(headers) \
|
||||
assembly.s \
|
||||
argv.c \
|
||||
cmd_line.c \
|
||||
few.c \
|
||||
|
@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/*
|
||||
* File to instantiate assembly level code for non-GNU C compilers.
|
||||
*/
|
||||
|
||||
#ifndef __GNUC__
|
||||
|
||||
#if defined(__alpha__)
|
||||
# include "include/sys/alpha/atomic.s"
|
||||
#elif defined(__amd64__) || defined(__x86_64__)
|
||||
# include "include/sys/amd64/atomic.s"
|
||||
#elif defined(__i386__)
|
||||
# include "include/sys/ia32/atomic.s"
|
||||
#elif defined(__ia64__)
|
||||
# include "include/sys/ia64/atomic.s"
|
||||
#elif defined(__POWERPC__)
|
||||
# include "include/sys/powerpc/atomic.s"
|
||||
#elif defined(__sparc__) || defined(__sparc)
|
||||
# include "include/sys/sparc64/atomic.s"
|
||||
#endif
|
||||
|
||||
#endif
|
Загрузка…
Ссылка в новой задаче
Block a user