Major update to the AVX* detection and support
1. Consistent march flag order between configure and make. 2. op/avx: give the option to skip some tests it is possible to skip some intrinsic tests by setting some environment variables to "no" before invoking configure: - ompi_cv_op_avx_check_avx512 - ompi_cv_op_avx_check_avx2 - ompi_cv_op_avx_check_avx - ompi_cv_op_avx_check_sse41 - ompi_cv_op_avx_check_sse3 3. op/avx: update AVX512 flags try -mavx512f -mavx512bw -mavx512vl -mavx512dq instead of -march=skylake-avx512 since the former is less likely to conflict with user provided CFLAGS (e.g. -march=...) Thanks Bart Oldeman for pointing this. 4. op/avx: have the op/avx library depend on libmpi.so Refs. open-mpi/ompi#8323 Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp> Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
родитель
cd49049ab9
Коммит
31068e063b
@ -2,7 +2,7 @@
|
||||
# Copyright (c) 2019-2020 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2020 Research Organization for Information Science
|
||||
# Copyright (c) 2020-2021 Research Organization for Information Science
|
||||
# and Technology (RIST). All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
@ -86,7 +86,7 @@ mcacomponentdir = $(ompilibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_op_avx_la_SOURCES = $(sources)
|
||||
mca_op_avx_la_LIBADD = $(specialized_op_libs)
|
||||
mca_op_avx_la_LDFLAGS = -module -avoid-version
|
||||
mca_op_avx_la_LDFLAGS = -module -avoid-version $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la
|
||||
|
||||
|
||||
# Specific information for static builds.
|
||||
|
@ -29,6 +29,13 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
|
||||
op_avx_support=0
|
||||
op_avx2_support=0
|
||||
op_avx512_support=0
|
||||
|
||||
AS_VAR_PUSHDEF([op_avx_check_sse3], [ompi_cv_op_avx_check_sse3])
|
||||
AS_VAR_PUSHDEF([op_avx_check_sse41], [ompi_cv_op_avx_check_sse41])
|
||||
AS_VAR_PUSHDEF([op_avx_check_avx], [ompi_cv_op_avx_check_avx])
|
||||
AS_VAR_PUSHDEF([op_avx_check_avx2], [ompi_cv_op_avx_check_avx2])
|
||||
AS_VAR_PUSHDEF([op_avx_check_avx512], [ompi_cv_op_avx_check_avx512])
|
||||
|
||||
OPAL_VAR_SCOPE_PUSH([op_avx_cflags_save])
|
||||
|
||||
AS_IF([test "$opal_cv_asm_arch" = "X86_64"],
|
||||
@ -37,21 +44,9 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
|
||||
#
|
||||
# Check for AVX512 support
|
||||
#
|
||||
AC_MSG_CHECKING([for AVX512 support (no additional flags)])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
__m512 vA, vB;
|
||||
_mm512_add_ps(vA, vB)
|
||||
]])],
|
||||
[op_avx512_support=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
|
||||
AS_IF([test $op_avx512_support -eq 0],
|
||||
[AC_MSG_CHECKING([for AVX512 support (with -march=skylake-avx512)])
|
||||
op_avx_cflags_save="$CFLAGS"
|
||||
CFLAGS="$CFLAGS -march=skylake-avx512"
|
||||
AC_CACHE_CHECK([if we are checking for AVX512 support], op_avx_check_avx512, AS_VAR_SET(op_avx_check_avx512, yes))
|
||||
AS_IF([test "$op_avx_check_avx512" = "yes"],
|
||||
[AC_MSG_CHECKING([for AVX512 support (no additional flags)])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
@ -59,99 +54,115 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
|
||||
_mm512_add_ps(vA, vB)
|
||||
]])],
|
||||
[op_avx512_support=1
|
||||
MCA_BUILD_OP_AVX512_FLAGS="-march=skylake-avx512"
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
CFLAGS="$op_avx_cflags_save"
|
||||
])
|
||||
#
|
||||
# Some combination of gcc and older as would not correctly build the code generated by
|
||||
# _mm256_loadu_si256. Screen them out.
|
||||
#
|
||||
AS_IF([test $op_avx512_support -eq 1],
|
||||
[AC_MSG_CHECKING([if _mm512_loadu_si512 generates code that can be compiled])
|
||||
op_avx_cflags_save="$CFLAGS"
|
||||
CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
|
||||
AS_IF([test $op_avx512_support -eq 0],
|
||||
[AC_MSG_CHECKING([for AVX512 support (with -mavx512f -mavx512bw -mavx512vl -mavx512dq)])
|
||||
op_avx_cflags_save="$CFLAGS"
|
||||
CFLAGS="-mavx512f -mavx512bw -mavx512vl -mavx512dq $CFLAGS"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
__m512 vA, vB;
|
||||
_mm512_add_ps(vA, vB)
|
||||
]])],
|
||||
[op_avx512_support=1
|
||||
MCA_BUILD_OP_AVX512_FLAGS="-mavx512f -mavx512bw -mavx512vl -mavx512dq"
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
CFLAGS="$op_avx_cflags_save"
|
||||
])
|
||||
#
|
||||
# Some combination of gcc and older as would not correctly build the code generated by
|
||||
# _mm256_loadu_si256. Screen them out.
|
||||
#
|
||||
AS_IF([test $op_avx512_support -eq 1],
|
||||
[AC_MSG_CHECKING([if _mm512_loadu_si512 generates code that can be compiled])
|
||||
op_avx_cflags_save="$CFLAGS"
|
||||
CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
int A[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
|
||||
__m512i vA = _mm512_loadu_si512((__m512i*)&(A[1]))
|
||||
]])],
|
||||
[AC_MSG_RESULT([yes])],
|
||||
[op_avx512_support=0
|
||||
MCA_BUILD_OP_AVX512_FLAGS=""
|
||||
AC_MSG_RESULT([no])])
|
||||
CFLAGS="$op_avx_cflags_save"
|
||||
])
|
||||
#
|
||||
# Some PGI compilers do not define _mm512_mullo_epi64. Screen them out.
|
||||
#
|
||||
AS_IF([test $op_avx512_support -eq 1],
|
||||
[AC_MSG_CHECKING([if _mm512_mullo_epi64 generates code that can be compiled])
|
||||
op_avx_cflags_save="$CFLAGS"
|
||||
CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
]])],
|
||||
[AC_MSG_RESULT([yes])],
|
||||
[op_avx512_support=0
|
||||
MCA_BUILD_OP_AVX512_FLAGS=""
|
||||
AC_MSG_RESULT([no])])
|
||||
CFLAGS="$op_avx_cflags_save"
|
||||
])
|
||||
#
|
||||
# Some PGI compilers do not define _mm512_mullo_epi64. Screen them out.
|
||||
#
|
||||
AS_IF([test $op_avx512_support -eq 1],
|
||||
[AC_MSG_CHECKING([if _mm512_mullo_epi64 generates code that can be compiled])
|
||||
op_avx_cflags_save="$CFLAGS"
|
||||
CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
__m512i vA, vB;
|
||||
_mm512_mullo_epi64(vA, vB)
|
||||
]])],
|
||||
[AC_MSG_RESULT([yes])],
|
||||
[op_avx512_support=0
|
||||
MCA_BUILD_OP_AVX512_FLAGS=""
|
||||
AC_MSG_RESULT([no])])
|
||||
CFLAGS="$op_avx_cflags_save"
|
||||
])
|
||||
]])],
|
||||
[AC_MSG_RESULT([yes])],
|
||||
[op_avx512_support=0
|
||||
MCA_BUILD_OP_AVX512_FLAGS=""
|
||||
AC_MSG_RESULT([no])])
|
||||
CFLAGS="$op_avx_cflags_save"
|
||||
])])
|
||||
#
|
||||
# Check support for AVX2
|
||||
#
|
||||
AC_MSG_CHECKING([for AVX2 support (no additional flags)])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
__m256 vA, vB;
|
||||
_mm256_add_ps(vA, vB)
|
||||
]])],
|
||||
[op_avx2_support=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
AS_IF([test $op_avx2_support -eq 0],
|
||||
[AC_MSG_CHECKING([for AVX2 support (with -mavx2)])
|
||||
op_avx_cflags_save="$CFLAGS"
|
||||
CFLAGS="$CFLAGS -mavx2"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
__m256 vA, vB;
|
||||
_mm256_add_ps(vA, vB)
|
||||
]])],
|
||||
[op_avx2_support=1
|
||||
MCA_BUILD_OP_AVX2_FLAGS="-mavx2"
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
CFLAGS="$op_avx_cflags_save"
|
||||
])
|
||||
#
|
||||
# Some combination of gcc and older as would not correctly build the code generated by
|
||||
# _mm256_loadu_si256. Screen them out.
|
||||
#
|
||||
AS_IF([test $op_avx2_support -eq 1],
|
||||
[AC_MSG_CHECKING([if _mm256_loadu_si256 generates code that can be compiled])
|
||||
op_avx_cflags_save="$CFLAGS"
|
||||
CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX2_FLAGS"
|
||||
AC_CACHE_CHECK([if we are checking for AVX2 support], op_avx_check_avx2, AS_VAR_SET(op_avx_check_avx2, yes))
|
||||
AS_IF([test "$op_avx_check_avx2" = "yes"],
|
||||
[AC_MSG_CHECKING([for AVX2 support (no additional flags)])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
[[
|
||||
__m256 vA, vB;
|
||||
_mm256_add_ps(vA, vB)
|
||||
]])],
|
||||
[op_avx2_support=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
AS_IF([test $op_avx2_support -eq 0],
|
||||
[AC_MSG_CHECKING([for AVX2 support (with -mavx2)])
|
||||
op_avx_cflags_save="$CFLAGS"
|
||||
CFLAGS="-mavx2 $CFLAGS"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
__m256 vA, vB;
|
||||
_mm256_add_ps(vA, vB)
|
||||
]])],
|
||||
[op_avx2_support=1
|
||||
MCA_BUILD_OP_AVX2_FLAGS="-mavx2"
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
CFLAGS="$op_avx_cflags_save"
|
||||
])
|
||||
#
|
||||
# Some combination of gcc and older as would not correctly build the code generated by
|
||||
# _mm256_loadu_si256. Screen them out.
|
||||
#
|
||||
AS_IF([test $op_avx2_support -eq 1],
|
||||
[AC_MSG_CHECKING([if _mm256_loadu_si256 generates code that can be compiled])
|
||||
op_avx_cflags_save="$CFLAGS"
|
||||
CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX2_FLAGS"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
int A[8] = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
__m256i vA = _mm256_loadu_si256((__m256i*)&A)
|
||||
]])],
|
||||
[AC_MSG_RESULT([yes])],
|
||||
[op_avx2_support=0
|
||||
MCA_BUILD_OP_AVX2_FLAGS=""
|
||||
AC_MSG_RESULT([no])])
|
||||
CFLAGS="$op_avx_cflags_save"
|
||||
])
|
||||
]])],
|
||||
[AC_MSG_RESULT([yes])],
|
||||
[op_avx2_support=0
|
||||
MCA_BUILD_OP_AVX2_FLAGS=""
|
||||
AC_MSG_RESULT([no])])
|
||||
CFLAGS="$op_avx_cflags_save"
|
||||
])])
|
||||
#
|
||||
# What about early AVX support. The rest of the logic is slightly different as
|
||||
# we need to include some of the SSE4.1 and SSE3 instructions. So, we first check
|
||||
@ -160,90 +171,92 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
|
||||
# the AVX flag, and then recheck if we have support for the SSE4.1 and SSE3
|
||||
# instructions.
|
||||
#
|
||||
AC_MSG_CHECKING([for AVX support (no additional flags)])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
AC_CACHE_CHECK([if we are checking for AVX support], op_avx_check_avx, AS_VAR_SET(op_avx_check_avx, yes))
|
||||
AS_IF([test "$op_avx_check_avx" = "yes"],
|
||||
[AC_MSG_CHECKING([for AVX support (no additional flags)])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
__m128 vA, vB;
|
||||
_mm_add_ps(vA, vB)
|
||||
]])],
|
||||
[op_avx_support=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
]])],
|
||||
[op_avx_support=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])])
|
||||
#
|
||||
# Check for SSE4.1 support
|
||||
#
|
||||
AS_IF([test $op_avx_support -eq 1],
|
||||
[AC_MSG_CHECKING([for SSE4.1 support])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
AC_CACHE_CHECK([if we are checking for SSE4.1 support], op_avx_check_sse41, AS_VAR_SET(op_avx_check_sse41, yes))
|
||||
AS_IF([test $op_avx_support -eq 1 && test "$op_avx_check_sse41" = "yes"],
|
||||
[AC_MSG_CHECKING([for SSE4.1 support])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
__m128i vA, vB;
|
||||
(void)_mm_max_epi8(vA, vB)
|
||||
]])],
|
||||
[op_sse41_support=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
])
|
||||
]])],
|
||||
[op_sse41_support=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
])
|
||||
#
|
||||
# Check for SSE3 support
|
||||
#
|
||||
AS_IF([test $op_avx_support -eq 1],
|
||||
[AC_MSG_CHECKING([for SSE3 support])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
AC_CACHE_CHECK([if we are checking for SSE3 support], op_avx_check_sse3, AS_VAR_SET(op_avx_check_sse3, yes))
|
||||
AS_IF([test $op_avx_support -eq 1 && test "$op_avx_check_sse3" = "yes"],
|
||||
[AC_MSG_CHECKING([for SSE3 support])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
int A[4] = {0, 1, 2, 3};
|
||||
__m128i vA = _mm_lddqu_si128((__m128i*)&A)
|
||||
]])],
|
||||
[op_sse3_support=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
])
|
||||
]])],
|
||||
[op_sse3_support=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
])
|
||||
# Second pass, do we need to add the AVX flag ?
|
||||
AS_IF([test $op_avx_support -eq 0 || test $op_sse41_support -eq 0 || test $op_sse3_support -eq 0],
|
||||
[AC_MSG_CHECKING([for AVX support (with -mavx)])
|
||||
op_avx_cflags_save="$CFLAGS"
|
||||
CFLAGS="$CFLAGS -mavx"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
[AS_IF([test "$op_avx_check_avx" = "yes"],
|
||||
[AC_MSG_CHECKING([for AVX support (with -mavx)])
|
||||
op_avx_cflags_save="$CFLAGS"
|
||||
CFLAGS="-mavx $CFLAGS"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
__m128 vA, vB;
|
||||
_mm_add_ps(vA, vB)
|
||||
]])],
|
||||
[op_avx_support=1
|
||||
MCA_BUILD_OP_AVX_FLAGS="-mavx"
|
||||
op_sse41_support=0
|
||||
op_sse3_support=0
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
[op_avx_support=1
|
||||
MCA_BUILD_OP_AVX_FLAGS="-mavx"
|
||||
op_sse41_support=0
|
||||
op_sse3_support=0
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])])
|
||||
|
||||
AS_IF([test $op_sse41_support -eq 0],
|
||||
[AC_MSG_CHECKING([for SSE4.1 support])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
AS_IF([test "$op_avx_check_sse41" = "yes" && test $op_sse41_support -eq 0],
|
||||
[AC_MSG_CHECKING([for SSE4.1 support])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
__m128i vA, vB;
|
||||
(void)_mm_max_epi8(vA, vB)
|
||||
]])],
|
||||
[op_sse41_support=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
])
|
||||
AS_IF([test $op_sse3_support -eq 0],
|
||||
[AC_MSG_CHECKING([for SSE3 support])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
]])],
|
||||
[op_sse41_support=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])])
|
||||
AS_IF([test "$op_avx_check_sse3" = "yes" && test $op_sse3_support -eq 0],
|
||||
[AC_MSG_CHECKING([for SSE3 support])
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
|
||||
[[
|
||||
int A[4] = {0, 1, 2, 3};
|
||||
__m128i vA = _mm_lddqu_si128((__m128i*)&A)
|
||||
]])],
|
||||
[op_sse3_support=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])
|
||||
])
|
||||
CFLAGS="$op_avx_cflags_save"
|
||||
])
|
||||
[op_sse3_support=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[AC_MSG_RESULT([no])])])
|
||||
CFLAGS="$op_avx_cflags_save"])
|
||||
|
||||
AC_LANG_POP([C])
|
||||
])
|
||||
@ -276,6 +289,12 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
|
||||
AC_SUBST(MCA_BUILD_OP_AVX2_FLAGS)
|
||||
AC_SUBST(MCA_BUILD_OP_AVX_FLAGS)
|
||||
|
||||
AS_VAR_POPDEF([op_avx_check_avx512])
|
||||
AS_VAR_POPDEF([op_avx_check_avx2])
|
||||
AS_VAR_POPDEF([op_avx_check_avx])
|
||||
AS_VAR_POPDEF([op_avx_check_sse41])
|
||||
AS_VAR_POPDEF([op_avx_check_sse3])
|
||||
|
||||
OPAL_VAR_SCOPE_POP
|
||||
# Enable this component iff we have at least the most basic form of support
|
||||
# for vectorial ISA
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user