# -*- shell-script -*- # # Copyright (c) 2019-2020 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2020 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2020 Research Organization for Information Science # and Technology (RIST). All rights reserved. # # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # MCA_ompi_op_avx_CONFIG([action-if-can-compile], # [action-if-cant-compile]) # ------------------------------------------------ # We can always build, unless we were explicitly disabled. AC_DEFUN([MCA_ompi_op_avx_CONFIG],[ AC_CONFIG_FILES([ompi/mca/op/avx/Makefile]) MCA_BUILD_OP_AVX_FLAGS="" MCA_BUILD_OP_AVX2_FLAGS="" MCA_BUILD_OP_AVX512_FLAGS="" op_sse3_support=0 op_sse41_support=0 op_avx_support=0 op_avx2_support=0 op_avx512_support=0 OPAL_VAR_SCOPE_PUSH([op_avx_cflags_save]) AS_IF([test "$opal_cv_asm_arch" = "X86_64"], [AC_LANG_PUSH([C]) # # Check for AVX512 support # AC_MSG_CHECKING([for AVX512 support (no additional flags)]) AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ __m512 vA, vB; _mm512_add_ps(vA, vB) ]])], [op_avx512_support=1 AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) AS_IF([test $op_avx512_support -eq 0], [AC_MSG_CHECKING([for AVX512 support (with -march=skylake-avx512)]) op_avx_cflags_save="$CFLAGS" CFLAGS="$CFLAGS -march=skylake-avx512" AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ __m512 vA, vB; _mm512_add_ps(vA, vB) ]])], [op_avx512_support=1 MCA_BUILD_OP_AVX512_FLAGS="-march=skylake-avx512" AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) CFLAGS="$op_avx_cflags_save" ]) # # Some combination of gcc and older as would not correctly build the code generated by # _mm256_loadu_si256. Screen them out. # AS_IF([test $op_avx512_support -eq 1], [AC_MSG_CHECKING([if _mm512_loadu_si512 generates code that can be compiled]) op_avx_cflags_save="$CFLAGS" CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS" AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ int A[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; __m512i vA = _mm512_loadu_si512((__m512i*)&(A[1])) ]])], [AC_MSG_RESULT([yes])], [op_avx512_support=0 MCA_BUILD_OP_AVX512_FLAGS="" AC_MSG_RESULT([no])]) CFLAGS="$op_avx_cflags_save" ]) # # Some PGI compilers do not define _mm512_mullo_epi64. Screen them out. # AS_IF([test $op_avx512_support -eq 1], [AC_MSG_CHECKING([if _mm512_mullo_epi64 generates code that can be compiled]) op_avx_cflags_save="$CFLAGS" CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS" AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ __m512i vA, vB; _mm512_mullo_epi64(vA, vB) ]])], [AC_MSG_RESULT([yes])], [op_avx512_support=0 MCA_BUILD_OP_AVX512_FLAGS="" AC_MSG_RESULT([no])]) CFLAGS="$op_avx_cflags_save" ]) # # Check support for AVX2 # AC_MSG_CHECKING([for AVX2 support (no additional flags)]) AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ __m256 vA, vB; _mm256_add_ps(vA, vB) ]])], [op_avx2_support=1 AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) AS_IF([test $op_avx2_support -eq 0], [AC_MSG_CHECKING([for AVX2 support (with -mavx2)]) op_avx_cflags_save="$CFLAGS" CFLAGS="$CFLAGS -mavx2" AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ __m256 vA, vB; _mm256_add_ps(vA, vB) ]])], [op_avx2_support=1 MCA_BUILD_OP_AVX2_FLAGS="-mavx2" AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) CFLAGS="$op_avx_cflags_save" ]) # # Some combination of gcc and older as would not correctly build the code generated by # _mm256_loadu_si256. Screen them out. # AS_IF([test $op_avx2_support -eq 1], [AC_MSG_CHECKING([if _mm256_loadu_si256 generates code that can be compiled]) op_avx_cflags_save="$CFLAGS" CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX2_FLAGS" AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ int A[8] = {0, 1, 2, 3, 4, 5, 6, 7}; __m256i vA = _mm256_loadu_si256((__m256i*)&A) ]])], [AC_MSG_RESULT([yes])], [op_avx2_support=0 MCA_BUILD_OP_AVX2_FLAGS="" AC_MSG_RESULT([no])]) CFLAGS="$op_avx_cflags_save" ]) # # What about early AVX support. The rest of the logic is slightly different as # we need to include some of the SSE4.1 and SSE3 instructions. So, we first check # if we can compile AVX code without a flag, then we validate that we have support # for the SSE4.1 and SSE3 instructions we need. If not, we check for the usage of # the AVX flag, and then recheck if we have support for the SSE4.1 and SSE3 # instructions. # AC_MSG_CHECKING([for AVX support (no additional flags)]) AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ __m128 vA, vB; _mm_add_ps(vA, vB) ]])], [op_avx_support=1 AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) # # Check for SSE4.1 support # AS_IF([test $op_avx_support -eq 1], [AC_MSG_CHECKING([for SSE4.1 support]) AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ __m128i vA, vB; (void)_mm_max_epi8(vA, vB) ]])], [op_sse41_support=1 AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) ]) # # Check for SSE3 support # AS_IF([test $op_avx_support -eq 1], [AC_MSG_CHECKING([for SSE3 support]) AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ int A[4] = {0, 1, 2, 3}; __m128i vA = _mm_lddqu_si128((__m128i*)&A) ]])], [op_sse3_support=1 AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) ]) # Second pass, do we need to add the AVX flag ? AS_IF([test $op_avx_support -eq 0 || test $op_sse41_support -eq 0 || test $op_sse3_support -eq 0], [AC_MSG_CHECKING([for AVX support (with -mavx)]) op_avx_cflags_save="$CFLAGS" CFLAGS="$CFLAGS -mavx" AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ __m128 vA, vB; _mm_add_ps(vA, vB) ]])], [op_avx_support=1 MCA_BUILD_OP_AVX_FLAGS="-mavx" op_sse41_support=0 op_sse3_support=0 AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) AS_IF([test $op_sse41_support -eq 0], [AC_MSG_CHECKING([for SSE4.1 support]) AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ __m128i vA, vB; (void)_mm_max_epi8(vA, vB) ]])], [op_sse41_support=1 AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) ]) AS_IF([test $op_sse3_support -eq 0], [AC_MSG_CHECKING([for SSE3 support]) AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], [[ int A[4] = {0, 1, 2, 3}; __m128i vA = _mm_lddqu_si128((__m128i*)&A) ]])], [op_sse3_support=1 AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) ]) CFLAGS="$op_avx_cflags_save" ]) AC_LANG_POP([C]) ]) AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_AVX512], [$op_avx512_support], [AVX512 supported in the current build]) AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_AVX2], [$op_avx2_support], [AVX2 supported in the current build]) AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_AVX], [$op_avx_support], [AVX supported in the current build]) AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_SSE41], [$op_sse41_support], [SSE4.1 supported in the current build]) AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_SSE3], [$op_sse3_support], [SSE3 supported in the current build]) AM_CONDITIONAL([MCA_BUILD_ompi_op_has_avx512_support], [test "$op_avx512_support" == "1"]) AM_CONDITIONAL([MCA_BUILD_ompi_op_has_avx2_support], [test "$op_avx2_support" == "1"]) AM_CONDITIONAL([MCA_BUILD_ompi_op_has_avx_support], [test "$op_avx_support" == "1"]) AM_CONDITIONAL([MCA_BUILD_ompi_op_has_sse41_support], [test "$op_sse41_support" == "1"]) AM_CONDITIONAL([MCA_BUILD_ompi_op_has_sse3_support], [test "$op_sse3_support" == "1"]) AC_SUBST(MCA_BUILD_OP_AVX512_FLAGS) AC_SUBST(MCA_BUILD_OP_AVX2_FLAGS) AC_SUBST(MCA_BUILD_OP_AVX_FLAGS) OPAL_VAR_SCOPE_POP # Enable this component iff we have at least the most basic form of support # for vectorial ISA AS_IF([test $op_avx_support -eq 1 || test $op_avx2_support -eq 1 || test $op_avx512_support -eq 1], [$1], [$2]) ])dnl