1
1
openmpi/ompi/mca/op/avx/configure.m4
Gilles Gouaillardet 26e42f9a0c op/avx: check for _mm512_mullo_epi64() AVX512 intrinsic
PGI (20.4) compiler do not define this intrinsic, so only build
AVX512 support if _mm512_mullo_epi64() intrisic is defined.

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
2020-11-04 14:45:03 +09:00

287 строки
12 KiB
Bash

# -*- shell-script -*-
#
# Copyright (c) 2019-2020 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2020 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2020 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_ompi_op_avx_CONFIG([action-if-can-compile],
# [action-if-cant-compile])
# ------------------------------------------------
# We can always build, unless we were explicitly disabled.
AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
AC_CONFIG_FILES([ompi/mca/op/avx/Makefile])
MCA_BUILD_OP_AVX_FLAGS=""
MCA_BUILD_OP_AVX2_FLAGS=""
MCA_BUILD_OP_AVX512_FLAGS=""
op_sse3_support=0
op_sse41_support=0
op_avx_support=0
op_avx2_support=0
op_avx512_support=0
OPAL_VAR_SCOPE_PUSH([op_avx_cflags_save])
AS_IF([test "$opal_cv_asm_arch" = "X86_64"],
[AC_LANG_PUSH([C])
#
# Check for AVX512 support
#
AC_MSG_CHECKING([for AVX512 support (no additional flags)])
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[
__m512 vA, vB;
_mm512_add_ps(vA, vB)
]])],
[op_avx512_support=1
AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])])
AS_IF([test $op_avx512_support -eq 0],
[AC_MSG_CHECKING([for AVX512 support (with -march=skylake-avx512)])
op_avx_cflags_save="$CFLAGS"
CFLAGS="$CFLAGS -march=skylake-avx512"
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[
__m512 vA, vB;
_mm512_add_ps(vA, vB)
]])],
[op_avx512_support=1
MCA_BUILD_OP_AVX512_FLAGS="-march=skylake-avx512"
AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])])
CFLAGS="$op_avx_cflags_save"
])
#
# Some combination of gcc and older as would not correctly build the code generated by
# _mm256_loadu_si256. Screen them out.
#
AS_IF([test $op_avx512_support -eq 1],
[AC_MSG_CHECKING([if _mm512_loadu_si512 generates code that can be compiled])
op_avx_cflags_save="$CFLAGS"
CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS"
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[
int A[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
__m512i vA = _mm512_loadu_si512((__m512i*)&(A[1]))
]])],
[AC_MSG_RESULT([yes])],
[op_avx512_support=0
MCA_BUILD_OP_AVX512_FLAGS=""
AC_MSG_RESULT([no])])
CFLAGS="$op_avx_cflags_save"
])
#
# Some PGI compilers do not define _mm512_mullo_epi64. Screen them out.
#
AS_IF([test $op_avx512_support -eq 1],
[AC_MSG_CHECKING([if _mm512_mullo_epi64 generates code that can be compiled])
op_avx_cflags_save="$CFLAGS"
CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX512_FLAGS"
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[
__m512i vA, vB;
_mm512_mullo_epi64(vA, vB)
]])],
[AC_MSG_RESULT([yes])],
[op_avx512_support=0
MCA_BUILD_OP_AVX512_FLAGS=""
AC_MSG_RESULT([no])])
CFLAGS="$op_avx_cflags_save"
])
#
# Check support for AVX2
#
AC_MSG_CHECKING([for AVX2 support (no additional flags)])
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[
__m256 vA, vB;
_mm256_add_ps(vA, vB)
]])],
[op_avx2_support=1
AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])])
AS_IF([test $op_avx2_support -eq 0],
[AC_MSG_CHECKING([for AVX2 support (with -mavx2)])
op_avx_cflags_save="$CFLAGS"
CFLAGS="$CFLAGS -mavx2"
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[
__m256 vA, vB;
_mm256_add_ps(vA, vB)
]])],
[op_avx2_support=1
MCA_BUILD_OP_AVX2_FLAGS="-mavx2"
AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])])
CFLAGS="$op_avx_cflags_save"
])
#
# Some combination of gcc and older as would not correctly build the code generated by
# _mm256_loadu_si256. Screen them out.
#
AS_IF([test $op_avx2_support -eq 1],
[AC_MSG_CHECKING([if _mm256_loadu_si256 generates code that can be compiled])
op_avx_cflags_save="$CFLAGS"
CFLAGS="$CFLAGS_WITHOUT_OPTFLAGS -O0 $MCA_BUILD_OP_AVX2_FLAGS"
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[
int A[8] = {0, 1, 2, 3, 4, 5, 6, 7};
__m256i vA = _mm256_loadu_si256((__m256i*)&A)
]])],
[AC_MSG_RESULT([yes])],
[op_avx2_support=0
MCA_BUILD_OP_AVX2_FLAGS=""
AC_MSG_RESULT([no])])
CFLAGS="$op_avx_cflags_save"
])
#
# What about early AVX support. The rest of the logic is slightly different as
# we need to include some of the SSE4.1 and SSE3 instructions. So, we first check
# if we can compile AVX code without a flag, then we validate that we have support
# for the SSE4.1 and SSE3 instructions we need. If not, we check for the usage of
# the AVX flag, and then recheck if we have support for the SSE4.1 and SSE3
# instructions.
#
AC_MSG_CHECKING([for AVX support (no additional flags)])
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[
__m128 vA, vB;
_mm_add_ps(vA, vB)
]])],
[op_avx_support=1
AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])])
#
# Check for SSE4.1 support
#
AS_IF([test $op_avx_support -eq 1],
[AC_MSG_CHECKING([for SSE4.1 support])
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[
__m128i vA, vB;
(void)_mm_max_epi8(vA, vB)
]])],
[op_sse41_support=1
AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])])
])
#
# Check for SSE3 support
#
AS_IF([test $op_avx_support -eq 1],
[AC_MSG_CHECKING([for SSE3 support])
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[
int A[4] = {0, 1, 2, 3};
__m128i vA = _mm_lddqu_si128((__m128i*)&A)
]])],
[op_sse3_support=1
AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])])
])
# Second pass, do we need to add the AVX flag ?
AS_IF([test $op_avx_support -eq 0 || test $op_sse41_support -eq 0 || test $op_sse3_support -eq 0],
[AC_MSG_CHECKING([for AVX support (with -mavx)])
op_avx_cflags_save="$CFLAGS"
CFLAGS="$CFLAGS -mavx"
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[
__m128 vA, vB;
_mm_add_ps(vA, vB)
]])],
[op_avx_support=1
MCA_BUILD_OP_AVX_FLAGS="-mavx"
op_sse41_support=0
op_sse3_support=0
AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])])
AS_IF([test $op_sse41_support -eq 0],
[AC_MSG_CHECKING([for SSE4.1 support])
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[
__m128i vA, vB;
(void)_mm_max_epi8(vA, vB)
]])],
[op_sse41_support=1
AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])])
])
AS_IF([test $op_sse3_support -eq 0],
[AC_MSG_CHECKING([for SSE3 support])
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[
int A[4] = {0, 1, 2, 3};
__m128i vA = _mm_lddqu_si128((__m128i*)&A)
]])],
[op_sse3_support=1
AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])])
])
CFLAGS="$op_avx_cflags_save"
])
AC_LANG_POP([C])
])
AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_AVX512],
[$op_avx512_support],
[AVX512 supported in the current build])
AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_AVX2],
[$op_avx2_support],
[AVX2 supported in the current build])
AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_AVX],
[$op_avx_support],
[AVX supported in the current build])
AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_SSE41],
[$op_sse41_support],
[SSE4.1 supported in the current build])
AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_SSE3],
[$op_sse3_support],
[SSE3 supported in the current build])
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_avx512_support],
[test "$op_avx512_support" == "1"])
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_avx2_support],
[test "$op_avx2_support" == "1"])
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_avx_support],
[test "$op_avx_support" == "1"])
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_sse41_support],
[test "$op_sse41_support" == "1"])
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_sse3_support],
[test "$op_sse3_support" == "1"])
AC_SUBST(MCA_BUILD_OP_AVX512_FLAGS)
AC_SUBST(MCA_BUILD_OP_AVX2_FLAGS)
AC_SUBST(MCA_BUILD_OP_AVX_FLAGS)
OPAL_VAR_SCOPE_POP
# Enable this component iff we have at least the most basic form of support
# for vectorial ISA
AS_IF([test $op_avx_support -eq 1 || test $op_avx2_support -eq 1 || test $op_avx512_support -eq 1],
[$1],
[$2])
])dnl