1
1

Major update to the AVX* detection and support

1. Consistent march flag order between configure and make.

2. op/avx: give the option to skip some tests

it is possible to skip some intrinsic tests by setting some environment variables to "no" before invoking configure:
 - ompi_cv_op_avx_check_avx512
 - ompi_cv_op_avx_check_avx2
 - ompi_cv_op_avx_check_avx
 - ompi_cv_op_avx_check_sse41
 - ompi_cv_op_avx_check_sse3

3. op/avx: update AVX512 flags

try
-mavx512f -mavx512bw -mavx512vl -mavx512dq
instead of
-march=skylake-avx512

since the former is less likely to conflict with user provided CFLAGS
(e.g. -march=...)

Thanks Bart Oldeman for pointing this.

4. op/avx: have the op/avx library depend on libmpi.so

Refs. open-mpi/ompi#8323

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
George Bosilca 2020-12-28 15:36:05 -05:00
родитель cd49049ab9
Коммит 31068e063b
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 09C926752C9F09B1
2 изменённых файлов: 176 добавлений и 157 удалений

Просмотреть файл

@ -2,7 +2,7 @@
# Copyright (c) 2019-2020 The University of Tennessee and The University # Copyright (c) 2019-2020 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights # of Tennessee Research Foundation. All rights
# reserved. # reserved.
# Copyright (c) 2020 Research Organization for Information Science # Copyright (c) 2020-2021 Research Organization for Information Science
# and Technology (RIST). All rights reserved. # and Technology (RIST). All rights reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
@ -86,7 +86,7 @@ mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install) mcacomponent_LTLIBRARIES = $(component_install)
mca_op_avx_la_SOURCES = $(sources) mca_op_avx_la_SOURCES = $(sources)
mca_op_avx_la_LIBADD = $(specialized_op_libs) mca_op_avx_la_LIBADD = $(specialized_op_libs)
mca_op_avx_la_LDFLAGS = -module -avoid-version mca_op_avx_la_LDFLAGS = -module -avoid-version $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la
# Specific information for static builds. # Specific information for static builds.

Просмотреть файл

@ -29,6 +29,13 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
op_avx_support=0 op_avx_support=0
op_avx2_support=0 op_avx2_support=0
op_avx512_support=0 op_avx512_support=0
AS_VAR_PUSHDEF([op_avx_check_sse3], [ompi_cv_op_avx_check_sse3])
AS_VAR_PUSHDEF([op_avx_check_sse41], [ompi_cv_op_avx_check_sse41])
AS_VAR_PUSHDEF([op_avx_check_avx], [ompi_cv_op_avx_check_avx])
AS_VAR_PUSHDEF([op_avx_check_avx2], [ompi_cv_op_avx_check_avx2])
AS_VAR_PUSHDEF([op_avx_check_avx512], [ompi_cv_op_avx_check_avx512])
OPAL_VAR_SCOPE_PUSH([op_avx_cflags_save]) OPAL_VAR_SCOPE_PUSH([op_avx_cflags_save])
AS_IF([test "$opal_cv_asm_arch" = "X86_64"], AS_IF([test "$opal_cv_asm_arch" = "X86_64"],
@ -37,7 +44,9 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
# #
# Check for AVX512 support # Check for AVX512 support
# #
AC_MSG_CHECKING([for AVX512 support (no additional flags)]) AC_CACHE_CHECK([if we are checking for AVX512 support], op_avx_check_avx512, AS_VAR_SET(op_avx_check_avx512, yes))
AS_IF([test "$op_avx_check_avx512" = "yes"],
[AC_MSG_CHECKING([for AVX512 support (no additional flags)])
AC_LINK_IFELSE( AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]], [AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[ [[
@ -49,9 +58,9 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
[AC_MSG_RESULT([no])]) [AC_MSG_RESULT([no])])
AS_IF([test $op_avx512_support -eq 0], AS_IF([test $op_avx512_support -eq 0],
[AC_MSG_CHECKING([for AVX512 support (with -march=skylake-avx512)]) [AC_MSG_CHECKING([for AVX512 support (with -mavx512f -mavx512bw -mavx512vl -mavx512dq)])
op_avx_cflags_save="$CFLAGS" op_avx_cflags_save="$CFLAGS"
CFLAGS="$CFLAGS -march=skylake-avx512" CFLAGS="-mavx512f -mavx512bw -mavx512vl -mavx512dq $CFLAGS"
AC_LINK_IFELSE( AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]], [AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[ [[
@ -59,7 +68,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
_mm512_add_ps(vA, vB) _mm512_add_ps(vA, vB)
]])], ]])],
[op_avx512_support=1 [op_avx512_support=1
MCA_BUILD_OP_AVX512_FLAGS="-march=skylake-avx512" MCA_BUILD_OP_AVX512_FLAGS="-mavx512f -mavx512bw -mavx512vl -mavx512dq"
AC_MSG_RESULT([yes])], AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])]) [AC_MSG_RESULT([no])])
CFLAGS="$op_avx_cflags_save" CFLAGS="$op_avx_cflags_save"
@ -102,11 +111,13 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
MCA_BUILD_OP_AVX512_FLAGS="" MCA_BUILD_OP_AVX512_FLAGS=""
AC_MSG_RESULT([no])]) AC_MSG_RESULT([no])])
CFLAGS="$op_avx_cflags_save" CFLAGS="$op_avx_cflags_save"
]) ])])
# #
# Check support for AVX2 # Check support for AVX2
# #
AC_MSG_CHECKING([for AVX2 support (no additional flags)]) AC_CACHE_CHECK([if we are checking for AVX2 support], op_avx_check_avx2, AS_VAR_SET(op_avx_check_avx2, yes))
AS_IF([test "$op_avx_check_avx2" = "yes"],
[AC_MSG_CHECKING([for AVX2 support (no additional flags)])
AC_LINK_IFELSE( AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]], [AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[ [[
@ -119,7 +130,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
AS_IF([test $op_avx2_support -eq 0], AS_IF([test $op_avx2_support -eq 0],
[AC_MSG_CHECKING([for AVX2 support (with -mavx2)]) [AC_MSG_CHECKING([for AVX2 support (with -mavx2)])
op_avx_cflags_save="$CFLAGS" op_avx_cflags_save="$CFLAGS"
CFLAGS="$CFLAGS -mavx2" CFLAGS="-mavx2 $CFLAGS"
AC_LINK_IFELSE( AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]], [AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[ [[
@ -151,7 +162,7 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
MCA_BUILD_OP_AVX2_FLAGS="" MCA_BUILD_OP_AVX2_FLAGS=""
AC_MSG_RESULT([no])]) AC_MSG_RESULT([no])])
CFLAGS="$op_avx_cflags_save" CFLAGS="$op_avx_cflags_save"
]) ])])
# #
# What about early AVX support. The rest of the logic is slightly different as # What about early AVX support. The rest of the logic is slightly different as
# we need to include some of the SSE4.1 and SSE3 instructions. So, we first check # we need to include some of the SSE4.1 and SSE3 instructions. So, we first check
@ -160,7 +171,9 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
# the AVX flag, and then recheck if we have support for the SSE4.1 and SSE3 # the AVX flag, and then recheck if we have support for the SSE4.1 and SSE3
# instructions. # instructions.
# #
AC_MSG_CHECKING([for AVX support (no additional flags)]) AC_CACHE_CHECK([if we are checking for AVX support], op_avx_check_avx, AS_VAR_SET(op_avx_check_avx, yes))
AS_IF([test "$op_avx_check_avx" = "yes"],
[AC_MSG_CHECKING([for AVX support (no additional flags)])
AC_LINK_IFELSE( AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]], [AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[ [[
@ -169,11 +182,12 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
]])], ]])],
[op_avx_support=1 [op_avx_support=1
AC_MSG_RESULT([yes])], AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])]) [AC_MSG_RESULT([no])])])
# #
# Check for SSE4.1 support # Check for SSE4.1 support
# #
AS_IF([test $op_avx_support -eq 1], AC_CACHE_CHECK([if we are checking for SSE4.1 support], op_avx_check_sse41, AS_VAR_SET(op_avx_check_sse41, yes))
AS_IF([test $op_avx_support -eq 1 && test "$op_avx_check_sse41" = "yes"],
[AC_MSG_CHECKING([for SSE4.1 support]) [AC_MSG_CHECKING([for SSE4.1 support])
AC_LINK_IFELSE( AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]], [AC_LANG_PROGRAM([[#include <immintrin.h>]],
@ -188,7 +202,8 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
# #
# Check for SSE3 support # Check for SSE3 support
# #
AS_IF([test $op_avx_support -eq 1], AC_CACHE_CHECK([if we are checking for SSE3 support], op_avx_check_sse3, AS_VAR_SET(op_avx_check_sse3, yes))
AS_IF([test $op_avx_support -eq 1 && test "$op_avx_check_sse3" = "yes"],
[AC_MSG_CHECKING([for SSE3 support]) [AC_MSG_CHECKING([for SSE3 support])
AC_LINK_IFELSE( AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]], [AC_LANG_PROGRAM([[#include <immintrin.h>]],
@ -202,9 +217,10 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
]) ])
# Second pass, do we need to add the AVX flag ? # Second pass, do we need to add the AVX flag ?
AS_IF([test $op_avx_support -eq 0 || test $op_sse41_support -eq 0 || test $op_sse3_support -eq 0], AS_IF([test $op_avx_support -eq 0 || test $op_sse41_support -eq 0 || test $op_sse3_support -eq 0],
[AS_IF([test "$op_avx_check_avx" = "yes"],
[AC_MSG_CHECKING([for AVX support (with -mavx)]) [AC_MSG_CHECKING([for AVX support (with -mavx)])
op_avx_cflags_save="$CFLAGS" op_avx_cflags_save="$CFLAGS"
CFLAGS="$CFLAGS -mavx" CFLAGS="-mavx $CFLAGS"
AC_LINK_IFELSE( AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]], [AC_LANG_PROGRAM([[#include <immintrin.h>]],
[[ [[
@ -216,9 +232,9 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
op_sse41_support=0 op_sse41_support=0
op_sse3_support=0 op_sse3_support=0
AC_MSG_RESULT([yes])], AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])]) [AC_MSG_RESULT([no])])])
AS_IF([test $op_sse41_support -eq 0], AS_IF([test "$op_avx_check_sse41" = "yes" && test $op_sse41_support -eq 0],
[AC_MSG_CHECKING([for SSE4.1 support]) [AC_MSG_CHECKING([for SSE4.1 support])
AC_LINK_IFELSE( AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]], [AC_LANG_PROGRAM([[#include <immintrin.h>]],
@ -228,9 +244,8 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
]])], ]])],
[op_sse41_support=1 [op_sse41_support=1
AC_MSG_RESULT([yes])], AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])]) [AC_MSG_RESULT([no])])])
]) AS_IF([test "$op_avx_check_sse3" = "yes" && test $op_sse3_support -eq 0],
AS_IF([test $op_sse3_support -eq 0],
[AC_MSG_CHECKING([for SSE3 support]) [AC_MSG_CHECKING([for SSE3 support])
AC_LINK_IFELSE( AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[#include <immintrin.h>]], [AC_LANG_PROGRAM([[#include <immintrin.h>]],
@ -240,10 +255,8 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
]])], ]])],
[op_sse3_support=1 [op_sse3_support=1
AC_MSG_RESULT([yes])], AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])]) [AC_MSG_RESULT([no])])])
]) CFLAGS="$op_avx_cflags_save"])
CFLAGS="$op_avx_cflags_save"
])
AC_LANG_POP([C]) AC_LANG_POP([C])
]) ])
@ -276,6 +289,12 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
AC_SUBST(MCA_BUILD_OP_AVX2_FLAGS) AC_SUBST(MCA_BUILD_OP_AVX2_FLAGS)
AC_SUBST(MCA_BUILD_OP_AVX_FLAGS) AC_SUBST(MCA_BUILD_OP_AVX_FLAGS)
AS_VAR_POPDEF([op_avx_check_avx512])
AS_VAR_POPDEF([op_avx_check_avx2])
AS_VAR_POPDEF([op_avx_check_avx])
AS_VAR_POPDEF([op_avx_check_sse41])
AS_VAR_POPDEF([op_avx_check_sse3])
OPAL_VAR_SCOPE_POP OPAL_VAR_SCOPE_POP
# Enable this component iff we have at least the most basic form of support # Enable this component iff we have at least the most basic form of support
# for vectorial ISA # for vectorial ISA