From 7b6d0ff1f6afdb7f6137c92a76d20e7497e2fa2a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 7 Dec 2018 15:14:50 +0100 Subject: Enable FMA with MSVC (through /arch:AVX2). To make this possible, I also has to turn the #warning regarding AVX512-FMA to a #error. --- CMakeLists.txt | 15 ++++++++++++++- Eigen/src/Core/arch/AVX/PacketMath.h | 2 +- Eigen/src/Core/arch/SSE/PacketMath.h | 4 ++-- Eigen/src/Core/util/ConfigureVectorization.h | 8 +++++--- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45cbc75ee..e18428e50 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -250,7 +250,7 @@ if(NOT MSVC) option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF) if(EIGEN_TEST_AVX512) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -DEIGEN_ENABLE_AVX512") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mfma -DEIGEN_ENABLE_AVX512") if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fabi-version=6") endif() @@ -350,6 +350,19 @@ else(NOT MSVC) endif(NOT CMAKE_CL_64) message(STATUS "Enabling SSE2 in tests/examples") endif(EIGEN_TEST_SSE2) + + option(EIGEN_TEST_AVX "Enable/Disable AVX in tests/examples" OFF) + if(EIGEN_TEST_AVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX") + message(STATUS "Enabling AVX in tests/examples") + endif() + + option(EIGEN_TEST_FMA "Enable/Disable FMA/AVX2 in tests/examples" OFF) + if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") + message(STATUS "Enabling FMA/AVX2 in tests/examples") + endif() + endif(NOT MSVC) option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF) diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 35c821472..9bc986575 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -181,7 +181,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv(const Packet8i& /*a*/, co return pset1(0); } -#ifdef __FMA__ +#ifdef EIGEN_VECTORIZE_FMA template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) { #if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) ) // Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers, diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 971a31a4d..b80b7f46a 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -266,7 +266,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv(const Packet2d& a, const // for some weird raisons, it has to be overloaded for packet of integers template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); } -#ifdef __FMA__ +#ifdef EIGEN_VECTORIZE_FMA template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); } template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); } #endif @@ -1013,7 +1013,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b) } // Scalar path for pmadd with FMA to ensure consistency with vectorized path. -#ifdef __FMA__ +#ifdef EIGEN_VECTORIZE_FMA template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) { return ::fmaf(a,b,c); } diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h index 2584717b5..121476394 100644 --- a/Eigen/src/Core/util/ConfigureVectorization.h +++ b/Eigen/src/Core/util/ConfigureVectorization.h @@ -250,15 +250,17 @@ #define EIGEN_VECTORIZE_SSE4_1 #define EIGEN_VECTORIZE_SSE4_2 #endif - #ifdef __FMA__ + #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__)) + // MSVC does not expose a switch dedicated for FMA + // For MSVC, AVX2 => FMA #define EIGEN_VECTORIZE_FMA #endif #if defined(__AVX512F__) #ifndef __FMA__ #if EIGEN_COMP_GNUC - #warning Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638). + #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638). #else - #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638). + #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638). #endif #endif #define EIGEN_VECTORIZE_AVX512 -- cgit v1.2.3