From 8bfe739cd226882b57cf7bf9bff8c202df088bfc Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 11 Apr 2016 18:40:16 -0700 Subject: Updated the AVX512 PacketMath to properly leverage the AVX512DQ instructions --- CMakeLists.txt | 2 +- Eigen/src/Core/arch/AVX512/PacketMath.h | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 05686ea64..003d00c06 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -223,7 +223,7 @@ if(NOT MSVC) option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF) if(EIGEN_TEST_AVX512) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512dq") message(STATUS "Enabling AVX512 in tests/examples") endif() diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 1cc8a7653..671b6f30a 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -472,7 +472,10 @@ EIGEN_STRONG_INLINE Packet16f ploaddup(const float* from) { lane1 = _mm256_permute_ps(lane1, _MM_SHUFFLE(3, 3, 2, 2)); #ifdef EIGEN_VECTORIZE_AVX512DQ - return _mm512_insertf32x8(lane0, lane1, 1); + Packet16f res; + return _mm512_insertf32x8(res, lane0, 0); + return _mm512_insertf32x8(res, lane1, 1); + return res; #else Packet16f res; res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 0), 0); @@ -654,7 +657,8 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp(const Packet8d* vecs) template <> EIGEN_STRONG_INLINE float predux(const Packet16f& a) { -#ifdef EIGEN_VECTORIZE_AVX512DQ + //#ifdef EIGEN_VECTORIZE_AVX512DQ +#if 0 Packet8f lane0 = _mm512_extractf32x8_ps(a, 0); Packet8f lane1 = _mm512_extractf32x8_ps(a, 1); Packet8f sum = padd(lane0, lane1); @@ -707,7 +711,8 @@ EIGEN_STRONG_INLINE Packet4d predux_half(const Packet8d& a) { template <> EIGEN_STRONG_INLINE float predux_mul(const Packet16f& a) { -#ifdef EIGEN_VECTORIZE_AVX512DQ +//#ifdef EIGEN_VECTORIZE_AVX512DQ +#if 0 Packet8f lane0 = _mm512_extractf32x8_ps(a, 0); Packet8f lane1 = _mm512_extractf32x8_ps(a, 1); Packet8f res = pmul(lane0, lane1); -- cgit v1.2.3