aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-12-21 20:55:07 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-12-21 20:55:07 -0800
commit354baa0fb1bb6c2d137288a61339e8cf0e702ad7 (patch)
tree43ad35f004b018db6a8b55d46083a8093a7503dc /Eigen
parentd7825b6707367c92f5d2cba742373059779c4e0f (diff)
Avoid using horizontal adds since they're not very efficient.
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h10
1 files changed, 4 insertions, 6 deletions
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 14ef93c55..e46a60472 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -628,8 +628,8 @@ EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
// AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512
#define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \
- __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0) __m256 OUTPUT##_1 = \
- _mm512_extractf32x8_ps(INPUT, 1)
+ __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0); \
+ __m256 OUTPUT##_1 = _mm512_extractf32x8_ps(INPUT, 1)
#else
#define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \
__m256 OUTPUT##_0 = _mm256_insertf128_ps( \
@@ -847,10 +847,8 @@ EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
__m256 lane0 = _mm512_extractf32x8_ps(a, 0);
__m256 lane1 = _mm512_extractf32x8_ps(a, 1);
- __m256 sum = _mm256_add_ps(lane0, lane1);
- __m256 tmp0 = _mm256_hadd_ps(sum, _mm256_permute2f128_ps(a, a, 1));
- tmp0 = _mm256_hadd_ps(tmp0, tmp0);
- return _mm_cvtss_f32(_mm256_castps256_ps128(_mm256_hadd_ps(tmp0, tmp0)));
+ Packet8f x = _mm256_add_ps(lane0, lane1);
+ return predux<Packet8f>(x);
#else
__m128 lane0 = _mm512_extractf32x4_ps(a, 0);
__m128 lane1 = _mm512_extractf32x4_ps(a, 1);