aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Antonio Sanchez <cantonios@google.com>2021-04-14 13:54:11 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2021-04-20 16:25:22 +0000
commit1d79c68ba0507574d893780e60b982f07d210261 (patch)
treeb30b5fd0a1ee37d3006d17ee68abc812e4fbfedb /Eigen
parent3e819d83bf52abda16bb53565f6801df40d071f1 (diff)
Fix ldexp for AVX512 (#2215)
Wrong shuffle was used. Need to interleave low/high halves with a `permute` instruction. Fixes #2215.
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h5
1 files changed, 3 insertions, 2 deletions
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index f8741372d..9307c6763 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -929,7 +929,8 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, cons
Packet8i b = parithmetic_shift_right<2>(e); // floor(e/4)
// 2^b
- Packet8i hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0));
+ const Packet8i permute_idx = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7);
+ Packet8i hi = _mm256_permutevar8x32_epi32(padd(b, bias), permute_idx);
Packet8i lo = _mm256_slli_epi64(hi, 52);
hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52);
Packet8d c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));
@@ -937,7 +938,7 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, cons
// 2^(e - 3b)
b = psub(psub(psub(e, b), b), b); // e - 3b
- hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0));
+ hi = _mm256_permutevar8x32_epi32(padd(b, bias), permute_idx);
lo = _mm256_slli_epi64(hi, 52);
hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52);
c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));