diff options
Diffstat (limited to 'Eigen/src/Core/arch/AVX/PacketMath.h')
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 770646b91..a415f2f1b 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -405,6 +405,20 @@ template<> EIGEN_STRONG_INLINE Packet8f pldexp<Packet8f>(const Packet8f& a, cons return pldexp_float(a,exponent); } +template<> EIGEN_STRONG_INLINE Packet4d pldexp<Packet4d>(const Packet4d& a, const Packet4d& exponent) { + // Build e=2^n by constructing the exponents in a 128-bit vector and + // shifting them to where they belong in double-precision values. + Packet4i cst_1023 = pset1<Packet4i>(1023); + __m128i emm0 = _mm256_cvtpd_epi32(exponent); + emm0 = _mm_add_epi32(emm0, cst_1023); + emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0)); + __m128i lo = _mm_slli_epi64(emm0, 52); + __m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52); + __m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0); + e = _mm256_insertf128_si256(e, hi, 1); + return pmul(a,_mm256_castsi256_pd(e)); +} + // preduxp should be ok // FIXME: why is this ok? why isn't the simply implementation working as expected? template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs) |