From 69a4f709562604d7323355ac54403ad5cbf2e99c Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Tue, 23 Mar 2021 18:52:19 +0000 Subject: Revert "Uses _mm512_abs_pd for Packet8d pabs" This reverts commit f019b97aca82071f35726b1aaebf1c598770f0f5 --- Eigen/src/Core/arch/AVX512/PacketMath.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index a964b932c..f8741372d 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -139,7 +139,6 @@ template<> struct packet_traits : default_packet_traits size = 8, HasHalfPacket = 1, #if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT) - HasAbs = 1, HasLog = 1, HasSqrt = EIGEN_FAST_MATH, HasRsqrt = EIGEN_FAST_MATH, @@ -889,7 +888,9 @@ template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a) } template <> EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) { - return _mm512_abs_pd(a); + // _mm512_abs_ps intrinsic not found, so hack around it + return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a), + _mm512_set1_epi64(0x7fffffffffffffff))); } template<> @@ -899,7 +900,7 @@ EIGEN_STRONG_INLINE Packet16f pfrexp(const Packet16f& a, Packet16f& e // Extract exponent without existence of Packet8l. template<> -EIGEN_STRONG_INLINE +EIGEN_STRONG_INLINE Packet8d pfrexp_generic_get_biased_exponent(const Packet8d& a) { const Packet8d cst_exp_mask = pset1frombits(static_cast(0x7ff0000000000000ull)); #ifdef EIGEN_VECTORIZE_AVX512DQ @@ -922,18 +923,18 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp(const Packet8d& a, cons // Clamp exponent to [-2099, 2099] const Packet8d max_exponent = pset1(2099.0); const Packet8i e = _mm512_cvtpd_epi32(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent)); - + // Split 2^e into four factors and multiply. const Packet8i bias = pset1(1023); Packet8i b = parithmetic_shift_right<2>(e); // floor(e/4) - + // 2^b Packet8i hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0)); Packet8i lo = _mm256_slli_epi64(hi, 52); hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52); Packet8d c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1)); Packet8d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b) - + // 2^(e - 3b) b = psub(psub(psub(e, b), b), b); // e - 3b hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0)); -- cgit v1.2.3