From 7ff0b7a980ceffe7d0e72ebac924f514f7874e9b Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Fri, 12 Feb 2021 11:32:29 -0800 Subject: Updated pfrexp implementation. The original implementation fails for 0, denormals, inf, and NaN. See #2150 --- Eigen/src/Core/arch/SSE/PacketMath.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'Eigen/src/Core/arch/SSE') diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 422b0fce7..401a497d2 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -887,21 +887,24 @@ template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) } template<> EIGEN_STRONG_INLINE Packet4f pfrexp(const Packet4f& a, Packet4f& exponent) { - return pfrexp_float(a,exponent); + return pfrexp_generic(a,exponent); } -template<> EIGEN_STRONG_INLINE Packet2d pfrexp(const Packet2d& a, Packet2d& exponent) { - const Packet2d cst_1022d = pset1(1022.0); - const Packet2d cst_half = pset1(0.5); +// Extract exponent without existence of Packet2l. +template<> +EIGEN_STRONG_INLINE +Packet2d pfrexp_generic_get_biased_exponent(const Packet2d& a) { const Packet2d cst_exp_mask = pset1frombits(static_cast(0x7ff0000000000000ull)); __m128i a_expo = _mm_srli_epi64(_mm_castpd_si128(pand(a, cst_exp_mask)), 52); - exponent = psub(_mm_cvtepi32_pd(vec4i_swizzle1(a_expo, 0, 2, 1, 3)), cst_1022d); - const Packet2d cst_mant_mask = pset1frombits(static_cast(~0x7ff0000000000000ull)); - return por(pand(a, cst_mant_mask), cst_half); + return _mm_cvtepi32_pd(vec4i_swizzle1(a_expo, 0, 2, 1, 3)); +} + +template<> EIGEN_STRONG_INLINE Packet2d pfrexp(const Packet2d& a, Packet2d& exponent) { + return pfrexp_generic(a, exponent); } template<> EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, const Packet4f& exponent) { - return pldexp_float(a,exponent); + return pldexp_generic(a,exponent); } // We specialize pldexp here, since the generic implementation uses Packet2l, which is not well -- cgit v1.2.3