From 7ff0b7a980ceffe7d0e72ebac924f514f7874e9b Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Fri, 12 Feb 2021 11:32:29 -0800 Subject: Updated pfrexp implementation. The original implementation fails for 0, denormals, inf, and NaN. See #2150 --- Eigen/src/Core/arch/AltiVec/PacketMath.h | 62 +++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 20 deletions(-) (limited to 'Eigen/src/Core/arch/AltiVec') diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index f29b59590..df04b8e0f 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -1160,43 +1160,48 @@ template<> EIGEN_STRONG_INLINE Packet8bf pabs(const Packet8bf& a) { return pand(p8us_abs_mask, a); } -template EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(Packet4i a) +template EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a) { return vec_sra(a,reinterpret_cast(pset1(N))); } -template EIGEN_STRONG_INLINE Packet4i plogical_shift_right(Packet4i a) +template EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i& a) { return vec_sr(a,reinterpret_cast(pset1(N))); } -template EIGEN_STRONG_INLINE Packet4i plogical_shift_left(Packet4i a) +template EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i& a) { return vec_sl(a,reinterpret_cast(pset1(N))); } -template EIGEN_STRONG_INLINE Packet4f plogical_shift_left(Packet4f a) +template EIGEN_STRONG_INLINE Packet4f plogical_shift_left(const Packet4f& a) { const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N); Packet4ui r = vec_sl(reinterpret_cast(a), p4ui_mask); return reinterpret_cast(r); } -template EIGEN_STRONG_INLINE Packet4f plogical_shift_right(Packet4f a) +template EIGEN_STRONG_INLINE Packet4f plogical_shift_right(const Packet4f& a) { const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N); Packet4ui r = vec_sr(reinterpret_cast(a), p4ui_mask); return reinterpret_cast(r); } -template EIGEN_STRONG_INLINE Packet4ui plogical_shift_right(Packet4ui a) +template EIGEN_STRONG_INLINE Packet4ui plogical_shift_right(const Packet4ui& a) { const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N); return vec_sr(a, p4ui_mask); } -template EIGEN_STRONG_INLINE Packet4ui plogical_shift_left(Packet4ui a) +template EIGEN_STRONG_INLINE Packet4ui plogical_shift_left(const Packet4ui& a) { const _EIGEN_DECLARE_CONST_FAST_Packet4ui(mask, N); return vec_sl(a, p4ui_mask); } -template EIGEN_STRONG_INLINE Packet8us plogical_shift_left(Packet8us a) +template EIGEN_STRONG_INLINE Packet8us plogical_shift_left(const Packet8us& a) { const _EIGEN_DECLARE_CONST_FAST_Packet8us(mask, N); return vec_sl(a, p8us_mask); } +template EIGEN_STRONG_INLINE Packet8us plogical_shift_right(const Packet8us& a) +{ + const _EIGEN_DECLARE_CONST_FAST_Packet8us(mask, N); + return vec_sr(a, p8us_mask); +} EIGEN_STRONG_INLINE Packet4f Bf16ToF32Even(const Packet8bf& bf){ return plogical_shift_left<16>(reinterpret_cast(bf.m_val)); @@ -1323,14 +1328,14 @@ template<> EIGEN_STRONG_INLINE Packet8bf pexp (const Packet8bf& a){ } template<> EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, const Packet4f& exponent) { - return pldexp_float(a,exponent); + return pldexp_generic(a,exponent); } template<> EIGEN_STRONG_INLINE Packet8bf pldexp (const Packet8bf& a, const Packet8bf& exponent){ - BF16_TO_F32_BINARY_OP_WRAPPER(pldexp_float, a, exponent); + BF16_TO_F32_BINARY_OP_WRAPPER(pldexp, a, exponent); } template<> EIGEN_STRONG_INLINE Packet4f pfrexp(const Packet4f& a, Packet4f& exponent) { - return pfrexp_float(a,exponent); + return pfrexp_generic(a,exponent); } template<> EIGEN_STRONG_INLINE Packet8bf pfrexp (const Packet8bf& a, Packet8bf& e){ Packet4f a_even = Bf16ToF32Even(a); @@ -2324,6 +2329,11 @@ template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { return v; } +template<> EIGEN_STRONG_INLINE Packet2d pset1frombits(unsigned long from) { + Packet2l v = {static_cast(from), static_cast(from)}; + return reinterpret_cast(v); +} + template<> EIGEN_STRONG_INLINE void pbroadcast4(const double *a, Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3) @@ -2439,7 +2449,8 @@ template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs // a slow version that works with older compilers. // Update: apparently vec_cts/vec_ctf intrinsics for 64-bit doubles // are buggy, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70963 -static inline Packet2l ConvertToPacket2l(const Packet2d& x) { +template<> +inline Packet2l pcast(const Packet2d& x) { #if EIGEN_GNUC_AT_LEAST(5, 4) || \ (EIGEN_GNUC_AT(6, 1) && __GNUC_PATCHLEVEL__ >= 1) return vec_cts(x, 0); // TODO: check clang version. @@ -2452,6 +2463,15 @@ static inline Packet2l ConvertToPacket2l(const Packet2d& x) { #endif } +template<> +inline Packet2d pcast(const Packet2l& x) { + unsigned long long tmp[2]; + memcpy(tmp, &x, sizeof(tmp)); + Packet2d d = { static_cast(tmp[0]), + static_cast(tmp[1]) }; + return d; +} + // Packet2l shifts. // For POWER8 we simply use vec_sr/l. @@ -2569,7 +2589,7 @@ EIGEN_STRONG_INLINE Packet2l plogical_shift_right(const Packet2l& a) { template<> EIGEN_STRONG_INLINE Packet2d pldexp(const Packet2d& a, const Packet2d& exponent) { // Clamp exponent to [-2099, 2099] const Packet2d max_exponent = pset1(2099.0); - const Packet2l e = ConvertToPacket2l(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent)); + const Packet2l e = pcast(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent)); // Split 2^e into four factors and multiply: const Packet2l bias = { 1023, 1023 }; @@ -2582,14 +2602,16 @@ template<> EIGEN_STRONG_INLINE Packet2d pldexp(const Packet2d& a, cons return out; } + +// Extract exponent without existence of Packet2l. +template<> +EIGEN_STRONG_INLINE +Packet2d pfrexp_generic_get_biased_exponent(const Packet2d& a) { + return pcast(plogical_shift_right<52>(reinterpret_cast(pabs(a)))); +} + template<> EIGEN_STRONG_INLINE Packet2d pfrexp (const Packet2d& a, Packet2d& exponent) { - double exp[2] = { exponent[0], exponent[1] }; - Packet2d ret = { pfrexp(a[0], exp[0]), pfrexp(a[1], exp[1]) }; - exponent[0] = exp[0]; - exponent[1] = exp[1]; - return ret; -// This doesn't currently work (no integer_packet for Packet2d - but adding it causes other problems) -// return pfrexp_double(a, exponent); + return pfrexp_generic(a, exponent); } template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) -- cgit v1.2.3