From e6b217b8ddf533de9bacc46aae2db6de78581056 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 9 Jan 2019 15:25:17 +0100 Subject: bug #1652: implements a much more accurate version of vectorized sin/cos. This new version achieve same speed for SSE/AVX, and is slightly faster with FMA. Guarantees are as follows: - no FMA: 1ULP up to 3pi, 2ULP up to sin(25966) and cos(18838), fallback to std::sin/cos for larger inputs - FMA: 1ULP up to sin(117435.992) and cos(71476.0625), fallback to std::sin/cos for larger inputs --- Eigen/src/Core/arch/SSE/PacketMath.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'Eigen/src/Core/arch/SSE') diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 3e7a75bc0..0003be43b 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -812,6 +812,17 @@ template<> EIGEN_STRONG_INLINE int predux_max(const Packet4i& a) #endif // EIGEN_VECTORIZE_SSE4_1 } +// not needed yet +// template<> EIGEN_STRONG_INLINE bool predux_all(const Packet4f& x) +// { +// return _mm_movemask_ps(x) == 0xF; +// } + +template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x) +{ + return _mm_movemask_ps(x) != 0x0; +} + #if EIGEN_COMP_GNUC // template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) // { -- cgit v1.2.3