diff options
author | Gael Guennebaud <g.gael@free.fr> | 2019-01-09 15:25:17 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2019-01-09 15:25:17 +0100 |
commit | e6b217b8ddf533de9bacc46aae2db6de78581056 (patch) | |
tree | ac2ef320056bf2698ea021412198ff6609137a0a /Eigen/src/Core/arch/AVX/PacketMath.h | |
parent | e70ffef9678f86ef465e93b89351e812ab47311d (diff) |
bug #1652: implements a much more accurate version of vectorized sin/cos. This new version achieve same speed for SSE/AVX, and is slightly faster with FMA. Guarantees are as follows:
- no FMA: 1ULP up to 3pi, 2ULP up to sin(25966) and cos(18838), fallback to std::sin/cos for larger inputs
- FMA: 1ULP up to sin(117435.992) and cos(71476.0625), fallback to std::sin/cos for larger inputs
Diffstat (limited to 'Eigen/src/Core/arch/AVX/PacketMath.h')
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index e5aeb6375..ebea63757 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -575,6 +575,16 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a) return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1))); } +// not needed yet +// template<> EIGEN_STRONG_INLINE bool predux_all(const Packet8f& x) +// { +// return _mm256_movemask_ps(x)==0xFF; +// } + +template<> EIGEN_STRONG_INLINE bool predux_any(const Packet8f& x) +{ + return _mm256_movemask_ps(x)!=0; +} template<int Offset> struct palign_impl<Offset,Packet8f> |