aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AVX/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2019-01-09 15:25:17 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2019-01-09 15:25:17 +0100
commite6b217b8ddf533de9bacc46aae2db6de78581056 (patch)
treeac2ef320056bf2698ea021412198ff6609137a0a /Eigen/src/Core/arch/AVX/PacketMath.h
parente70ffef9678f86ef465e93b89351e812ab47311d (diff)
bug #1652: implements a much more accurate version of vectorized sin/cos. This new version achieve same speed for SSE/AVX, and is slightly faster with FMA. Guarantees are as follows:
- no FMA: 1ULP up to 3pi, 2ULP up to sin(25966) and cos(18838), fallback to std::sin/cos for larger inputs - FMA: 1ULP up to sin(117435.992) and cos(71476.0625), fallback to std::sin/cos for larger inputs
Diffstat (limited to 'Eigen/src/Core/arch/AVX/PacketMath.h')
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index e5aeb6375..ebea63757 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -575,6 +575,16 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
}
+// not needed yet
+// template<> EIGEN_STRONG_INLINE bool predux_all(const Packet8f& x)
+// {
+// return _mm256_movemask_ps(x)==0xFF;
+// }
+
+template<> EIGEN_STRONG_INLINE bool predux_any(const Packet8f& x)
+{
+ return _mm256_movemask_ps(x)!=0;
+}
template<int Offset>
struct palign_impl<Offset,Packet8f>