aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/NEON/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Zachary Garrett <zach.garrett@gmail.com>2020-08-04 16:28:23 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-08-04 16:28:23 +0000
commit21122498ecfaa394aeef9d6ca8d8659550be97fa (patch)
tree2db5f3ba4e22eb6ab4c7cab9879315a88015ea96 /Eigen/src/Core/arch/NEON/PacketMath.h
parent23b7f0572b6fbff74d8834d2ec34eec95b454c7a (diff)
Temporarily turn off the NEON implementation of pfloor as it does not work for large values.
The NEON implementation mimics the SSE implementation, but didn't mention the caveat that due to the unsigned of signed integer conversions, not all values in the original floating point represented are supported.
Diffstat (limited to 'Eigen/src/Core/arch/NEON/PacketMath.h')
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h10
1 files changed, 8 insertions, 2 deletions
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 2c4b5bfff..c2fdcbade 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -137,7 +137,7 @@ struct packet_traits<float> : default_packet_traits
HasBlend = 0,
HasDiv = 1,
- HasFloor = 1,
+ HasFloor = 0,
HasSin = EIGEN_FAST_MATH,
HasCos = EIGEN_FAST_MATH,
@@ -1397,6 +1397,9 @@ template<> EIGEN_STRONG_INLINE Packet2f pcmp_lt_or_nan<Packet2f>(const Packet2f&
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan<Packet4f>(const Packet4f& a, const Packet4f& b)
{ return vreinterpretq_f32_u32(vmvnq_u32(vcgeq_f32(a,b))); }
+// WARNING: this pfloor implementation makes sense for inputs that fit in
+// signed int32 integers (up to ~2.14e9), hence this is currently only used
+// by pexp and not exposed through HasFloor.
template<> EIGEN_STRONG_INLINE Packet2f pfloor<Packet2f>(const Packet2f& a)
{
const Packet2f cst_1 = pset1<Packet2f>(1.0f);
@@ -3269,7 +3272,7 @@ template<> struct packet_traits<double> : default_packet_traits
HasBlend = 0,
HasDiv = 1,
- HasFloor = 1,
+ HasFloor = 0,
HasSin = 0,
HasCos = 0,
@@ -3329,6 +3332,9 @@ template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmaxq_f64(a,b); }
+// WARNING: this pfloor implementation makes sense for inputs that fit in
+// signed int64 integers (up to ~9.22e18), hence this is currently only used
+// by pexp and not exposed through HasFloor.
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a)
{
const Packet2d cst_1 = pset1<Packet2d>(1.0);