aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/NEON/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Antonio Sánchez <cantonios@google.com>2021-03-03 18:51:51 +0000
committerGravatar Antonio Sánchez <cantonios@google.com>2021-03-03 18:51:51 +0000
commit9a663973b499a53be3e30eb7521545af820fdfc4 (patch)
treed7547ca70c4e28c461b05294894ded5730f7a791 /Eigen/src/Core/arch/NEON/PacketMath.h
parente72dfeb8b9fa5662831b5d0bb9d132521f9173dd (diff)
Revert "Fix rint for SSE/NEON."
Diffstat (limited to 'Eigen/src/Core/arch/NEON/PacketMath.h')
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h34
1 files changed, 10 insertions, 24 deletions
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 51cebaf2b..ec6ea90c5 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -3207,34 +3207,20 @@ template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) {
// Adds and subtracts signum(a) * 2^23 to force rounding.
- const Packet4f limit = pset1<Packet4f>(static_cast<float>(1<<23));
- const Packet4f abs_a = pabs(a);
- // Inline asm to prevent the compiler from optimizing away the
- // addition and subtraction.
- // Packet4f r = psub(padd(abs_a, limit), limit);
- Packet4f r = abs_a;
- __asm__ ("vadd.f32 %[r], %[r], %[limit]\n\t"
- "vsub.f32 %[r], %[r], %[limit]" : [r] "+x" (r) : [limit] "x" (limit));
- // If greater than limit, simply return a. Otherwise, account for sign.
- r = pselect(pcmp_lt(abs_a, limit),
- pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
- return r;
+ const Packet4f offset =
+ pselect(pcmp_lt(a, pzero(a)),
+ pset1<Packet4f>(-static_cast<float>(1<<23)),
+ pset1<Packet4f>(+static_cast<float>(1<<23)));
+ return psub(padd(a, offset), offset);
}
template<> EIGEN_STRONG_INLINE Packet2f print(const Packet2f& a) {
// Adds and subtracts signum(a) * 2^23 to force rounding.
- const Packet2f limit = pset1<Packet2f>(static_cast<float>(1<<23));
- const Packet2f abs_a = pabs(a);
- // Inline asm to prevent the compiler from optimizing away the
- // addition and subtraction.
- // Packet4f r = psub(padd(abs_a, limit), limit);
- Packet2f r = abs_a;
- __asm__ ("vadd.f32 %[r], %[r], %[limit]\n\t"
- "vsub.f32 %[r], %[r], %[limit]" : [r] "+x" (r) : [limit] "x" (limit));
- // If greater than limit, simply return a. Otherwise, account for sign.
- r = pselect(pcmp_lt(abs_a, limit),
- pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
- return r;
+ const Packet2f offset =
+ pselect(pcmp_lt(a, pzero(a)),
+ pset1<Packet2f>(-static_cast<float>(1<<23)),
+ pset1<Packet2f>(+static_cast<float>(1<<23)));
+ return psub(padd(a, offset), offset);
}
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)