diff options
author | Antonio Sanchez <cantonios@google.com> | 2021-03-03 19:22:15 -0800 |
---|---|---|
committer | Antonio Sanchez <cantonios@google.com> | 2021-03-05 08:54:12 -0800 |
commit | 82d61af3a490154ad1c0ae2fe00c561095854897 (patch) | |
tree | 9137169da76e43ef4908ab87dc5990d801c48eda /Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | |
parent | 5f0b4a4010af4cbf6161a0d1a03a747addc44a5d (diff) |
Fix rint SSE/NEON again, using optimization barrier.
This is a new version of !423, which failed for MSVC.
Defined `EIGEN_OPTIMIZATION_BARRIER(X)` that uses inline assembly to
prevent operations involving `X` from crossing that barrier. Should
work on most `GNUC` compatible compilers (MSVC doesn't seem to need
this). This is a modified version adapted from what was used in
`psincos_float` and tested on more platforms
(see #1674, https://godbolt.org/z/73ezTG).
Modified `rint` to use the barrier to prevent the add/subtract rounding
trick from being optimized away.
Also fixed an edge case for large inputs that get bumped up a power of two
and ends up rounding away more than just the fractional part. If we are
over `2^digits` then just return the input. This edge case was missed in
the test since the test was comparing approximate equality, which was still
satisfied. Adding a strict equality option catches it.
Diffstat (limited to 'Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h')
-rw-r--r-- | Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | 16 |
1 files changed, 3 insertions, 13 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index b1d4be32d..411640ee8 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -630,14 +630,6 @@ __attribute__((optimize("-fno-unsafe-math-optimizations"))) #endif Packet psincos_float(const Packet& _x) { -// Workaround -ffast-math aggressive optimizations -// See bug 1674 -#if EIGEN_COMP_CLANG && defined(EIGEN_VECTORIZE_SSE) -#define EIGEN_SINCOS_DONT_OPT(X) __asm__ ("" : "+x" (X)); -#else -#define EIGEN_SINCOS_DONT_OPT(X) -#endif - typedef typename unpacket_traits<Packet>::integer_packet PacketI; const Packet cst_2oPI = pset1<Packet>(0.636619746685028076171875f); // 2/PI @@ -652,7 +644,7 @@ Packet psincos_float(const Packet& _x) // Rounding trick: Packet y_round = padd(y, cst_rounding_magic); - EIGEN_SINCOS_DONT_OPT(y_round) + EIGEN_OPTIMIZATION_BARRIER(y_round) PacketI y_int = preinterpret<PacketI>(y_round); // last 23 digits represent integer (if abs(x)<2^24) y = psub(y_round, cst_rounding_magic); // nearest integer to x*4/pi @@ -674,9 +666,9 @@ Packet psincos_float(const Packet& _x) // and 2 ULP up to: const float huge_th = ComputeSine ? 25966.f : 18838.f; x = pmadd(y, pset1<Packet>(-1.5703125), x); // = 0xbfc90000 - EIGEN_SINCOS_DONT_OPT(x) + EIGEN_OPTIMIZATION_BARRIER(x) x = pmadd(y, pset1<Packet>(-0.000483989715576171875), x); // = 0xb9fdc000 - EIGEN_SINCOS_DONT_OPT(x) + EIGEN_OPTIMIZATION_BARRIER(x) x = pmadd(y, pset1<Packet>(1.62865035235881805419921875e-07), x); // = 0x342ee000 x = pmadd(y, pset1<Packet>(5.5644315544167710640977020375430583953857421875e-11), x); // = 0x2e74b9ee @@ -753,8 +745,6 @@ Packet psincos_float(const Packet& _x) // Update the sign and filter huge inputs return pxor(y, sign_bit); - -#undef EIGEN_SINCOS_DONT_OPT } template<typename Packet> |