Fix rint SSE/NEON again, using optimization barrier.

This is a new version of !423, which failed for MSVC. Defined `EIGEN_OPTIMIZATION_BARRIER(X)` that uses inline assembly to prevent operations involving `X` from crossing that barrier. Should work on most `GNUC` compatible compilers (MSVC doesn't seem to need this). This is a modified version adapted from what was used in `psincos_float` and tested on more platforms (see #1674, https://godbolt.org/z/73ezTG). Modified `rint` to use the barrier to prevent the add/subtract rounding trick from being optimized away. Also fixed an edge case for large inputs that get bumped up a power of two and ends up rounding away more than just the fractional part. If we are over `2^digits` then just return the input. This edge case was missed in the test since the test was comparing approximate equality, which was still satisfied. Adding a strict equality option catches it.
author: Antonio Sanchez <cantonios@google.com> 2021-03-03 19:22:15 -0800
committer: Antonio Sanchez <cantonios@google.com> 2021-03-05 08:54:12 -0800
commit: 82d61af3a490154ad1c0ae2fe00c561095854897 (patch)
tree: 9137169da76e43ef4908ab87dc5990d801c48eda /Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
parent: 5f0b4a4010af4cbf6161a0d1a03a747addc44a5d (diff)
1 files changed, 3 insertions, 13 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index b1d4be32d..411640ee8 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -630,14 +630,6 @@ __attribute__((optimize("-fno-unsafe-math-optimizations")))
 #endif
 Packet psincos_float(const Packet& _x)
 {
-// Workaround -ffast-math aggressive optimizations
-// See bug 1674
-#if EIGEN_COMP_CLANG && defined(EIGEN_VECTORIZE_SSE)
-#define EIGEN_SINCOS_DONT_OPT(X) __asm__  ("" : "+x" (X));
-#else
-#define EIGEN_SINCOS_DONT_OPT(X)
-#endif
-
   typedef typename unpacket_traits<Packet>::integer_packet PacketI;
 
   const Packet  cst_2oPI            = pset1<Packet>(0.636619746685028076171875f); // 2/PI
@@ -652,7 +644,7 @@ Packet psincos_float(const Packet& _x)
 
   // Rounding trick:
   Packet y_round = padd(y, cst_rounding_magic);
-  EIGEN_SINCOS_DONT_OPT(y_round)
+  EIGEN_OPTIMIZATION_BARRIER(y_round)
   PacketI y_int = preinterpret<PacketI>(y_round); // last 23 digits represent integer (if abs(x)<2^24)
   y = psub(y_round, cst_rounding_magic); // nearest integer to x*4/pi
 
@@ -674,9 +666,9 @@ Packet psincos_float(const Packet& _x)
   // and 2 ULP up to:
   const float huge_th = ComputeSine ? 25966.f : 18838.f;
   x = pmadd(y, pset1<Packet>(-1.5703125), x); // = 0xbfc90000
-  EIGEN_SINCOS_DONT_OPT(x)
+  EIGEN_OPTIMIZATION_BARRIER(x)
   x = pmadd(y, pset1<Packet>(-0.000483989715576171875), x); // = 0xb9fdc000
-  EIGEN_SINCOS_DONT_OPT(x)
+  EIGEN_OPTIMIZATION_BARRIER(x)
   x = pmadd(y, pset1<Packet>(1.62865035235881805419921875e-07), x); // = 0x342ee000
   x = pmadd(y, pset1<Packet>(5.5644315544167710640977020375430583953857421875e-11), x); // = 0x2e74b9ee
 
@@ -753,8 +745,6 @@ Packet psincos_float(const Packet& _x)
 
   // Update the sign and filter huge inputs
   return pxor(y, sign_bit);
-
-#undef EIGEN_SINCOS_DONT_OPT
 }
 
 template<typename Packet>
author	Antonio Sanchez <cantonios@google.com>	2021-03-03 19:22:15 -0800
committer	Antonio Sanchez <cantonios@google.com>	2021-03-05 08:54:12 -0800
commit	82d61af3a490154ad1c0ae2fe00c561095854897 (patch)
tree	9137169da76e43ef4908ab87dc5990d801c48eda /Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
parent	5f0b4a4010af4cbf6161a0d1a03a747addc44a5d (diff)