aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
diff options
context:
space:
mode:
authorGravatar Antonio Sanchez <cantonios@google.com>2021-03-03 19:22:15 -0800
committerGravatar Antonio Sanchez <cantonios@google.com>2021-03-05 08:54:12 -0800
commit82d61af3a490154ad1c0ae2fe00c561095854897 (patch)
tree9137169da76e43ef4908ab87dc5990d801c48eda /Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
parent5f0b4a4010af4cbf6161a0d1a03a747addc44a5d (diff)
Fix rint SSE/NEON again, using optimization barrier.
This is a new version of !423, which failed for MSVC. Defined `EIGEN_OPTIMIZATION_BARRIER(X)` that uses inline assembly to prevent operations involving `X` from crossing that barrier. Should work on most `GNUC` compatible compilers (MSVC doesn't seem to need this). This is a modified version adapted from what was used in `psincos_float` and tested on more platforms (see #1674, https://godbolt.org/z/73ezTG). Modified `rint` to use the barrier to prevent the add/subtract rounding trick from being optimized away. Also fixed an edge case for large inputs that get bumped up a power of two and ends up rounding away more than just the fractional part. If we are over `2^digits` then just return the input. This edge case was missed in the test since the test was comparing approximate equality, which was still satisfied. Adding a strict equality option catches it.
Diffstat (limited to 'Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h')
-rw-r--r--Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h16
1 files changed, 3 insertions, 13 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index b1d4be32d..411640ee8 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -630,14 +630,6 @@ __attribute__((optimize("-fno-unsafe-math-optimizations")))
#endif
Packet psincos_float(const Packet& _x)
{
-// Workaround -ffast-math aggressive optimizations
-// See bug 1674
-#if EIGEN_COMP_CLANG && defined(EIGEN_VECTORIZE_SSE)
-#define EIGEN_SINCOS_DONT_OPT(X) __asm__ ("" : "+x" (X));
-#else
-#define EIGEN_SINCOS_DONT_OPT(X)
-#endif
-
typedef typename unpacket_traits<Packet>::integer_packet PacketI;
const Packet cst_2oPI = pset1<Packet>(0.636619746685028076171875f); // 2/PI
@@ -652,7 +644,7 @@ Packet psincos_float(const Packet& _x)
// Rounding trick:
Packet y_round = padd(y, cst_rounding_magic);
- EIGEN_SINCOS_DONT_OPT(y_round)
+ EIGEN_OPTIMIZATION_BARRIER(y_round)
PacketI y_int = preinterpret<PacketI>(y_round); // last 23 digits represent integer (if abs(x)<2^24)
y = psub(y_round, cst_rounding_magic); // nearest integer to x*4/pi
@@ -674,9 +666,9 @@ Packet psincos_float(const Packet& _x)
// and 2 ULP up to:
const float huge_th = ComputeSine ? 25966.f : 18838.f;
x = pmadd(y, pset1<Packet>(-1.5703125), x); // = 0xbfc90000
- EIGEN_SINCOS_DONT_OPT(x)
+ EIGEN_OPTIMIZATION_BARRIER(x)
x = pmadd(y, pset1<Packet>(-0.000483989715576171875), x); // = 0xb9fdc000
- EIGEN_SINCOS_DONT_OPT(x)
+ EIGEN_OPTIMIZATION_BARRIER(x)
x = pmadd(y, pset1<Packet>(1.62865035235881805419921875e-07), x); // = 0x342ee000
x = pmadd(y, pset1<Packet>(5.5644315544167710640977020375430583953857421875e-11), x); // = 0x2e74b9ee
@@ -753,8 +745,6 @@ Packet psincos_float(const Packet& _x)
// Update the sign and filter huge inputs
return pxor(y, sign_bit);
-
-#undef EIGEN_SINCOS_DONT_OPT
}
template<typename Packet>