aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/util/Macros.h
diff options
context:
space:
mode:
authorGravatar Antonio Sanchez <cantonios@google.com>2021-03-03 19:22:15 -0800
committerGravatar Antonio Sanchez <cantonios@google.com>2021-03-05 08:54:12 -0800
commit82d61af3a490154ad1c0ae2fe00c561095854897 (patch)
tree9137169da76e43ef4908ab87dc5990d801c48eda /Eigen/src/Core/util/Macros.h
parent5f0b4a4010af4cbf6161a0d1a03a747addc44a5d (diff)
Fix rint SSE/NEON again, using optimization barrier.
This is a new version of !423, which failed for MSVC. Defined `EIGEN_OPTIMIZATION_BARRIER(X)` that uses inline assembly to prevent operations involving `X` from crossing that barrier. Should work on most `GNUC` compatible compilers (MSVC doesn't seem to need this). This is a modified version adapted from what was used in `psincos_float` and tested on more platforms (see #1674, https://godbolt.org/z/73ezTG). Modified `rint` to use the barrier to prevent the add/subtract rounding trick from being optimized away. Also fixed an edge case for large inputs that get bumped up a power of two and ends up rounding away more than just the fractional part. If we are over `2^digits` then just return the input. This edge case was missed in the test since the test was comparing approximate equality, which was still satisfied. Adding a strict equality option catches it.
Diffstat (limited to 'Eigen/src/Core/util/Macros.h')
-rw-r--r--Eigen/src/Core/util/Macros.h64
1 files changed, 63 insertions, 1 deletions
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index ac514cbb4..43890eab1 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -51,7 +51,11 @@
#ifndef EIGEN_STACK_ALLOCATION_LIMIT
// 131072 == 128 KB
-#define EIGEN_STACK_ALLOCATION_LIMIT 131072
+#if defined(__AVX512F__)
+ #define EIGEN_STACK_ALLOCATION_LIMIT 0
+#else
+ #define EIGEN_STACK_ALLOCATION_LIMIT 16384
+#endif
#endif
//------------------------------------------------------------------------------------------
@@ -1063,6 +1067,64 @@ namespace Eigen {
#endif
+// Acts as a barrier preventing operations involving `X` from crossing. This
+// occurs, for example, in the fast rounding trick where a magic constant is
+// added then subtracted, which is otherwise compiled away with -ffast-math.
+//
+// See bug 1674
+#if !defined(EIGEN_OPTIMIZATION_BARRIER)
+ #if EIGEN_COMP_GNUC
+ // According to https://gcc.gnu.org/onlinedocs/gcc/Constraints.html:
+ // X: Any operand whatsoever.
+ // r: A register operand is allowed provided that it is in a general
+ // register.
+ // g: Any register, memory or immediate integer operand is allowed, except
+ // for registers that are not general registers.
+ // w: (AArch32/AArch64) Floating point register, Advanced SIMD vector
+ // register or SVE vector register.
+ // x: (SSE) Any SSE register.
+ // (AArch64) Like w, but restricted to registers 0 to 15 inclusive.
+ // v: (PowerPC) An Altivec vector register.
+ // wa:(PowerPC) A VSX register.
+ //
+ // "X" (uppercase) should work for all cases, though this seems to fail for
+ // some versions of GCC for arm/aarch64 with
+ // "error: inconsistent operand constraints in an 'asm'"
+ // Clang x86_64/arm/aarch64 seems to require "g" to support both scalars and
+ // vectors, otherwise
+ // "error: non-trivial scalar-to-vector conversion, possible invalid
+ // constraint for vector type"
+ //
+ // GCC for ppc64le generates an internal compiler error with x/X/g.
+ // GCC for AVX generates an internal compiler error with X.
+ //
+ // Tested on icc/gcc/clang for sse, avx, avx2, avx512dq
+ // gcc for arm, aarch64,
+ // gcc for ppc64le,
+ // both vectors and scalars.
+ //
+ // Note that this is restricted to plain types - this will not work
+ // directly for std::complex<T>, Eigen::half, Eigen::bfloat16. For these,
+ // you will need to apply to the underlying POD type.
+ #if EIGEN_ARCH_PPC
+ // General, Altivec, VSX.
+ #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v,wa" (X));
+ #elif EIGEN_ARCH_ARM_OR_ARM64
+ // General, NEON.
+ #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+g,w" (X));
+ #elif EIGEN_ARCH_i386_OR_x86_64
+ // General, SSE.
+ #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+g,x" (X));
+ #else
+ // Not implemented for other architectures.
+ #define EIGEN_OPTIMIZATION_BARRIER(X)
+ #endif
+ #else
+ // Not implemented for other compilers.
+ #define EIGEN_OPTIMIZATION_BARRIER(X)
+ #endif
+#endif
+
#if EIGEN_COMP_MSVC
// NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362.
// This workaround is ugly, but it does the job.