aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r--Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h16
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h30
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h30
3 files changed, 43 insertions, 33 deletions
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index b1d4be32d..411640ee8 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -630,14 +630,6 @@ __attribute__((optimize("-fno-unsafe-math-optimizations")))
#endif
Packet psincos_float(const Packet& _x)
{
-// Workaround -ffast-math aggressive optimizations
-// See bug 1674
-#if EIGEN_COMP_CLANG && defined(EIGEN_VECTORIZE_SSE)
-#define EIGEN_SINCOS_DONT_OPT(X) __asm__ ("" : "+x" (X));
-#else
-#define EIGEN_SINCOS_DONT_OPT(X)
-#endif
-
typedef typename unpacket_traits<Packet>::integer_packet PacketI;
const Packet cst_2oPI = pset1<Packet>(0.636619746685028076171875f); // 2/PI
@@ -652,7 +644,7 @@ Packet psincos_float(const Packet& _x)
// Rounding trick:
Packet y_round = padd(y, cst_rounding_magic);
- EIGEN_SINCOS_DONT_OPT(y_round)
+ EIGEN_OPTIMIZATION_BARRIER(y_round)
PacketI y_int = preinterpret<PacketI>(y_round); // last 23 digits represent integer (if abs(x)<2^24)
y = psub(y_round, cst_rounding_magic); // nearest integer to x*4/pi
@@ -674,9 +666,9 @@ Packet psincos_float(const Packet& _x)
// and 2 ULP up to:
const float huge_th = ComputeSine ? 25966.f : 18838.f;
x = pmadd(y, pset1<Packet>(-1.5703125), x); // = 0xbfc90000
- EIGEN_SINCOS_DONT_OPT(x)
+ EIGEN_OPTIMIZATION_BARRIER(x)
x = pmadd(y, pset1<Packet>(-0.000483989715576171875), x); // = 0xb9fdc000
- EIGEN_SINCOS_DONT_OPT(x)
+ EIGEN_OPTIMIZATION_BARRIER(x)
x = pmadd(y, pset1<Packet>(1.62865035235881805419921875e-07), x); // = 0x342ee000
x = pmadd(y, pset1<Packet>(5.5644315544167710640977020375430583953857421875e-11), x); // = 0x2e74b9ee
@@ -753,8 +745,6 @@ Packet psincos_float(const Packet& _x)
// Update the sign and filter huge inputs
return pxor(y, sign_bit);
-
-#undef EIGEN_SINCOS_DONT_OPT
}
template<typename Packet>
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index ec6ea90c5..7d69de6dc 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -3207,20 +3207,30 @@ template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) {
// Adds and subtracts signum(a) * 2^23 to force rounding.
- const Packet4f offset =
- pselect(pcmp_lt(a, pzero(a)),
- pset1<Packet4f>(-static_cast<float>(1<<23)),
- pset1<Packet4f>(+static_cast<float>(1<<23)));
- return psub(padd(a, offset), offset);
+ const Packet4f limit = pset1<Packet4f>(static_cast<float>(1<<23));
+ const Packet4f abs_a = pabs(a);
+ Packet4f r = padd(abs_a, limit);
+ // Don't compile-away addition and subtraction.
+ EIGEN_OPTIMIZATION_BARRIER(r);
+ r = psub(r, limit);
+ // If greater than limit, simply return a. Otherwise, account for sign.
+ r = pselect(pcmp_lt(abs_a, limit),
+ pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
+ return r;
}
template<> EIGEN_STRONG_INLINE Packet2f print(const Packet2f& a) {
// Adds and subtracts signum(a) * 2^23 to force rounding.
- const Packet2f offset =
- pselect(pcmp_lt(a, pzero(a)),
- pset1<Packet2f>(-static_cast<float>(1<<23)),
- pset1<Packet2f>(+static_cast<float>(1<<23)));
- return psub(padd(a, offset), offset);
+ const Packet2f limit = pset1<Packet2f>(static_cast<float>(1<<23));
+ const Packet2f abs_a = pabs(a);
+ Packet2f r = padd(abs_a, limit);
+ // Don't compile-away addition and subtraction.
+ EIGEN_OPTIMIZATION_BARRIER(r);
+ r = psub(r, limit);
+ // If greater than limit, simply return a. Otherwise, account for sign.
+ r = pselect(pcmp_lt(abs_a, limit),
+ pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
+ return r;
}
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index b9821ad80..d7b8bc8ac 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -646,20 +646,30 @@ template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { re
#else
template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) {
// Adds and subtracts signum(a) * 2^23 to force rounding.
- const Packet4f offset =
- pselect(pcmp_lt(a, pzero(a)),
- pset1<Packet4f>(-static_cast<float>(1<<23)),
- pset1<Packet4f>(+static_cast<float>(1<<23)));
- return psub(padd(a, offset), offset);
+ const Packet4f limit = pset1<Packet4f>(static_cast<float>(1<<23));
+ const Packet4f abs_a = pabs(a);
+ Packet4f r = padd(abs_a, limit);
+ // Don't compile-away addition and subtraction.
+ EIGEN_OPTIMIZATION_BARRIER(r);
+ r = psub(r, limit);
+ // If greater than limit, simply return a. Otherwise, account for sign.
+ r = pselect(pcmp_lt(abs_a, limit),
+ pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
+ return r;
}
template<> EIGEN_STRONG_INLINE Packet2d print(const Packet2d& a) {
// Adds and subtracts signum(a) * 2^52 to force rounding.
- const Packet2d offset =
- pselect(pcmp_lt(a, pzero(a)),
- pset1<Packet2d>(-static_cast<double>(1ull<<52)),
- pset1<Packet2d>(+static_cast<double>(1ull<<52)));
- return psub(padd(a, offset), offset);
+ const Packet2d limit = pset1<Packet2d>(static_cast<double>(1ull<<52));
+ const Packet2d abs_a = pabs(a);
+ Packet2d r = padd(abs_a, limit);
+ // Don't compile-away addition and subtraction.
+ EIGEN_OPTIMIZATION_BARRIER(r);
+ r = psub(r, limit);
+ // If greater than limit, simply return a. Otherwise, account for sign.
+ r = pselect(pcmp_lt(abs_a, limit),
+ pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
+ return r;
}
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)