From 1108b4f21836d52b50e4ec10a6e0eec027eda04d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 4 Apr 2016 11:09:25 -0700 Subject: Fixed the signature of numext::abs to make it compatible with complex numbers --- Eigen/src/Core/MathFunctions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Eigen') diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 000cafee7..e6c7dfa08 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -1034,7 +1034,7 @@ double tan(const double &x) { return ::tan(x); } template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -T abs(const T &x) { +typename NumTraits::Real abs(const T &x) { EIGEN_USING_STD_MATH(abs); return abs(x); } -- cgit v1.2.3 From c4179dd470f72520b9ffba5b78d4dd1261ccc609 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 4 Apr 2016 11:11:51 -0700 Subject: Updated the scalar_abs_op struct to make it compatible with cuda devices. --- Eigen/src/Core/functors/UnaryFunctors.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Eigen') diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 531beead6..46622f804 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -41,7 +41,7 @@ struct functor_traits > template struct scalar_abs_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op) typedef typename NumTraits::Real result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs(a); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pabs(a); } -- cgit v1.2.3 From 03f2997a119578b894ee717aff23e2641ab78f37 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 4 Apr 2016 16:41:47 -0400 Subject: bug #1191 - Prevent Clang/ARM from rewriting VMLA into VMUL+VADD --- Eigen/src/Core/arch/NEON/PacketMath.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'Eigen') diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index fead02916..10ef1d2b3 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -186,7 +186,25 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, co // MLA: 10 GFlop/s ; FMA: 12 GFlops/s. template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); } #else -template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); } +template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { +#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM + // Clang/ARM will replace VMLA by VMUL+VADD at least for some values of -mcpu, + // at least -mcpu=cortex-a8 and -mcpu=cortex-a7. Since the former is the default on + // -march=armv7-a, that is a very common case. + // See e.g. this thread: + // http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html + Packet4f r = c; + asm volatile( + "vmla.f32 %q[r], %q[a], %q[b]" + : [r] "+w" (r) + : [a] "w" (a), + [b] "w" (b) + : ); + return r; +#else + return vmlaq_f32(c,a,b); +#endif +} #endif // No FMA instruction for int, so use MLA unconditionally. -- cgit v1.2.3 From 158fea0f5e15e4611c36ce73f582c484deeace1a Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 4 Apr 2016 16:42:40 -0400 Subject: bug #1190 - Don't trust __ARM_FEATURE_FMA on Clang/ARM --- Eigen/src/Core/arch/NEON/PacketMath.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'Eigen') diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 10ef1d2b3..63a2d9f52 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -177,7 +177,9 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, co return pset1(0); } -#ifdef __ARM_FEATURE_FMA +// Clang/ARM wrongly advertises __ARM_FEATURE_FMA even when it's not available, +// then implements a slow software scalar fallback calling fmaf()! +#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM) // See bug 936. // FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4. // FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding. -- cgit v1.2.3