diff options
author | 2016-04-05 18:21:05 +0100 | |
---|---|---|
committer | 2016-04-05 18:21:05 +0100 | |
commit | 726bd5f077342615404807fe986cd8ccc1177d62 (patch) | |
tree | 58eacde7b2df88d15ce1fcd94b09e027084385b9 /Eigen | |
parent | a350c25a396aa4fdef4878d165bb3dbaedf0a4bb (diff) | |
parent | 4d7e230d2f8a55c45c1191fe08aa19d41e869a65 (diff) |
Merged eigen/eigen into default
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/src/Core/MathFunctions.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 24 | ||||
-rw-r--r-- | Eigen/src/Core/functors/UnaryFunctors.h | 2 |
3 files changed, 24 insertions, 4 deletions
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 000cafee7..e6c7dfa08 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -1034,7 +1034,7 @@ double tan(const double &x) { return ::tan(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -T abs(const T &x) { +typename NumTraits<T>::Real abs(const T &x) { EIGEN_USING_STD_MATH(abs); return abs(x); } diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index fead02916..63a2d9f52 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -177,7 +177,9 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co return pset1<Packet4i>(0); } -#ifdef __ARM_FEATURE_FMA +// Clang/ARM wrongly advertises __ARM_FEATURE_FMA even when it's not available, +// then implements a slow software scalar fallback calling fmaf()! +#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM) // See bug 936. // FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4. // FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding. @@ -186,7 +188,25 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co // MLA: 10 GFlop/s ; FMA: 12 GFlops/s. template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); } #else -template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); } +template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { +#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM + // Clang/ARM will replace VMLA by VMUL+VADD at least for some values of -mcpu, + // at least -mcpu=cortex-a8 and -mcpu=cortex-a7. Since the former is the default on + // -march=armv7-a, that is a very common case. + // See e.g. this thread: + // http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html + Packet4f r = c; + asm volatile( + "vmla.f32 %q[r], %q[a], %q[b]" + : [r] "+w" (r) + : [a] "w" (a), + [b] "w" (b) + : ); + return r; +#else + return vmlaq_f32(c,a,b); +#endif +} #endif // No FMA instruction for int, so use MLA unconditionally. diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 826d84f69..26c02e4a7 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -41,7 +41,7 @@ struct functor_traits<scalar_opposite_op<Scalar> > template<typename Scalar> struct scalar_abs_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op) typedef typename NumTraits<Scalar>::Real result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs(a); } template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pabs(a); } |