diff options
author | 2014-04-24 23:17:18 +0200 | |
---|---|---|
committer | 2014-04-24 23:17:18 +0200 | |
commit | b0e19db1cf462a07e25429d4f04f7d8e858f670f (patch) | |
tree | cd2fd92e39c9bf3f8266baa0fd6f307c36a8413c /Eigen | |
parent | 8d85ce88e129d794d0700dd2c8eec2713449e54d (diff) |
Enable fused madd for Altivec
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 6 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/AltiVec/PacketMath.h | 4 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 10 |
4 files changed, 21 insertions, 5 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index a8b94e191..e6f540430 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -22,6 +22,12 @@ namespace internal { #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*)) #endif +#ifdef EIGEN_VECTORIZE_FMA +#ifndef EIGEN_HAS_FUSED_MADD +#define EIGEN_HAS_FUSED_MADD 1 +#endif +#endif + typedef __m256 Packet8f; typedef __m256i Packet8i; typedef __m256d Packet4d; diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 618d95d85..e26d88382 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -18,6 +18,10 @@ namespace internal { #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4 #endif +#ifndef EIGEN_HAS_FUSED_MADD +#define EIGEN_HAS_FUSED_MADD 1 +#endif + #ifndef EIGEN_HAS_FUSE_CJMADD #define EIGEN_HAS_FUSE_CJMADD 1 #endif diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index e7e035f4e..4f9d8c4fd 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -22,6 +22,12 @@ namespace internal { #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*)) #endif +#ifdef EIGEN_VECTORIZE_FMA +#ifndef EIGEN_HAS_FUSED_MADD +#define EIGEN_HAS_FUSED_MADD 1 +#endif +#endif + #if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER) // With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot // have overloads for both types without linking error. diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index b1ed9264a..60251f624 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -188,7 +188,7 @@ public: nr = 4, // register block size along the M direction (currently, this one cannot be modified) -#ifdef __FMA__ +#ifdef EIGEN_HAS_FUSED_MADD // we assume 16 registers mr = 3*LhsPacketSize, #else @@ -254,7 +254,7 @@ public: // let gcc allocate the register in which to store the result of the pmul // (in the case where there is no FMA) gcc fails to figure out how to avoid // spilling register. -#ifdef EIGEN_VECTORIZE_FMA +#ifdef EIGEN_HAS_FUSED_MADD EIGEN_UNUSED_VARIABLE(tmp); c = pmadd(a,b,c); #else @@ -296,7 +296,7 @@ public: NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, nr = 4, -#ifdef __FMA__ +#ifdef EIGEN_HAS_FUSED_MADD // we assume 16 registers mr = 3*LhsPacketSize, #else @@ -359,7 +359,7 @@ public: EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const { -#ifdef EIGEN_VECTORIZE_FMA +#ifdef EIGEN_HAS_FUSED_MADD EIGEN_UNUSED_VARIABLE(tmp); c.v = pmadd(a.v,b,c.v); #else @@ -635,7 +635,7 @@ public: EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const { -#ifdef EIGEN_VECTORIZE_FMA +#ifdef EIGEN_HAS_FUSED_MADD EIGEN_UNUSED_VARIABLE(tmp); c.v = pmadd(a,b.v,c.v); #else |