aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2014-04-24 23:17:18 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2014-04-24 23:17:18 +0200
commitb0e19db1cf462a07e25429d4f04f7d8e858f670f (patch)
treecd2fd92e39c9bf3f8266baa0fd6f307c36a8413c /Eigen
parent8d85ce88e129d794d0700dd2c8eec2713449e54d (diff)
Enable fused madd for Altivec
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h6
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h4
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h6
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h10
4 files changed, 21 insertions, 5 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index a8b94e191..e6f540430 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -22,6 +22,12 @@ namespace internal {
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#endif
+#ifdef EIGEN_VECTORIZE_FMA
+#ifndef EIGEN_HAS_FUSED_MADD
+#define EIGEN_HAS_FUSED_MADD 1
+#endif
+#endif
+
typedef __m256 Packet8f;
typedef __m256i Packet8i;
typedef __m256d Packet4d;
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 618d95d85..e26d88382 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -18,6 +18,10 @@ namespace internal {
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
#endif
+#ifndef EIGEN_HAS_FUSED_MADD
+#define EIGEN_HAS_FUSED_MADD 1
+#endif
+
#ifndef EIGEN_HAS_FUSE_CJMADD
#define EIGEN_HAS_FUSE_CJMADD 1
#endif
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index e7e035f4e..4f9d8c4fd 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -22,6 +22,12 @@ namespace internal {
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#endif
+#ifdef EIGEN_VECTORIZE_FMA
+#ifndef EIGEN_HAS_FUSED_MADD
+#define EIGEN_HAS_FUSED_MADD 1
+#endif
+#endif
+
#if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER)
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
// have overloads for both types without linking error.
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index b1ed9264a..60251f624 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -188,7 +188,7 @@ public:
nr = 4,
// register block size along the M direction (currently, this one cannot be modified)
-#ifdef __FMA__
+#ifdef EIGEN_HAS_FUSED_MADD
// we assume 16 registers
mr = 3*LhsPacketSize,
#else
@@ -254,7 +254,7 @@ public:
// let gcc allocate the register in which to store the result of the pmul
// (in the case where there is no FMA) gcc fails to figure out how to avoid
// spilling register.
-#ifdef EIGEN_VECTORIZE_FMA
+#ifdef EIGEN_HAS_FUSED_MADD
EIGEN_UNUSED_VARIABLE(tmp);
c = pmadd(a,b,c);
#else
@@ -296,7 +296,7 @@ public:
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
nr = 4,
-#ifdef __FMA__
+#ifdef EIGEN_HAS_FUSED_MADD
// we assume 16 registers
mr = 3*LhsPacketSize,
#else
@@ -359,7 +359,7 @@ public:
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
{
-#ifdef EIGEN_VECTORIZE_FMA
+#ifdef EIGEN_HAS_FUSED_MADD
EIGEN_UNUSED_VARIABLE(tmp);
c.v = pmadd(a.v,b,c.v);
#else
@@ -635,7 +635,7 @@ public:
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
{
-#ifdef EIGEN_VECTORIZE_FMA
+#ifdef EIGEN_HAS_FUSED_MADD
EIGEN_UNUSED_VARIABLE(tmp);
c.v = pmadd(a,b.v,c.v);
#else