aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products/GeneralBlockPanelKernel.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2018-12-07 09:15:46 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2018-12-07 09:15:46 +0100
commit4e7746fe2234ccaa10cb0c9431d68ee72a1d1d40 (patch)
tree1cb1497f8a84ba65d588b8c3867c8c2bdfe776b3 /Eigen/src/Core/products/GeneralBlockPanelKernel.h
parentcbf2f4b7a0da0719cfb4fac3e7fc9b2c6b5f52bd (diff)
bug #1636: fix gemm performance issue with gcc>=6 and no FMA
Diffstat (limited to 'Eigen/src/Core/products/GeneralBlockPanelKernel.h')
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h12
1 files changed, 10 insertions, 2 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 9475a6ecc..88ca9cc97 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -1387,10 +1387,18 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX4");
RhsPacket B_0, B1, B2, B3, T0;
- #define EIGEN_GEBGP_ONESTEP(K) \
+
+ // NOTE: the begin/end asm comments below work around bug 935!
+ // but they are not enough for gcc>=6 without FMA (bug 1637)
+ #if EIGEN_GNUC_AT_LEAST(6,0)
+ #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND asm("" : [a0] "+x" (A0), [a1] "+x" (A1) );
+ #else
+ #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND
+ #endif
+ #define EIGEN_GEBGP_ONESTEP(K) \
do { \
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \
- EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+ EIGEN_GEBP_2PX4_SPILLING_WORKAROUND \
traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \