diff options
author | Gael Guennebaud <g.gael@free.fr> | 2018-12-07 09:15:46 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2018-12-07 09:15:46 +0100 |
commit | 4e7746fe2234ccaa10cb0c9431d68ee72a1d1d40 (patch) | |
tree | 1cb1497f8a84ba65d588b8c3867c8c2bdfe776b3 /Eigen/src/Core/products/GeneralBlockPanelKernel.h | |
parent | cbf2f4b7a0da0719cfb4fac3e7fc9b2c6b5f52bd (diff) |
bug #1636: fix gemm performance issue with gcc>=6 and no FMA
Diffstat (limited to 'Eigen/src/Core/products/GeneralBlockPanelKernel.h')
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 9475a6ecc..88ca9cc97 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -1387,10 +1387,18 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX4"); RhsPacket B_0, B1, B2, B3, T0; - #define EIGEN_GEBGP_ONESTEP(K) \ + + // NOTE: the begin/end asm comments below work around bug 935! + // but they are not enough for gcc>=6 without FMA (bug 1637) + #if EIGEN_GNUC_AT_LEAST(6,0) + #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND asm("" : [a0] "+x" (A0), [a1] "+x" (A1) ); + #else + #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND + #endif + #define EIGEN_GEBGP_ONESTEP(K) \ do { \ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \ - EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ + EIGEN_GEBP_2PX4_SPILLING_WORKAROUND \ traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \ traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \ |