From 4e7746fe2234ccaa10cb0c9431d68ee72a1d1d40 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 7 Dec 2018 09:15:46 +0100 Subject: bug #1636: fix gemm performance issue with gcc>=6 and no FMA --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'Eigen/src/Core/products') diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 9475a6ecc..88ca9cc97 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -1387,10 +1387,18 @@ void gebp_kernel=6 without FMA (bug 1637) + #if EIGEN_GNUC_AT_LEAST(6,0) + #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND asm("" : [a0] "+x" (A0), [a1] "+x" (A1) ); + #else + #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND + #endif + #define EIGEN_GEBGP_ONESTEP(K) \ do { \ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \ - EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ + EIGEN_GEBP_2PX4_SPILLING_WORKAROUND \ traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \ traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \ -- cgit v1.2.3