aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products/GeneralBlockPanelKernel.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2016-07-02 17:35:08 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2016-07-02 17:35:08 +0200
commit0fa9e4a15cb93fb6a0ef54ada13979d38d120ab0 (patch)
tree91fa4297421000e39cf9da6c368cbc90e023efb6 /Eigen/src/Core/products/GeneralBlockPanelKernel.h
parent672076db5d13882443235dadc01e76b6978a3a8a (diff)
Fix performance regression in dgemm introduced by changeset 5d51a7f12c69138ed2a43df240bdf27a5313f7ce
Diffstat (limited to 'Eigen/src/Core/products/GeneralBlockPanelKernel.h')
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h4
1 files changed, 2 insertions, 2 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 2d764727b..c66882012 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -1526,12 +1526,12 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
// The following piece of code wont work for 512 bit registers
- // Moreover it assumes that there is a half packet of the same size
+ // Moreover, if LhsProgress==8 it assumes that there is a half packet of the same size
// as nr (which is currently 4) for the return type.
typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
if ((SwappedTraits::LhsProgress % 4) == 0 &&
(SwappedTraits::LhsProgress <= 8) &&
- unpacket_traits<SResPacketHalf>::size==4)
+ (SwappedTraits::LhsProgress!=8 || unpacket_traits<SResPacketHalf>::size==nr))
{
SAccPacket C0, C1, C2, C3;
straits.initAcc(C0);