aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2014-04-25 13:22:34 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2014-04-25 13:22:34 +0200
commitc20e3641de5b6d56f5496fef2619a1f53f8a1835 (patch)
tree1e0a7340c22079c4e6c3d2e9c958c47934ee8964 /Eigen/src
parent2dbfd83424cd0d30dac3b42b27b970b44a4e4541 (diff)
Fix for mixed products
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h53
1 files changed, 30 insertions, 23 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 41c46c67a..ebf438d57 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -480,8 +480,14 @@ public:
loadRhs(b,dest);
}
- // linking error if instantiated without being optimized out:
- void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3);
+ EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ // FIXME not sure that's the best way to implement it!
+ loadRhs(b+0, b0);
+ loadRhs(b+1, b1);
+ loadRhs(b+2, b2);
+ loadRhs(b+3, b3);
+ }
// Vectorized path
EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, DoublePacketType& b0, DoublePacketType& b1)
@@ -602,9 +608,11 @@ public:
dest = pset1<RhsPacket>(*b);
}
- // linking error if instantiated without being optimized out:
-// void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3);
-//
+ void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+ {
+ pbroadcast4(b, b0, b1, b2, b3);
+ }
+
// EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1)
// {
// // FIXME not sure that's the best way to implement it!
@@ -1137,19 +1145,16 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,mr,nr,ConjugateLhs,ConjugateRhs>
for(Index k=0; k<peeled_kc; k+=pk)
{
EIGEN_ASM_COMMENT("begin gegp micro kernel 1pX4");
- RhsPacket B_0, B1;
-
+ RhsPacket B_0, B1, B2, B3;
+
#define EIGEN_GEBGP_ONESTEP(K) \
- traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
- traits.loadRhs(&blB[(0+4*K)*RhsProgress], B_0); \
- traits.madd(A0, B_0, C0, B1); \
- traits.loadRhs(&blB[1+4*K*RhsProgress], B_0); \
- traits.madd(A0, B_0, C1, B1); \
- traits.loadRhs(&blB[2+4*K*RhsProgress], B_0); \
- traits.madd(A0, B_0, C2, B1); \
- traits.loadRhs(&blB[3+4*K*RhsProgress], B_0); \
- traits.madd(A0, B_0, C3 , B1); \
-
+ traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
+ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
+ traits.madd(A0, B_0, C0, B_0); \
+ traits.madd(A0, B1, C1, B1); \
+ traits.madd(A0, B2, C2, B2); \
+ traits.madd(A0, B3, C3, B3);
+
internal::prefetch(blB+(48+0));
EIGEN_GEBGP_ONESTEP(0);
EIGEN_GEBGP_ONESTEP(1);
@@ -1167,7 +1172,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,mr,nr,ConjugateLhs,ConjugateRhs>
// process remaining peeled loop
for(Index k=peeled_kc; k<depth; k++)
{
- RhsPacket B_0, B1;
+ RhsPacket B_0, B1, B2, B3;
EIGEN_GEBGP_ONESTEP(0);
blB += 4*RhsProgress;
blA += 1*LhsProgress;
@@ -1448,10 +1453,12 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, ColMajor, Conj
const Index peeled_mc0 = Pack2>=1*PacketSize ? peeled_mc1
: Pack2>1 ? (rows/Pack2)*Pack2 : 0;
+ Index i=0;
+
// Pack 3 packets
if(Pack1>=3*PacketSize)
{
- for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
+ for(; i<peeled_mc3; i+=3*PacketSize)
{
if(PanelMode) count += (3*PacketSize) * offset;
@@ -1471,7 +1478,7 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, ColMajor, Conj
// Pack 2 packets
if(Pack1>=2*PacketSize)
{
- for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
+ for(; i<peeled_mc2; i+=2*PacketSize)
{
if(PanelMode) count += (2*PacketSize) * offset;
@@ -1489,7 +1496,7 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, ColMajor, Conj
// Pack 1 packets
if(Pack1>=1*PacketSize)
{
- for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
+ for(; i<peeled_mc1; i+=1*PacketSize)
{
if(PanelMode) count += (1*PacketSize) * offset;
@@ -1506,7 +1513,7 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, ColMajor, Conj
// Pack scalars
if(Pack2<PacketSize && Pack2>1)
{
- for(Index i=peeled_mc1; i<peeled_mc0; i+=Pack2)
+ for(; i<peeled_mc0; i+=Pack2)
{
if(PanelMode) count += Pack2 * offset;
@@ -1517,7 +1524,7 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, ColMajor, Conj
if(PanelMode) count += Pack2 * (stride-offset-depth);
}
}
- for(Index i=peeled_mc0; i<rows; i++)
+ for(; i<rows; i++)
{
if(PanelMode) count += offset;
for(Index k=0; k<depth; k++)