aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2018-09-20 17:01:24 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2018-09-20 17:01:24 +0200
commit71496b0e25158fe6c47dd8c959748b74be4ca94c (patch)
treeb68794605ac9b8275459d108d7c8e1a37e75fe82 /Eigen/src/Core/products
parent5a30eed17e170af4aedca1a3ff0c10a8e65bf47e (diff)
Fix gebp kernel for real+complex in case only reals are vectorized (e.g., AVX512).
This commit also removes "half-packet" from data-mappers: it was not used and conceptually broken anyways.
Diffstat (limited to 'Eigen/src/Core/products')
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h106
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h2
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h6
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixMatrix.h6
-rw-r--r--Eigen/src/Core/products/TriangularMatrixMatrix.h6
-rw-r--r--Eigen/src/Core/products/TriangularSolverMatrix.h4
6 files changed, 64 insertions, 66 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 3ec8eb082..fa844815d 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -1025,9 +1025,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
ResPacket R0, R1, R2;
ResPacket alphav = pset1<ResPacket>(alpha);
- R0 = r0.loadPacket(0 * Traits::ResPacketSize);
- R1 = r0.loadPacket(1 * Traits::ResPacketSize);
- R2 = r0.loadPacket(2 * Traits::ResPacketSize);
+ R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+ R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+ R2 = r0.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
traits.acc(C0, alphav, R0);
traits.acc(C4, alphav, R1);
traits.acc(C8, alphav, R2);
@@ -1035,9 +1035,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
r0.storePacket(1 * Traits::ResPacketSize, R1);
r0.storePacket(2 * Traits::ResPacketSize, R2);
- R0 = r1.loadPacket(0 * Traits::ResPacketSize);
- R1 = r1.loadPacket(1 * Traits::ResPacketSize);
- R2 = r1.loadPacket(2 * Traits::ResPacketSize);
+ R0 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+ R1 = r1.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+ R2 = r1.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
traits.acc(C1, alphav, R0);
traits.acc(C5, alphav, R1);
traits.acc(C9, alphav, R2);
@@ -1045,9 +1045,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
r1.storePacket(1 * Traits::ResPacketSize, R1);
r1.storePacket(2 * Traits::ResPacketSize, R2);
- R0 = r2.loadPacket(0 * Traits::ResPacketSize);
- R1 = r2.loadPacket(1 * Traits::ResPacketSize);
- R2 = r2.loadPacket(2 * Traits::ResPacketSize);
+ R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+ R1 = r2.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+ R2 = r2.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
traits.acc(C2, alphav, R0);
traits.acc(C6, alphav, R1);
traits.acc(C10, alphav, R2);
@@ -1055,9 +1055,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
r2.storePacket(1 * Traits::ResPacketSize, R1);
r2.storePacket(2 * Traits::ResPacketSize, R2);
- R0 = r3.loadPacket(0 * Traits::ResPacketSize);
- R1 = r3.loadPacket(1 * Traits::ResPacketSize);
- R2 = r3.loadPacket(2 * Traits::ResPacketSize);
+ R0 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+ R1 = r3.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+ R2 = r3.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
traits.acc(C3, alphav, R0);
traits.acc(C7, alphav, R1);
traits.acc(C11, alphav, R2);
@@ -1134,9 +1134,9 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
ResPacket R0, R1, R2;
ResPacket alphav = pset1<ResPacket>(alpha);
- R0 = r0.loadPacket(0 * Traits::ResPacketSize);
- R1 = r0.loadPacket(1 * Traits::ResPacketSize);
- R2 = r0.loadPacket(2 * Traits::ResPacketSize);
+ R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+ R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+ R2 = r0.template loadPacket<ResPacket>(2 * Traits::ResPacketSize);
traits.acc(C0, alphav, R0);
traits.acc(C4, alphav, R1);
traits.acc(C8, alphav, R2);
@@ -1244,10 +1244,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
ResPacket R0, R1, R2, R3;
ResPacket alphav = pset1<ResPacket>(alpha);
- R0 = r0.loadPacket(0 * Traits::ResPacketSize);
- R1 = r0.loadPacket(1 * Traits::ResPacketSize);
- R2 = r1.loadPacket(0 * Traits::ResPacketSize);
- R3 = r1.loadPacket(1 * Traits::ResPacketSize);
+ R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+ R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+ R2 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+ R3 = r1.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
traits.acc(C0, alphav, R0);
traits.acc(C4, alphav, R1);
traits.acc(C1, alphav, R2);
@@ -1257,10 +1257,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
r1.storePacket(0 * Traits::ResPacketSize, R2);
r1.storePacket(1 * Traits::ResPacketSize, R3);
- R0 = r2.loadPacket(0 * Traits::ResPacketSize);
- R1 = r2.loadPacket(1 * Traits::ResPacketSize);
- R2 = r3.loadPacket(0 * Traits::ResPacketSize);
- R3 = r3.loadPacket(1 * Traits::ResPacketSize);
+ R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+ R1 = r2.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
+ R2 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+ R3 = r3.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
traits.acc(C2, alphav, R0);
traits.acc(C6, alphav, R1);
traits.acc(C3, alphav, R2);
@@ -1337,8 +1337,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
ResPacket R0, R1;
ResPacket alphav = pset1<ResPacket>(alpha);
- R0 = r0.loadPacket(0 * Traits::ResPacketSize);
- R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+ R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+ R1 = r0.template loadPacket<ResPacket>(1 * Traits::ResPacketSize);
traits.acc(C0, alphav, R0);
traits.acc(C4, alphav, R1);
r0.storePacket(0 * Traits::ResPacketSize, R0);
@@ -1431,15 +1431,15 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
ResPacket R0, R1;
ResPacket alphav = pset1<ResPacket>(alpha);
- R0 = r0.loadPacket(0 * Traits::ResPacketSize);
- R1 = r1.loadPacket(0 * Traits::ResPacketSize);
+ R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+ R1 = r1.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
traits.acc(C0, alphav, R0);
traits.acc(C1, alphav, R1);
r0.storePacket(0 * Traits::ResPacketSize, R0);
r1.storePacket(0 * Traits::ResPacketSize, R1);
- R0 = r2.loadPacket(0 * Traits::ResPacketSize);
- R1 = r3.loadPacket(0 * Traits::ResPacketSize);
+ R0 = r2.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
+ R1 = r3.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
traits.acc(C2, alphav, R0);
traits.acc(C3, alphav, R1);
r2.storePacket(0 * Traits::ResPacketSize, R0);
@@ -1504,7 +1504,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
#undef EIGEN_GEBGP_ONESTEP
ResPacket R0;
ResPacket alphav = pset1<ResPacket>(alpha);
- R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+ R0 = r0.template loadPacket<ResPacket>(0 * Traits::ResPacketSize);
traits.acc(C0, alphav, R0);
r0.storePacket(0 * Traits::ResPacketSize, R0);
}
@@ -1685,19 +1685,18 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
//
// 32 33 34 35 ...
// 36 36 38 39 ...
-template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
-struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode>
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>
+struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>
{
typedef typename DataMapper::LinearMapper LinearMapper;
EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
};
-template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
-EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode>
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>
::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
{
- typedef typename packet_traits<Scalar>::type Packet;
- enum { PacketSize = packet_traits<Scalar>::size };
+ enum { PacketSize = unpacket_traits<Packet>::size };
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
EIGEN_UNUSED_VARIABLE(stride);
@@ -1725,9 +1724,9 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Co
for(Index k=0; k<depth; k++)
{
Packet A, B, C;
- A = lhs.loadPacket(i+0*PacketSize, k);
- B = lhs.loadPacket(i+1*PacketSize, k);
- C = lhs.loadPacket(i+2*PacketSize, k);
+ A = lhs.template loadPacket<Packet>(i+0*PacketSize, k);
+ B = lhs.template loadPacket<Packet>(i+1*PacketSize, k);
+ C = lhs.template loadPacket<Packet>(i+2*PacketSize, k);
pstore(blockA+count, cj.pconj(A)); count+=PacketSize;
pstore(blockA+count, cj.pconj(B)); count+=PacketSize;
pstore(blockA+count, cj.pconj(C)); count+=PacketSize;
@@ -1745,8 +1744,8 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Co
for(Index k=0; k<depth; k++)
{
Packet A, B;
- A = lhs.loadPacket(i+0*PacketSize, k);
- B = lhs.loadPacket(i+1*PacketSize, k);
+ A = lhs.template loadPacket<Packet>(i+0*PacketSize, k);
+ B = lhs.template loadPacket<Packet>(i+1*PacketSize, k);
pstore(blockA+count, cj.pconj(A)); count+=PacketSize;
pstore(blockA+count, cj.pconj(B)); count+=PacketSize;
}
@@ -1763,7 +1762,7 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Co
for(Index k=0; k<depth; k++)
{
Packet A;
- A = lhs.loadPacket(i+0*PacketSize, k);
+ A = lhs.template loadPacket<Packet>(i+0*PacketSize, k);
pstore(blockA+count, cj.pconj(A));
count+=PacketSize;
}
@@ -1793,19 +1792,18 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Co
}
}
-template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
-struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode>
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>
+struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>
{
typedef typename DataMapper::LinearMapper LinearMapper;
EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
};
-template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
-EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode>
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>
::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
{
- typedef typename packet_traits<Scalar>::type Packet;
- enum { PacketSize = packet_traits<Scalar>::size };
+ enum { PacketSize = unpacket_traits<Packet>::size };
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
EIGEN_UNUSED_VARIABLE(stride);
@@ -1837,7 +1835,7 @@ EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, Ro
for (Index m = 0; m < pack; m += PacketSize)
{
PacketBlock<Packet> kernel;
- for (int p = 0; p < PacketSize; ++p) kernel.packet[p] = lhs.loadPacket(i+p+m, k);
+ for (int p = 0; p < PacketSize; ++p) kernel.packet[p] = lhs.template loadPacket<Packet>(i+p+m, k);
ptranspose(kernel);
for (int p = 0; p < PacketSize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel.packet[p]));
}
@@ -1971,10 +1969,10 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Co
{
for(; k<peeled_k; k+=PacketSize) {
PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel;
- kernel.packet[0] = dm0.loadPacket(k);
- kernel.packet[1%PacketSize] = dm1.loadPacket(k);
- kernel.packet[2%PacketSize] = dm2.loadPacket(k);
- kernel.packet[3%PacketSize] = dm3.loadPacket(k);
+ kernel.packet[0 ] = dm0.template loadPacket<Packet>(k);
+ kernel.packet[1%PacketSize] = dm1.template loadPacket<Packet>(k);
+ kernel.packet[2%PacketSize] = dm2.template loadPacket<Packet>(k);
+ kernel.packet[3%PacketSize] = dm3.template loadPacket<Packet>(k);
ptranspose(kernel);
pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel.packet[0]));
pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1%PacketSize]));
@@ -2075,7 +2073,7 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Co
for(Index k=0; k<depth; k++)
{
if (PacketSize==4) {
- Packet A = rhs.loadPacket(k, j2);
+ Packet A = rhs.template loadPacket<Packet>(k, j2);
pstoreu(blockB+count, cj.pconj(A));
count += PacketSize;
} else {
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index bd7b6ff2a..a010e150f 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -75,7 +75,7 @@ static void run(Index rows, Index cols, Index depth,
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
Index nc = (std::min)(cols,blocking.nc()); // cache block size along the N direction
- gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index e436c50a4..27a7921fa 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -84,7 +84,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
- gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb;
@@ -110,7 +110,6 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
gebp(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc,
(std::min)(size,i2), alpha, -1, -1, 0, 0);
-
sybb(_res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha);
if (UpLo==Upper)
@@ -160,7 +159,7 @@ struct tribb_kernel
if(UpLo==Upper)
gebp_kernel(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha,
-1, -1, 0, 0);
-
+
// selfadjoint micro block
{
Index i = j;
@@ -168,6 +167,7 @@ struct tribb_kernel
// 1 - apply the kernel on the temporary buffer
gebp_kernel(ResMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
-1, -1, 0, 0);
+
// 2 - triangular accumulation
for(Index j1=0; j1<actualBlockSize; ++j1)
{
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index da6f82abc..c43a53b92 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -352,7 +352,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
- gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
+ gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
for(Index k2=0; k2<size; k2+=kc)
{
@@ -387,7 +387,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
for(Index i2=k2+kc; i2<size; i2+=mc)
{
const Index actual_mc = (std::min)(i2+mc,size)-i2;
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder,false>()
(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
@@ -437,7 +437,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder> pack_lhs;
symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
for(Index k2=0; k2<size; k2+=kc)
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index f784507e7..7fe476be2 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -151,7 +151,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
triangularBuffer.diagonal().setOnes();
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
for(Index k2=IsLower ? depth : 0;
@@ -222,7 +222,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
for(Index i2=start; i2<end; i2+=mc)
{
const Index actual_mc = (std::min)(i2+mc,end)-i2;
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder,false>()
(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc,
@@ -299,7 +299,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
triangularBuffer.diagonal().setOnes();
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h
index 223c38b86..96d1b38a9 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -76,7 +76,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
conj_if<Conjugate> conj;
gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
- gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
+ gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, TriStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs;
// the goal here is to subdivise the Rhs panels such that we keep some cache
@@ -229,7 +229,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder,false,true> pack_rhs_panel;
- gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
+ gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket, ColMajor, false, true> pack_lhs_panel;
for(Index k2=IsLower ? size : 0;
IsLower ? k2>0 : k2<size;