aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2013-04-19 11:21:39 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2013-04-19 11:21:39 +0200
commit9cd2d14005def8e7df0b0bf5fd6eb51f8a6591e9 (patch)
treeca4df13b58e923bdebd9d5f59aecda9d1e30ca58 /Eigen/src/Core/products
parent4e2e615a7c2c719d2d708ab32840bad353322d8c (diff)
parent46755648ec341aa5e0283b47456108bb2897b1b3 (diff)
merge with default branch
Diffstat (limited to 'Eigen/src/Core/products')
-rw-r--r--Eigen/src/Core/products/CoeffBasedProduct.h2
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h294
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h7
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h112
-rw-r--r--Eigen/src/Core/products/GeneralMatrixVector.h23
-rw-r--r--Eigen/src/Core/products/GeneralMatrixVector_MKL.h6
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixMatrix.h32
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h10
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixVector.h15
-rw-r--r--Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h4
-rw-r--r--Eigen/src/Core/products/SelfadjointProduct.h20
-rw-r--r--Eigen/src/Core/products/SelfadjointRank2Update.h6
-rw-r--r--Eigen/src/Core/products/TriangularMatrixMatrix.h40
-rw-r--r--Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h4
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector.h30
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector_MKL.h12
-rw-r--r--Eigen/src/Core/products/TriangularSolverMatrix.h18
-rw-r--r--Eigen/src/Core/products/TriangularSolverMatrix_MKL.h4
18 files changed, 404 insertions, 235 deletions
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
index 312a05c71..51fc5fd58 100644
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -152,7 +152,7 @@ class CoeffBasedProduct
{
// we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
// We still allow to mix T and complex<T>.
- EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+ EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
eigen_assert(lhs.cols() == rhs.rows()
&& "invalid matrix product"
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 09912fafb..780fa74d3 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -69,8 +69,8 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdi
* - the number of scalars that fit into a packet (when vectorization is enabled).
*
* \sa setCpuCacheSizes */
-template<typename LhsScalar, typename RhsScalar, int KcFactor>
-void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType>
+void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
{
EIGEN_UNUSED_VARIABLE(n);
// Explanations:
@@ -91,13 +91,13 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
};
manage_caching_sizes(GetAction, &l1, &l2);
- k = std::min<std::ptrdiff_t>(k, l1/kdiv);
- std::ptrdiff_t _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
+ k = std::min<SizeType>(k, l1/kdiv);
+ SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
if(_m<m) m = _m & mr_mask;
}
-template<typename LhsScalar, typename RhsScalar>
-inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
+template<typename LhsScalar, typename RhsScalar, typename SizeType>
+inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
{
computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
}
@@ -529,7 +529,14 @@ struct gebp_kernel
EIGEN_DONT_INLINE
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
- Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
+ Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB=0);
+};
+
+template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_DONT_INLINE
+void gebp_kernel<LhsScalar,RhsScalar,Index,mr,nr,ConjugateLhs,ConjugateRhs>
+ ::operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
+ Index strideA, Index strideB, Index offsetA, Index offsetB, RhsScalar* unpackedB)
{
Traits traits;
@@ -1089,7 +1096,7 @@ EIGEN_ASM_COMMENT("mybegin4");
}
}
}
-};
+
#undef CJMADD
@@ -1110,80 +1117,83 @@ EIGEN_ASM_COMMENT("mybegin4");
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
struct gemm_pack_lhs
{
- EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
- Index stride=0, Index offset=0)
+ EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, StorageOrder, Conjugate, PanelMode>
+ ::operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride, Index offset)
+{
+ typedef typename packet_traits<Scalar>::type Packet;
+ enum { PacketSize = packet_traits<Scalar>::size };
+
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
+ Index count = 0;
+ Index peeled_mc = (rows/Pack1)*Pack1;
+ for(Index i=0; i<peeled_mc; i+=Pack1)
{
- typedef typename packet_traits<Scalar>::type Packet;
- enum { PacketSize = packet_traits<Scalar>::size };
-
- EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
- eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
- eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
- conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
- const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
- Index count = 0;
- Index peeled_mc = (rows/Pack1)*Pack1;
- for(Index i=0; i<peeled_mc; i+=Pack1)
- {
- if(PanelMode) count += Pack1 * offset;
+ if(PanelMode) count += Pack1 * offset;
- if(StorageOrder==ColMajor)
+ if(StorageOrder==ColMajor)
+ {
+ for(Index k=0; k<depth; k++)
{
- for(Index k=0; k<depth; k++)
- {
- Packet A, B, C, D;
- if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
- if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
- if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
- if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
- if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
- if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
- if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
- if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
- }
+ Packet A, B, C, D;
+ if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
+ if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
+ if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
+ if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
+ if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
+ if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
+ if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
+ if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
}
- else
+ }
+ else
+ {
+ for(Index k=0; k<depth; k++)
{
- for(Index k=0; k<depth; k++)
+ // TODO add a vectorized transpose here
+ Index w=0;
+ for(; w<Pack1-3; w+=4)
{
- // TODO add a vectorized transpose here
- Index w=0;
- for(; w<Pack1-3; w+=4)
- {
- Scalar a(cj(lhs(i+w+0, k))),
- b(cj(lhs(i+w+1, k))),
- c(cj(lhs(i+w+2, k))),
- d(cj(lhs(i+w+3, k)));
- blockA[count++] = a;
- blockA[count++] = b;
- blockA[count++] = c;
- blockA[count++] = d;
- }
- if(Pack1%4)
- for(;w<Pack1;++w)
- blockA[count++] = cj(lhs(i+w, k));
+ Scalar a(cj(lhs(i+w+0, k))),
+ b(cj(lhs(i+w+1, k))),
+ c(cj(lhs(i+w+2, k))),
+ d(cj(lhs(i+w+3, k)));
+ blockA[count++] = a;
+ blockA[count++] = b;
+ blockA[count++] = c;
+ blockA[count++] = d;
}
+ if(Pack1%4)
+ for(;w<Pack1;++w)
+ blockA[count++] = cj(lhs(i+w, k));
}
- if(PanelMode) count += Pack1 * (stride-offset-depth);
- }
- if(rows-peeled_mc>=Pack2)
- {
- if(PanelMode) count += Pack2*offset;
- for(Index k=0; k<depth; k++)
- for(Index w=0; w<Pack2; w++)
- blockA[count++] = cj(lhs(peeled_mc+w, k));
- if(PanelMode) count += Pack2 * (stride-offset-depth);
- peeled_mc += Pack2;
- }
- for(Index i=peeled_mc; i<rows; i++)
- {
- if(PanelMode) count += offset;
- for(Index k=0; k<depth; k++)
- blockA[count++] = cj(lhs(i, k));
- if(PanelMode) count += (stride-offset-depth);
}
+ if(PanelMode) count += Pack1 * (stride-offset-depth);
}
-};
+ if(rows-peeled_mc>=Pack2)
+ {
+ if(PanelMode) count += Pack2*offset;
+ for(Index k=0; k<depth; k++)
+ for(Index w=0; w<Pack2; w++)
+ blockA[count++] = cj(lhs(peeled_mc+w, k));
+ if(PanelMode) count += Pack2 * (stride-offset-depth);
+ peeled_mc += Pack2;
+ }
+ for(Index i=peeled_mc; i<rows; i++)
+ {
+ if(PanelMode) count += offset;
+ for(Index k=0; k<depth; k++)
+ blockA[count++] = cj(lhs(i, k));
+ if(PanelMode) count += (stride-offset-depth);
+ }
+}
// copy a complete panel of the rhs
// this version is optimized for column major matrices
@@ -1197,92 +1207,98 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
{
typedef typename packet_traits<Scalar>::type Packet;
enum { PacketSize = packet_traits<Scalar>::size };
- EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
- Index stride=0, Index offset=0)
+ EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
+{
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index packet_cols = (cols/nr) * nr;
+ Index count = 0;
+ for(Index j2=0; j2<packet_cols; j2+=nr)
{
- EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
- eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
- conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
- Index packet_cols = (cols/nr) * nr;
- Index count = 0;
- for(Index j2=0; j2<packet_cols; j2+=nr)
+ // skip what we have before
+ if(PanelMode) count += nr * offset;
+ const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+ const Scalar* b1 = &rhs[(j2+1)*rhsStride];
+ const Scalar* b2 = &rhs[(j2+2)*rhsStride];
+ const Scalar* b3 = &rhs[(j2+3)*rhsStride];
+ for(Index k=0; k<depth; k++)
{
- // skip what we have before
- if(PanelMode) count += nr * offset;
- const Scalar* b0 = &rhs[(j2+0)*rhsStride];
- const Scalar* b1 = &rhs[(j2+1)*rhsStride];
- const Scalar* b2 = &rhs[(j2+2)*rhsStride];
- const Scalar* b3 = &rhs[(j2+3)*rhsStride];
- for(Index k=0; k<depth; k++)
- {
- blockB[count+0] = cj(b0[k]);
- blockB[count+1] = cj(b1[k]);
- if(nr==4) blockB[count+2] = cj(b2[k]);
- if(nr==4) blockB[count+3] = cj(b3[k]);
- count += nr;
- }
- // skip what we have after
- if(PanelMode) count += nr * (stride-offset-depth);
+ blockB[count+0] = cj(b0[k]);
+ blockB[count+1] = cj(b1[k]);
+ if(nr==4) blockB[count+2] = cj(b2[k]);
+ if(nr==4) blockB[count+3] = cj(b3[k]);
+ count += nr;
}
+ // skip what we have after
+ if(PanelMode) count += nr * (stride-offset-depth);
+ }
- // copy the remaining columns one at a time (nr==1)
- for(Index j2=packet_cols; j2<cols; ++j2)
+ // copy the remaining columns one at a time (nr==1)
+ for(Index j2=packet_cols; j2<cols; ++j2)
+ {
+ if(PanelMode) count += offset;
+ const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+ for(Index k=0; k<depth; k++)
{
- if(PanelMode) count += offset;
- const Scalar* b0 = &rhs[(j2+0)*rhsStride];
- for(Index k=0; k<depth; k++)
- {
- blockB[count] = cj(b0[k]);
- count += 1;
- }
- if(PanelMode) count += (stride-offset-depth);
+ blockB[count] = cj(b0[k]);
+ count += 1;
}
+ if(PanelMode) count += (stride-offset-depth);
}
-};
+}
// this version is optimized for row major matrices
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
{
enum { PacketSize = packet_traits<Scalar>::size };
- EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
- Index stride=0, Index offset=0)
+ EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
+ ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
+{
+ EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
+ eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+ conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+ Index packet_cols = (cols/nr) * nr;
+ Index count = 0;
+ for(Index j2=0; j2<packet_cols; j2+=nr)
{
- EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
- eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
- conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
- Index packet_cols = (cols/nr) * nr;
- Index count = 0;
- for(Index j2=0; j2<packet_cols; j2+=nr)
+ // skip what we have before
+ if(PanelMode) count += nr * offset;
+ for(Index k=0; k<depth; k++)
{
- // skip what we have before
- if(PanelMode) count += nr * offset;
- for(Index k=0; k<depth; k++)
- {
- const Scalar* b0 = &rhs[k*rhsStride + j2];
- blockB[count+0] = cj(b0[0]);
- blockB[count+1] = cj(b0[1]);
- if(nr==4) blockB[count+2] = cj(b0[2]);
- if(nr==4) blockB[count+3] = cj(b0[3]);
- count += nr;
- }
- // skip what we have after
- if(PanelMode) count += nr * (stride-offset-depth);
+ const Scalar* b0 = &rhs[k*rhsStride + j2];
+ blockB[count+0] = cj(b0[0]);
+ blockB[count+1] = cj(b0[1]);
+ if(nr==4) blockB[count+2] = cj(b0[2]);
+ if(nr==4) blockB[count+3] = cj(b0[3]);
+ count += nr;
}
- // copy the remaining columns one at a time (nr==1)
- for(Index j2=packet_cols; j2<cols; ++j2)
+ // skip what we have after
+ if(PanelMode) count += nr * (stride-offset-depth);
+ }
+ // copy the remaining columns one at a time (nr==1)
+ for(Index j2=packet_cols; j2<cols; ++j2)
+ {
+ if(PanelMode) count += offset;
+ const Scalar* b0 = &rhs[j2];
+ for(Index k=0; k<depth; k++)
{
- if(PanelMode) count += offset;
- const Scalar* b0 = &rhs[j2];
- for(Index k=0; k<depth; k++)
- {
- blockB[count] = cj(b0[k*rhsStride]);
- count += 1;
- }
- if(PanelMode) count += stride-offset-depth;
+ blockB[count] = cj(b0[k*rhsStride]);
+ count += 1;
}
+ if(PanelMode) count += stride-offset-depth;
}
-};
+}
} // end namespace internal
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 73a465ec5..3f5ffcf51 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -50,6 +50,7 @@ template<
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
{
+
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static void run(Index rows, Index cols, Index depth,
const LhsScalar* _lhs, Index lhsStride,
@@ -169,7 +170,6 @@ static void run(Index rows, Index cols, Index depth,
// vertical panel which is, in practice, a very low number.
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
-
// For each mc x kc block of the lhs's vertical panel...
// (==GEPP_VAR1)
for(Index i2=0; i2<rows; i2+=mc)
@@ -183,7 +183,6 @@ static void run(Index rows, Index cols, Index depth,
// Everything is packed, we can now call the block * panel kernel:
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
-
}
}
}
@@ -204,7 +203,7 @@ struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
struct gemm_functor
{
- gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha,
+ gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha,
BlockingType& blocking)
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
{}
@@ -395,7 +394,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
}
- template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index 432d3a9dc..5c3763909 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -12,6 +12,9 @@
namespace Eigen {
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
+struct selfadjoint_rank1_update;
+
namespace internal {
/**********************************************************************
@@ -39,7 +42,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
- const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
+ const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
{
general_matrix_matrix_triangular_product<Index,
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
@@ -55,7 +58,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
- const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
+ const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
{
const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
@@ -133,7 +136,7 @@ struct tribb_kernel
enum {
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
};
- void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, ResScalar alpha, RhsScalar* workspace)
+ void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace)
{
gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
@@ -180,31 +183,92 @@ struct tribb_kernel
// high level API
+template<typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct>
+struct general_product_to_triangular_selector;
+
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
+{
+ static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+ {
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::Index Index;
+
+ typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+ typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+ typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+
+ typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+ typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+ typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+ Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+ enum {
+ StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+ UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
+ UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1
+ };
+
+ internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs;
+ ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(),
+ (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));
+ if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
+
+ internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs;
+ ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(),
+ (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));
+ if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+
+
+ selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
+ LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
+ RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex>
+ ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha);
+ }
+};
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
+{
+ static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+ {
+ typedef typename MatrixType::Index Index;
+
+ typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+ typedef internal::blas_traits<Lhs> LhsBlasTraits;
+ typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+ typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+ typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+
+ typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+ typedef internal::blas_traits<Rhs> RhsBlasTraits;
+ typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+ typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+ typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+ typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+ internal::general_matrix_matrix_triangular_product<Index,
+ typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+ typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+ MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
+ ::run(mat.cols(), actualLhs.cols(),
+ &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
+ mat.data(), mat.outerStride(), actualAlpha);
+ }
+};
+
template<typename MatrixType, unsigned int UpLo>
template<typename ProductDerived, typename _Lhs, typename _Rhs>
TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
{
- typedef typename internal::remove_all<typename ProductDerived::LhsNested>::type Lhs;
- typedef internal::blas_traits<Lhs> LhsBlasTraits;
- typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
- typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
- typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
-
- typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
- typedef internal::blas_traits<Rhs> RhsBlasTraits;
- typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
- typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
- typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
-
- typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
-
- internal::general_matrix_matrix_triangular_product<Index,
- typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
- typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
- MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
- ::run(m_matrix.cols(), actualLhs.cols(),
- &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
- const_cast<Scalar*>(m_matrix.data()), m_matrix.outerStride(), actualAlpha);
+ general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha);
return *this;
}
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index 8895d3ab2..9bdd588df 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -56,6 +56,18 @@ EIGEN_DONT_INLINE static void run(
#ifdef EIGEN_INTERNAL_DEBUGGING
resIncr
#endif
+ , RhsScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
+ Index rows, Index cols,
+ const LhsScalar* lhs, Index lhsStride,
+ const RhsScalar* rhs, Index rhsIncr,
+ ResScalar* res, Index
+ #ifdef EIGEN_INTERNAL_DEBUGGING
+ resIncr
+ #endif
, RhsScalar alpha)
{
eigen_internal_assert(resIncr==1);
@@ -274,7 +286,6 @@ EIGEN_DONT_INLINE static void run(
} while(Vectorizable);
#undef _EIGEN_ACCUMULATE_PACKETS
}
-};
/* Optimized row-major matrix * vector product:
* This algorithm processes 4 rows at onces that allows to both reduce
@@ -312,6 +323,15 @@ EIGEN_DONT_INLINE static void run(
const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsIncr,
ResScalar* res, Index resIncr,
+ ResScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
+ Index rows, Index cols,
+ const LhsScalar* lhs, Index lhsStride,
+ const RhsScalar* rhs, Index rhsIncr,
+ ResScalar* res, Index resIncr,
ResScalar alpha)
{
EIGEN_UNUSED_VARIABLE(rhsIncr);
@@ -545,7 +565,6 @@ EIGEN_DONT_INLINE static void run(
#undef _EIGEN_ACCUMULATE_PACKETS
}
-};
} // end namespace internal
diff --git a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
index e9de6af3e..1cb9fe6b5 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
@@ -53,7 +53,7 @@ struct general_matrix_vector_product_gemv :
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
Index rows, Index cols, \
const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsIncr, \
@@ -70,7 +70,7 @@ static EIGEN_DONT_INLINE void run( \
}; \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
Index rows, Index cols, \
const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsIncr, \
@@ -92,7 +92,7 @@ struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,Conjugat
{ \
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
\
-static EIGEN_DONT_INLINE void run( \
+static void run( \
Index rows, Index cols, \
const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* rhs, Index rhsIncr, \
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index 48209636e..ee619df99 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -211,7 +211,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha)
+ const Scalar& alpha)
{
product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
@@ -234,7 +234,18 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha)
+ const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
+ Index rows, Index cols,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha)
{
Index size = rows;
@@ -301,7 +312,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
}
}
}
-};
// matrix * selfadjoint product
template <typename Scalar, typename Index,
@@ -315,7 +325,18 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha)
+ const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
+ Index rows, Index cols,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha)
{
Index size = cols;
@@ -353,7 +374,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
}
}
}
-};
} // end namespace internal
@@ -383,7 +403,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
};
- template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
index 4e5c4125c..dfa687fef 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
@@ -23,7 +23,7 @@
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
+//
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
@@ -47,7 +47,7 @@ template <typename Index, \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
{\
\
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
@@ -98,7 +98,7 @@ template <typename Index, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
{\
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
@@ -174,7 +174,7 @@ template <typename Index, \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
{\
\
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
@@ -224,7 +224,7 @@ template <typename Index, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
{\
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index c3145c69a..f70f4894c 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -32,10 +32,18 @@ static EIGEN_DONT_INLINE void run(
const Scalar* lhs, Index lhsStride,
const Scalar* _rhs, Index rhsIncr,
Scalar* res,
+ Scalar alpha);
+};
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
+ Index size,
+ const Scalar* lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsIncr,
+ Scalar* res,
Scalar alpha)
{
typedef typename packet_traits<Scalar>::type Packet;
- typedef typename NumTraits<Scalar>::Real RealScalar;
const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
enum {
@@ -153,7 +161,6 @@ static EIGEN_DONT_INLINE void run(
res[j] += alpha * t2;
}
}
-};
} // end namespace internal
@@ -180,7 +187,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
- template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
{
typedef typename Dest::Scalar ResScalar;
typedef typename Base::RhsScalar RhsScalar;
@@ -260,7 +267,7 @@ struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
- template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
{
// let's simply transpose the product
Transpose<Dest> destT(dest);
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
index f88d483b6..86684b66d 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
@@ -50,7 +50,7 @@ struct selfadjoint_matrix_vector_product_symv :
#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
Index size, const Scalar* lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
enum {\
@@ -77,7 +77,7 @@ struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,Co
{ \
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
\
-static EIGEN_DONT_INLINE void run( \
+static void run( \
Index size, const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
{ \
diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h
index 6a55f3d77..6ca4ae6c0 100644
--- a/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/Eigen/src/Core/products/SelfadjointProduct.h
@@ -18,21 +18,19 @@
namespace Eigen {
-template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
-struct selfadjoint_rank1_update;
template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
{
- static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
+ static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
{
internal::conj_if<ConjRhs> cj;
typedef Map<const Matrix<Scalar,Dynamic,1> > OtherMap;
- typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjRhsType;
+ typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjLhsType;
for (Index i=0; i<size; ++i)
{
Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1)))
- += (alpha * cj(vec[i])) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
+ += (alpha * cj(vecY[i])) * ConjLhsType(OtherMap(vecX+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
}
}
};
@@ -40,9 +38,9 @@ struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
struct selfadjoint_rank1_update<Scalar,Index,RowMajor,UpLo,ConjLhs,ConjRhs>
{
- static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
+ static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
{
- selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vec,alpha);
+ selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vecY,vecX,alpha);
}
};
@@ -52,7 +50,7 @@ struct selfadjoint_product_selector;
template<typename MatrixType, typename OtherType, int UpLo>
struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
{
- static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
+ static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
{
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
@@ -78,14 +76,14 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
(!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
- ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualAlpha);
+ ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualOtherPtr, actualAlpha);
}
};
template<typename MatrixType, typename OtherType, int UpLo>
struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
{
- static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
+ static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
{
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
@@ -113,7 +111,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
template<typename MatrixType, unsigned int UpLo>
template<typename DerivedU>
SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
-::rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha)
+::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
{
selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h
index 57a98cc2d..4b57f189d 100644
--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -24,7 +24,7 @@ struct selfadjoint_rank2_update_selector;
template<typename Scalar, typename Index, typename UType, typename VType>
struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
{
- static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
+ static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
{
const Index size = u.size();
for (Index i=0; i<size; ++i)
@@ -39,7 +39,7 @@ struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
template<typename Scalar, typename Index, typename UType, typename VType>
struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
{
- static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
+ static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
{
const Index size = u.size();
for (Index i=0; i<size; ++i)
@@ -58,7 +58,7 @@ template<bool Cond, typename T> struct conj_expr_if
template<typename MatrixType, unsigned int UpLo>
template<typename DerivedU, typename DerivedV>
SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
-::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha)
+::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
{
typedef internal::blas_traits<DerivedU> UBlasTraits;
typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index 92cba66f6..8110507b5 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -61,7 +61,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
product_triangular_matrix_matrix<Scalar, Index,
(Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
@@ -96,7 +96,20 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
+ LhsStorageOrder,ConjugateLhs,
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+ Index _rows, Index _cols, Index _depth,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
// strip zeros
Index diagSize = (std::min)(_rows,_depth);
@@ -203,15 +216,14 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
}
}
}
-};
// implements col-major += alpha * op(general) * op(triangular)
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
- LhsStorageOrder,ConjugateLhs,
- RhsStorageOrder,ConjugateRhs,ColMajor,Version>
+ LhsStorageOrder,ConjugateLhs,
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>
{
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
@@ -225,7 +237,20 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
- Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+ int LhsStorageOrder, bool ConjugateLhs,
+ int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
+ LhsStorageOrder,ConjugateLhs,
+ RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+ Index _rows, Index _cols, Index _depth,
+ const Scalar* _lhs, Index lhsStride,
+ const Scalar* _rhs, Index rhsStride,
+ Scalar* res, Index resStride,
+ const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
// strip zeros
Index diagSize = (std::min)(_cols,_depth);
@@ -343,7 +368,6 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
}
}
}
-};
/***************************************************************************
* Wrapper to product_triangular_matrix_matrix
@@ -364,7 +388,7 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
- template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
{
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
index 4d20de617..ba41a1c99 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
@@ -91,7 +91,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
}; \
\
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
@@ -205,7 +205,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
}; \
\
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index b1c10c201..c8b7d28c4 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -27,7 +27,13 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
};
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
- const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
+ ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index size = (std::min)(_rows,_cols);
@@ -78,7 +84,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
_res, resIncr, alpha);
}
}
-};
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
@@ -89,8 +94,14 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
};
- static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
- const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
+ static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
+ ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+ const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index diagSize = (std::min)(_rows,_cols);
@@ -141,7 +152,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
&res.coeffRef(diagSize), resIncr, alpha);
}
}
-};
/***************************************************************************
* Wrapper to product_triangular_vector
@@ -171,7 +181,7 @@ struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>
TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
- template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
@@ -187,7 +197,7 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
- template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+ template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
@@ -205,7 +215,7 @@ namespace internal {
template<> struct trmv_selector<ColMajor>
{
template<int Mode, typename Lhs, typename Rhs, typename Dest>
- static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
+ static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
{
typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
typedef typename ProductType::Index Index;
@@ -246,7 +256,7 @@ template<> struct trmv_selector<ColMajor>
if(!evalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
- int size = dest.size();
+ Index size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if(!alphaIsCompatible)
@@ -281,7 +291,7 @@ template<> struct trmv_selector<ColMajor>
template<> struct trmv_selector<RowMajor>
{
template<int Mode, typename Lhs, typename Rhs, typename Dest>
- static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
+ static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
{
typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
typedef typename ProductType::LhsScalar LhsScalar;
diff --git a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
index 3c2c3049a..09f110da7 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
@@ -50,7 +50,7 @@ struct triangular_matrix_vector_product_trmv :
#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
@@ -58,7 +58,7 @@ struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs
}; \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
@@ -81,8 +81,8 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
- const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+ const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (ConjLhs || IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
@@ -166,8 +166,8 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
- const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+ const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h
index a49ea3183..f103eae72 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -18,7 +18,7 @@ namespace internal {
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
{
- static EIGEN_DONT_INLINE void run(
+ static void run(
Index size, Index cols,
const Scalar* tri, Index triStride,
Scalar* _other, Index otherStride,
@@ -42,6 +42,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride,
+ level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
+ Index size, Index otherSize,
+ const Scalar* _tri, Index triStride,
+ Scalar* _other, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
Index cols = otherSize;
@@ -173,7 +180,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
}
}
}
-};
/* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
*/
@@ -184,6 +190,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride,
+ level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
+ Index size, Index otherSize,
+ const Scalar* _tri, Index triStride,
+ Scalar* _other, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
Index rows = otherSize;
@@ -308,7 +321,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
}
}
}
-};
} // end namespace internal
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
index a4f508b2e..6a0bb8339 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
@@ -48,7 +48,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
@@ -103,7 +103,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
- static EIGEN_DONT_INLINE void run( \
+ static void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \