aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/Product.h5
-rw-r--r--Eigen/src/Core/SolveTriangular.h2
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h14
-rw-r--r--Eigen/src/Core/products/GeneralMatrixVector.h12
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector.h2
5 files changed, 19 insertions, 16 deletions
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index edd79bd9a..e929b8d89 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -298,8 +298,7 @@ class GeneralProduct<Lhs, Rhs, GemvProduct>
{
ei_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
ei_gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
- bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess)
- /*&& ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret*/>::run(*this, dst, alpha);
+ bool(ei_blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
}
};
@@ -357,7 +356,7 @@ template<> struct ei_gemv_selector<OnTheRight,ColMajor,true>
<Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
&actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(),
- actualRhs, actualRhs.innerStride(),
+ actualRhs.data(), actualRhs.innerStride(),
actualDest, 1,
actualAlpha);
diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h
index f9e24a193..c25317989 100644
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -151,7 +151,7 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor
ei_general_matrix_vector_product<Index,Scalar,ColMajor,LhsProductTraits::NeedToConjugate,Scalar,false>::run(
r, actualPanelWidth,
&(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(),
- other.segment(startBlock, actualPanelWidth), other.innerStride(),
+ &other.coeff(startBlock), other.innerStride(),
&(other.coeffRef(endBlock, 0)), other.innerStride(), Scalar(-1));
}
}
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index ffb4cd386..9214582ed 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -134,12 +134,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
}
// FIXME
-// #ifdef EIGEN_HAS_FUSE_CJMADD
+#ifndef EIGEN_HAS_FUSE_CJMADD
+#define EIGEN_HAS_FUSE_CJMADD
+#endif
+#ifdef EIGEN_HAS_FUSE_CJMADD
#define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
-// #else
- //#define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T));
-// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T);
-// #endif
+#else
+ #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T));
+#endif
// optimized GEneral packed Block * packed Panel product kernel
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
@@ -712,7 +714,9 @@ EIGEN_ASM_COMMENT("myend");
const RhsScalar* blB = unpackedB;
for(Index k=0; k<depth; k++)
{
+ #ifndef EIGEN_HAS_FUSE_CJMADD
RhsPacket T0;
+ #endif
MADD(pcj,ei_pload<LhsPacket>(blA), ei_pload<RhsPacket>(blB), C0, T0);
blB += RhsPacketSize;
blA += LhsPacketSize;
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index e0d71be7e..d772834a2 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -53,15 +53,13 @@ typedef typename ei_meta_if<Vectorizable,_LhsPacket,LhsScalar>::ret LhsPacket;
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
-template<typename RhsType>
EIGEN_DONT_INLINE static void run(
Index rows, Index cols,
const LhsScalar* lhs, Index lhsStride,
- const RhsType&/*const RhsScalar**/ rhs, Index rhsIncr,
+ const RhsScalar* rhs, Index rhsIncr,
ResScalar* res, Index resIncr,
ResScalar alpha)
{
- EIGEN_UNUSED_VARIABLE(rhsIncr);
ei_internal_assert(resIncr==1);
#ifdef _EIGEN_ACCUMULATE_PACKETS
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
@@ -147,8 +145,10 @@ EIGEN_DONT_INLINE static void run(
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
{
- RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i]), ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[i+offset1]),
- ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[i+2]), ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[i+offset3]);
+ RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]),
+ ptmp1 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset1)*rhsIncr]),
+ ptmp2 = ei_pset1<RhsPacket>(alpha*rhs[(i+2)*rhsIncr]),
+ ptmp3 = ei_pset1<RhsPacket>(alpha*rhs[(i+offset3)*rhsIncr]);
// this helps a lot generating better binary code
const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
@@ -239,7 +239,7 @@ EIGEN_DONT_INLINE static void run(
{
for (Index i=start; i<end; ++i)
{
- RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i]);
+ RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]);
const LhsScalar* lhs0 = lhs + i*lhsStride;
if (Vectorizable)
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index 16b02a425..67c131ab2 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -79,7 +79,7 @@ struct ei_product_triangular_vector_selector<true,Lhs,Rhs,Result,Mode,ConjLhs,Co
ei_general_matrix_vector_product<Index,Scalar,ColMajor,ConjLhs,Scalar,ConjRhs>::run(
r, actualPanelWidth,
&(lhs.const_cast_derived().coeffRef(s,pi)), lhs.outerStride(),
- rhs.segment(pi, actualPanelWidth), rhs.innerStride(),
+ &rhs.coeff(pi), rhs.innerStride(),
&res.coeffRef(s), res.innerStride(), alpha);
}
}