diff options
author | Gael Guennebaud <g.gael@free.fr> | 2016-11-30 17:59:13 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2016-11-30 17:59:13 +0100 |
commit | c927af60edbf7dd859fc8057151e40b0fe2de661 (patch) | |
tree | 2be21cf94a2a3ae83242d651d5900ec303f7ced6 | |
parent | ab4ef5e66e33dd585bed2207c7c53948e30b2875 (diff) |
Fix a performance regression in (mat*mat)*vec for which mat*mat was evaluated multiple times.
-rw-r--r-- | Eigen/src/Core/GeneralProduct.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/ProductEvaluators.h | 9 | ||||
-rw-r--r-- | Eigen/src/Core/util/XprHelper.h | 10 | ||||
-rw-r--r-- | test/product_notemporary.cpp | 4 |
4 files changed, 16 insertions, 9 deletions
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 0cc2d08e2..8d87cab1f 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -329,6 +329,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,false> template<typename Lhs, typename Rhs, typename Dest> static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { + EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE); // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp typename nested_eval<Rhs,1>::type actual_rhs(rhs); const Index size = rhs.rows(); @@ -342,6 +343,7 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false> template<typename Lhs, typename Rhs, typename Dest> static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { + EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE); typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs); const Index rows = dest.rows(); for(Index i=0; i<rows; ++i) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index c9e2e1a07..b0e8b189e 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -366,17 +366,22 @@ template<typename Lhs, typename Rhs> struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> > { + typedef typename nested_eval<Lhs,1>::type LhsNested; + typedef typename nested_eval<Rhs,1>::type RhsNested; typedef typename Product<Lhs,Rhs>::Scalar Scalar; enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; - typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType; + typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType; template<typename Dest> static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { + LhsNested actual_lhs(lhs); + RhsNested actual_rhs(rhs); + internal::gemv_dense_selector<Side, (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor, bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess) - >::run(lhs, rhs, dst, alpha); + >::run(actual_lhs, actual_rhs, dst, alpha); } }; diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 7cfa2c49f..efd179b35 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -445,15 +445,11 @@ template<typename T, int n, typename PlainObject = typename plain_object_eval<T> // Another solution could be to count the number of temps? NAsInteger = n == Dynamic ? HugeCost : n, CostEval = (NAsInteger+1) * ScalarReadCost + CoeffReadCost, - CostNoEval = NAsInteger * CoeffReadCost + CostNoEval = NAsInteger * CoeffReadCost, + Evaluate = (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval)) }; - typedef typename conditional< - ( (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || - (int(CostEval) < int(CostNoEval)) ), - PlainObject, - typename ref_selector<T>::type - >::type type; + typedef typename conditional<Evaluate, PlainObject, typename ref_selector<T>::type>::type type; }; template<typename T> diff --git a/test/product_notemporary.cpp b/test/product_notemporary.cpp index 2bb19a681..8bf71b4f2 100644 --- a/test/product_notemporary.cpp +++ b/test/product_notemporary.cpp @@ -136,6 +136,10 @@ template<typename MatrixType> void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( rm3.noalias() -= (cv1) * (rv1 * m1), 1 ); VERIFY_EVALUATION_COUNT( rm3.noalias() = (m1*cv1) * (rv1 * m1), 2 ); VERIFY_EVALUATION_COUNT( rm3.noalias() += (m1*cv1) * (rv1 * m1), 2 ); + + // Check nested products + VERIFY_EVALUATION_COUNT( cvres.noalias() = m1.adjoint() * m1 * cv1, 1 ); + VERIFY_EVALUATION_COUNT( rvres.noalias() = rv1 * (m1 * m2.adjoint()), 1 ); } void test_product_notemporary() |