From 4920f2011e8acd0e44c0c6646843d5ca5d79b68c Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 8 Apr 2008 14:15:01 +0000 Subject: finish making use of CoeffReadCost and the new XprCopy everywhere seems appropriate to me. --- Eigen/src/Core/Dot.h | 29 ++++++++++++++++++----------- Eigen/src/Core/Product.h | 18 +++++++++--------- Eigen/src/Core/Redux.h | 43 ++++++++++++++++++++++++++++--------------- Eigen/src/Core/Visitor.h | 23 ++++++++++++++++++++--- 4 files changed, 75 insertions(+), 38 deletions(-) diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index ba45d5192..1d768b259 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -72,18 +72,25 @@ template typename ei_traits::Scalar MatrixBase::dot(const MatrixBase& other) const { - typename Derived::XprCopy xprCopy(derived()); - typename OtherDerived::XprCopy otherXprCopy(other.derived()); + typedef typename Derived::XprCopy XprCopy; + typedef typename OtherDerived::XprCopy OtherXprCopy; + typedef typename ei_unref::type _XprCopy; + typedef typename ei_unref::type _OtherXprCopy; + XprCopy xprCopy(derived()); + OtherXprCopy otherXprCopy(other.derived()); - ei_assert(IsVectorAtCompileTime - && OtherDerived::IsVectorAtCompileTime - && xprCopy.size() == otherXprCopy.size()); + ei_assert(_XprCopy::IsVectorAtCompileTime + && _OtherXprCopy::IsVectorAtCompileTime + && xprCopy.size() == otherXprCopy.size()); Scalar res; - if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) + const bool unroll = SizeAtCompileTime + * (_XprCopy::CoeffReadCost + _OtherXprCopy::CoeffReadCost + NumTraits::MulCost) + + (SizeAtCompileTime - 1) * NumTraits::AddCost + <= EIGEN_UNROLLING_LIMIT; + if(unroll) ei_dot_unroller::type, - typename ei_unref::type> + unroll ? SizeAtCompileTime : Dynamic, + _XprCopy, _OtherXprCopy> ::run(xprCopy, otherXprCopy, res); else { @@ -142,8 +149,8 @@ template bool MatrixBase::isOrtho (const MatrixBase& other, RealScalar prec) const { - typename Derived::XprCopy xprCopy(derived()); - typename OtherDerived::XprCopy otherXprCopy(other.derived()); + typename ei_xpr_copy::type xprCopy(derived()); + typename ei_xpr_copy::type otherXprCopy(other.derived()); return ei_abs2(xprCopy.dot(otherXprCopy)) <= prec * prec * xprCopy.norm2() * otherXprCopy.norm2(); } diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index d303cbdb7..7f149075b 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -86,13 +86,13 @@ struct ei_traits > typedef typename Lhs::Scalar Scalar; typedef typename ei_xpr_copy::type LhsXprCopy; typedef typename ei_xpr_copy::type RhsXprCopy; - typedef typename ei_unref::type ActualLhs; - typedef typename ei_unref::type ActualRhs; + typedef typename ei_unref::type _LhsXprCopy; + typedef typename ei_unref::type _RhsXprCopy; enum { - LhsCoeffReadCost = ActualLhs::CoeffReadCost, - RhsCoeffReadCost = ActualRhs::CoeffReadCost, - LhsFlags = ActualLhs::Flags, - RhsFlags = ActualRhs::Flags, + LhsCoeffReadCost = _LhsXprCopy::CoeffReadCost, + RhsCoeffReadCost = _RhsXprCopy::CoeffReadCost, + LhsFlags = _LhsXprCopy::Flags, + RhsFlags = _RhsXprCopy::Flags, RowsAtCompileTime = Lhs::RowsAtCompileTime, ColsAtCompileTime = Rhs::ColsAtCompileTime, MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, @@ -117,9 +117,10 @@ template class Product : ei_no_assignm public: EIGEN_GENERIC_PUBLIC_INTERFACE(Product) - typedef typename ei_traits::LhsXprCopy LhsXprCopy; typedef typename ei_traits::RhsXprCopy RhsXprCopy; + typedef typename ei_traits::_LhsXprCopy _LhsXprCopy; + typedef typename ei_traits::_RhsXprCopy _RhsXprCopy; Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) @@ -144,8 +145,7 @@ template class Product : ei_no_assignm { ei_product_unroller::type, - typename ei_unref::type> + _LhsXprCopy, _RhsXprCopy> ::run(row, col, m_lhs, m_rhs, res); } else diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index bccfe42b4..f7fdbc077 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -87,15 +87,19 @@ struct ei_traits > typedef typename ei_result_of< BinaryOp(typename MatrixType::Scalar) >::type Scalar; + typedef typename ei_xpr_copy::type MatrixTypeXprCopy; + typedef typename ei_unref::type _MatrixTypeXprCopy; enum { RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime, ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime, - MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, + MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime, Flags = (RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) - ? (unsigned int)MatrixType::Flags - : (unsigned int)MatrixType::Flags & ~LargeBit, - CoeffReadCost = 1 //FIXME -- unimplemented! + ? (unsigned int)_MatrixTypeXprCopy::Flags + : (unsigned int)_MatrixTypeXprCopy::Flags & ~LargeBit, + TraversalSize = Direction==Vertical ? RowsAtCompileTime : ColsAtCompileTime, + CoeffReadCost = TraversalSize * _MatrixTypeXprCopy::CoeffReadCost + + (TraversalSize - 1) * ei_functor_traits::Cost }; }; @@ -106,6 +110,8 @@ class PartialRedux : ei_no_assignment_operator, public: EIGEN_GENERIC_PUBLIC_INTERFACE(PartialRedux) + typedef typename ei_traits::MatrixTypeXprCopy MatrixTypeXprCopy; + typedef typename ei_traits::_MatrixTypeXprCopy _MatrixTypeXprCopy; PartialRedux(const MatrixType& mat, const BinaryOp& func = BinaryOp()) : m_matrix(mat), m_functor(func) {} @@ -124,7 +130,7 @@ class PartialRedux : ei_no_assignment_operator, } protected: - const typename MatrixType::XprCopy m_matrix; + const MatrixTypeXprCopy m_matrix; const BinaryOp m_functor; }; @@ -171,10 +177,13 @@ template typename ei_result_of::Scalar)>::type MatrixBase::redux(const BinaryOp& func) const { - if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) + const bool unroll = SizeAtCompileTime * CoeffReadCost + + (SizeAtCompileTime-1) * ei_functor_traits::Cost + <= EIGEN_UNROLLING_LIMIT; + if(unroll) return ei_redux_unroller0 && SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) ? - SizeAtCompileTime : Dynamic>::run(derived(), func); + unroll ? SizeAtCompileTime : Dynamic> + ::run(derived(), func); else { Scalar res; @@ -291,10 +300,12 @@ struct ei_any_unroller template bool MatrixBase::all(void) const { - if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) + const bool unroll = SizeAtCompileTime * (CoeffReadCost + NumTraits::AddCost) + <= EIGEN_UNROLLING_LIMIT; + if(unroll) return ei_all_unroller0 && SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) ? - SizeAtCompileTime : Dynamic>::run(derived()); + unroll ? SizeAtCompileTime : Dynamic + >::run(derived()); else { for(int j = 0; j < cols(); j++) @@ -311,10 +322,12 @@ bool MatrixBase::all(void) const template bool MatrixBase::any(void) const { - if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) + const bool unroll = SizeAtCompileTime * (CoeffReadCost + NumTraits::AddCost) + <= EIGEN_UNROLLING_LIMIT; + if(unroll) return ei_any_unroller0 && SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) ? - SizeAtCompileTime : Dynamic>::run(derived()); + unroll ? SizeAtCompileTime : Dynamic + >::run(derived()); else { for(int j = 0; j < cols(); j++) diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index 2cff3a576..0aeac0cfc 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -74,10 +74,13 @@ template template void MatrixBase::visit(Visitor& visitor) const { - if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) + const bool unroll = SizeAtCompileTime * CoeffReadCost + + (SizeAtCompileTime-1) * ei_functor_traits::Cost + <= EIGEN_UNROLLING_LIMIT; + if(unroll) return ei_visitor_unroller0 && SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) ? - SizeAtCompileTime : Dynamic>::run(derived(), visitor); + unroll ? SizeAtCompileTime : Dynamic + >::run(derived(), visitor); else { visitor.init(coeff(0,0), 0, 0); @@ -124,6 +127,13 @@ struct ei_min_coeff_visitor : ei_coeff_visitor } }; +template +struct ei_functor_traits > { + enum { + Cost = NumTraits::AddCost + }; +}; + /** \internal * \brief Visitor computing the max coefficient with its value and coordinates * @@ -143,6 +153,13 @@ struct ei_max_coeff_visitor : ei_coeff_visitor } }; +template +struct ei_functor_traits > { + enum { + Cost = NumTraits::AddCost + }; +}; + /** \returns the minimum of all coefficients of *this * and puts in *row and *col its location. * -- cgit v1.2.3