From 69980379300b035daae6de26eb4bf394651d2de3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 7 Jun 2008 01:07:48 +0000 Subject: * move some compile time "if" to their respective unroller (assign and dot) * fix a couple of compilation issues when unrolling is disabled * reduce default unrolling limit to a more reasonable value --- Eigen/src/Core/Assign.h | 68 ++++++++++++++------------- Eigen/src/Core/Dot.h | 37 ++++++++------- Eigen/src/Core/Part.h | 107 ++++++++++++++++++++++++------------------- Eigen/src/Core/Product.h | 6 +-- Eigen/src/Core/Redux.h | 30 ++++++------ Eigen/src/Core/Visitor.h | 26 +++++------ Eigen/src/Core/util/Macros.h | 2 +- 7 files changed, 144 insertions(+), 132 deletions(-) (limited to 'Eigen/src/Core') diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 21d5e3d11..98df25235 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -58,10 +58,30 @@ struct ei_matrix_assignment_unroller inline static void run(Derived1 &, const Derived2 &) {} }; +// Dynamic col-major template -struct ei_matrix_assignment_unroller +struct ei_matrix_assignment_unroller { - inline static void run(Derived1 &, const Derived2 &) {} + inline static void run(Derived1 &dst, const Derived2 &src) + { + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i < dst.rows(); i++) + dst.coeffRef(i, j) = src.coeff(i, j); + } +}; + +// Dynamic row-major +template +struct ei_matrix_assignment_unroller +{ + inline static void run(Derived1 &dst, const Derived2 &src) + { + // traverse in row-major order + // in order to allow the compiler to unroll the inner loop + for(int i = 0; i < dst.rows(); i++) + for(int j = 0; j < dst.cols(); j++) + dst.coeffRef(i, j) = src.coeff(i, j); + } }; //---- @@ -103,10 +123,12 @@ struct ei_matrix_assignment_packet_unroller template ::size==0) - : int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits::size==0)) ), + && ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit) + || ((int(Derived::Flags) & RowMajorBit) + ? int(Derived::ColsAtCompileTime)!=Dynamic + && (int(Derived::ColsAtCompileTime)%ei_packet_traits::size==0) + : int(Derived::RowsAtCompileTime)!=Dynamic + && (int(Derived::RowsAtCompileTime)%ei_packet_traits::size==0)) ), bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT> struct ei_assignment_impl; @@ -156,39 +178,21 @@ inline Derived& MatrixBase //---- -template -struct ei_assignment_impl // no vec + unrolling +// no vectorization +template +struct ei_assignment_impl { static void run(Derived & dst, const OtherDerived & src) { ei_matrix_assignment_unroller - ::run(dst.derived(), src.derived()); } }; -template -struct ei_assignment_impl // no vec + no unrolling + col major order -{ - static void run(Derived & dst, const OtherDerived & src) - { - if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic) - { - for(int j = 0; j < dst.cols(); j++) - for(int i = 0; i < dst.rows(); i++) - dst.coeffRef(i, j) = src.coeff(i, j); - } - else - { - // traverse in row-major order - // in order to allow the compiler to unroll the inner loop - for(int i = 0; i < dst.rows(); i++) - for(int j = 0; j < dst.cols(); j++) - dst.coeffRef(i, j) = src.coeff(i, j); - } - } -}; - //---- template @@ -224,7 +228,7 @@ struct ei_assignment_impl // vec + no-unroll }; template -struct ei_packet_assignment_seclector // row-major + complex 1D array +struct ei_packet_assignment_seclector // row-major + complex 1D array like { static void run(Derived & dst, const OtherDerived & src) { diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index c85d9e3fb..a1c1ffce8 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -26,17 +26,17 @@ #define EIGEN_DOT_H template -struct ei_dot_unroller +struct ei_dot_impl { inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot) { - ei_dot_unroller::run(v1, v2, dot); + ei_dot_impl::run(v1, v2, dot); dot += v1.coeff(Index) * ei_conj(v2.coeff(Index)); } }; template -struct ei_dot_unroller<0, Size, Derived1, Derived2> +struct ei_dot_impl<0, Size, Derived1, Derived2> { inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot) { @@ -44,15 +44,20 @@ struct ei_dot_unroller<0, Size, Derived1, Derived2> } }; -template -struct ei_dot_unroller +template +struct ei_dot_impl { - inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {} + inline static void run(const Derived1& v1, const Derived2& v2, typename Derived1::Scalar& dot) + { + dot = v1.coeff(0) * ei_conj(v2.coeff(0)); + for(int i = 1; i < v1.size(); i++) + dot += v1.coeff(i)* ei_conj(v2.coeff(i)); + } }; // prevent buggy user code from causing an infinite recursion template -struct ei_dot_unroller +struct ei_dot_impl { inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {} }; @@ -83,22 +88,16 @@ MatrixBase::dot(const MatrixBase& other) const EIGEN_STATIC_ASSERT_VECTOR_ONLY(_OtherNested); EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(_Nested,_OtherNested); ei_assert(nested.size() == otherNested.size()); - Scalar res; const bool unroll = SizeAtCompileTime * (_Nested::CoeffReadCost + _OtherNested::CoeffReadCost + NumTraits::MulCost) + (int(SizeAtCompileTime) - 1) * NumTraits::AddCost <= EIGEN_UNROLLING_LIMIT; - if(unroll) - ei_dot_unroller - ::run(nested, otherNested, res); - else - { - res = nested.coeff(0) * ei_conj(otherNested.coeff(0)); - for(int i = 1; i < size(); i++) - res += nested.coeff(i)* ei_conj(otherNested.coeff(i)); - } + + Scalar res; + ei_dot_impl + ::run(nested, otherNested, res); return res; } diff --git a/Eigen/src/Core/Part.h b/Eigen/src/Core/Part.h index 647e6cf2d..eb8dcbba7 100644 --- a/Eigen/src/Core/Part.h +++ b/Eigen/src/Core/Part.h @@ -92,7 +92,7 @@ inline void Part::operator=(const Other& other) } template -struct ei_part_assignment_unroller +struct ei_part_assignment_impl { enum { col = (UnrollCount-1) / Derived1::RowsAtCompileTime, @@ -101,7 +101,7 @@ struct ei_part_assignment_unroller inline static void run(Derived1 &dst, const Derived2 &src) { - ei_part_assignment_unroller::run(dst, src); + ei_part_assignment_impl::run(dst, src); if(Mode == SelfAdjoint) { @@ -122,7 +122,7 @@ struct ei_part_assignment_unroller }; template -struct ei_part_assignment_unroller +struct ei_part_assignment_impl { inline static void run(Derived1 &dst, const Derived2 &src) { @@ -133,65 +133,78 @@ struct ei_part_assignment_unroller // prevent buggy user code from causing an infinite recursion template -struct ei_part_assignment_unroller +struct ei_part_assignment_impl { inline static void run(Derived1 &, const Derived2 &) {} }; -template -struct ei_part_assignment_unroller +template +struct ei_part_assignment_impl { - inline static void run(Derived1 &, const Derived2 &) {} + inline static void run(Derived1 &dst, const Derived2 &src) + { + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i <= j; i++) + dst.coeffRef(i, j) = src.coeff(i, j); + } }; +template +struct ei_part_assignment_impl +{ + inline static void run(Derived1 &dst, const Derived2 &src) + { + for(int j = 0; j < dst.cols(); j++) + for(int i = j; i < dst.rows(); i++) + dst.coeffRef(i, j) = src.coeff(i, j); + } +}; -template -template -void Part::lazyAssign(const Other& other) +template +struct ei_part_assignment_impl { - const bool unroll = MatrixType::SizeAtCompileTime * Other::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT; - ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols()); - if(unroll) + inline static void run(Derived1 &dst, const Derived2 &src) { - ei_part_assignment_unroller - ::run(m_matrix, other.derived()); + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i < j; i++) + dst.coeffRef(i, j) = src.coeff(i, j); } - else +}; +template +struct ei_part_assignment_impl +{ + inline static void run(Derived1 &dst, const Derived2 &src) { - switch(Mode) + for(int j = 0; j < dst.cols(); j++) + for(int i = j+1; i < dst.rows(); i++) + dst.coeffRef(i, j) = src.coeff(i, j); + } +}; +template +struct ei_part_assignment_impl +{ + inline static void run(Derived1 &dst, const Derived2 &src) + { + for(int j = 0; j < dst.cols(); j++) { - case Upper: - for(int j = 0; j < m_matrix.cols(); j++) - for(int i = 0; i <= j; i++) - m_matrix.coeffRef(i, j) = other.coeff(i, j); - break; - case Lower: - for(int j = 0; j < m_matrix.cols(); j++) - for(int i = j; i < m_matrix.rows(); i++) - m_matrix.coeffRef(i, j) = other.coeff(i, j); - break; - case StrictlyUpper: - for(int j = 0; j < m_matrix.cols(); j++) - for(int i = 0; i < j; i++) - m_matrix.coeffRef(i, j) = other.coeff(i, j); - break; - case StrictlyLower: - for(int j = 0; j < m_matrix.cols(); j++) - for(int i = j+1; i < m_matrix.rows(); i++) - m_matrix.coeffRef(i, j) = other.coeff(i, j); - break; - case SelfAdjoint: - for(int j = 0; j < m_matrix.cols(); j++) - { - for(int i = 0; i < j; i++) - m_matrix.coeffRef(j, i) = ei_conj(m_matrix.coeffRef(i, j) = other.coeff(i, j)); - m_matrix.coeffRef(j, j) = ei_real(other.coeff(j, j)); - } - break; + for(int i = 0; i < j; i++) + dst.coeffRef(j, i) = ei_conj(dst.coeffRef(i, j) = src.coeff(i, j)); + dst.coeffRef(j, j) = ei_real(src.coeff(j, j)); } } +}; + +template +template +void Part::lazyAssign(const Other& other) +{ + const bool unroll = MatrixType::SizeAtCompileTime * Other::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT; + ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols()); + + ei_part_assignment_impl + ::run(m_matrix, other.derived()); } template diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 3709ebf64..7af1bb3ed 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -47,8 +47,8 @@ struct ei_product_impl<0, Size, Lhs, Rhs> } }; -template -struct ei_product_impl +template +struct ei_product_impl { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res) { @@ -268,7 +268,7 @@ template class Product : ei_no_assignm { Scalar res; const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT; - ei_product_impl ::run(row, col, m_lhs, m_rhs, res); diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 7c5534154..821d3813d 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -63,7 +63,17 @@ template struct ei_redux_unroller { typedef typename ei_result_of::type Scalar; - static Scalar run(const Derived&, const BinaryOp&) { return Scalar(); } + static Scalar run(const Derived& mat, const BinaryOp& func) + { + Scalar res; + res = mat.coeff(0,0); + for(int i = 1; i < mat.rows(); i++) + res = func(res, mat.coeff(i, 0)); + for(int j = 1; j < mat.cols(); j++) + for(int i = 0; i < mat.rows(); i++) + res = func(res, mat.coeff(i, j)); + return res; + } }; /** \returns the result of a full redux operation on the whole matrix or vector using \a func @@ -81,21 +91,9 @@ MatrixBase::redux(const BinaryOp& func) const const bool unroll = SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * ei_functor_traits::Cost <= EIGEN_UNROLLING_LIMIT; - if(unroll) - return ei_redux_unroller - ::run(derived(), func); - else - { - Scalar res; - res = coeff(0,0); - for(int i = 1; i < rows(); i++) - res = func(res, coeff(i, 0)); - for(int j = 1; j < cols(); j++) - for(int i = 0; i < rows(); i++) - res = func(res, coeff(i, j)); - return res; - } + return ei_redux_unroller + ::run(derived(), func); } /** \returns the sum of all coefficients of *this diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index 6e00e4142..bd469f747 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -52,7 +52,15 @@ struct ei_visitor_unroller template struct ei_visitor_unroller { - inline static void run(const Derived &, Visitor&) {} + inline static void run(const Derived& mat, Visitor& visitor) + { + visitor.init(mat.coeff(0,0), 0, 0); + for(int i = 1; i < mat.rows(); i++) + visitor(mat.coeff(i, 0), i, 0); + for(int j = 1; j < mat.cols(); j++) + for(int i = 0; i < mat.rows(); i++) + visitor(mat.coeff(i, j), i, j); + } }; @@ -77,19 +85,9 @@ void MatrixBase::visit(Visitor& visitor) const const bool unroll = SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * ei_functor_traits::Cost <= EIGEN_UNROLLING_LIMIT; - if(unroll) - return ei_visitor_unroller::run(derived(), visitor); - else - { - visitor.init(coeff(0,0), 0, 0); - for(int i = 1; i < rows(); i++) - visitor(coeff(i, 0), i, 0); - for(int j = 1; j < cols(); j++) - for(int i = 0; i < rows(); i++) - visitor(coeff(i, j), i, j); - } + return ei_visitor_unroller::run(derived(), visitor); } /** \internal diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 717e3e072..dff4d7885 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -34,7 +34,7 @@ /** Defines the maximal loop size to enable meta unrolling of loops */ #ifndef EIGEN_UNROLLING_LIMIT -#define EIGEN_UNROLLING_LIMIT 400 +#define EIGEN_UNROLLING_LIMIT 100 #endif #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR -- cgit v1.2.3