diff options
author | Gael Guennebaud <g.gael@free.fr> | 2008-06-07 01:07:48 +0000 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2008-06-07 01:07:48 +0000 |
commit | 69980379300b035daae6de26eb4bf394651d2de3 (patch) | |
tree | e0f75f44232947cf7d52271a3a8f11807bdae00b /Eigen/src/Core | |
parent | a172385720fad3b72a820da28fef158efabdb369 (diff) |
* move some compile time "if" to their respective unroller (assign and dot)
* fix a couple of compilation issues when unrolling is disabled
* reduce default unrolling limit to a more reasonable value
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/Assign.h | 68 | ||||
-rw-r--r-- | Eigen/src/Core/Dot.h | 37 | ||||
-rw-r--r-- | Eigen/src/Core/Part.h | 107 | ||||
-rw-r--r-- | Eigen/src/Core/Product.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/Redux.h | 30 | ||||
-rw-r--r-- | Eigen/src/Core/Visitor.h | 26 | ||||
-rw-r--r-- | Eigen/src/Core/util/Macros.h | 2 |
7 files changed, 144 insertions, 132 deletions
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 21d5e3d11..98df25235 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -58,10 +58,30 @@ struct ei_matrix_assignment_unroller<Derived1, Derived2, 0> inline static void run(Derived1 &, const Derived2 &) {} }; +// Dynamic col-major template<typename Derived1, typename Derived2> -struct ei_matrix_assignment_unroller<Derived1, Derived2, Dynamic> +struct ei_matrix_assignment_unroller<Derived1, Derived2, -1> { - inline static void run(Derived1 &, const Derived2 &) {} + inline static void run(Derived1 &dst, const Derived2 &src) + { + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i < dst.rows(); i++) + dst.coeffRef(i, j) = src.coeff(i, j); + } +}; + +// Dynamic row-major +template<typename Derived1, typename Derived2> +struct ei_matrix_assignment_unroller<Derived1, Derived2, -2> +{ + inline static void run(Derived1 &dst, const Derived2 &src) + { + // traverse in row-major order + // in order to allow the compiler to unroll the inner loop + for(int i = 0; i < dst.rows(); i++) + for(int j = 0; j < dst.cols(); j++) + dst.coeffRef(i, j) = src.coeff(i, j); + } }; //---- @@ -103,10 +123,12 @@ struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, Dynamic> template <typename Derived, typename OtherDerived, bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit) && ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)) - && ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit) - ||((int(Derived::Flags)&RowMajorBit) - ? int(Derived::ColsAtCompileTime)!=Dynamic && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0) - : int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ), + && ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit) + || ((int(Derived::Flags) & RowMajorBit) + ? int(Derived::ColsAtCompileTime)!=Dynamic + && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0) + : int(Derived::RowsAtCompileTime)!=Dynamic + && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ), bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT> struct ei_assignment_impl; @@ -156,39 +178,21 @@ inline Derived& MatrixBase<Derived> //---- -template <typename Derived, typename OtherDerived> -struct ei_assignment_impl<Derived, OtherDerived, false, true> // no vec + unrolling +// no vectorization +template <typename Derived, typename OtherDerived, bool Unroll> +struct ei_assignment_impl<Derived, OtherDerived, false, Unroll> { static void run(Derived & dst, const OtherDerived & src) { ei_matrix_assignment_unroller - <Derived, OtherDerived, int(Derived::SizeAtCompileTime) + <Derived, OtherDerived, + Unroll ? int(Derived::SizeAtCompileTime) + : Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic ? -1 // col-major + : -2 // row-major >::run(dst.derived(), src.derived()); } }; -template <typename Derived, typename OtherDerived> -struct ei_assignment_impl<Derived, OtherDerived, false, false> // no vec + no unrolling + col major order -{ - static void run(Derived & dst, const OtherDerived & src) - { - if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic) - { - for(int j = 0; j < dst.cols(); j++) - for(int i = 0; i < dst.rows(); i++) - dst.coeffRef(i, j) = src.coeff(i, j); - } - else - { - // traverse in row-major order - // in order to allow the compiler to unroll the inner loop - for(int i = 0; i < dst.rows(); i++) - for(int j = 0; j < dst.cols(); j++) - dst.coeffRef(i, j) = src.coeff(i, j); - } - } -}; - //---- template <typename Derived, typename OtherDerived> @@ -224,7 +228,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true, false> // vec + no-unroll }; template <typename Derived, typename OtherDerived> -struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array +struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array like { static void run(Derived & dst, const OtherDerived & src) { diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index c85d9e3fb..a1c1ffce8 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -26,17 +26,17 @@ #define EIGEN_DOT_H template<int Index, int Size, typename Derived1, typename Derived2> -struct ei_dot_unroller +struct ei_dot_impl { inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot) { - ei_dot_unroller<Index-1, Size, Derived1, Derived2>::run(v1, v2, dot); + ei_dot_impl<Index-1, Size, Derived1, Derived2>::run(v1, v2, dot); dot += v1.coeff(Index) * ei_conj(v2.coeff(Index)); } }; template<int Size, typename Derived1, typename Derived2> -struct ei_dot_unroller<0, Size, Derived1, Derived2> +struct ei_dot_impl<0, Size, Derived1, Derived2> { inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot) { @@ -44,15 +44,20 @@ struct ei_dot_unroller<0, Size, Derived1, Derived2> } }; -template<int Index, typename Derived1, typename Derived2> -struct ei_dot_unroller<Index, Dynamic, Derived1, Derived2> +template<typename Derived1, typename Derived2> +struct ei_dot_impl<Dynamic, Dynamic, Derived1, Derived2> { - inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {} + inline static void run(const Derived1& v1, const Derived2& v2, typename Derived1::Scalar& dot) + { + dot = v1.coeff(0) * ei_conj(v2.coeff(0)); + for(int i = 1; i < v1.size(); i++) + dot += v1.coeff(i)* ei_conj(v2.coeff(i)); + } }; // prevent buggy user code from causing an infinite recursion template<int Index, typename Derived1, typename Derived2> -struct ei_dot_unroller<Index, 0, Derived1, Derived2> +struct ei_dot_impl<Index, 0, Derived1, Derived2> { inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {} }; @@ -83,22 +88,16 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const EIGEN_STATIC_ASSERT_VECTOR_ONLY(_OtherNested); EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(_Nested,_OtherNested); ei_assert(nested.size() == otherNested.size()); - Scalar res; const bool unroll = SizeAtCompileTime * (_Nested::CoeffReadCost + _OtherNested::CoeffReadCost + NumTraits<Scalar>::MulCost) + (int(SizeAtCompileTime) - 1) * NumTraits<Scalar>::AddCost <= EIGEN_UNROLLING_LIMIT; - if(unroll) - ei_dot_unroller<int(SizeAtCompileTime)-1, - unroll ? int(SizeAtCompileTime) : Dynamic, - _Nested, _OtherNested> - ::run(nested, otherNested, res); - else - { - res = nested.coeff(0) * ei_conj(otherNested.coeff(0)); - for(int i = 1; i < size(); i++) - res += nested.coeff(i)* ei_conj(otherNested.coeff(i)); - } + + Scalar res; + ei_dot_impl<unroll ? int(SizeAtCompileTime)-1 : Dynamic, + unroll ? int(SizeAtCompileTime) : Dynamic, + _Nested, _OtherNested> + ::run(nested, otherNested, res); return res; } diff --git a/Eigen/src/Core/Part.h b/Eigen/src/Core/Part.h index 647e6cf2d..eb8dcbba7 100644 --- a/Eigen/src/Core/Part.h +++ b/Eigen/src/Core/Part.h @@ -92,7 +92,7 @@ inline void Part<MatrixType, Mode>::operator=(const Other& other) } template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount> -struct ei_part_assignment_unroller +struct ei_part_assignment_impl { enum { col = (UnrollCount-1) / Derived1::RowsAtCompileTime, @@ -101,7 +101,7 @@ struct ei_part_assignment_unroller inline static void run(Derived1 &dst, const Derived2 &src) { - ei_part_assignment_unroller<Derived1, Derived2, Mode, UnrollCount-1>::run(dst, src); + ei_part_assignment_impl<Derived1, Derived2, Mode, UnrollCount-1>::run(dst, src); if(Mode == SelfAdjoint) { @@ -122,7 +122,7 @@ struct ei_part_assignment_unroller }; template<typename Derived1, typename Derived2, unsigned int Mode> -struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 1> +struct ei_part_assignment_impl<Derived1, Derived2, Mode, 1> { inline static void run(Derived1 &dst, const Derived2 &src) { @@ -133,65 +133,78 @@ struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 1> // prevent buggy user code from causing an infinite recursion template<typename Derived1, typename Derived2, unsigned int Mode> -struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 0> +struct ei_part_assignment_impl<Derived1, Derived2, Mode, 0> { inline static void run(Derived1 &, const Derived2 &) {} }; -template<typename Derived1, typename Derived2, unsigned int Mode> -struct ei_part_assignment_unroller<Derived1, Derived2, Mode, Dynamic> +template<typename Derived1, typename Derived2> +struct ei_part_assignment_impl<Derived1, Derived2, Upper, Dynamic> { - inline static void run(Derived1 &, const Derived2 &) {} + inline static void run(Derived1 &dst, const Derived2 &src) + { + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i <= j; i++) + dst.coeffRef(i, j) = src.coeff(i, j); + } }; +template<typename Derived1, typename Derived2> +struct ei_part_assignment_impl<Derived1, Derived2, Lower, Dynamic> +{ + inline static void run(Derived1 &dst, const Derived2 &src) + { + for(int j = 0; j < dst.cols(); j++) + for(int i = j; i < dst.rows(); i++) + dst.coeffRef(i, j) = src.coeff(i, j); + } +}; -template<typename MatrixType, unsigned int Mode> -template<typename Other> -void Part<MatrixType, Mode>::lazyAssign(const Other& other) +template<typename Derived1, typename Derived2> +struct ei_part_assignment_impl<Derived1, Derived2, StrictlyUpper, Dynamic> { - const bool unroll = MatrixType::SizeAtCompileTime * Other::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT; - ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols()); - if(unroll) + inline static void run(Derived1 &dst, const Derived2 &src) { - ei_part_assignment_unroller - <MatrixType, Other, Mode, - unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic - >::run(m_matrix, other.derived()); + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i < j; i++) + dst.coeffRef(i, j) = src.coeff(i, j); } - else +}; +template<typename Derived1, typename Derived2> +struct ei_part_assignment_impl<Derived1, Derived2, StrictlyLower, Dynamic> +{ + inline static void run(Derived1 &dst, const Derived2 &src) { - switch(Mode) + for(int j = 0; j < dst.cols(); j++) + for(int i = j+1; i < dst.rows(); i++) + dst.coeffRef(i, j) = src.coeff(i, j); + } +}; +template<typename Derived1, typename Derived2> +struct ei_part_assignment_impl<Derived1, Derived2, SelfAdjoint, Dynamic> +{ + inline static void run(Derived1 &dst, const Derived2 &src) + { + for(int j = 0; j < dst.cols(); j++) { - case Upper: - for(int j = 0; j < m_matrix.cols(); j++) - for(int i = 0; i <= j; i++) - m_matrix.coeffRef(i, j) = other.coeff(i, j); - break; - case Lower: - for(int j = 0; j < m_matrix.cols(); j++) - for(int i = j; i < m_matrix.rows(); i++) - m_matrix.coeffRef(i, j) = other.coeff(i, j); - break; - case StrictlyUpper: - for(int j = 0; j < m_matrix.cols(); j++) - for(int i = 0; i < j; i++) - m_matrix.coeffRef(i, j) = other.coeff(i, j); - break; - case StrictlyLower: - for(int j = 0; j < m_matrix.cols(); j++) - for(int i = j+1; i < m_matrix.rows(); i++) - m_matrix.coeffRef(i, j) = other.coeff(i, j); - break; - case SelfAdjoint: - for(int j = 0; j < m_matrix.cols(); j++) - { - for(int i = 0; i < j; i++) - m_matrix.coeffRef(j, i) = ei_conj(m_matrix.coeffRef(i, j) = other.coeff(i, j)); - m_matrix.coeffRef(j, j) = ei_real(other.coeff(j, j)); - } - break; + for(int i = 0; i < j; i++) + dst.coeffRef(j, i) = ei_conj(dst.coeffRef(i, j) = src.coeff(i, j)); + dst.coeffRef(j, j) = ei_real(src.coeff(j, j)); } } +}; + +template<typename MatrixType, unsigned int Mode> +template<typename Other> +void Part<MatrixType, Mode>::lazyAssign(const Other& other) +{ + const bool unroll = MatrixType::SizeAtCompileTime * Other::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT; + ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols()); + + ei_part_assignment_impl + <MatrixType, Other, Mode, + unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic + >::run(m_matrix, other.derived()); } template<typename MatrixType, unsigned int Mode> diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 3709ebf64..7af1bb3ed 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -47,8 +47,8 @@ struct ei_product_impl<0, Size, Lhs, Rhs> } }; -template<int Index, typename Lhs, typename Rhs> -struct ei_product_impl<Index, Dynamic, Lhs, Rhs> +template<typename Lhs, typename Rhs> +struct ei_product_impl<Dynamic, Dynamic, Lhs, Rhs> { inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res) { @@ -268,7 +268,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm { Scalar res; const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT; - ei_product_impl<Lhs::ColsAtCompileTime-1, + ei_product_impl<unroll ? Lhs::ColsAtCompileTime-1 : Dynamic, unroll ? Lhs::ColsAtCompileTime : Dynamic, _LhsNested, _RhsNested> ::run(row, col, m_lhs, m_rhs, res); diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 7c5534154..821d3813d 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -63,7 +63,17 @@ template<typename BinaryOp, typename Derived, int Start> struct ei_redux_unroller<BinaryOp, Derived, Start, Dynamic> { typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar; - static Scalar run(const Derived&, const BinaryOp&) { return Scalar(); } + static Scalar run(const Derived& mat, const BinaryOp& func) + { + Scalar res; + res = mat.coeff(0,0); + for(int i = 1; i < mat.rows(); i++) + res = func(res, mat.coeff(i, 0)); + for(int j = 1; j < mat.cols(); j++) + for(int i = 0; i < mat.rows(); i++) + res = func(res, mat.coeff(i, j)); + return res; + } }; /** \returns the result of a full redux operation on the whole matrix or vector using \a func @@ -81,21 +91,9 @@ MatrixBase<Derived>::redux(const BinaryOp& func) const const bool unroll = SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * ei_functor_traits<BinaryOp>::Cost <= EIGEN_UNROLLING_LIMIT; - if(unroll) - return ei_redux_unroller<BinaryOp, Derived, 0, - unroll ? int(SizeAtCompileTime) : Dynamic> - ::run(derived(), func); - else - { - Scalar res; - res = coeff(0,0); - for(int i = 1; i < rows(); i++) - res = func(res, coeff(i, 0)); - for(int j = 1; j < cols(); j++) - for(int i = 0; i < rows(); i++) - res = func(res, coeff(i, j)); - return res; - } + return ei_redux_unroller<BinaryOp, Derived, 0, + unroll ? int(SizeAtCompileTime) : Dynamic> + ::run(derived(), func); } /** \returns the sum of all coefficients of *this diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index 6e00e4142..bd469f747 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -52,7 +52,15 @@ struct ei_visitor_unroller<Visitor, Derived, 1> template<typename Visitor, typename Derived> struct ei_visitor_unroller<Visitor, Derived, Dynamic> { - inline static void run(const Derived &, Visitor&) {} + inline static void run(const Derived& mat, Visitor& visitor) + { + visitor.init(mat.coeff(0,0), 0, 0); + for(int i = 1; i < mat.rows(); i++) + visitor(mat.coeff(i, 0), i, 0); + for(int j = 1; j < mat.cols(); j++) + for(int i = 0; i < mat.rows(); i++) + visitor(mat.coeff(i, j), i, j); + } }; @@ -77,19 +85,9 @@ void MatrixBase<Derived>::visit(Visitor& visitor) const const bool unroll = SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * ei_functor_traits<Visitor>::Cost <= EIGEN_UNROLLING_LIMIT; - if(unroll) - return ei_visitor_unroller<Visitor, Derived, - unroll ? int(SizeAtCompileTime) : Dynamic - >::run(derived(), visitor); - else - { - visitor.init(coeff(0,0), 0, 0); - for(int i = 1; i < rows(); i++) - visitor(coeff(i, 0), i, 0); - for(int j = 1; j < cols(); j++) - for(int i = 0; i < rows(); i++) - visitor(coeff(i, j), i, j); - } + return ei_visitor_unroller<Visitor, Derived, + unroll ? int(SizeAtCompileTime) : Dynamic + >::run(derived(), visitor); } /** \internal diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 717e3e072..dff4d7885 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -34,7 +34,7 @@ /** Defines the maximal loop size to enable meta unrolling of loops */ #ifndef EIGEN_UNROLLING_LIMIT -#define EIGEN_UNROLLING_LIMIT 400 +#define EIGEN_UNROLLING_LIMIT 100 #endif #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR |