aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2008-06-07 01:07:48 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2008-06-07 01:07:48 +0000
commit69980379300b035daae6de26eb4bf394651d2de3 (patch)
treee0f75f44232947cf7d52271a3a8f11807bdae00b /Eigen/src/Core
parenta172385720fad3b72a820da28fef158efabdb369 (diff)
* move some compile time "if" to their respective unroller (assign and dot)
* fix a couple of compilation issues when unrolling is disabled * reduce default unrolling limit to a more reasonable value
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/Assign.h68
-rw-r--r--Eigen/src/Core/Dot.h37
-rw-r--r--Eigen/src/Core/Part.h107
-rw-r--r--Eigen/src/Core/Product.h6
-rw-r--r--Eigen/src/Core/Redux.h30
-rw-r--r--Eigen/src/Core/Visitor.h26
-rw-r--r--Eigen/src/Core/util/Macros.h2
7 files changed, 144 insertions, 132 deletions
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 21d5e3d11..98df25235 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -58,10 +58,30 @@ struct ei_matrix_assignment_unroller<Derived1, Derived2, 0>
inline static void run(Derived1 &, const Derived2 &) {}
};
+// Dynamic col-major
template<typename Derived1, typename Derived2>
-struct ei_matrix_assignment_unroller<Derived1, Derived2, Dynamic>
+struct ei_matrix_assignment_unroller<Derived1, Derived2, -1>
{
- inline static void run(Derived1 &, const Derived2 &) {}
+ inline static void run(Derived1 &dst, const Derived2 &src)
+ {
+ for(int j = 0; j < dst.cols(); j++)
+ for(int i = 0; i < dst.rows(); i++)
+ dst.coeffRef(i, j) = src.coeff(i, j);
+ }
+};
+
+// Dynamic row-major
+template<typename Derived1, typename Derived2>
+struct ei_matrix_assignment_unroller<Derived1, Derived2, -2>
+{
+ inline static void run(Derived1 &dst, const Derived2 &src)
+ {
+ // traverse in row-major order
+ // in order to allow the compiler to unroll the inner loop
+ for(int i = 0; i < dst.rows(); i++)
+ for(int j = 0; j < dst.cols(); j++)
+ dst.coeffRef(i, j) = src.coeff(i, j);
+ }
};
//----
@@ -103,10 +123,12 @@ struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, Dynamic>
template <typename Derived, typename OtherDerived,
bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit)
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit))
- && ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit)
- ||((int(Derived::Flags)&RowMajorBit)
- ? int(Derived::ColsAtCompileTime)!=Dynamic && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)
- : int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ),
+ && ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit)
+ || ((int(Derived::Flags) & RowMajorBit)
+ ? int(Derived::ColsAtCompileTime)!=Dynamic
+ && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)
+ : int(Derived::RowsAtCompileTime)!=Dynamic
+ && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ),
bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT>
struct ei_assignment_impl;
@@ -156,39 +178,21 @@ inline Derived& MatrixBase<Derived>
//----
-template <typename Derived, typename OtherDerived>
-struct ei_assignment_impl<Derived, OtherDerived, false, true> // no vec + unrolling
+// no vectorization
+template <typename Derived, typename OtherDerived, bool Unroll>
+struct ei_assignment_impl<Derived, OtherDerived, false, Unroll>
{
static void run(Derived & dst, const OtherDerived & src)
{
ei_matrix_assignment_unroller
- <Derived, OtherDerived, int(Derived::SizeAtCompileTime)
+ <Derived, OtherDerived,
+ Unroll ? int(Derived::SizeAtCompileTime)
+ : Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic ? -1 // col-major
+ : -2 // row-major
>::run(dst.derived(), src.derived());
}
};
-template <typename Derived, typename OtherDerived>
-struct ei_assignment_impl<Derived, OtherDerived, false, false> // no vec + no unrolling + col major order
-{
- static void run(Derived & dst, const OtherDerived & src)
- {
- if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
- {
- for(int j = 0; j < dst.cols(); j++)
- for(int i = 0; i < dst.rows(); i++)
- dst.coeffRef(i, j) = src.coeff(i, j);
- }
- else
- {
- // traverse in row-major order
- // in order to allow the compiler to unroll the inner loop
- for(int i = 0; i < dst.rows(); i++)
- for(int j = 0; j < dst.cols(); j++)
- dst.coeffRef(i, j) = src.coeff(i, j);
- }
- }
-};
-
//----
template <typename Derived, typename OtherDerived>
@@ -224,7 +228,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true, false> // vec + no-unroll
};
template <typename Derived, typename OtherDerived>
-struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array
+struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array like
{
static void run(Derived & dst, const OtherDerived & src)
{
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index c85d9e3fb..a1c1ffce8 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -26,17 +26,17 @@
#define EIGEN_DOT_H
template<int Index, int Size, typename Derived1, typename Derived2>
-struct ei_dot_unroller
+struct ei_dot_impl
{
inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot)
{
- ei_dot_unroller<Index-1, Size, Derived1, Derived2>::run(v1, v2, dot);
+ ei_dot_impl<Index-1, Size, Derived1, Derived2>::run(v1, v2, dot);
dot += v1.coeff(Index) * ei_conj(v2.coeff(Index));
}
};
template<int Size, typename Derived1, typename Derived2>
-struct ei_dot_unroller<0, Size, Derived1, Derived2>
+struct ei_dot_impl<0, Size, Derived1, Derived2>
{
inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot)
{
@@ -44,15 +44,20 @@ struct ei_dot_unroller<0, Size, Derived1, Derived2>
}
};
-template<int Index, typename Derived1, typename Derived2>
-struct ei_dot_unroller<Index, Dynamic, Derived1, Derived2>
+template<typename Derived1, typename Derived2>
+struct ei_dot_impl<Dynamic, Dynamic, Derived1, Derived2>
{
- inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {}
+ inline static void run(const Derived1& v1, const Derived2& v2, typename Derived1::Scalar& dot)
+ {
+ dot = v1.coeff(0) * ei_conj(v2.coeff(0));
+ for(int i = 1; i < v1.size(); i++)
+ dot += v1.coeff(i)* ei_conj(v2.coeff(i));
+ }
};
// prevent buggy user code from causing an infinite recursion
template<int Index, typename Derived1, typename Derived2>
-struct ei_dot_unroller<Index, 0, Derived1, Derived2>
+struct ei_dot_impl<Index, 0, Derived1, Derived2>
{
inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {}
};
@@ -83,22 +88,16 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
EIGEN_STATIC_ASSERT_VECTOR_ONLY(_OtherNested);
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(_Nested,_OtherNested);
ei_assert(nested.size() == otherNested.size());
- Scalar res;
const bool unroll = SizeAtCompileTime
* (_Nested::CoeffReadCost + _OtherNested::CoeffReadCost + NumTraits<Scalar>::MulCost)
+ (int(SizeAtCompileTime) - 1) * NumTraits<Scalar>::AddCost
<= EIGEN_UNROLLING_LIMIT;
- if(unroll)
- ei_dot_unroller<int(SizeAtCompileTime)-1,
- unroll ? int(SizeAtCompileTime) : Dynamic,
- _Nested, _OtherNested>
- ::run(nested, otherNested, res);
- else
- {
- res = nested.coeff(0) * ei_conj(otherNested.coeff(0));
- for(int i = 1; i < size(); i++)
- res += nested.coeff(i)* ei_conj(otherNested.coeff(i));
- }
+
+ Scalar res;
+ ei_dot_impl<unroll ? int(SizeAtCompileTime)-1 : Dynamic,
+ unroll ? int(SizeAtCompileTime) : Dynamic,
+ _Nested, _OtherNested>
+ ::run(nested, otherNested, res);
return res;
}
diff --git a/Eigen/src/Core/Part.h b/Eigen/src/Core/Part.h
index 647e6cf2d..eb8dcbba7 100644
--- a/Eigen/src/Core/Part.h
+++ b/Eigen/src/Core/Part.h
@@ -92,7 +92,7 @@ inline void Part<MatrixType, Mode>::operator=(const Other& other)
}
template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount>
-struct ei_part_assignment_unroller
+struct ei_part_assignment_impl
{
enum {
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
@@ -101,7 +101,7 @@ struct ei_part_assignment_unroller
inline static void run(Derived1 &dst, const Derived2 &src)
{
- ei_part_assignment_unroller<Derived1, Derived2, Mode, UnrollCount-1>::run(dst, src);
+ ei_part_assignment_impl<Derived1, Derived2, Mode, UnrollCount-1>::run(dst, src);
if(Mode == SelfAdjoint)
{
@@ -122,7 +122,7 @@ struct ei_part_assignment_unroller
};
template<typename Derived1, typename Derived2, unsigned int Mode>
-struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 1>
+struct ei_part_assignment_impl<Derived1, Derived2, Mode, 1>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
@@ -133,65 +133,78 @@ struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 1>
// prevent buggy user code from causing an infinite recursion
template<typename Derived1, typename Derived2, unsigned int Mode>
-struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 0>
+struct ei_part_assignment_impl<Derived1, Derived2, Mode, 0>
{
inline static void run(Derived1 &, const Derived2 &) {}
};
-template<typename Derived1, typename Derived2, unsigned int Mode>
-struct ei_part_assignment_unroller<Derived1, Derived2, Mode, Dynamic>
+template<typename Derived1, typename Derived2>
+struct ei_part_assignment_impl<Derived1, Derived2, Upper, Dynamic>
{
- inline static void run(Derived1 &, const Derived2 &) {}
+ inline static void run(Derived1 &dst, const Derived2 &src)
+ {
+ for(int j = 0; j < dst.cols(); j++)
+ for(int i = 0; i <= j; i++)
+ dst.coeffRef(i, j) = src.coeff(i, j);
+ }
};
+template<typename Derived1, typename Derived2>
+struct ei_part_assignment_impl<Derived1, Derived2, Lower, Dynamic>
+{
+ inline static void run(Derived1 &dst, const Derived2 &src)
+ {
+ for(int j = 0; j < dst.cols(); j++)
+ for(int i = j; i < dst.rows(); i++)
+ dst.coeffRef(i, j) = src.coeff(i, j);
+ }
+};
-template<typename MatrixType, unsigned int Mode>
-template<typename Other>
-void Part<MatrixType, Mode>::lazyAssign(const Other& other)
+template<typename Derived1, typename Derived2>
+struct ei_part_assignment_impl<Derived1, Derived2, StrictlyUpper, Dynamic>
{
- const bool unroll = MatrixType::SizeAtCompileTime * Other::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT;
- ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
- if(unroll)
+ inline static void run(Derived1 &dst, const Derived2 &src)
{
- ei_part_assignment_unroller
- <MatrixType, Other, Mode,
- unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic
- >::run(m_matrix, other.derived());
+ for(int j = 0; j < dst.cols(); j++)
+ for(int i = 0; i < j; i++)
+ dst.coeffRef(i, j) = src.coeff(i, j);
}
- else
+};
+template<typename Derived1, typename Derived2>
+struct ei_part_assignment_impl<Derived1, Derived2, StrictlyLower, Dynamic>
+{
+ inline static void run(Derived1 &dst, const Derived2 &src)
{
- switch(Mode)
+ for(int j = 0; j < dst.cols(); j++)
+ for(int i = j+1; i < dst.rows(); i++)
+ dst.coeffRef(i, j) = src.coeff(i, j);
+ }
+};
+template<typename Derived1, typename Derived2>
+struct ei_part_assignment_impl<Derived1, Derived2, SelfAdjoint, Dynamic>
+{
+ inline static void run(Derived1 &dst, const Derived2 &src)
+ {
+ for(int j = 0; j < dst.cols(); j++)
{
- case Upper:
- for(int j = 0; j < m_matrix.cols(); j++)
- for(int i = 0; i <= j; i++)
- m_matrix.coeffRef(i, j) = other.coeff(i, j);
- break;
- case Lower:
- for(int j = 0; j < m_matrix.cols(); j++)
- for(int i = j; i < m_matrix.rows(); i++)
- m_matrix.coeffRef(i, j) = other.coeff(i, j);
- break;
- case StrictlyUpper:
- for(int j = 0; j < m_matrix.cols(); j++)
- for(int i = 0; i < j; i++)
- m_matrix.coeffRef(i, j) = other.coeff(i, j);
- break;
- case StrictlyLower:
- for(int j = 0; j < m_matrix.cols(); j++)
- for(int i = j+1; i < m_matrix.rows(); i++)
- m_matrix.coeffRef(i, j) = other.coeff(i, j);
- break;
- case SelfAdjoint:
- for(int j = 0; j < m_matrix.cols(); j++)
- {
- for(int i = 0; i < j; i++)
- m_matrix.coeffRef(j, i) = ei_conj(m_matrix.coeffRef(i, j) = other.coeff(i, j));
- m_matrix.coeffRef(j, j) = ei_real(other.coeff(j, j));
- }
- break;
+ for(int i = 0; i < j; i++)
+ dst.coeffRef(j, i) = ei_conj(dst.coeffRef(i, j) = src.coeff(i, j));
+ dst.coeffRef(j, j) = ei_real(src.coeff(j, j));
}
}
+};
+
+template<typename MatrixType, unsigned int Mode>
+template<typename Other>
+void Part<MatrixType, Mode>::lazyAssign(const Other& other)
+{
+ const bool unroll = MatrixType::SizeAtCompileTime * Other::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT;
+ ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
+
+ ei_part_assignment_impl
+ <MatrixType, Other, Mode,
+ unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic
+ >::run(m_matrix, other.derived());
}
template<typename MatrixType, unsigned int Mode>
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 3709ebf64..7af1bb3ed 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -47,8 +47,8 @@ struct ei_product_impl<0, Size, Lhs, Rhs>
}
};
-template<int Index, typename Lhs, typename Rhs>
-struct ei_product_impl<Index, Dynamic, Lhs, Rhs>
+template<typename Lhs, typename Rhs>
+struct ei_product_impl<Dynamic, Dynamic, Lhs, Rhs>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res)
{
@@ -268,7 +268,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
{
Scalar res;
const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
- ei_product_impl<Lhs::ColsAtCompileTime-1,
+ ei_product_impl<unroll ? Lhs::ColsAtCompileTime-1 : Dynamic,
unroll ? Lhs::ColsAtCompileTime : Dynamic,
_LhsNested, _RhsNested>
::run(row, col, m_lhs, m_rhs, res);
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 7c5534154..821d3813d 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -63,7 +63,17 @@ template<typename BinaryOp, typename Derived, int Start>
struct ei_redux_unroller<BinaryOp, Derived, Start, Dynamic>
{
typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
- static Scalar run(const Derived&, const BinaryOp&) { return Scalar(); }
+ static Scalar run(const Derived& mat, const BinaryOp& func)
+ {
+ Scalar res;
+ res = mat.coeff(0,0);
+ for(int i = 1; i < mat.rows(); i++)
+ res = func(res, mat.coeff(i, 0));
+ for(int j = 1; j < mat.cols(); j++)
+ for(int i = 0; i < mat.rows(); i++)
+ res = func(res, mat.coeff(i, j));
+ return res;
+ }
};
/** \returns the result of a full redux operation on the whole matrix or vector using \a func
@@ -81,21 +91,9 @@ MatrixBase<Derived>::redux(const BinaryOp& func) const
const bool unroll = SizeAtCompileTime * CoeffReadCost
+ (SizeAtCompileTime-1) * ei_functor_traits<BinaryOp>::Cost
<= EIGEN_UNROLLING_LIMIT;
- if(unroll)
- return ei_redux_unroller<BinaryOp, Derived, 0,
- unroll ? int(SizeAtCompileTime) : Dynamic>
- ::run(derived(), func);
- else
- {
- Scalar res;
- res = coeff(0,0);
- for(int i = 1; i < rows(); i++)
- res = func(res, coeff(i, 0));
- for(int j = 1; j < cols(); j++)
- for(int i = 0; i < rows(); i++)
- res = func(res, coeff(i, j));
- return res;
- }
+ return ei_redux_unroller<BinaryOp, Derived, 0,
+ unroll ? int(SizeAtCompileTime) : Dynamic>
+ ::run(derived(), func);
}
/** \returns the sum of all coefficients of *this
diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h
index 6e00e4142..bd469f747 100644
--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@@ -52,7 +52,15 @@ struct ei_visitor_unroller<Visitor, Derived, 1>
template<typename Visitor, typename Derived>
struct ei_visitor_unroller<Visitor, Derived, Dynamic>
{
- inline static void run(const Derived &, Visitor&) {}
+ inline static void run(const Derived& mat, Visitor& visitor)
+ {
+ visitor.init(mat.coeff(0,0), 0, 0);
+ for(int i = 1; i < mat.rows(); i++)
+ visitor(mat.coeff(i, 0), i, 0);
+ for(int j = 1; j < mat.cols(); j++)
+ for(int i = 0; i < mat.rows(); i++)
+ visitor(mat.coeff(i, j), i, j);
+ }
};
@@ -77,19 +85,9 @@ void MatrixBase<Derived>::visit(Visitor& visitor) const
const bool unroll = SizeAtCompileTime * CoeffReadCost
+ (SizeAtCompileTime-1) * ei_functor_traits<Visitor>::Cost
<= EIGEN_UNROLLING_LIMIT;
- if(unroll)
- return ei_visitor_unroller<Visitor, Derived,
- unroll ? int(SizeAtCompileTime) : Dynamic
- >::run(derived(), visitor);
- else
- {
- visitor.init(coeff(0,0), 0, 0);
- for(int i = 1; i < rows(); i++)
- visitor(coeff(i, 0), i, 0);
- for(int j = 1; j < cols(); j++)
- for(int i = 0; i < rows(); i++)
- visitor(coeff(i, j), i, j);
- }
+ return ei_visitor_unroller<Visitor, Derived,
+ unroll ? int(SizeAtCompileTime) : Dynamic
+ >::run(derived(), visitor);
}
/** \internal
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 717e3e072..dff4d7885 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -34,7 +34,7 @@
/** Defines the maximal loop size to enable meta unrolling of loops */
#ifndef EIGEN_UNROLLING_LIMIT
-#define EIGEN_UNROLLING_LIMIT 400
+#define EIGEN_UNROLLING_LIMIT 100
#endif
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR