aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2008-04-06 18:01:03 +0000
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2008-04-06 18:01:03 +0000
commit371d302efbbbedf2b4818f3efae466bedab63a1e (patch)
tree433daed948c140f5e931bcfe8b7119b99be12780
parent30ec34de362744fa9a3e82573cb23662aaafbf5a (diff)
- merge ei_xpr_copy and ei_eval_if_needed_before_nesting
- make use of CoeffReadCost to determine when to unroll the loops, for now only in Product.h and in OperatorEquals.h performance remains the same: generally still not as good as before the big changes.
-rw-r--r--Eigen/src/Core/ForwardDeclarations.h27
-rw-r--r--Eigen/src/Core/OperatorEquals.h11
-rw-r--r--Eigen/src/Core/Product.h35
-rw-r--r--Eigen/src/Core/Util.h2
-rw-r--r--bench/benchmark.cpp6
5 files changed, 49 insertions, 32 deletions
diff --git a/Eigen/src/Core/ForwardDeclarations.h b/Eigen/src/Core/ForwardDeclarations.h
index 36519c7da..32be8cd68 100644
--- a/Eigen/src/Core/ForwardDeclarations.h
+++ b/Eigen/src/Core/ForwardDeclarations.h
@@ -83,27 +83,30 @@ template<typename T> struct ei_eval
template<typename T> struct ei_unref { typedef T type; };
template<typename T> struct ei_unref<T&> { typedef T type; };
-template<typename T> struct ei_xpr_copy
+template<typename T> struct ei_is_temporary
{
- typedef typename ei_meta_if< ei_traits<T>::Flags & EvalBeforeNestingBit,
- typename ei_eval<T>::type, const T&>::ret type;
+ enum { ret = 0 };
};
-template<typename T> struct ei_xpr_copy<Temporary<T> >
+template<typename T> struct ei_is_temporary<Temporary<T> >
{
- typedef Temporary<T> type;
+ enum { ret = 1 };
};
-template<typename T, int n=1> struct ei_eval_if_needed_before_nesting
+template<typename T, int n=1> struct ei_xpr_copy
{
- // FIXME should we consider the additional store as well as the creation cost of the temporary ?
- enum { eval = T::Flags & EvalBeforeNestingBit
- || (n+1) * NumTraits<typename ei_traits<T>::Scalar>::ReadCost < (n-1) * T::CoeffReadCost };
- typedef typename ei_meta_if<eval, typename ei_eval<T>::type, T>::ret XprType;
- typedef typename ei_meta_if<eval, typename ei_eval<T>::type, typename T::XprCopy>::ret CopyType;
+ typedef typename ei_meta_if<
+ ei_is_temporary<T>::ret,
+ T,
+ typename ei_meta_if<
+ ei_traits<T>::Flags & EvalBeforeNestingBit
+ || (n+1) * NumTraits<typename ei_traits<T>::Scalar>::ReadCost < (n-1) * T::CoeffReadCost,
+ typename ei_eval<T>::type,
+ const T&
+ >::ret
+ >::ret type;
};
-
template<typename T> struct ei_functor_traits
{
enum
diff --git a/Eigen/src/Core/OperatorEquals.h b/Eigen/src/Core/OperatorEquals.h
index c93a9329f..5529c8313 100644
--- a/Eigen/src/Core/OperatorEquals.h
+++ b/Eigen/src/Core/OperatorEquals.h
@@ -102,14 +102,15 @@ template<typename OtherDerived>
Derived& MatrixBase<Derived>
::lazyAssign(const MatrixBase<OtherDerived>& other)
{
+ const bool unroll = SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
if(IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime)
// copying a vector expression into a vector
{
ei_assert(size() == other.size());
- if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT)
+ if(unroll)
ei_vector_operator_equals_unroller
<Derived, OtherDerived,
- SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT ? SizeAtCompileTime : Dynamic
+ unroll ? SizeAtCompileTime : Dynamic
>::run(derived(), other.derived());
else
for(int i = 0; i < size(); i++)
@@ -118,11 +119,11 @@ Derived& MatrixBase<Derived>
else // copying a matrix expression into a matrix
{
ei_assert(rows() == other.rows() && cols() == other.cols());
- if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT)
+ if(unroll)
{
ei_matrix_operator_equals_unroller
<Derived, OtherDerived,
- SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT ? SizeAtCompileTime : Dynamic
+ unroll ? SizeAtCompileTime : Dynamic
>::run(derived(), other.derived());
}
else
@@ -152,7 +153,7 @@ template<typename OtherDerived>
Derived& MatrixBase<Derived>
::operator=(const MatrixBase<OtherDerived>& other)
{
- if (OtherDerived::Flags & EvalBeforeAssigningBit)
+ if(OtherDerived::Flags & EvalBeforeAssigningBit)
{
return lazyAssign(other.derived().eval());
}
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 608de0b9f..d303cbdb7 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -84,21 +84,29 @@ template<typename Lhs, typename Rhs, int EvalMode>
struct ei_traits<Product<Lhs, Rhs, EvalMode> >
{
typedef typename Lhs::Scalar Scalar;
+ typedef typename ei_xpr_copy<Lhs,Rhs::ColsAtCompileTime>::type LhsXprCopy;
+ typedef typename ei_xpr_copy<Rhs,Lhs::RowsAtCompileTime>::type RhsXprCopy;
+ typedef typename ei_unref<LhsXprCopy>::type ActualLhs;
+ typedef typename ei_unref<RhsXprCopy>::type ActualRhs;
enum {
+ LhsCoeffReadCost = ActualLhs::CoeffReadCost,
+ RhsCoeffReadCost = ActualRhs::CoeffReadCost,
+ LhsFlags = ActualLhs::Flags,
+ RhsFlags = ActualRhs::Flags,
RowsAtCompileTime = Lhs::RowsAtCompileTime,
ColsAtCompileTime = Rhs::ColsAtCompileTime,
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
Flags = ( (RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic)
- ? (unsigned int)(Lhs::Flags | Rhs::Flags)
- : (unsigned int)(Lhs::Flags | Rhs::Flags) & ~LargeBit )
+ ? (unsigned int)(LhsFlags | RhsFlags)
+ : (unsigned int)(LhsFlags | RhsFlags) & ~LargeBit )
| EvalBeforeAssigningBit
| (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimal ? EvalBeforeNestingBit : 0),
CoeffReadCost
= Lhs::ColsAtCompileTime == Dynamic
? Dynamic
: Lhs::ColsAtCompileTime
- * (NumTraits<Scalar>::MulCost + Lhs::CoeffReadCost + Rhs::CoeffReadCost)
+ * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ (Lhs::ColsAtCompileTime - 1) * NumTraits<Scalar>::AddCost
};
};
@@ -110,10 +118,8 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
- typedef typename ei_eval_if_needed_before_nesting<Lhs,Rhs::ColsAtCompileTime>::CopyType CopyLhs;
- typedef typename ei_eval_if_needed_before_nesting<Rhs,Lhs::RowsAtCompileTime>::CopyType CopyRhs;
- typedef typename ei_eval_if_needed_before_nesting<Lhs,Rhs::ColsAtCompileTime>::XprType XprLhs;
- typedef typename ei_eval_if_needed_before_nesting<Rhs,Lhs::RowsAtCompileTime>::XprType XprRhs;
+ typedef typename ei_traits<Product>::LhsXprCopy LhsXprCopy;
+ typedef typename ei_traits<Product>::RhsXprCopy RhsXprCopy;
Product(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
@@ -133,12 +139,15 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
const Scalar _coeff(int row, int col) const
{
Scalar res;
- if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT)
+ const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
+ if(unroll)
+ {
ei_product_unroller<Lhs::ColsAtCompileTime-1,
- Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT
- ? Lhs::ColsAtCompileTime : Dynamic,
- XprLhs, XprRhs>
+ unroll ? Lhs::ColsAtCompileTime : Dynamic,
+ typename ei_unref<LhsXprCopy>::type,
+ typename ei_unref<RhsXprCopy>::type>
::run(row, col, m_lhs, m_rhs, res);
+ }
else
{
res = m_lhs.coeff(row, 0) * m_rhs.coeff(0, col);
@@ -149,8 +158,8 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
}
protected:
- const CopyLhs m_lhs;
- const CopyRhs m_rhs;
+ const LhsXprCopy m_lhs;
+ const RhsXprCopy m_rhs;
};
/** \returns the matrix product of \c *this and \a other.
diff --git a/Eigen/src/Core/Util.h b/Eigen/src/Core/Util.h
index 10fdacb8b..ad8a15b07 100644
--- a/Eigen/src/Core/Util.h
+++ b/Eigen/src/Core/Util.h
@@ -31,7 +31,7 @@
/** Defines the maximal loop size to enable meta unrolling of loops */
#ifndef EIGEN_UNROLLING_LIMIT
-#define EIGEN_UNROLLING_LIMIT 16
+#define EIGEN_UNROLLING_LIMIT 400
#endif
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
diff --git a/bench/benchmark.cpp b/bench/benchmark.cpp
index ee58607cd..4ff678d8a 100644
--- a/bench/benchmark.cpp
+++ b/bench/benchmark.cpp
@@ -8,6 +8,10 @@
using namespace std;
USING_PART_OF_NAMESPACE_EIGEN
+#ifndef REPEAT
+#define REPEAT 40000000
+#endif
+
int main(int argc, char *argv[])
{
Matrix<double,MATSIZE,MATSIZE> I;
@@ -19,7 +23,7 @@ int main(int argc, char *argv[])
m(i,j) = (i+MATSIZE*j);
}
asm("#begin");
- for(int a = 0; a < 40000000; a++)
+ for(int a = 0; a < REPEAT; a++)
{
m = I + 0.00005 * (m + m*m);
}