* support for matrix-scalar quotient with integer scalar types.

* added cache efficient matrix-matrix product. - provides a huge speed-up for large matrices. - currently it is enabled when an explicit unrolling is not possible.
author: Gael Guennebaud <g.gael@free.fr> 2008-03-21 20:26:14 +0000
committer: Gael Guennebaud <g.gael@free.fr> 2008-03-21 20:26:14 +0000
commit: 4342f024d9937beaff70635d2e2cb1ad6574bf72 (patch)
tree: d8b74ed57bea4735114c1e479563acae939f820e /Eigen/src
parent: 0ef1efdbdb63b5ebcb3ebf096a8833b2dd43a790 (diff)
7 files changed, 116 insertions, 22 deletions
diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h
index 6958ca248..08e0cbdce 100644
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -208,6 +208,34 @@ struct ei_scalar_multiple_op {
     const Scalar m_other;
 };
 
+template<typename Scalar, bool HasFloatingPoint>
+struct ei_scalar_quotient1_impl {
+    ei_scalar_quotient1_impl(const Scalar& other) : m_other(static_cast<Scalar>(1) / other) {}
+    Scalar operator() (const Scalar& a) const { return a * m_other; }
+    const Scalar m_other;
+};
+
+template<typename Scalar>
+struct ei_scalar_quotient1_impl<Scalar,false> {
+    ei_scalar_quotient1_impl(const Scalar& other) : m_other(other) {}
+    Scalar operator() (const Scalar& a) const { return a / m_other; }
+    const Scalar m_other;
+};
+
+/** \internal
+  * \brief Template functor to divide a scalar by a fixed other one
+  *
+  * This functor is used to implement the quotient of a matrix by
+  * a scalar where the scalar type is not a floating point type.
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::operator/
+  */
+template<typename Scalar>
+struct ei_scalar_quotient1_op : ei_scalar_quotient1_impl<Scalar, NumTraits<Scalar>::HasFloatingPoint > {
+  ei_scalar_quotient1_op(const Scalar& other)
+    : ei_scalar_quotient1_impl<Scalar, NumTraits<Scalar>::HasFloatingPoint >(other) {}
+};
+
 /** \relates MatrixBase */
 template<typename Derived>
 const CwiseUnaryOp<ei_scalar_multiple_op<typename ei_traits<Derived>::Scalar>, Derived>
@@ -219,12 +247,11 @@ MatrixBase<Derived>::operator*(const Scalar& scalar) const
 
 /** \relates MatrixBase */
 template<typename Derived>
-const CwiseUnaryOp<ei_scalar_multiple_op<typename ei_traits<Derived>::Scalar>, Derived>
+const CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<Derived>::Scalar>, Derived>
 MatrixBase<Derived>::operator/(const Scalar& scalar) const
 {
-  assert(NumTraits<Scalar>::HasFloatingPoint);
-  return CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, Derived>
-    (derived(), ei_scalar_multiple_op<Scalar>(static_cast<Scalar>(1) / scalar));
+  return CwiseUnaryOp<ei_scalar_quotient1_op<Scalar>, Derived>
+    (derived(), ei_scalar_quotient1_op<Scalar>(scalar));
 }
 
 template<typename Derived>
diff --git a/Eigen/src/Core/ForwardDeclarations.h b/Eigen/src/Core/ForwardDeclarations.h
index 9e0bbe2e6..332198b6e 100644
--- a/Eigen/src/Core/ForwardDeclarations.h
+++ b/Eigen/src/Core/ForwardDeclarations.h
@@ -26,6 +26,7 @@
 #define EIGEN_FORWARDDECLARATIONS_H
 
 template<typename T> struct ei_traits;
+template<typename Lhs, typename Rhs> struct ei_product_eval_mode;
 
 template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols> class Matrix;
 template<typename MatrixType> class MatrixRef;
@@ -35,7 +36,7 @@ template<typename MatrixType> class Transpose;
 template<typename MatrixType> class Conjugate;
 template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
 template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
-template<typename Lhs, typename Rhs> class Product;
+template<typename Lhs, typename Rhs, int EvalMode=ei_product_eval_mode<Lhs,Rhs>::EvalMode > class Product;
 template<typename MatrixType> class Random;
 template<typename MatrixType> class Zero;
 template<typename MatrixType> class Ones;
@@ -60,9 +61,10 @@ struct ei_scalar_exp_op;
 struct ei_scalar_log_op;
 struct ei_scalar_cos_op;
 struct ei_scalar_sin_op;
-template<typename Scalar> struct ei_scalar_pow_op;
+template<typename Scalar>  struct ei_scalar_pow_op;
 template<typename NewType> struct ei_scalar_cast_op;
 template<typename Scalar>  struct ei_scalar_multiple_op;
+template<typename Scalar>  struct ei_scalar_quotient1_op;
 struct ei_scalar_min_op;
 struct ei_scalar_max_op;
 
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 2102ce484..9452906d0 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -162,7 +162,7 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols,
         resize(other.size(), 1);
       }
       else resize(other.rows(), other.cols());
-      return MatrixBase<Matrix>::operator=(other);
+      return Base::operator=(other.derived());
     }
 
     /** This is a special case of the templated operator=. Its purpose is to
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index f3be4977f..d2f415741 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -170,6 +170,10 @@ template<typename Derived> class MatrixBase
       return this->operator=<Derived>(other);
     }
 
+    /** Overloaded for optimal product evaluation */
+    template<typename Derived1, typename Derived2>
+    Derived& operator=(const Product<Derived1,Derived2,CacheOptimal>& product);
+
     CommaInitializer operator<< (const Scalar& s);
 
     template<typename OtherDerived>
@@ -223,7 +227,7 @@ template<typename Derived> class MatrixBase
     Derived& operator/=(const Scalar& other);
 
     const CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, Derived> operator*(const Scalar& scalar) const;
-    const CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, Derived> operator/(const Scalar& scalar) const;
+    const CwiseUnaryOp<ei_scalar_quotient1_op<Scalar>, Derived> operator/(const Scalar& scalar) const;
 
     friend const CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, Derived>
     operator*(const Scalar& scalar, const MatrixBase& matrix)
diff --git a/Eigen/src/Core/OperatorEquals.h b/Eigen/src/Core/OperatorEquals.h
index 020296c2f..82ef13ead 100644
--- a/Eigen/src/Core/OperatorEquals.h
+++ b/Eigen/src/Core/OperatorEquals.h
@@ -148,7 +148,7 @@ Derived& MatrixBase<Derived>
             coeffRef(i, j) = other.coeff(i, j);
       }
     }
-    return *static_cast<Derived*>(this);
+    return (*this).derived();
   }
 }
 
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 4999a1a4e..28602d8a0 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -65,6 +65,7 @@ struct ei_product_unroller<Index, 0, Lhs, Rhs>
   *
   * \param Lhs the type of the left-hand side
   * \param Rhs the type of the right-hand side
+  * \param EvalMode internal use only
   *
   * This class represents an expression of the product of two matrices.
   * It is the return type of MatrixBase::lazyProduct(), which is used internally by
@@ -72,8 +73,8 @@ struct ei_product_unroller<Index, 0, Lhs, Rhs>
   *
   * \sa class Sum, class Difference
   */
-template<typename Lhs, typename Rhs>
-struct ei_traits<Product<Lhs, Rhs> >
+template<typename Lhs, typename Rhs, int EvalMode>
+struct ei_traits<Product<Lhs, Rhs, EvalMode> >
 {
   typedef typename Lhs::Scalar Scalar;
   enum {
@@ -84,8 +85,19 @@ struct ei_traits<Product<Lhs, Rhs> >
   };
 };
 
-template<typename Lhs, typename Rhs> class Product : ei_no_assignment_operator,
-  public MatrixBase<Product<Lhs, Rhs> >
+template<typename Lhs, typename Rhs>
+struct ei_product_eval_mode
+{
+  enum {
+    SizeAtCompileTime = MatrixBase<Product<Lhs,Rhs,UnrolledDotProduct> >::SizeAtCompileTime,
+    EvalMode = ( EIGEN_UNROLLED_LOOPS
+              && SizeAtCompileTime != Dynamic
+              && SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) ? UnrolledDotProduct : CacheOptimal,
+  };
+};
+
+template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignment_operator,
+  public MatrixBase<Product<Lhs, Rhs, EvalMode> >
 {
   public:
 
@@ -97,6 +109,10 @@ template<typename Lhs, typename Rhs> class Product : ei_no_assignment_operator,
       assert(lhs.cols() == rhs.rows());
     }
 
+    /** \internal */
+    template<typename DestDerived>
+    void _cacheOptimalEval(DestDerived& res) const;
+
   private:
 
     int _rows() const { return m_lhs.rows(); }
@@ -156,7 +172,7 @@ template<typename OtherDerived>
 const Eval<Product<Derived, OtherDerived> >
 MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
 {
-  return lazyProduct(other).eval();
+  return (*this).lazyProduct(other).eval();
 }
 
 /** replaces \c *this by \c *this * \a other.
@@ -171,4 +187,39 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
   return *this = *this * other;
 }
 
+template<typename Derived>
+template<typename Derived1, typename Derived2>
+Derived& MatrixBase<Derived>::operator=(const Product<Derived1,Derived2,CacheOptimal>& product)
+{
+  product._cacheOptimalEval(*this);
+  return (*this).derived();
+}
+
+template<typename Lhs, typename Rhs, int EvalMode>
+template<typename DestDerived>
+void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
+{
+  res.setZero();
+  const int cols4 = m_lhs.cols()&0xfffffffC;
+  for (int k=0; k<m_rhs.cols(); ++k)
+  {
+    int j=0;
+    for (; j<cols4; j+=4)
+    {
+      const Scalar tmp0 = m_rhs.coeff(j  ,k);
+      const Scalar tmp1 = m_rhs.coeff(j+1,k);
+      const Scalar tmp2 = m_rhs.coeff(j+2,k);
+      const Scalar tmp3 = m_rhs.coeff(j+3,k);
+      for (int i=0; i<m_lhs.rows(); ++i)
+        res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1) + tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3);
+    }
+    for (; j<m_lhs.cols(); ++j)
+    {
+      const Scalar tmp = m_rhs.coeff(j,k);
+      for (int i=0; i<m_lhs.rows(); ++i)
+        res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j);
+    }
+  }
+}
+
 #endif // EIGEN_PRODUCT_H
diff --git a/Eigen/src/Core/Util.h b/Eigen/src/Core/Util.h
index 54c56eb3c..454edd4cb 100644
--- a/Eigen/src/Core/Util.h
+++ b/Eigen/src/Core/Util.h
@@ -122,6 +122,8 @@ enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
 
 enum DirectionType { Vertical, Horizontal };
 
+enum ProductEvaluationMode { UnrolledDotProduct, CacheOptimal };
+
 // just a workaround because GCC seems to not really like empty structs
 #ifdef __GNUG__
   struct ei_empty_struct{char _ei_dummy_;};
@@ -156,20 +158,28 @@ template<> class ei_int_if_dynamic<Dynamic>
     void setValue(int value) { m_value = value; }
 };
 
-struct ei_has_nothing {int a[1];};
-struct ei_has_std_result_type {int a[2];};
-struct ei_has_tr1_result {int a[3];};
+
+template <bool Condition, class Then, class Else>
+struct ei_meta_if { typedef Then ret; };
+
+template <class Then, class Else>
+struct ei_meta_if <false, Then, Else> { typedef Else ret; };
+
 
 /** \internal
   * Convenient struct to get the result type of a unary or binary functor.
   *
   * It supports both the current STL mechanism (using the result_type member) as well as
   * upcoming next STL generation (using a templated result member).
-  * If none of these member is provided, then the type of the first argument is returned.
+  * If none of these members is provided, then the type of the first argument is returned.
   */
 template<typename T> struct ei_result_of {};
 
-template<typename Func, typename ArgType, int SizeOf=sizeof(ei_has_nothing)>
+struct ei_has_none {int a[1];};
+struct ei_has_std_result_type {int a[2];};
+struct ei_has_tr1_result {int a[3];};
+
+template<typename Func, typename ArgType, int SizeOf=sizeof(ei_has_none)>
 struct ei_unary_result_of_select {typedef ArgType type;};
 
 template<typename Func, typename ArgType>
@@ -184,12 +194,12 @@ struct ei_result_of<Func(ArgType)> {
     static ei_has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
     template<typename T>
     static ei_has_tr1_result      testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);
-    static ei_has_nothing         testFunctor(...);
+    static ei_has_none            testFunctor(...);
 
     typedef typename ei_unary_result_of_select<Func, ArgType, sizeof(testFunctor(static_cast<Func*>(0)))>::type type;
 };
 
-template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(ei_has_nothing)>
+template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(ei_has_none)>
 struct ei_binary_result_of_select {typedef ArgType0 type;};
 
 template<typename Func, typename ArgType0, typename ArgType1>
@@ -206,7 +216,7 @@ struct ei_result_of<Func(ArgType0,ArgType1)> {
     static ei_has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
     template<typename T>
     static ei_has_tr1_result      testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);
-    static ei_has_nothing         testFunctor(...);
+    static ei_has_none            testFunctor(...);
 
     typedef typename ei_binary_result_of_select<Func, ArgType0, ArgType1, sizeof(testFunctor(static_cast<Func*>(0)))>::type type;
 };
author	Gael Guennebaud <g.gael@free.fr>	2008-03-21 20:26:14 +0000
committer	Gael Guennebaud <g.gael@free.fr>	2008-03-21 20:26:14 +0000
commit	4342f024d9937beaff70635d2e2cb1ad6574bf72 (patch)
tree	d8b74ed57bea4735114c1e479563acae939f820e /Eigen/src
parent	0ef1efdbdb63b5ebcb3ebf096a8833b2dd43a790 (diff)