From 027818d7392dbb4f12db17bb9f35032727bb1a30 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Sat, 28 Jun 2008 23:07:14 +0000
Subject: * added innerSize / outerSize functions to MatrixBase * added
 complete implementation of sparse matrix product   (with a little glue in
 Eigen/Core) * added an exhaustive bench of sparse products including GMM++
 and MTL4   => Eigen outperforms in all transposed/density configurations !

---
 Eigen/src/Sparse/HashMatrix.h         |   2 +-
 Eigen/src/Sparse/LinkedVectorMatrix.h |   2 +-
 Eigen/src/Sparse/SparseMatrix.h       |  22 +-
 Eigen/src/Sparse/SparseMatrixBase.h   |  25 +--
 Eigen/src/Sparse/SparseProduct.h      | 397 ++++++++++++++++++++++++----------
 Eigen/src/Sparse/SparseUtil.h         |   1 +
 6 files changed, 304 insertions(+), 145 deletions(-)

(limited to 'Eigen/src/Sparse')
diff --git a/Eigen/src/Sparse/HashMatrix.h b/Eigen/src/Sparse/HashMatrix.h
index 753c7c5e9..a0383574c 100644
--- a/Eigen/src/Sparse/HashMatrix.h
+++ b/Eigen/src/Sparse/HashMatrix.h
@@ -34,7 +34,7 @@ struct ei_traits<HashMatrix<_Scalar, _Flags> >
     ColsAtCompileTime = Dynamic,
     MaxRowsAtCompileTime = Dynamic,
     MaxColsAtCompileTime = Dynamic,
-    Flags = _Flags,
+    Flags = SparseBit | _Flags,
     CoeffReadCost = NumTraits<Scalar>::ReadCost,
     SupportedAccessPatterns = RandomAccessPattern
   };
diff --git a/Eigen/src/Sparse/LinkedVectorMatrix.h b/Eigen/src/Sparse/LinkedVectorMatrix.h
index 2e2618e26..76e3a4c12 100644
--- a/Eigen/src/Sparse/LinkedVectorMatrix.h
+++ b/Eigen/src/Sparse/LinkedVectorMatrix.h
@@ -34,7 +34,7 @@ struct ei_traits<LinkedVectorMatrix<_Scalar,_Flags> >
     ColsAtCompileTime = Dynamic,
     MaxRowsAtCompileTime = Dynamic,
     MaxColsAtCompileTime = Dynamic,
-    Flags = _Flags,
+    Flags = SparseBit | _Flags,
     CoeffReadCost = NumTraits<Scalar>::ReadCost,
     SupportedAccessPatterns = InnerCoherentAccessPattern
   };
diff --git a/Eigen/src/Sparse/SparseMatrix.h b/Eigen/src/Sparse/SparseMatrix.h
index d9d404b8a..cbc504592 100644
--- a/Eigen/src/Sparse/SparseMatrix.h
+++ b/Eigen/src/Sparse/SparseMatrix.h
@@ -43,7 +43,7 @@ struct ei_traits<SparseMatrix<_Scalar, _Flags> >
     ColsAtCompileTime = Dynamic,
     MaxRowsAtCompileTime = Dynamic,
     MaxColsAtCompileTime = Dynamic,
-    Flags = _Flags,
+    Flags = SparseBit | _Flags,
     CoeffReadCost = NumTraits<Scalar>::ReadCost,
     SupportedAccessPatterns = FullyCoherentAccessPattern
   };
@@ -68,8 +68,9 @@ class SparseMatrix : public SparseMatrixBase<SparseMatrix<_Scalar, _Flags> >
 
     inline int rows() const { return m_rows; }
     inline int cols() const { return m_cols; }
+    inline int innerNonZeros(int j) const { return m_colPtrs[j+1]-m_colPtrs[j]; }
 
-    inline const Scalar& coeff(int row, int col) const
+    inline Scalar coeff(int row, int col) const
     {
       int id = m_colPtrs[col];
       int end = m_colPtrs[col+1];
@@ -161,6 +162,13 @@ class SparseMatrix : public SparseMatrixBase<SparseMatrix<_Scalar, _Flags> >
       resize(rows, cols);
     }
 
+    template<typename OtherDerived>
+    inline SparseMatrix(const MatrixBase<OtherDerived>& other)
+      : m_rows(0), m_cols(0), m_colPtrs(0)
+    {
+      *this = other.derived();
+    }
+
     inline void shallowCopy(const SparseMatrix& other)
     {
       EIGEN_DBG_SPARSE(std::cout << "SparseMatrix:: shallowCopy\n");
@@ -192,15 +200,7 @@ class SparseMatrix : public SparseMatrixBase<SparseMatrix<_Scalar, _Flags> >
     template<typename OtherDerived>
     inline SparseMatrix& operator=(const MatrixBase<OtherDerived>& other)
     {
-      return SparseMatrixBase<SparseMatrix>::operator=(other);
-    }
-
-    template<typename OtherDerived>
-    SparseMatrix<Scalar> operator*(const MatrixBase<OtherDerived>& other)
-    {
-      SparseMatrix<Scalar> res(rows(), other.cols());
-      ei_sparse_product<SparseMatrix,OtherDerived>(*this,other.derived(),res);
-      return res;
+      return SparseMatrixBase<SparseMatrix>::operator=(other.derived());
     }
 
     friend std::ostream & operator << (std::ostream & s, const SparseMatrix& m)
diff --git a/Eigen/src/Sparse/SparseMatrixBase.h b/Eigen/src/Sparse/SparseMatrixBase.h
index 6357cbeff..c663a6902 100644
--- a/Eigen/src/Sparse/SparseMatrixBase.h
+++ b/Eigen/src/Sparse/SparseMatrixBase.h
@@ -49,9 +49,6 @@ class SparseMatrixBase : public MatrixBase<Derived>
     bool isRValue() const { return m_isRValue; }
     Derived& temporary() { m_isRValue = true; return derived(); }
 
-    int outerSize() const { return RowMajor ? this->rows() : this->cols(); }
-    int innerSize() const { return RowMajor ? this->cols() : this->rows(); }
-
     inline Derived& operator=(const Derived& other)
     {
       if (other.isRValue())
@@ -64,31 +61,27 @@ class SparseMatrixBase : public MatrixBase<Derived>
       return derived();
     }
 
-
-
     template<typename OtherDerived>
     inline Derived& operator=(const MatrixBase<OtherDerived>& other)
     {
       const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
-      const int outerSize = (int(OtherDerived::Flags) & RowMajorBit) ? other.rows() : other.cols();
-      // eval to a temporary and then do a shallow copy
-      typename ei_meta_if<transpose, LinkedVectorMatrix<Scalar,Flags&RowMajorBit>, Derived>::ret temp(other.rows(), other.cols());
+      const int outerSize = other.outerSize();
+      typedef typename ei_meta_if<transpose, LinkedVectorMatrix<Scalar,Flags&RowMajorBit>, Derived>::ret TempType;
+      TempType temp(other.rows(), other.cols());
 
       temp.startFill(std::max(this->rows(),this->cols())*2);
-//       std::cout << other.rows() << " xm " << other.cols() << "\n";
       for (int j=0; j<outerSize; ++j)
       {
         for (typename OtherDerived::InnerIterator it(other.derived(), j); it; ++it)
         {
-//           std::cout << other.rows() << " x " << other.cols() << "\n";
-//           std::cout << it.m_matrix.rows() << "\n";
           Scalar v = it.value();
           if (v!=Scalar(0))
-            if (RowMajor) temp.fill(j,it.index()) = v;
+            if (OtherDerived::Flags & RowMajorBit) temp.fill(j,it.index()) = v;
             else temp.fill(it.index(),j) = v;
         }
       }
       temp.endFill();
+
       derived() = temp.temporary();
       return derived();
     }
@@ -97,6 +90,7 @@ class SparseMatrixBase : public MatrixBase<Derived>
     inline Derived& operator=(const SparseMatrixBase<OtherDerived>& other)
     {
       const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
+//       std::cout << "eval transpose = " << transpose << "\n";
       const int outerSize = (int(OtherDerived::Flags) & RowMajorBit) ? other.rows() : other.cols();
       if ((!transpose) && other.isRValue())
       {
@@ -120,12 +114,15 @@ class SparseMatrixBase : public MatrixBase<Derived>
       return derived();
     }
 
+    template<typename Lhs, typename Rhs>
+    inline Derived& operator=(const Product<Lhs,Rhs,SparseProduct>& product);
+
     friend std::ostream & operator << (std::ostream & s, const SparseMatrixBase& m)
     {
 
       if (Flags&RowMajorBit)
       {
-        for (int row=0; row<m.rows(); ++row)
+        for (int row=0; row<m.outerSize(); ++row)
         {
           int col = 0;
           for (typename Derived::InnerIterator it(m.derived(), row); it; ++it)
@@ -158,6 +155,4 @@ class SparseMatrixBase : public MatrixBase<Derived>
     mutable bool m_hasBeenCopied;
 };
 
-
-
 #endif // EIGEN_SPARSEMATRIXBASE_H
diff --git a/Eigen/src/Sparse/SparseProduct.h b/Eigen/src/Sparse/SparseProduct.h
index 9abddbd27..e921e8775 100644
--- a/Eigen/src/Sparse/SparseProduct.h
+++ b/Eigen/src/Sparse/SparseProduct.h
@@ -25,145 +25,308 @@
 #ifndef EIGEN_SPARSEPRODUCT_H
 #define EIGEN_SPARSEPRODUCT_H
 
-#define DENSE_TMP 1
-#define MAP_TMP 2
-#define LIST_TMP 3
+// sparse product return type specialization
+template<typename Lhs, typename Rhs>
+struct ProductReturnType<Lhs,Rhs,SparseProduct>
+{
+  typedef typename ei_traits<Lhs>::Scalar Scalar;
+  enum {
+    LhsRowMajor = ei_traits<Lhs>::Flags & RowMajorBit,
+    RhsRowMajor = ei_traits<Rhs>::Flags & RowMajorBit,
+    TransposeRhs = (!LhsRowMajor) && RhsRowMajor,
+    TransposeLhs = LhsRowMajor && (!RhsRowMajor)
+  };
+
+  // FIXME if we transpose let's evaluate to a LinkedVectorMatrix since it is the
+  // type of the temporary to perform the transpose op
+  typedef typename ei_meta_if<TransposeLhs,
+    SparseMatrix<Scalar,0>,
+    typename ei_nested<Lhs,Rhs::RowsAtCompileTime>::type>::ret LhsNested;
+
+  typedef typename ei_meta_if<TransposeRhs,
+    SparseMatrix<Scalar,0>,
+    typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested;
 
-#define TMP_TMP 3
+  typedef Product<typename ei_unconst<LhsNested>::type,
+                  typename ei_unconst<RhsNested>::type, SparseProduct> Type;
+};
 
-template<typename Scalar>
-struct ListEl
+template<typename LhsNested, typename RhsNested>
+struct ei_traits<Product<LhsNested, RhsNested, SparseProduct> >
 {
-  int next;
-  int index;
-  Scalar value;
+  // clean the nested types:
+  typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
+  typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
+  typedef typename _LhsNested::Scalar Scalar;
+
+  enum {
+    LhsCoeffReadCost = _LhsNested::CoeffReadCost,
+    RhsCoeffReadCost = _RhsNested::CoeffReadCost,
+    LhsFlags = _LhsNested::Flags,
+    RhsFlags = _RhsNested::Flags,
+
+    RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
+    ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
+    InnerSize = EIGEN_ENUM_MIN(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
+
+    MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
+
+    LhsRowMajor = LhsFlags & RowMajorBit,
+    RhsRowMajor = RhsFlags & RowMajorBit,
+
+    EvalToRowMajor = (RhsFlags & LhsFlags & RowMajorBit),
+
+    RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit)
+                | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)),
+
+    Flags = (int(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits)
+          | EvalBeforeAssigningBit
+          | EvalBeforeNestingBit,
+
+    CoeffReadCost = Dynamic
+  };
 };
 
-template<typename Lhs, typename Rhs>
-static void ei_sparse_product(const Lhs& lhs, const Rhs& rhs, SparseMatrix<typename ei_traits<Lhs>::Scalar>& res)
+template<typename LhsNested, typename RhsNested> class Product<LhsNested,RhsNested,SparseProduct> : ei_no_assignment_operator,
+  public MatrixBase<Product<LhsNested, RhsNested, SparseProduct> >
 {
-  int rows = lhs.rows();
-  int cols = rhs.rows();
-  int size = lhs.cols();
+  public:
 
-  float ratio = std::max(float(lhs.nonZeros())/float(lhs.rows()*lhs.cols()), float(rhs.nonZeros())/float(rhs.rows()*rhs.cols()));
-  std::cout << ratio << "\n";
+    EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
 
-  ei_assert(size == rhs.rows());
-  typedef typename ei_traits<Lhs>::Scalar Scalar;
-  #if (TMP_TMP == MAP_TMP)
-  std::map<int,Scalar> tmp;
-  #elif (TMP_TMP == LIST_TMP)
-  std::vector<ListEl<Scalar> > tmp(2*rows);
-  #else
-  std::vector<Scalar> tmp(rows);
-  #endif
-  res.resize(rows, cols);
-  res.startFill(2*std::max(rows, cols));
-  for (int j=0; j<cols; ++j)
+  private:
+
+    typedef typename ei_traits<Product>::_LhsNested _LhsNested;
+    typedef typename ei_traits<Product>::_RhsNested _RhsNested;
+
+  public:
+
+    template<typename Lhs, typename Rhs>
+    inline Product(const Lhs& lhs, const Rhs& rhs)
+      : m_lhs(lhs), m_rhs(rhs)
+    {
+      ei_assert(lhs.cols() == rhs.rows());
+    }
+
+    Scalar coeff(int, int) const { ei_assert(false && "eigen internal error"); }
+    Scalar& coeffRef(int, int) { ei_assert(false && "eigen internal error"); }
+
+    inline int rows() const { return m_lhs.rows(); }
+    inline int cols() const { return m_rhs.cols(); }
+
+    const _LhsNested& lhs() const { return m_lhs; }
+    const _LhsNested& rhs() const { return m_rhs; }
+
+  protected:
+    const LhsNested m_lhs;
+    const RhsNested m_rhs;
+};
+
+const int RowMajor = RowMajorBit;
+const int ColMajor = 0;
+
+template<typename Lhs, typename Rhs, typename ResultType,
+  int LhsStorageOrder = ei_traits<Lhs>::Flags&RowMajorBit,
+  int RhsStorageOrder = ei_traits<Rhs>::Flags&RowMajorBit,
+  int ResStorageOrder = ei_traits<ResultType>::Flags&RowMajorBit>
+struct ei_sparse_product_selector;
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct ei_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>
+{
+  typedef typename ei_traits<typename ei_cleantype<Lhs>::type>::Scalar Scalar;
+
+  struct ListEl
+  {
+    int next;
+    int index;
+    Scalar value;
+  };
+
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    #if (TMP_TMP == MAP_TMP)
-    tmp.clear();
-    #elif (TMP_TMP == LIST_TMP)
-    int tmp_size = 0;
-    int tmp_start = -1;
-    #else
-    for (int k=0; k<rows; ++k)
-      tmp[k] = 0;
-    #endif
-    for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
+    // make sure to call innerSize/outerSize since we fake the storage order.
+    int rows = lhs.innerSize();
+    int cols = rhs.outerSize();
+    int size = lhs.outerSize();
+    ei_assert(size == rhs.rows());
+
+    // allocate a temporary buffer
+    Scalar* buffer = new Scalar[rows];
+
+    // estimate the number of non zero entries
+    float ratioLhs = float(lhs.nonZeros())/float(lhs.rows()*lhs.cols());
+    float avgNnzPerRhsColumn = float(rhs.nonZeros())/float(cols);
+    float ratioRes = std::min(ratioLhs * avgNnzPerRhsColumn, 1.f);
+
+    res.resize(rows, cols);
+    res.startFill(ratioRes*rows*cols);
+    for (int j=0; j<cols; ++j)
     {
-      #if (TMP_TMP == MAP_TMP)
-      typename std::map<int,Scalar>::iterator hint = tmp.begin();
-      typename std::map<int,Scalar>::iterator r;
-      #elif (TMP_TMP == LIST_TMP)
-      int tmp_el = tmp_start;
-      #endif
-      for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
+      // let's do a more accurate determination of the nnz ratio for the current column j of res
+      //float ratioColRes = std::min(ratioLhs * rhs.innerNonZeros(j), 1.f);
+      // FIXME find a nice way to get the number of nonzeros of a sub matrix (here an inner vector)
+      float ratioColRes = ratioRes;
+      if (ratioColRes>0.1)
       {
-        #if (TMP_TMP == MAP_TMP)
-        r = hint;
-        Scalar v = lhsIt.value() * rhsIt.value();
-        int id = lhsIt.index();
-        while (r!=tmp.end() && r->first < id)
-          ++r;
-        if (r!=tmp.end() && r->first==id)
-        {
-          r->second += v;
-          hint = r;
-        }
-        else
-          hint = tmp.insert(r, std::pair<int,Scalar>(id, v));
-        ++hint;
-        #elif (TMP_TMP == LIST_TMP)
-        Scalar v = lhsIt.value() * rhsIt.value();
-        int id = lhsIt.index();
-        if (tmp_size==0)
+        // dense path, the scalar * columns products are accumulated into a dense column
+        Scalar* __restrict__ tmp = buffer;
+        // set to zero
+        for (int k=0; k<rows; ++k)
+          tmp[k] = 0;
+        for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
         {
-          tmp_start = 0;
-          tmp_el = 0;
-          tmp_size++;
-          tmp[0].value = v;
-          tmp[0].index = id;
-          tmp[0].next = -1;
-        }
-        else if (id<tmp[tmp_start].index)
-        {
-          tmp[tmp_size].value = v;
-          tmp[tmp_size].index = id;
-          tmp[tmp_size].next = tmp_start;
-          tmp_start = tmp_size;
-          tmp_size++;
+          // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
+          Scalar x = rhsIt.value();
+          for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
+          {
+            tmp[lhsIt.index()] += lhsIt.value() * x;
+          }
         }
-        else
+        // copy the temporary to the respective res.col()
+        for (int k=0; k<rows; ++k)
+          if (tmp[k]!=0)
+            res.fill(k, j) = tmp[k];
+      }
+      else
+      {
+        ListEl* __restrict__ tmp = reinterpret_cast<ListEl*>(buffer);
+        // sparse path, the scalar * columns products are accumulated into a linked list
+        int tmp_size = 0;
+        int tmp_start = -1;
+        for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
         {
-          int nextel = tmp[tmp_el].next;
-          while (nextel >= 0 && tmp[nextel].index<=id)
+          int tmp_el = tmp_start;
+          for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
           {
-            tmp_el = nextel;
-            nextel = tmp[nextel].next;
-          }
+            Scalar v = lhsIt.value() * rhsIt.value();
+            int id = lhsIt.index();
+            if (tmp_size==0)
+            {
+              tmp_start = 0;
+              tmp_el = 0;
+              tmp_size++;
+              tmp[0].value = v;
+              tmp[0].index = id;
+              tmp[0].next = -1;
+            }
+            else if (id<tmp[tmp_start].index)
+            {
+              tmp[tmp_size].value = v;
+              tmp[tmp_size].index = id;
+              tmp[tmp_size].next = tmp_start;
+              tmp_start = tmp_size;
+              tmp_size++;
+            }
+            else
+            {
+              int nextel = tmp[tmp_el].next;
+              while (nextel >= 0 && tmp[nextel].index<=id)
+              {
+                tmp_el = nextel;
+                nextel = tmp[nextel].next;
+              }
 
-          if (tmp[tmp_el].index==id)
-          {
-            tmp[tmp_el].value += v;
-          }
-          else
-          {
-            tmp[tmp_size].value = v;
-            tmp[tmp_size].index = id;
-            tmp[tmp_size].next = tmp[tmp_el].next;
-            tmp[tmp_el].next = tmp_size;
-            tmp_size++;
+              if (tmp[tmp_el].index==id)
+              {
+                tmp[tmp_el].value += v;
+              }
+              else
+              {
+                tmp[tmp_size].value = v;
+                tmp[tmp_size].index = id;
+                tmp[tmp_size].next = tmp[tmp_el].next;
+                tmp[tmp_el].next = tmp_size;
+                tmp_size++;
+              }
+            }
           }
         }
-        #else
-        tmp[lhsIt.index()] += lhsIt.value() * rhsIt.value();
-        #endif
-        //res.coeffRef(lhsIt.index(), j) += lhsIt.value() * rhsIt.value();
+        int k = tmp_start;
+        while (k>=0)
+        {
+          if (tmp[k].value!=0)
+            res.fill(tmp[k].index, j) = tmp[k].value;
+          k = tmp[k].next;
+        }
       }
     }
-    #if (TMP_TMP == MAP_TMP)
-    for (typename std::map<int,Scalar>::const_iterator k=tmp.begin(); k!=tmp.end(); ++k)
-      if (k->second!=0)
-        res.fill(k->first, j) = k->second;
-    #elif (TMP_TMP == LIST_TMP)
-    int k = tmp_start;
-    while (k>=0)
-    {
-      if (tmp[k].value!=0)
-        res.fill(tmp[k].index, j) = tmp[k].value;
-      k = tmp[k].next;
-    }
-    #else
-    for (int k=0; k<rows; ++k)
-      if (tmp[k]!=0)
-        res.fill(k, j) = tmp[k];
-    #endif
+    res.endFill();
+  }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct ei_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,RowMajor>
+{
+  typedef SparseMatrix<typename ResultType::Scalar> SparseTemporaryType;
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+  {
+    SparseTemporaryType _res(res.rows(), res.cols());
+    ei_sparse_product_selector<Lhs,Rhs,SparseTemporaryType,ColMajor,ColMajor,ColMajor>::run(lhs, rhs, _res);
+    res = _res;
+  }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct ei_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,RowMajor>
+{
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+  {
+    // let's transpose the product and fake the matrices are column major
+    ei_sparse_product_selector<Rhs,Lhs,ResultType,ColMajor,ColMajor,ColMajor>::run(rhs, lhs, res);
+  }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct ei_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,ColMajor>
+{
+  typedef SparseMatrix<typename ResultType::Scalar> SparseTemporaryType;
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+  {
+    // let's transpose the product and fake the matrices are column major
+    ei_sparse_product_selector<Rhs,Lhs,ResultType,ColMajor,ColMajor,RowMajor>::run(rhs, lhs, res);
   }
-  res.endFill();
+};
 
-  std::cout << "  => " << float(res.nonZeros())/float(res.rows()*res.cols()) << "\n";
+// NOTE eventually let's transpose one argument even in this case since it might be expensive if
+// the result is not dense.
+// template<typename Lhs, typename Rhs, typename ResultType, int ResStorageOrder>
+// struct ei_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,ResStorageOrder>
+// {
+//   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+//   {
+//     // trivial product as lhs.row/rhs.col dot products
+//     // loop over the prefered order of the result
+//   }
+// };
+
+// NOTE the 2 others cases (col row *) must never occurs since they are catched
+// by ProductReturnType which transform it to (col col *) by evaluating rhs.
+
+
+template<typename Derived>
+template<typename Lhs, typename Rhs>
+inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,SparseProduct>& product)
+{
+//   std::cout << "sparse product to dense\n";
+  ei_sparse_product_selector<
+    typename ei_cleantype<Lhs>::type,
+    typename ei_cleantype<Rhs>::type,
+    typename ei_cleantype<Derived>::type>::run(product.lhs(),product.rhs(),derived());
+  return derived();
+}
+
+template<typename Derived>
+template<typename Lhs, typename Rhs>
+inline Derived& SparseMatrixBase<Derived>::operator=(const Product<Lhs,Rhs,SparseProduct>& product)
+{
+//   std::cout << "sparse product to sparse\n";
+  ei_sparse_product_selector<
+    typename ei_cleantype<Lhs>::type,
+    typename ei_cleantype<Rhs>::type,
+    Derived>::run(product.lhs(),product.rhs(),derived());
+  return derived();
 }
 
 #endif // EIGEN_SPARSEPRODUCT_H
diff --git a/Eigen/src/Sparse/SparseUtil.h b/Eigen/src/Sparse/SparseUtil.h
index e89a7c322..bcf87ad90 100644
--- a/Eigen/src/Sparse/SparseUtil.h
+++ b/Eigen/src/Sparse/SparseUtil.h
@@ -31,6 +31,7 @@
 #define EIGEN_DBG_SPARSE(X) X
 #endif
 
+template<typename Derived> class SparseMatrixBase;
 template<typename _Scalar, int _Flags = 0> class SparseMatrix;
 template<typename _Scalar, int _Flags = 0> class HashMatrix;
 template<typename _Scalar, int _Flags = 0> class LinkedVectorMatrix;
-- 
cgit v1.2.3