4 files changed, 232 insertions, 4 deletions
diff --git a/Eigen/src/Sparse/SparseBlock.h b/Eigen/src/Sparse/SparseBlock.h
index 7f2e05c4d..a8c1f9047 100644
--- a/Eigen/src/Sparse/SparseBlock.h
+++ b/Eigen/src/Sparse/SparseBlock.h
@@ -264,10 +264,18 @@ class SparseInnerVectorSet<SparseMatrix<_Scalar, _Options>, Size>
 
     inline const Scalar* _valuePtr() const
     { return m_matrix._valuePtr() + m_matrix._outerIndexPtr()[m_outerStart]; }
+    inline Scalar* _valuePtr()
+    { return m_matrix.const_cast_derived()._valuePtr() + m_matrix._outerIndexPtr()[m_outerStart]; }
+
     inline const int* _innerIndexPtr() const
     { return m_matrix._innerIndexPtr() + m_matrix._outerIndexPtr()[m_outerStart]; }
+    inline int* _innerIndexPtr()
+    { return m_matrix.const_cast_derived()._innerIndexPtr() + m_matrix._outerIndexPtr()[m_outerStart]; }
+
     inline const int* _outerIndexPtr() const
     { return m_matrix._outerIndexPtr() + m_outerStart; }
+    inline int* _outerIndexPtr()
+    { return m_matrix.const_cast_derived()._outerIndexPtr() + m_outerStart; }
 
     int nonZeros() const
     {
diff --git a/Eigen/src/Sparse/SparseMatrix.h b/Eigen/src/Sparse/SparseMatrix.h
index 7feb2aff8..aa094d703 100644
--- a/Eigen/src/Sparse/SparseMatrix.h
+++ b/Eigen/src/Sparse/SparseMatrix.h
@@ -201,6 +201,14 @@ class SparseMatrix
       return m_data.value(id);
     }
 
+    inline Scalar& insertBackNoCheck(int outer, int inner)
+    {
+      int id = m_outerIndex[outer+1];
+      ++m_outerIndex[outer+1];
+      m_data.append(0, inner);
+      return m_data.value(id);
+    }
+
     inline void startVec(int outer)
     {
       ei_assert(m_outerIndex[outer]==int(m_data.size()) && "you must call startVec on each inner vec");
@@ -443,7 +451,7 @@ class SparseMatrix
     }
 
     template<typename OtherDerived>
-    inline SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other)
+    EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other)
     {
       const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
       if (needToTranspose)
@@ -479,13 +487,14 @@ class SparseMatrix
         m_data.resize(count);
         // pass 2
         for (int j=0; j<otherCopy.outerSize(); ++j)
+        {
           for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
           {
             int pos = positions[it.index()]++;
             m_data.index(pos) = j;
             m_data.value(pos) = it.value();
           }
-
+        }
         return *this;
       }
       else
diff --git a/Eigen/src/Sparse/SparseMatrixBase.h b/Eigen/src/Sparse/SparseMatrixBase.h
index 80cb7620c..dc66113f2 100644
--- a/Eigen/src/Sparse/SparseMatrixBase.h
+++ b/Eigen/src/Sparse/SparseMatrixBase.h
@@ -251,6 +251,9 @@ template<typename Derived> class SparseMatrixBase : public AnyMatrixBase<Derived
     template<typename Lhs, typename Rhs>
     inline Derived& operator=(const SparseProduct<Lhs,Rhs>& product);
 
+    template<typename Lhs, typename Rhs>
+    inline void _experimentalNewProduct(const Lhs& lhs, const Rhs& rhs);
+
     friend std::ostream & operator << (std::ostream & s, const SparseMatrixBase& m)
     {
       if (Flags&RowMajorBit)
diff --git a/Eigen/src/Sparse/SparseProduct.h b/Eigen/src/Sparse/SparseProduct.h
index 08f6e3cac..3a47df6aa 100644
--- a/Eigen/src/Sparse/SparseProduct.h
+++ b/Eigen/src/Sparse/SparseProduct.h
@@ -32,8 +32,8 @@ struct SparseProductReturnType
   enum {
     LhsRowMajor = ei_traits<Lhs>::Flags & RowMajorBit,
     RhsRowMajor = ei_traits<Rhs>::Flags & RowMajorBit,
-    TransposeRhs = (!LhsRowMajor) && RhsRowMajor,
-    TransposeLhs = LhsRowMajor && (!RhsRowMajor)
+    TransposeRhs = /*false,*/ (!LhsRowMajor) && RhsRowMajor,
+    TransposeLhs = /*false*/  LhsRowMajor && (!RhsRowMajor)
   };
 
   // FIXME if we transpose let's evaluate to a LinkedVectorMatrix since it is the
@@ -136,6 +136,84 @@ class SparseProduct : ei_no_assignment_operator,
     RhsNested m_rhs;
 };
 
+template<typename Lhs, typename Rhs, typename ResultType>
+static void ei_sparse_product_impl2(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+{
+  typedef typename ei_traits<typename ei_cleantype<Lhs>::type>::Scalar Scalar;
+
+  // make sure to call innerSize/outerSize since we fake the storage order.
+  int rows = lhs.innerSize();
+  int cols = rhs.outerSize();
+  ei_assert(lhs.outerSize() == rhs.innerSize());
+
+  std::vector<bool> mask(rows,false);
+
+  // estimate the number of non zero entries
+  float ratioLhs = float(lhs.nonZeros())/(float(lhs.rows())*float(lhs.cols()));
+  float avgNnzPerRhsColumn = float(rhs.nonZeros())/float(cols);
+  float ratioRes = std::min(ratioLhs * avgNnzPerRhsColumn, 1.f);
+
+  float ratio;
+
+  res.resize(rows, cols);
+  res.reserve(int(ratioRes*rows*cols));
+  // we compute each column of the result, one after the other
+  for (int j=0; j<cols; ++j)
+  {
+
+    res.startVec(j);
+    for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
+    {
+      Scalar y = rhsIt.value();
+      int k = rhsIt.index();
+      for (typename Lhs::InnerIterator lhsIt(lhs, k); lhsIt; ++lhsIt)
+      {
+        int i = lhsIt.index();
+        Scalar x = lhsIt.value();
+        if(!mask[i])
+        {
+          mask[i] = true;
+          values[i] = x * y;
+          res.insertBackNoCheck(j,i);
+        }
+        else
+          res._valuePtr()[mask[i]] += x* y;
+      }
+    }
+    // if the result is sparse enough => use a quick sort
+    // otherwise => loop through the entire vector
+    SparseInnerVectorSet<ResultType,1> vec(res,j);
+
+    int nnz = vec.nonZeros();
+    if(rows/1.39 > nnz * log2(nnz))
+    {
+      std::sort(vec._innerIndexPtr(), vec._innerIndexPtr()+vec.nonZeros());
+      for (typename ResultType::InnerIterator it(res, j); it; ++it)
+      {
+        it.valueRef() = values[it.index()];
+        mask[it.index()] = false;
+      }
+    }
+    else
+    {
+      // dense path
+      int count = 0;
+      for(int i=0; i<rows; ++i)
+      {
+        if(mask[i])
+        {
+          mask[i] = false;
+          vec._innerIndexPtr()[count] = i;
+          vec._valuePtr()[count] = i;
+          ++count;
+        }
+      }
+    }
+
+  }
+  res.finalize();
+}
+
 // perform a pseudo in-place sparse * sparse product assuming all matrices are col major
 template<typename Lhs, typename Rhs, typename ResultType>
 static void ei_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res)
@@ -257,6 +335,136 @@ inline Derived& SparseMatrixBase<Derived>::operator=(const SparseProduct<Lhs,Rhs
 }
 
 
+template<typename Lhs, typename Rhs, typename ResultType,
+  int LhsStorageOrder = ei_traits<Lhs>::Flags&RowMajorBit,
+  int RhsStorageOrder = ei_traits<Rhs>::Flags&RowMajorBit,
+  int ResStorageOrder = ei_traits<ResultType>::Flags&RowMajorBit>
+struct ei_sparse_product_selector2;
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct ei_sparse_product_selector2<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>
+{
+  typedef typename ei_traits<typename ei_cleantype<Lhs>::type>::Scalar Scalar;
+
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+  {
+    ei_sparse_product_impl2<Lhs,Rhs,ResultType>(lhs, rhs, res, 0);
+  }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct ei_sparse_product_selector2<Lhs,Rhs,ResultType,RowMajor,ColMajor,ColMajor>
+{
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+  {
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
+    RowMajorMatrix rhsRow = rhs;
+    RowMajorMatrix resRow(res.rows(), res.cols());
+    ei_sparse_product_impl2<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow);
+    res = resRow;
+  }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct ei_sparse_product_selector2<Lhs,Rhs,ResultType,ColMajor,RowMajor,ColMajor>
+{
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+  {
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
+    RowMajorMatrix lhsRow = lhs;
+    RowMajorMatrix resRow(res.rows(), res.cols());
+    ei_sparse_product_impl2<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow);
+    res = resRow;
+  }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct ei_sparse_product_selector2<Lhs,Rhs,ResultType,RowMajor,RowMajor,ColMajor>
+{
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+  {
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
+    RowMajorMatrix resRow(res.rows(), res.cols());
+    ei_sparse_product_impl2<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
+    res = resRow;
+  }
+};
+
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct ei_sparse_product_selector2<Lhs,Rhs,ResultType,ColMajor,ColMajor,RowMajor>
+{
+  typedef typename ei_traits<typename ei_cleantype<Lhs>::type>::Scalar Scalar;
+
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+  {
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    ColMajorMatrix resCol(res.rows(), res.cols());
+    ei_sparse_product_impl2<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
+    res = resCol;
+  }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct ei_sparse_product_selector2<Lhs,Rhs,ResultType,RowMajor,ColMajor,RowMajor>
+{
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+  {
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    ColMajorMatrix lhsCol = lhs;
+    ColMajorMatrix resCol(res.rows(), res.cols());
+    ei_sparse_product_impl2<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol);
+    res = resCol;
+  }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct ei_sparse_product_selector2<Lhs,Rhs,ResultType,ColMajor,RowMajor,RowMajor>
+{
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+  {
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    ColMajorMatrix rhsCol = rhs;
+    ColMajorMatrix resCol(res.rows(), res.cols());
+    ei_sparse_product_impl2<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol);
+    res = resCol;
+  }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct ei_sparse_product_selector2<Lhs,Rhs,ResultType,RowMajor,RowMajor,RowMajor>
+{
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+  {
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+//     ColMajorMatrix lhsTr(lhs);
+//     ColMajorMatrix rhsTr(rhs);
+//     ColMajorMatrix aux(res.rows(), res.cols());
+//     ei_sparse_product_impl2<Rhs,Lhs,ColMajorMatrix>(rhs, lhs, aux);
+// //     ColMajorMatrix aux2 = aux.transpose();
+//     res = aux;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    ColMajorMatrix lhsCol(lhs);
+    ColMajorMatrix rhsCol(rhs);
+    ColMajorMatrix resCol(res.rows(), res.cols());
+    ei_sparse_product_impl2<ColMajorMatrix,ColMajorMatrix,ColMajorMatrix>(lhsCol, rhsCol, resCol);
+    res = resCol;
+  }
+};
+
+template<typename Derived>
+template<typename Lhs, typename Rhs>
+inline void SparseMatrixBase<Derived>::_experimentalNewProduct(const Lhs& lhs, const Rhs& rhs)
+{
+  //derived().resize(lhs.rows(), rhs.cols());
+  ei_sparse_product_selector2<
+    typename ei_cleantype<Lhs>::type,
+    typename ei_cleantype<Rhs>::type,
+    Derived>::run(lhs,rhs,derived());
+}
+
+
+
 template<typename Lhs, typename Rhs>
 struct ei_traits<SparseTimeDenseProduct<Lhs,Rhs> >
  : ei_traits<ProductBase<SparseTimeDenseProduct<Lhs,Rhs>, Lhs, Rhs> >