Merged eigen/eigen into default

author: Konstantinos Margaritis <konstantinos.margaritis@freevec.org> 2014-09-21 14:02:51 +0300
committer: Konstantinos Margaritis <konstantinos.margaritis@freevec.org> 2014-09-21 14:02:51 +0300
commit: 60e093a9dce2f8d4c0f3b2ea3e0386d5f01bff8d (patch)
tree: 05442eeff0bcfe7fe85ce59cf5fa72aa06ee2a07
parent: 56408504e4e3fa5f9c59d9edac14ca1ba1255e5a (diff)
parent: 03dd4dd91a5d8963f56eebe3b9d2eb924bc06e02 (diff)
211 files changed, 8174 insertions, 9478 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ea42cc8db..b3753edb0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -143,7 +143,7 @@ if(NOT MSVC)
   ei_add_cxx_compiler_flag("-Wpointer-arith")
   ei_add_cxx_compiler_flag("-Wwrite-strings")
   ei_add_cxx_compiler_flag("-Wformat-security")
-  ei_add_cxx_compiler_flag("-Wshorten-64-to-32")
+#   ei_add_cxx_compiler_flag("-Wshorten-64-to-32")
   ei_add_cxx_compiler_flag("-Wenum-conversion")
   ei_add_cxx_compiler_flag("-Wc++11-extensions")
   
diff --git a/Eigen/Cholesky b/Eigen/Cholesky
index 7314d326c..dd0ca911c 100644
--- a/Eigen/Cholesky
+++ b/Eigen/Cholesky
@@ -21,7 +21,6 @@
   * \endcode
   */
 
-#include "src/misc/Solve.h"
 #include "src/Cholesky/LLT.h"
 #include "src/Cholesky/LDLT.h"
 #ifdef EIGEN_USE_LAPACKE
diff --git a/Eigen/CholmodSupport b/Eigen/CholmodSupport
index 745b884e7..687cd9777 100644
--- a/Eigen/CholmodSupport
+++ b/Eigen/CholmodSupport
@@ -33,12 +33,8 @@ extern "C" {
   *
   */
 
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
 #include "src/CholmodSupport/CholmodSupport.h"
 
-
 #include "src/Core/util/ReenableStupidWarnings.h"
 
 #endif // EIGEN_CHOLMODSUPPORT_MODULE_H
diff --git a/Eigen/Core b/Eigen/Core
index ac3cbd0c7..adab50b4a 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -277,8 +277,8 @@ using std::ptrdiff_t;
   */
 
 #include "src/Core/util/Constants.h"
-#include "src/Core/util/ForwardDeclarations.h"
 #include "src/Core/util/Meta.h"
+#include "src/Core/util/ForwardDeclarations.h"
 #include "src/Core/util/StaticAssert.h"
 #include "src/Core/util/XprHelper.h"
 #include "src/Core/util/Memory.h"
@@ -311,19 +311,16 @@ using std::ptrdiff_t;
 #include "src/Core/functors/UnaryFunctors.h"
 #include "src/Core/functors/NullaryFunctors.h"
 #include "src/Core/functors/StlFunctors.h"
+#include "src/Core/functors/AssignmentFunctors.h"
 
 #include "src/Core/DenseCoeffsBase.h"
 #include "src/Core/DenseBase.h"
 #include "src/Core/MatrixBase.h"
 #include "src/Core/EigenBase.h"
 
-#ifdef EIGEN_ENABLE_EVALUATORS
-#include "src/Core/functors/AssignmentFunctors.h"
 #include "src/Core/Product.h"
 #include "src/Core/CoreEvaluators.h"
 #include "src/Core/AssignEvaluator.h"
-#include "src/Core/ProductEvaluators.h"
-#endif
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874
                                 // at least confirmed with Doxygen 1.5.5 and 1.5.6
@@ -334,7 +331,10 @@ using std::ptrdiff_t;
 #include "src/Core/util/BlasUtil.h"
 #include "src/Core/DenseStorage.h"
 #include "src/Core/NestByValue.h"
-#include "src/Core/ForceAlignedAccess.h"
+
+// #include "src/Core/ForceAlignedAccess.h"
+// #include "src/Core/Flagged.h"
+
 #include "src/Core/ReturnByValue.h"
 #include "src/Core/NoAlias.h"
 #include "src/Core/PlainObjectBase.h"
@@ -347,12 +347,12 @@ using std::ptrdiff_t;
 #include "src/Core/SelfCwiseBinaryOp.h"
 #include "src/Core/Dot.h"
 #include "src/Core/StableNorm.h"
-#include "src/Core/MapBase.h"
 #include "src/Core/Stride.h"
+#include "src/Core/MapBase.h"
 #include "src/Core/Map.h"
+#include "src/Core/Ref.h"
 #include "src/Core/Block.h"
 #include "src/Core/VectorBlock.h"
-#include "src/Core/Ref.h"
 #include "src/Core/Transpose.h"
 #include "src/Core/DiagonalMatrix.h"
 #include "src/Core/Diagonal.h"
@@ -365,14 +365,15 @@ using std::ptrdiff_t;
 #include "src/Core/IO.h"
 #include "src/Core/Swap.h"
 #include "src/Core/CommaInitializer.h"
-#include "src/Core/Flagged.h"
 #include "src/Core/ProductBase.h"
 #include "src/Core/GeneralProduct.h"
+#include "src/Core/Solve.h"
+#include "src/Core/Inverse.h"
 #include "src/Core/TriangularMatrix.h"
 #include "src/Core/SelfAdjointView.h"
 #include "src/Core/products/GeneralBlockPanelKernel.h"
 #include "src/Core/products/Parallelizer.h"
-#include "src/Core/products/CoeffBasedProduct.h"
+#include "src/Core/ProductEvaluators.h"
 #include "src/Core/products/GeneralMatrixVector.h"
 #include "src/Core/products/GeneralMatrixMatrix.h"
 #include "src/Core/SolveTriangular.h"
diff --git a/Eigen/IterativeLinearSolvers b/Eigen/IterativeLinearSolvers
index 0f4159dc1..c06668bd2 100644
--- a/Eigen/IterativeLinearSolvers
+++ b/Eigen/IterativeLinearSolvers
@@ -26,9 +26,7 @@
   * \endcode
   */
 
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
+#include "src/IterativeLinearSolvers/SolveWithGuess.h"
 #include "src/IterativeLinearSolvers/IterativeSolverBase.h"
 #include "src/IterativeLinearSolvers/BasicPreconditioners.h"
 #include "src/IterativeLinearSolvers/ConjugateGradient.h"
diff --git a/Eigen/LU b/Eigen/LU
index 29a98cb9a..132ecc42c 100644
--- a/Eigen/LU
+++ b/Eigen/LU
@@ -16,7 +16,6 @@
   * \endcode
   */
 
-#include "src/misc/Solve.h"
 #include "src/misc/Kernel.h"
 #include "src/misc/Image.h"
 #include "src/LU/FullPivLU.h"
@@ -25,7 +24,7 @@
 #include "src/LU/PartialPivLU_MKL.h"
 #endif
 #include "src/LU/Determinant.h"
-#include "src/LU/Inverse.h"
+#include "src/LU/InverseImpl.h"
 
 // Use the SSE optimized version whenever possible. At the moment the
 // SSE version doesn't compile when AVX is enabled
diff --git a/Eigen/PaStiXSupport b/Eigen/PaStiXSupport
index 7c616ee5e..e7d275f97 100644
--- a/Eigen/PaStiXSupport
+++ b/Eigen/PaStiXSupport
@@ -35,12 +35,8 @@ extern "C" {
   *
   */
 
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
 #include "src/PaStiXSupport/PaStiXSupport.h"
 
-
 #include "src/Core/util/ReenableStupidWarnings.h"
 
 #endif // EIGEN_PASTIXSUPPORT_MODULE_H
diff --git a/Eigen/QR b/Eigen/QR
index 4c2533610..230cb079a 100644
--- a/Eigen/QR
+++ b/Eigen/QR
@@ -24,7 +24,6 @@
   * \endcode
   */
 
-#include "src/misc/Solve.h"
 #include "src/QR/HouseholderQR.h"
 #include "src/QR/FullPivHouseholderQR.h"
 #include "src/QR/ColPivHouseholderQR.h"
diff --git a/Eigen/SPQRSupport b/Eigen/SPQRSupport
index 77016442e..e3f49bb5a 100644
--- a/Eigen/SPQRSupport
+++ b/Eigen/SPQRSupport
@@ -21,8 +21,6 @@
   *
   */
 
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
 #include "src/CholmodSupport/CholmodSupport.h"
 #include "src/SPQRSupport/SuiteSparseQRSupport.h"
 
diff --git a/Eigen/SVD b/Eigen/SVD
index 5eee46df5..c13472e82 100644
--- a/Eigen/SVD
+++ b/Eigen/SVD
@@ -20,7 +20,7 @@
   * \endcode
   */
 
-#include "src/misc/Solve.h"
+#include "src/SVD/SVDBase.h"
 #include "src/SVD/JacobiSVD.h"
 #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
 #include "src/SVD/JacobiSVD_MKL.h"
diff --git a/Eigen/SparseCholesky b/Eigen/SparseCholesky
index 9f5056aa1..b6a320c40 100644
--- a/Eigen/SparseCholesky
+++ b/Eigen/SparseCholesky
@@ -34,8 +34,6 @@
 #error The SparseCholesky module has nothing to offer in MPL2 only mode
 #endif
 
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
 #include "src/SparseCholesky/SimplicialCholesky.h"
 
 #ifndef EIGEN_MPL2_ONLY
diff --git a/Eigen/SparseCore b/Eigen/SparseCore
index 9b5be5e15..b68c8fa8a 100644
--- a/Eigen/SparseCore
+++ b/Eigen/SparseCore
@@ -35,28 +35,30 @@ struct Sparse {};
 
 #include "src/SparseCore/SparseUtil.h"
 #include "src/SparseCore/SparseMatrixBase.h"
+#include "src/SparseCore/SparseAssign.h"
 #include "src/SparseCore/CompressedStorage.h"
 #include "src/SparseCore/AmbiVector.h"
 #include "src/SparseCore/SparseMatrix.h"
 #include "src/SparseCore/MappedSparseMatrix.h"
 #include "src/SparseCore/SparseVector.h"
-#include "src/SparseCore/SparseBlock.h"
-#include "src/SparseCore/SparseTranspose.h"
 #include "src/SparseCore/SparseCwiseUnaryOp.h"
 #include "src/SparseCore/SparseCwiseBinaryOp.h"
+#include "src/SparseCore/SparseTranspose.h"
+#include "src/SparseCore/SparseBlock.h"
 #include "src/SparseCore/SparseDot.h"
-#include "src/SparseCore/SparsePermutation.h"
 #include "src/SparseCore/SparseRedux.h"
-#include "src/SparseCore/SparseFuzzy.h"
+#include "src/SparseCore/SparseView.h"
+#include "src/SparseCore/SparseDiagonalProduct.h"
 #include "src/SparseCore/ConservativeSparseSparseProduct.h"
 #include "src/SparseCore/SparseSparseProductWithPruning.h"
 #include "src/SparseCore/SparseProduct.h"
 #include "src/SparseCore/SparseDenseProduct.h"
-#include "src/SparseCore/SparseDiagonalProduct.h"
-#include "src/SparseCore/SparseTriangularView.h"
 #include "src/SparseCore/SparseSelfAdjointView.h"
+#include "src/SparseCore/SparseTriangularView.h"
 #include "src/SparseCore/TriangularSolver.h"
-#include "src/SparseCore/SparseView.h"
+#include "src/SparseCore/SparsePermutation.h"
+#include "src/SparseCore/SparseFuzzy.h"
+#include "src/SparseCore/SparseSolverBase.h"
 
 #include "src/Core/util/ReenableStupidWarnings.h"
 
diff --git a/Eigen/SparseLU b/Eigen/SparseLU
index 8527a49bd..38b38b531 100644
--- a/Eigen/SparseLU
+++ b/Eigen/SparseLU
@@ -20,9 +20,6 @@
   * Please, see the documentation of the SparseLU class for more details.
   */
 
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
 // Ordering interface
 #include "OrderingMethods"
 
diff --git a/Eigen/SparseQR b/Eigen/SparseQR
index 4ee42065e..efb2695ba 100644
--- a/Eigen/SparseQR
+++ b/Eigen/SparseQR
@@ -21,9 +21,6 @@
   * 
   */
 
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
 #include "OrderingMethods"
 #include "src/SparseCore/SparseColEtree.h"
 #include "src/SparseQR/SparseQR.h"
diff --git a/Eigen/SuperLUSupport b/Eigen/SuperLUSupport
index 575e14fbc..d1eac9464 100644
--- a/Eigen/SuperLUSupport
+++ b/Eigen/SuperLUSupport
@@ -48,12 +48,8 @@ namespace Eigen { struct SluMatrix; }
   *
   */
 
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
 #include "src/SuperLUSupport/SuperLUSupport.h"
 
-
 #include "src/Core/util/ReenableStupidWarnings.h"
 
 #endif // EIGEN_SUPERLUSUPPORT_MODULE_H
diff --git a/Eigen/UmfPackSupport b/Eigen/UmfPackSupport
index 984f64a84..0efad5dee 100644
--- a/Eigen/UmfPackSupport
+++ b/Eigen/UmfPackSupport
@@ -26,9 +26,6 @@ extern "C" {
   *
   */
 
-#include "src/misc/Solve.h"
-#include "src/misc/SparseSolve.h"
-
 #include "src/UmfPackSupport/UmfPackSupport.h"
 
 #include "src/Core/util/ReenableStupidWarnings.h"
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index aa9784e54..32c770654 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -175,13 +175,13 @@ template<typename _MatrixType, int _UpLo> class LDLT
       * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt()
       */
     template<typename Rhs>
-    inline const internal::solve_retval<LDLT, Rhs>
+    inline const Solve<LDLT, Rhs>
     solve(const MatrixBase<Rhs>& b) const
     {
       eigen_assert(m_isInitialized && "LDLT is not initialized.");
       eigen_assert(m_matrix.rows()==b.rows()
                 && "LDLT::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<LDLT, Rhs>(*this, b.derived());
+      return Solve<LDLT, Rhs>(*this, b.derived());
     }
 
     template<typename Derived>
@@ -217,6 +217,12 @@ template<typename _MatrixType, int _UpLo> class LDLT
       eigen_assert(m_isInitialized && "LDLT is not initialized.");
       return Success;
     }
+    
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+    #endif
 
   protected:
 
@@ -466,52 +472,46 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
   return *this;
 }
 
-namespace internal {
-template<typename _MatrixType, int _UpLo, typename Rhs>
-struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
-  : solve_retval_base<LDLT<_MatrixType,_UpLo>, Rhs>
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType, int _UpLo>
+template<typename RhsType, typename DstType>
+void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
 {
-  typedef LDLT<_MatrixType,_UpLo> LDLTType;
-  EIGEN_MAKE_SOLVE_HELPERS(LDLTType,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
+  eigen_assert(rhs.rows() == rows());
+  // dst = P b
+  dst = m_transpositions * rhs;
+
+  // dst = L^-1 (P b)
+  matrixL().solveInPlace(dst);
+
+  // dst = D^-1 (L^-1 P b)
+  // more precisely, use pseudo-inverse of D (see bug 241)
+  using std::abs;
+  EIGEN_USING_STD_MATH(max);
+  const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
+  // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
+  // as motivated by LAPACK's xGELSS:
+  // RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() *NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
+  // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
+  // diagonal element is not well justified and to numerical issues in some cases.
+  // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
+  RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
+  
+  for (Index i = 0; i < vecD.size(); ++i)
   {
-    eigen_assert(rhs().rows() == dec().matrixLDLT().rows());
-    // dst = P b
-    dst = dec().transpositionsP() * rhs();
-
-    // dst = L^-1 (P b)
-    dec().matrixL().solveInPlace(dst);
-
-    // dst = D^-1 (L^-1 P b)
-    // more precisely, use pseudo-inverse of D (see bug 241)
-    using std::abs;
-    EIGEN_USING_STD_MATH(max);
-    typedef typename LDLTType::MatrixType MatrixType;
-    typedef typename LDLTType::RealScalar RealScalar;
-    const typename Diagonal<const MatrixType>::RealReturnType vectorD(dec().vectorD());
-    // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
-    // as motivated by LAPACK's xGELSS:
-    // RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() *NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
-    // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
-    // diagonal element is not well justified and to numerical issues in some cases.
-    // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
-    RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
-    for (Index i = 0; i < vectorD.size(); ++i) {
-      if(abs(vectorD(i)) > tolerance)
-        dst.row(i) /= vectorD(i);
-      else
-        dst.row(i).setZero();
-    }
+    if(abs(vecD(i)) > tolerance)
+      dst.row(i) /= vecD(i);
+    else
+      dst.row(i).setZero();
+  }
 
-    // dst = L^-T (D^-1 L^-1 P b)
-    dec().matrixU().solveInPlace(dst);
+  // dst = L^-T (D^-1 L^-1 P b)
+  matrixU().solveInPlace(dst);
 
-    // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b
-    dst = dec().transpositionsP().transpose() * dst;
-  }
-};
+  // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b
+  dst = m_transpositions.transpose() * dst;
 }
+#endif
 
 /** \internal use x = ldlt_object.solve(x);
   *
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index 38e820165..cb9e0eb7b 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -118,13 +118,13 @@ template<typename _MatrixType, int _UpLo> class LLT
       * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt()
       */
     template<typename Rhs>
-    inline const internal::solve_retval<LLT, Rhs>
+    inline const Solve<LLT, Rhs>
     solve(const MatrixBase<Rhs>& b) const
     {
       eigen_assert(m_isInitialized && "LLT is not initialized.");
       eigen_assert(m_matrix.rows()==b.rows()
                 && "LLT::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<LLT, Rhs>(*this, b.derived());
+      return Solve<LLT, Rhs>(*this, b.derived());
     }
 
     template<typename Derived>
@@ -161,6 +161,12 @@ template<typename _MatrixType, int _UpLo> class LLT
 
     template<typename VectorType>
     LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
+    
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+    #endif
 
   protected:
     /** \internal
@@ -404,22 +410,16 @@ LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, c
 
   return *this;
 }
-    
-namespace internal {
-template<typename _MatrixType, int UpLo, typename Rhs>
-struct solve_retval<LLT<_MatrixType, UpLo>, Rhs>
-  : solve_retval_base<LLT<_MatrixType, UpLo>, Rhs>
+ 
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType,int _UpLo>
+template<typename RhsType, typename DstType>
+void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
 {
-  typedef LLT<_MatrixType,UpLo> LLTType;
-  EIGEN_MAKE_SOLVE_HELPERS(LLTType,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dst = rhs();
-    dec().solveInPlace(dst);
-  }
-};
+  dst = rhs;
+  solveInPlace(dst);
 }
+#endif
 
 /** \internal use x = llt_object.solve(x);
   * 
diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h
index c449960de..3524ffb2d 100644
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -157,8 +157,12 @@ enum CholmodMode {
   * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT
   */
 template<typename _MatrixType, int _UpLo, typename Derived>
-class CholmodBase : internal::noncopyable
+class CholmodBase : public SparseSolverBase<Derived>
 {
+  protected:
+    typedef SparseSolverBase<Derived> Base;
+    using Base::derived;
+    using Base::m_isInitialized;
   public:
     typedef _MatrixType MatrixType;
     enum { UpLo = _UpLo };
@@ -170,14 +174,14 @@ class CholmodBase : internal::noncopyable
   public:
 
     CholmodBase()
-      : m_cholmodFactor(0), m_info(Success), m_isInitialized(false)
+      : m_cholmodFactor(0), m_info(Success)
     {
       m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
       cholmod_start(&m_cholmod);
     }
 
     CholmodBase(const MatrixType& matrix)
-      : m_cholmodFactor(0), m_info(Success), m_isInitialized(false)
+      : m_cholmodFactor(0), m_info(Success)
     {
       m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
       cholmod_start(&m_cholmod);
@@ -194,9 +198,6 @@ class CholmodBase : internal::noncopyable
     inline Index cols() const { return m_cholmodFactor->n; }
     inline Index rows() const { return m_cholmodFactor->n; }
     
-    Derived& derived() { return *static_cast<Derived*>(this); }
-    const Derived& derived() const { return *static_cast<const Derived*>(this); }
-    
     /** \brief Reports whether previous computation was successful.
       *
       * \returns \c Success if computation was succesful,
@@ -216,34 +217,6 @@ class CholmodBase : internal::noncopyable
       return derived();
     }
     
-    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::solve_retval<CholmodBase, Rhs>
-    solve(const MatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_isInitialized && "LLT is not initialized.");
-      eigen_assert(rows()==b.rows()
-                && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<CholmodBase, Rhs>(*this, b.derived());
-    }
-    
-    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::sparse_solve_retval<CholmodBase, Rhs>
-    solve(const SparseMatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_isInitialized && "LLT is not initialized.");
-      eigen_assert(rows()==b.rows()
-                && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::sparse_solve_retval<CholmodBase, Rhs>(*this, b.derived());
-    }
-    
     /** Performs a symbolic decomposition on the sparsity pattern of \a matrix.
       *
       * This function is particularly useful when solving for several problems having the same structure.
@@ -290,7 +263,7 @@ class CholmodBase : internal::noncopyable
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** \internal */
     template<typename Rhs,typename Dest>
-    void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
+    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
     {
       eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
       const Index size = m_cholmodFactor->n;
@@ -312,7 +285,7 @@ class CholmodBase : internal::noncopyable
     
     /** \internal */
     template<typename RhsScalar, int RhsOptions, typename RhsIndex, typename DestScalar, int DestOptions, typename DestIndex>
-    void _solve(const SparseMatrix<RhsScalar,RhsOptions,RhsIndex> &b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
+    void _solve_impl(const SparseMatrix<RhsScalar,RhsOptions,RhsIndex> &b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
     {
       eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
       const Index size = m_cholmodFactor->n;
@@ -357,7 +330,6 @@ class CholmodBase : internal::noncopyable
     cholmod_factor* m_cholmodFactor;
     RealScalar m_shiftOffset[2];
     mutable ComputationInfo m_info;
-    bool m_isInitialized;
     int m_factorizationIsOk;
     int m_analysisIsOk;
 };
@@ -572,36 +544,6 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
     }
 };
 
-namespace internal {
-  
-template<typename _MatrixType, int _UpLo, typename Derived, typename Rhs>
-struct solve_retval<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
-  : solve_retval_base<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
-{
-  typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-template<typename _MatrixType, int _UpLo, typename Derived, typename Rhs>
-struct sparse_solve_retval<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
-  : sparse_solve_retval_base<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
-{
-  typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec;
-  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-} // end namespace internal
-
 } // end namespace Eigen
 
 #endif // EIGEN_CHOLMODSUPPORT_H
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h
index 28d6f1443..eaee8847b 100644
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -74,6 +74,21 @@ class Array
     {
       return Base::operator=(other);
     }
+    
+    /** Set all the entries to \a value.
+      * \sa DenseBase::setConstant(), DenseBase::fill()
+      */
+    /* This overload is needed because the usage of
+      *   using Base::operator=;
+      * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped
+      * the usage of 'using'. This should be done only for operator=.
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Array& operator=(const Scalar &value)
+    {
+      Base::setConstant(value);
+      return *this;
+    }
 
     /** Copies the value of the expression \a other into \c *this with automatic resizing.
       *
@@ -99,7 +114,7 @@ class Array
     {
       return Base::_set(other);
     }
-
+    
     /** Default constructor.
       *
       * For fixed-size matrices, does nothing.
@@ -144,7 +159,6 @@ class Array
     }
 #endif
 
-
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename T>
     EIGEN_DEVICE_FUNC
diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h
index 2c9ace4a7..48a0006d5 100644
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h
@@ -64,8 +64,7 @@ template<typename Derived> class ArrayBase
     using Base::MaxSizeAtCompileTime;
     using Base::IsVectorAtCompileTime;
     using Base::Flags;
-    using Base::CoeffReadCost;
-
+    
     using Base::derived;
     using Base::const_cast_derived;
     using Base::rows;
@@ -121,8 +120,14 @@ template<typename Derived> class ArrayBase
     EIGEN_DEVICE_FUNC
     Derived& operator=(const ArrayBase& other)
     {
-      return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
+      internal::call_assignment(derived(), other.derived());
     }
+    
+    /** Set all the entries to \a value.
+      * \sa DenseBase::setConstant(), DenseBase::fill() */
+    EIGEN_DEVICE_FUNC
+    Derived& operator=(const Scalar &value)
+    { Base::setConstant(value); return derived(); }
 
     EIGEN_DEVICE_FUNC
     Derived& operator+=(const Scalar& scalar);
@@ -186,8 +191,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
 {
-  SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
-  tmp = other.derived();
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
   return derived();
 }
 
@@ -200,8 +204,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
 {
-  SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
-  tmp = other.derived();
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
   return derived();
 }
 
@@ -214,8 +217,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
 {
-  SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, OtherDerived> tmp(derived());
-  tmp = other.derived();
+  call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
   return derived();
 }
 
@@ -228,8 +230,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
 {
-  SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, OtherDerived> tmp(derived());
-  tmp = other.derived();
+  call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar>());
   return derived();
 }
 
diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h
index 4bb648024..ed5210272 100644
--- a/Eigen/src/Core/ArrayWrapper.h
+++ b/Eigen/src/Core/ArrayWrapper.h
@@ -29,6 +29,11 @@ struct traits<ArrayWrapper<ExpressionType> >
   : public traits<typename remove_all<typename ExpressionType::Nested>::type >
 {
   typedef ArrayXpr XprKind;
+  // Let's remove NestByRefBit
+  enum {
+    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
+    Flags = Flags0 & ~NestByRefBit
+  };
 };
 }
 
@@ -39,6 +44,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
     typedef ArrayBase<ArrayWrapper> Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
+    typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
 
     typedef typename internal::conditional<
                        internal::is_lvalue<ExpressionType>::value,
@@ -166,6 +172,11 @@ struct traits<MatrixWrapper<ExpressionType> >
  : public traits<typename remove_all<typename ExpressionType::Nested>::type >
 {
   typedef MatrixXpr XprKind;
+  // Let's remove NestByRefBit
+  enum {
+    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
+    Flags = Flags0 & ~NestByRefBit
+  };
 };
 }
 
@@ -176,6 +187,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
     typedef MatrixBase<MatrixWrapper<ExpressionType> > Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
+    typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
 
     typedef typename internal::conditional<
                        internal::is_lvalue<ExpressionType>::value,
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 07da2fe31..53806ba33 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -14,485 +14,6 @@
 
 namespace Eigen {
 
-namespace internal {
-
-/***************************************************************************
-* Part 1 : the logic deciding a strategy for traversal and unrolling       *
-***************************************************************************/
-
-template <typename Derived, typename OtherDerived>
-struct assign_traits
-{
-public:
-  enum {
-    DstIsAligned = Derived::Flags & AlignedBit,
-    DstHasDirectAccess = Derived::Flags & DirectAccessBit,
-    SrcIsAligned = OtherDerived::Flags & AlignedBit,
-    JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
-  };
-
-private:
-  enum {
-    InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
-              : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
-              : int(Derived::RowsAtCompileTime),
-    InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
-              : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
-              : int(Derived::MaxRowsAtCompileTime),
-    MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
-    PacketSize = packet_traits<typename Derived::Scalar>::size
-  };
-
-  enum {
-    StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
-    MightVectorize = StorageOrdersAgree
-                  && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
-    MayInnerVectorize  = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
-                       && int(DstIsAligned) && int(SrcIsAligned),
-    MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
-    MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
-                       && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
-      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
-         so it's only good for large enough sizes. */
-    MaySliceVectorize  = MightVectorize && DstHasDirectAccess
-                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
-      /* slice vectorization can be slow, so we only want it if the slices are big, which is
-         indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
-         in a fixed-size matrix */
-  };
-
-public:
-  enum {
-    Traversal = int(MayInnerVectorize)  ? int(InnerVectorizedTraversal)
-              : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
-              : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
-              : int(MayLinearize)       ? int(LinearTraversal)
-                                        : int(DefaultTraversal),
-    Vectorized = int(Traversal) == InnerVectorizedTraversal
-              || int(Traversal) == LinearVectorizedTraversal
-              || int(Traversal) == SliceVectorizedTraversal
-  };
-
-private:
-  enum {
-    UnrollingLimit      = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
-    MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
-                       && int(OtherDerived::CoeffReadCost) != Dynamic
-                       && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
-    MayUnrollInner      = int(InnerSize) != Dynamic
-                       && int(OtherDerived::CoeffReadCost) != Dynamic
-                       && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
-  };
-
-public:
-  enum {
-    Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
-                ? (
-                    int(MayUnrollCompletely) ? int(CompleteUnrolling)
-                  : int(MayUnrollInner)      ? int(InnerUnrolling)
-                                             : int(NoUnrolling)
-                  )
-              : int(Traversal) == int(LinearVectorizedTraversal)
-                ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
-              : int(Traversal) == int(LinearTraversal)
-                ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
-              : int(NoUnrolling)
-  };
-
-#ifdef EIGEN_DEBUG_ASSIGN
-  static void debug()
-  {
-    EIGEN_DEBUG_VAR(DstIsAligned)
-    EIGEN_DEBUG_VAR(SrcIsAligned)
-    EIGEN_DEBUG_VAR(JointAlignment)
-    EIGEN_DEBUG_VAR(Derived::SizeAtCompileTime)
-    EIGEN_DEBUG_VAR(OtherDerived::CoeffReadCost)
-    EIGEN_DEBUG_VAR(InnerSize)
-    EIGEN_DEBUG_VAR(InnerMaxSize)
-    EIGEN_DEBUG_VAR(PacketSize)
-    EIGEN_DEBUG_VAR(StorageOrdersAgree)
-    EIGEN_DEBUG_VAR(MightVectorize)
-    EIGEN_DEBUG_VAR(MayLinearize)
-    EIGEN_DEBUG_VAR(MayInnerVectorize)
-    EIGEN_DEBUG_VAR(MayLinearVectorize)
-    EIGEN_DEBUG_VAR(MaySliceVectorize)
-    EIGEN_DEBUG_VAR(Traversal)
-    EIGEN_DEBUG_VAR(UnrollingLimit)
-    EIGEN_DEBUG_VAR(MayUnrollCompletely)
-    EIGEN_DEBUG_VAR(MayUnrollInner)
-    EIGEN_DEBUG_VAR(Unrolling)
-  }
-#endif
-};
-
-/***************************************************************************
-* Part 2 : meta-unrollers
-***************************************************************************/
-
-/************************
-*** Default traversal ***
-************************/
-
-template<typename Derived1, typename Derived2, int Index, int Stop>
-struct assign_DefaultTraversal_CompleteUnrolling
-{
-  enum {
-    outer = Index / Derived1::InnerSizeAtCompileTime,
-    inner = Index % Derived1::InnerSizeAtCompileTime
-  };
-
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
-  {
-    dst.copyCoeffByOuterInner(outer, inner, src);
-    assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
-  }
-};
-
-template<typename Derived1, typename Derived2, int Stop>
-struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
-{
-  EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
-};
-
-template<typename Derived1, typename Derived2, int Index, int Stop>
-struct assign_DefaultTraversal_InnerUnrolling
-{
-  EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
-  {
-    dst.copyCoeffByOuterInner(outer, Index, src);
-    assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
-  }
-};
-
-template<typename Derived1, typename Derived2, int Stop>
-struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
-{
-  EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
-};
-
-/***********************
-*** Linear traversal ***
-***********************/
-
-template<typename Derived1, typename Derived2, int Index, int Stop>
-struct assign_LinearTraversal_CompleteUnrolling
-{
-  EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
-  {
-    dst.copyCoeff(Index, src);
-    assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
-  }
-};
-
-template<typename Derived1, typename Derived2, int Stop>
-struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
-{
-  EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
-};
-
-/**************************
-*** Inner vectorization ***
-**************************/
-
-template<typename Derived1, typename Derived2, int Index, int Stop>
-struct assign_innervec_CompleteUnrolling
-{
-  enum {
-    outer = Index / Derived1::InnerSizeAtCompileTime,
-    inner = Index % Derived1::InnerSizeAtCompileTime,
-    JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment
-  };
-
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
-  {
-    dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
-    assign_innervec_CompleteUnrolling<Derived1, Derived2,
-      Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
-  }
-};
-
-template<typename Derived1, typename Derived2, int Stop>
-struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
-{
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
-};
-
-template<typename Derived1, typename Derived2, int Index, int Stop>
-struct assign_innervec_InnerUnrolling
-{
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
-  {
-    dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
-    assign_innervec_InnerUnrolling<Derived1, Derived2,
-      Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer);
-  }
-};
-
-template<typename Derived1, typename Derived2, int Stop>
-struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
-{
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
-};
-
-/***************************************************************************
-* Part 3 : implementation of all cases
-***************************************************************************/
-
-template<typename Derived1, typename Derived2,
-         int Traversal = assign_traits<Derived1, Derived2>::Traversal,
-         int Unrolling = assign_traits<Derived1, Derived2>::Unrolling,
-         int Version = Specialized>
-struct assign_impl;
-
-/************************
-*** Default traversal ***
-************************/
-
-template<typename Derived1, typename Derived2, int Unrolling, int Version>
-struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
-{
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &, const Derived2 &) { }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
-{
-  typedef typename Derived1::Index Index;
-  EIGEN_DEVICE_FUNC 
-  static inline void run(Derived1 &dst, const Derived2 &src)
-  {
-    const Index innerSize = dst.innerSize();
-    const Index outerSize = dst.outerSize();
-    for(Index outer = 0; outer < outerSize; ++outer)
-      for(Index inner = 0; inner < innerSize; ++inner)
-        dst.copyCoeffByOuterInner(outer, inner, src);
-  }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
-{
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
-  {
-    assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
-      ::run(dst, src);
-  }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
-{
-  typedef typename Derived1::Index Index;
-  EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
-  {
-    const Index outerSize = dst.outerSize();
-    for(Index outer = 0; outer < outerSize; ++outer)
-      assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
-        ::run(dst, src, outer);
-  }
-};
-
-/***********************
-*** Linear traversal ***
-***********************/
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
-{
-  typedef typename Derived1::Index Index;
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &dst, const Derived2 &src)
-  {
-    const Index size = dst.size();
-    for(Index i = 0; i < size; ++i)
-      dst.copyCoeff(i, src);
-  }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
-{
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
-  {
-    assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
-      ::run(dst, src);
-  }
-};
-
-/**************************
-*** Inner vectorization ***
-**************************/
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Version>
-{
-  typedef typename Derived1::Index Index;
-  static inline void run(Derived1 &dst, const Derived2 &src)
-  {
-    const Index innerSize = dst.innerSize();
-    const Index outerSize = dst.outerSize();
-    const Index packetSize = packet_traits<typename Derived1::Scalar>::size;
-    for(Index outer = 0; outer < outerSize; ++outer)
-      for(Index inner = 0; inner < innerSize; inner+=packetSize)
-        dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, inner, src);
-  }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling, Version>
-{
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
-  {
-    assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
-      ::run(dst, src);
-  }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling, Version>
-{
-  typedef typename Derived1::Index Index;
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
-  {
-    const Index outerSize = dst.outerSize();
-    for(Index outer = 0; outer < outerSize; ++outer)
-      assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
-        ::run(dst, src, outer);
-  }
-};
-
-/***************************
-*** Linear vectorization ***
-***************************/
-
-template <bool IsAligned = false>
-struct unaligned_assign_impl
-{
-  template <typename Derived, typename OtherDerived>
-  static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {}
-};
-
-template <>
-struct unaligned_assign_impl<false>
-{
-  // MSVC must not inline this functions. If it does, it fails to optimize the
-  // packet access path.
-#ifdef _MSC_VER
-  template <typename Derived, typename OtherDerived>
-  static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end)
-#else
-  template <typename Derived, typename OtherDerived>
-  static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end)
-#endif
-  {
-    for (typename Derived::Index index = start; index < end; ++index)
-      dst.copyCoeff(index, src);
-  }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, Version>
-{
-  typedef typename Derived1::Index Index;
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
-  {
-    const Index size = dst.size();
-    typedef packet_traits<typename Derived1::Scalar> PacketTraits;
-    enum {
-      packetSize = PacketTraits::size,
-      dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
-      srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
-    };
-    const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0
-                             : internal::first_aligned(&dst.coeffRef(0), size);
-    const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
-
-    unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
-
-    for(Index index = alignedStart; index < alignedEnd; index += packetSize)
-    {
-      dst.template copyPacket<Derived2, dstAlignment, srcAlignment>(index, src);
-    }
-
-    unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
-  }
-};
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling, Version>
-{
-  typedef typename Derived1::Index Index;
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
-  {
-    enum { size = Derived1::SizeAtCompileTime,
-           packetSize = packet_traits<typename Derived1::Scalar>::size,
-           alignedSize = (size/packetSize)*packetSize };
-
-    assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
-    assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
-  }
-};
-
-/**************************
-*** Slice vectorization ***
-***************************/
-
-template<typename Derived1, typename Derived2, int Version>
-struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Version>
-{
-  typedef typename Derived1::Index Index;
-  static inline void run(Derived1 &dst, const Derived2 &src)
-  {
-    typedef packet_traits<typename Derived1::Scalar> PacketTraits;
-    enum {
-      packetSize = PacketTraits::size,
-      alignable = PacketTraits::AlignedOnScalar,
-      dstAlignment = alignable ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
-      srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
-    };
-    const Index packetAlignedMask = packetSize - 1;
-    const Index innerSize = dst.innerSize();
-    const Index outerSize = dst.outerSize();
-    const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
-    Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0
-                       : internal::first_aligned(&dst.coeffRef(0,0), innerSize);
-
-    for(Index outer = 0; outer < outerSize; ++outer)
-    {
-      const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
-      // do the non-vectorizable part of the assignment
-      for(Index inner = 0; inner<alignedStart ; ++inner)
-        dst.copyCoeffByOuterInner(outer, inner, src);
-
-      // do the vectorizable part of the assignment
-      for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
-        dst.template copyPacketByOuterInner<Derived2, dstAlignment, Unaligned>(outer, inner, src);
-
-      // do the non-vectorizable part of the assignment
-      for(Index inner = alignedEnd; inner<innerSize ; ++inner)
-        dst.copyCoeffByOuterInner(outer, inner, src);
-
-      alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
-    }
-  }
-};
-
-} // end namespace internal
-
-/***************************************************************************
-* Part 4 : implementation of DenseBase methods
-***************************************************************************/
-
 template<typename Derived>
 template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
@@ -506,91 +27,35 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
   EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
   EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
 
-#ifdef EIGEN_TEST_EVALUATORS
-  
-#ifdef EIGEN_DEBUG_ASSIGN
-  internal::copy_using_evaluator_traits<Derived, OtherDerived>::debug();
-#endif
-  eigen_assert(rows() == other.rows() && cols() == other.cols());
-  internal::call_dense_assignment_loop(derived(),other.derived());
-  
-#else // EIGEN_TEST_EVALUATORS
-
-#ifdef EIGEN_DEBUG_ASSIGN
-  internal::assign_traits<Derived, OtherDerived>::debug();
-#endif
   eigen_assert(rows() == other.rows() && cols() == other.cols());
-  internal::assign_impl<Derived, OtherDerived, int(SameType) ? int(internal::assign_traits<Derived, OtherDerived>::Traversal)
-                                                             : int(InvalidTraversal)>::run(derived(),other.derived());
+  internal::call_assignment_no_alias(derived(),other.derived());
   
-#endif // EIGEN_TEST_EVALUATORS
-  
-#ifndef EIGEN_NO_DEBUG
-  checkTransposeAliasing(other.derived());
-#endif
   return derived();
 }
 
-namespace internal {
-
-template<typename Derived, typename OtherDerived,
-         bool EvalBeforeAssigning = (int(internal::traits<OtherDerived>::Flags) & EvalBeforeAssigningBit) != 0,
-         bool NeedToTranspose = ((int(Derived::RowsAtCompileTime) == 1 && int(OtherDerived::ColsAtCompileTime) == 1)
-                              |   // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
-                                  // revert to || as soon as not needed anymore.
-                                  (int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1))
-                              && int(Derived::SizeAtCompileTime) != 1>
-struct assign_selector;
-
-template<typename Derived, typename OtherDerived>
-struct assign_selector<Derived,OtherDerived,false,false> {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
-  template<typename ActualDerived, typename ActualOtherDerived>
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; }
-};
-template<typename Derived, typename OtherDerived>
-struct assign_selector<Derived,OtherDerived,true,false> {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
-};
-template<typename Derived, typename OtherDerived>
-struct assign_selector<Derived,OtherDerived,false,true> {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
-  template<typename ActualDerived, typename ActualOtherDerived>
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose<ActualDerived> dstTrans(dst); other.evalTo(dstTrans); return dst; }
-};
-template<typename Derived, typename OtherDerived>
-struct assign_selector<Derived,OtherDerived,true,true> {
-  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
-};
-
-} // end namespace internal
-
 template<typename Derived>
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
 {
-  return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
+  internal::call_assignment(derived(), other.derived());
+  return derived();
 }
 
 template<typename Derived>
 EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
 {
-  return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
+  internal::call_assignment(derived(), other.derived());
+  return derived();
 }
 
 template<typename Derived>
 EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
 {
-  return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
+  internal::call_assignment(derived(), other.derived());
+  return derived();
 }
 
 template<typename Derived>
@@ -598,7 +63,8 @@ template <typename OtherDerived>
 EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
 {
-  return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
+  internal::call_assignment(derived(), other.derived());
+  return derived();
 }
 
 template<typename Derived>
@@ -606,7 +72,8 @@ template <typename OtherDerived>
 EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
 {
-  return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
+  internal::call_assignment(derived(), other.derived());
+  return derived();
 }
 
 template<typename Derived>
@@ -614,7 +81,8 @@ template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
 {
-  return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
+  other.derived().evalTo(derived());
+  return derived();
 }
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 5451a138f..8ab71446c 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -2,7 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2011-2013 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -24,37 +24,46 @@ namespace internal {
 
 // copy_using_evaluator_traits is based on assign_traits
 
-template <typename Derived, typename OtherDerived>
+template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
 struct copy_using_evaluator_traits
 {
+  typedef typename DstEvaluator::XprType Dst;
+  
+  enum {
+    DstFlags = DstEvaluator::Flags,
+    SrcFlags = SrcEvaluator::Flags
+  };
+  
 public:
   enum {
-    DstIsAligned = Derived::Flags & AlignedBit,
-    DstHasDirectAccess = Derived::Flags & DirectAccessBit,
-    SrcIsAligned = OtherDerived::Flags & AlignedBit,
-    JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned,
-    SrcEvalBeforeAssign = (evaluator_traits<OtherDerived>::HasEvalTo == 1)
+    DstIsAligned = DstFlags & AlignedBit,
+    DstHasDirectAccess = DstFlags & DirectAccessBit,
+    SrcIsAligned = SrcFlags & AlignedBit,
+    JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
   };
 
 private:
   enum {
-    InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
-              : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
-              : int(Derived::RowsAtCompileTime),
-    InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
-              : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
-              : int(Derived::MaxRowsAtCompileTime),
-    MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
-    PacketSize = packet_traits<typename Derived::Scalar>::size
+    InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
+              : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
+              : int(Dst::RowsAtCompileTime),
+    InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
+              : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
+              : int(Dst::MaxRowsAtCompileTime),
+    MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
+    PacketSize = packet_traits<typename Dst::Scalar>::size
   };
 
   enum {
-    StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
+    DstIsRowMajor = DstFlags&RowMajorBit,
+    SrcIsRowMajor = SrcFlags&RowMajorBit,
+    StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
     MightVectorize = StorageOrdersAgree
-                  && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
+                  && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
+                  && (functor_traits<AssignFunc>::PacketAccess),
     MayInnerVectorize  = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
                        && int(DstIsAligned) && int(SrcIsAligned),
-    MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
+    MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
     MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
                        && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
       /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
@@ -68,8 +77,7 @@ private:
 
 public:
   enum {
-    Traversal = int(SrcEvalBeforeAssign) ? int(AllAtOnceTraversal) 
-              : int(MayInnerVectorize)   ? int(InnerVectorizedTraversal)
+    Traversal = int(MayInnerVectorize)   ? int(InnerVectorizedTraversal)
               : int(MayLinearVectorize)  ? int(LinearVectorizedTraversal)
               : int(MaySliceVectorize)   ? int(SliceVectorizedTraversal)
               : int(MayLinearize)        ? int(LinearTraversal)
@@ -82,12 +90,12 @@ public:
 private:
   enum {
     UnrollingLimit      = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
-    MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
-                       && int(OtherDerived::CoeffReadCost) != Dynamic
-                       && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
+    MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
+                       && int(SrcEvaluator::CoeffReadCost) != Dynamic
+                       && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit),
     MayUnrollInner      = int(InnerSize) != Dynamic
-                       && int(OtherDerived::CoeffReadCost) != Dynamic
-                       && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
+                       && int(SrcEvaluator::CoeffReadCost) != Dynamic
+                       && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit)
   };
 
 public:
@@ -110,6 +118,12 @@ public:
 #ifdef EIGEN_DEBUG_ASSIGN
   static void debug()
   {
+    std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
+    std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
+    std::cerr.setf(std::ios::hex, std::ios::basefield);
+    EIGEN_DEBUG_VAR(DstFlags)
+    EIGEN_DEBUG_VAR(SrcFlags)
+    std::cerr.unsetf(std::ios::hex);
     EIGEN_DEBUG_VAR(DstIsAligned)
     EIGEN_DEBUG_VAR(SrcIsAligned)
     EIGEN_DEBUG_VAR(JointAlignment)
@@ -127,6 +141,7 @@ public:
     EIGEN_DEBUG_VAR(MayUnrollCompletely)
     EIGEN_DEBUG_VAR(MayUnrollInner)
     EIGEN_DEBUG_VAR(Unrolling)
+    std::cerr << std::endl;
   }
 #endif
 };
@@ -142,6 +157,7 @@ public:
 template<typename Kernel, int Index, int Stop>
 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
 {
+  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
   typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
   typedef typename DstEvaluatorType::XprType DstXprType;
   
@@ -206,9 +222,10 @@ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop
 template<typename Kernel, int Index, int Stop>
 struct copy_using_evaluator_innervec_CompleteUnrolling
 {
+  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
   typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
   typedef typename DstEvaluatorType::XprType DstXprType;
-
+  
   enum {
     outer = Index / DstXprType::InnerSizeAtCompileTime,
     inner = Index % DstXprType::InnerSizeAtCompileTime,
@@ -235,8 +252,7 @@ struct copy_using_evaluator_innervec_InnerUnrolling
   static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer)
   {
     kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, Index);
-    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
-    enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
+    enum { NextIndex = Index + packet_traits<typename Kernel::Scalar>::size };
     copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
   }
 };
@@ -496,25 +512,8 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
   }
 };
 
-/****************************
-*** All-at-once traversal ***
-****************************/
-
-// TODO: this 'AllAtOnceTraversal' should be dropped or caught earlier (Gael)
-// Indeed, what to do with the kernel's functor??
-template<typename Kernel>
-struct dense_assignment_loop<Kernel, AllAtOnceTraversal, NoUnrolling>
-{
-  static inline void run(Kernel & kernel)
-  {
-    // Evaluate rhs in temporary to prevent aliasing problems in a = a * a;
-    // TODO: Do not pass the xpr object to evalTo() (Jitse)
-    kernel.srcEvaluator().evalTo(kernel.dstEvaluator(), kernel.dstExpression());
-  }
-};
-
 /***************************************************************************
-* Part 4 : Generic Assignment routine
+* Part 4 : Generic dense assignment kernel
 ***************************************************************************/
 
 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
@@ -523,7 +522,7 @@ struct dense_assignment_loop<Kernel, AllAtOnceTraversal, NoUnrolling>
 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
 // One can customize the assignment using this generic dense_assignment_kernel with different
 // functors, or by completely overloading it, by-passing a functor.
-template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
+template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
 class generic_dense_assignment_kernel
 {
 protected:
@@ -535,16 +534,22 @@ public:
   typedef SrcEvaluatorTypeT SrcEvaluatorType;
   typedef typename DstEvaluatorType::Scalar Scalar;
   typedef typename DstEvaluatorType::Index Index;
-  typedef copy_using_evaluator_traits<DstXprType, SrcXprType> AssignmentTraits;
+  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
   
   
   generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
     : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
-  {}
+  {
+    #ifdef EIGEN_DEBUG_ASSIGN
+    AssignmentTraits::debug();
+    #endif
+  }
   
   Index size() const        { return m_dstExpr.size(); }
   Index innerSize() const   { return m_dstExpr.innerSize(); }
   Index outerSize() const   { return m_dstExpr.outerSize(); }
+  Index rows() const        { return m_dstExpr.rows(); }
+  Index cols() const        { return m_dstExpr.cols(); }
   Index outerStride() const { return m_dstExpr.outerStride(); }
   
   // TODO get rid of this one:
@@ -553,16 +558,19 @@ public:
   DstEvaluatorType& dstEvaluator() { return m_dst; }
   const SrcEvaluatorType& srcEvaluator() const { return m_src; }
   
+  /// Assign src(row,col) to dst(row,col) through the assignment functor.
   void assignCoeff(Index row, Index col)
   {
     m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
   }
   
+  /// \sa assignCoeff(Index,Index)
   void assignCoeff(Index index)
   {
     m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
   }
   
+  /// \sa assignCoeff(Index,Index)
   void assignCoeffByOuterInner(Index outer, Index inner)
   {
     Index row = rowIndexByOuterInner(outer, inner); 
@@ -596,7 +604,7 @@ public:
     typedef typename DstEvaluatorType::ExpressionTraits Traits;
     return int(Traits::RowsAtCompileTime) == 1 ? 0
       : int(Traits::ColsAtCompileTime) == 1 ? inner
-      : int(Traits::Flags)&RowMajorBit ? outer
+      : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
       : inner;
   }
 
@@ -605,7 +613,7 @@ public:
     typedef typename DstEvaluatorType::ExpressionTraits Traits;
     return int(Traits::ColsAtCompileTime) == 1 ? 0
       : int(Traits::RowsAtCompileTime) == 1 ? inner
-      : int(Traits::Flags)&RowMajorBit ? inner
+      : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
       : outer;
   }
   
@@ -617,13 +625,13 @@ protected:
   DstXprType& m_dstExpr;
 };
 
+/***************************************************************************
+* Part 5 : Entry point for dense rectangular assignment
+***************************************************************************/
+
 template<typename DstXprType, typename SrcXprType, typename Functor>
 void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
 {
-#ifdef EIGEN_DEBUG_ASSIGN
-  // TODO these traits should be computed from information provided by the evaluators
-  internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
-#endif
   eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
   
   typedef typename evaluator<DstXprType>::type DstEvaluatorType;
@@ -645,195 +653,141 @@ void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
 }
 
 /***************************************************************************
-* Part 5 : Entry points
+* Part 6 : Generic assignment
 ***************************************************************************/
 
-// Based on DenseBase::LazyAssign()
-// The following functions are just for testing and they are meant to be moved to operator= and the likes.
+// Based on the respective shapes of the destination and source,
+// the class AssignmentKind determine the kind of assignment mechanism.
+// AssignmentKind must define a Kind typedef.
+template<typename DstShape, typename SrcShape> struct AssignmentKind;
 
-template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
-EIGEN_STRONG_INLINE
-const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst, 
-                                       const EigenBase<SrcXprType>& src)
-{
-  return noalias_copy_using_evaluator(dst.expression(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
-}
+// Assignement kind defined in this file:
+struct Dense2Dense {};
+struct EigenBase2EigenBase {};
 
-template<typename XprType, int AssumeAliasing = evaluator_traits<XprType>::AssumeAliasing>
-struct AddEvalIfAssumingAliasing;
+template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
+template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
+    
+// This is the main assignment class
+template< typename DstXprType, typename SrcXprType, typename Functor,
+          typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
+          typename Scalar = typename DstXprType::Scalar>
+struct Assignment;
 
-template<typename XprType>
-struct AddEvalIfAssumingAliasing<XprType, 0>
-{
-  static const XprType& run(const XprType& xpr) 
-  {
-    return xpr;
-  }
-};
 
-template<typename XprType>
-struct AddEvalIfAssumingAliasing<XprType, 1>
-{
-  static const EvalToTemp<XprType> run(const XprType& xpr)
-  {
-    return EvalToTemp<XprType>(xpr);
-  }
-};
+// The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition.
+// Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated.
+// So this intermediate function removes everything related to AssumeAliasing such that Assignment
+// does not has to bother about these annoying details.
 
-template<typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_STRONG_INLINE
-const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
+template<typename Dst, typename Src>
+void call_assignment(Dst& dst, const Src& src)
 {
-  return noalias_copy_using_evaluator(dst.const_cast_derived(), 
-                                      AddEvalIfAssumingAliasing<SrcXprType>::run(src.derived()),
-                                      func
-                                     );
+  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
 }
-
-// this mimics operator=
-template<typename DstXprType, typename SrcXprType>
-EIGEN_STRONG_INLINE
-const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src)
+template<typename Dst, typename Src>
+void call_assignment(const Dst& dst, const Src& src)
 {
-  return copy_using_evaluator(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
 }
-
-template<typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_STRONG_INLINE
-const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
+                     
+// Deal with AssumeAliasing
+template<typename Dst, typename Src, typename Func>
+void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==1, void*>::type = 0)
 {
-#ifdef EIGEN_DEBUG_ASSIGN
-  internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
-#endif
-#ifdef EIGEN_NO_AUTOMATIC_RESIZING
-  eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size())
-                                                        : (dst.rows() == src.rows() && dst.cols() == src.cols())))
-              && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
-#else
-  dst.const_cast_derived().resizeLike(src.derived());
-#endif
-  call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
-  return dst.derived();
+  typename plain_matrix_type<Src>::type tmp(src);
+  call_assignment_no_alias(dst, tmp, func);
 }
 
-template<typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_STRONG_INLINE
-const DstXprType& noalias_copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func)
+template<typename Dst, typename Src, typename Func>
+void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==0, void*>::type = 0)
 {
-  call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func);
-  return dst.derived();
+  call_assignment_no_alias(dst, src, func);
 }
 
-// Based on DenseBase::swap()
-// TODO: Check whether we need to do something special for swapping two
-//       Arrays or Matrices. (Jitse)
-
-// Overload default assignPacket behavior for swapping them
-template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT>
-class swap_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> >
+// by-pass AssumeAliasing
+// FIXME the const version should probably not be needed
+// When there is no aliasing, we require that 'dst' has been properly resized
+template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
+void call_assignment(const NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
 {
-  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> > Base;
-  typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar;
-  using Base::m_dst;
-  using Base::m_src;
-  using Base::m_functor;
-  
-public:
-  typedef typename Base::Scalar Scalar;
-  typedef typename Base::Index Index;
-  typedef typename Base::DstXprType DstXprType;
-  
-  swap_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, DstXprType& dstExpr)
-    : Base(dst, src, swap_assign_op<Scalar>(), dstExpr)
-  {}
-  
-  template<int StoreMode, int LoadMode>
-  void assignPacket(Index row, Index col)
-  {
-    m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
-  }
-  
-  template<int StoreMode, int LoadMode>
-  void assignPacket(Index index)
-  {
-    m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
-  }
-  
-  // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
-  template<int StoreMode, int LoadMode>
-  void assignPacketByOuterInner(Index outer, Index inner)
-  {
-    Index row = Base::rowIndexByOuterInner(outer, inner); 
-    Index col = Base::colIndexByOuterInner(outer, inner);
-    assignPacket<StoreMode,LoadMode>(row, col);
-  }
-};
-  
-template<typename DstXprType, typename SrcXprType>
-void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src)
-{
-  // TODO there is too much redundancy with call_dense_assignment_loop
-  
-  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
-  
-  typedef typename evaluator<DstXprType>::type DstEvaluatorType;
-  typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-
-  DstEvaluatorType dstEvaluator(dst);
-  SrcEvaluatorType srcEvaluator(src);
-    
-  typedef swap_kernel<DstEvaluatorType,SrcEvaluatorType> Kernel;
-  Kernel kernel(dstEvaluator, srcEvaluator, dst.const_cast_derived());
-  
-  dense_assignment_loop<Kernel>::run(kernel);
-}
-
-// Based on MatrixBase::operator+= (in CwiseBinaryOp.h)
-template<typename DstXprType, typename SrcXprType>
-void add_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
-{
-  typedef typename DstXprType::Scalar Scalar;
-  copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
+  call_assignment_no_alias(dst.expression(), src, func);
 }
-
-// Based on ArrayBase::operator+=
-template<typename DstXprType, typename SrcXprType>
-void add_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
+void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
 {
-  typedef typename DstXprType::Scalar Scalar;
-  copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>());
+  call_assignment_no_alias(dst.expression(), src, func);
 }
 
-// TODO: Add add_assign_using_evaluator for EigenBase ? (Jitse)
 
-template<typename DstXprType, typename SrcXprType>
-void subtract_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
+template<typename Dst, typename Src, typename Func>
+void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
 {
-  typedef typename DstXprType::Scalar Scalar;
-  copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
-}
+  enum {
+    NeedToTranspose = (  (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
+                        |   // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
+                                // revert to || as soon as not needed anymore.
+                         (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1))
+                     && int(Dst::SizeAtCompileTime) != 1
+  };
 
-template<typename DstXprType, typename SrcXprType>
-void subtract_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
-{
-  typedef typename DstXprType::Scalar Scalar;
-  copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>());
+  typename Dst::Index dstRows = NeedToTranspose ? src.cols() : src.rows();
+  typename Dst::Index dstCols = NeedToTranspose ? src.rows() : src.cols();
+  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+    dst.resize(dstRows, dstCols);
+  
+  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
+  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
+  ActualDstType actualDst(dst);
+  
+  // TODO check whether this is the right place to perform these checks:
+  EIGEN_STATIC_ASSERT_LVALUE(Dst)
+  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
+
+  // TODO this line is commented to allow matrix = permutation
+  // Actually, the "Scalar" type for a permutation matrix does not really make sense,
+  // perhaps it could be void, and EIGEN_CHECK_BINARY_COMPATIBILIY could allow micing void with anything...?
+//   EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
+  
+  Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
 }
-
-template<typename DstXprType, typename SrcXprType>
-void multiply_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+template<typename Dst, typename Src>
+void call_assignment_no_alias(Dst& dst, const Src& src)
 {
-  typedef typename DstXprType::Scalar Scalar;
-  copy_using_evaluator(dst.derived(), src.derived(), mul_assign_op<Scalar>());
+  call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
 }
 
-template<typename DstXprType, typename SrcXprType>
-void divide_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+// forxard declaration
+template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
+
+// Generic Dense to Dense assignment
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
 {
-  typedef typename DstXprType::Scalar Scalar;
-  copy_using_evaluator(dst.derived(), src.derived(), div_assign_op<Scalar>());
-}
+  static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+  {
+    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+    
+#ifndef EIGEN_NO_DEBUG
+    internal::check_for_aliasing(dst, src);
+#endif
+    
+    call_dense_assignment_loop(dst, src, func);
+  }
+};
 
+// Generic assignment through evalTo.
+// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
+{
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+  {
+    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+    
+    src.evalTo(dst);
+  }
+};
 
 } // namespace internal
 
diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h
index ffd7fe8b3..b0ebe1160 100644
--- a/Eigen/src/Core/BandMatrix.h
+++ b/Eigen/src/Core/BandMatrix.h
@@ -327,6 +327,25 @@ class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint
   protected:
 };
 
+
+struct BandShape {};
+
+template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
+struct evaluator_traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
+  : public evaluator_traits_base<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
+{
+  typedef BandShape Shape;
+};
+
+template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
+struct evaluator_traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
+  : public evaluator_traits_base<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
+{
+  typedef BandShape Shape;
+};
+
+template<> struct AssignmentKind<DenseShape,BandShape> { typedef EigenBase2EigenBase Kind; };
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index da193d1a2..737e5dc24 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -68,6 +68,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
     MaxColsAtCompileTime = BlockCols==0 ? 0
                          : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
                          : int(traits<XprType>::MaxColsAtCompileTime),
+
     XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
     IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
                : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
@@ -80,18 +81,14 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
     OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
                              ? int(outer_stride_at_compile_time<XprType>::ret)
                              : int(inner_stride_at_compile_time<XprType>::ret),
-    MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
-                       && (InnerStrideAtCompileTime == 1)
-                        ? PacketAccessBit : 0,
-    MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
-    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (traits<XprType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
+    // IsAligned is needed by MapBase's assertions
+    // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
+    IsAligned = 0,
+    // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
     FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
     FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
-    Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
-                                        DirectAccessBit |
-                                        MaskPacketAccessBit |
-                                        MaskAlignedBit),
-    Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit
+    Flags = (traits<XprType>::Flags & DirectAccessBit) | FlagsLvalueBit | FlagsRowMajorBit
+    // FIXME DirectAccessBit should not be handled by expressions
   };
 };
 
@@ -111,6 +108,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
     typedef Impl Base;
     EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
+    
+    typedef typename internal::remove_all<XprType>::type NestedExpression;
   
     /** Column or Row constructor
       */
@@ -333,6 +332,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
   : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
 {
     typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+    enum {
+      XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
+    };
   public:
 
     typedef MapBase<BlockType> Base;
@@ -343,9 +345,8 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
       */
     EIGEN_DEVICE_FUNC
     inline BlockImpl_dense(XprType& xpr, Index i)
-      : Base(internal::const_cast_ptr(&xpr.coeffRef(
-              (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0,
-              (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)),
+      : Base(xpr.data() + i * (    ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor)) 
+                                || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
              BlockRows==1 ? 1 : xpr.rows(),
              BlockCols==1 ? 1 : xpr.cols()),
         m_xpr(xpr)
@@ -357,7 +358,8 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
       */
     EIGEN_DEVICE_FUNC
     inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
-      : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr)
+      : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
+        m_xpr(xpr)
     {
       init();
     }
@@ -368,7 +370,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
     inline BlockImpl_dense(XprType& xpr,
           Index startRow, Index startCol,
           Index blockRows, Index blockCols)
-      : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols),
+      : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
         m_xpr(xpr)
     {
       init();
diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h
index be9f48a8c..dac1887e0 100644
--- a/Eigen/src/Core/BooleanRedux.h
+++ b/Eigen/src/Core/BooleanRedux.h
@@ -17,9 +17,10 @@ namespace internal {
 template<typename Derived, int UnrollCount>
 struct all_unroller
 {
+  typedef typename Derived::ExpressionTraits Traits;
   enum {
-    col = (UnrollCount-1) / Derived::RowsAtCompileTime,
-    row = (UnrollCount-1) % Derived::RowsAtCompileTime
+    col = (UnrollCount-1) / Traits::RowsAtCompileTime,
+    row = (UnrollCount-1) % Traits::RowsAtCompileTime
   };
 
   static inline bool run(const Derived &mat)
@@ -43,11 +44,12 @@ struct all_unroller<Derived, Dynamic>
 template<typename Derived, int UnrollCount>
 struct any_unroller
 {
+  typedef typename Derived::ExpressionTraits Traits;
   enum {
-    col = (UnrollCount-1) / Derived::RowsAtCompileTime,
-    row = (UnrollCount-1) % Derived::RowsAtCompileTime
+    col = (UnrollCount-1) / Traits::RowsAtCompileTime,
+    row = (UnrollCount-1) % Traits::RowsAtCompileTime
   };
-
+  
   static inline bool run(const Derived &mat)
   {
     return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
@@ -78,19 +80,21 @@ struct any_unroller<Derived, Dynamic>
 template<typename Derived>
 inline bool DenseBase<Derived>::all() const
 {
+  typedef typename internal::evaluator<Derived>::type Evaluator;
   enum {
     unroll = SizeAtCompileTime != Dynamic
-          && CoeffReadCost != Dynamic
+          && Evaluator::CoeffReadCost != Dynamic
           && NumTraits<Scalar>::AddCost != Dynamic
-          && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
+          && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
   };
+  Evaluator evaluator(derived());
   if(unroll)
-    return internal::all_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
+    return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
   else
   {
     for(Index j = 0; j < cols(); ++j)
       for(Index i = 0; i < rows(); ++i)
-        if (!coeff(i, j)) return false;
+        if (!evaluator.coeff(i, j)) return false;
     return true;
   }
 }
@@ -102,19 +106,21 @@ inline bool DenseBase<Derived>::all() const
 template<typename Derived>
 inline bool DenseBase<Derived>::any() const
 {
+  typedef typename internal::evaluator<Derived>::type Evaluator;
   enum {
     unroll = SizeAtCompileTime != Dynamic
-          && CoeffReadCost != Dynamic
+          && Evaluator::CoeffReadCost != Dynamic
           && NumTraits<Scalar>::AddCost != Dynamic
-          && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
+          && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
   };
+  Evaluator evaluator(derived());
   if(unroll)
-    return internal::any_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
+    return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
   else
   {
     for(Index j = 0; j < cols(); ++j)
       for(Index i = 0; i < rows(); ++i)
-        if (coeff(i, j)) return true;
+        if (evaluator.coeff(i, j)) return true;
     return false;
   }
 }
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 3568cb85f..09a83a382 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -2,7 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -14,57 +14,83 @@
 #define EIGEN_COREEVALUATORS_H
 
 namespace Eigen {
-
+  
 namespace internal {
 
-// evaluator_traits<T> contains traits for evaluator_impl<T> 
+struct IndexBased {};
+struct IteratorBased {};
+
+// This class returns the evaluator kind from the expression storage kind.
+// Default assumes index based accessors
+template<typename StorageKind>
+struct storage_kind_to_evaluator_kind {
+  typedef IndexBased Kind;
+};
+
+// This class returns the evaluator shape from the expression storage kind.
+// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc.
+template<typename StorageKind> struct storage_kind_to_shape;
+
+
+template<> struct storage_kind_to_shape<Dense> { typedef DenseShape Shape; };
+
+// Evaluators have to be specialized with respect to various criteria such as:
+//  - storage/structure/shape
+//  - scalar type
+//  - etc.
+// Therefore, we need specialization of evaluator providing additional template arguments for each kind of evaluators.
+// We currently distinguish the following kind of evaluators:
+// - unary_evaluator    for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate)
+// - binary_evaluator   for expression taking two arguments (CwiseBinaryOp)
+// - product_evaluator  for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching.
+// - mapbase_evaluator  for Map, Block, Ref
+// - block_evaluator    for Block (special dispatching to a mapbase_evaluator or unary_evaluator)
+
+template< typename T,
+          typename LhsKind   = typename evaluator_traits<typename T::Lhs>::Kind,
+          typename RhsKind   = typename evaluator_traits<typename T::Rhs>::Kind,
+          typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
+          typename RhsScalar = typename traits<typename T::Rhs>::Scalar> struct binary_evaluator;
+
+template< typename T,
+          typename Kind   = typename evaluator_traits<typename T::NestedExpression>::Kind,
+          typename Scalar = typename T::Scalar> struct unary_evaluator;
+          
+// evaluator_traits<T> contains traits for evaluator<T> 
 
 template<typename T>
-struct evaluator_traits
+struct evaluator_traits_base
 {
-  // 1 if evaluator_impl<T>::evalTo() exists
-  // 0 if evaluator_impl<T> allows coefficient-based access
-  static const int HasEvalTo = 0;
-
+  // TODO check whether these two indirections are really needed.
+  // Basically, if nobody overwrite type and nestedType, then, they can be dropped
+//   typedef evaluator<T> type;
+//   typedef evaluator<T> nestedType;
+  
+  // by default, get evaluator kind and shape from storage
+  typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;
+  typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;
+  
   // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
   // temporary; 0 if not.
   static const int AssumeAliasing = 0;
 };
 
-// expression class for evaluating nested expression to a temporary
- 
-template<typename ArgType>
-class EvalToTemp;
-
-// evaluator<T>::type is type of evaluator for T
-// evaluator<T>::nestedType is type of evaluator if T is nested inside another evaluator
- 
-template<typename T>
-struct evaluator_impl 
-{ };
- 
-template<typename T, int Nested = evaluator_traits<T>::HasEvalTo>
-struct evaluator_nested_type;
-
+// Default evaluator traits
 template<typename T>
-struct evaluator_nested_type<T, 0>
+struct evaluator_traits : public evaluator_traits_base<T>
 {
-  typedef evaluator_impl<T> type;
 };
 
-template<typename T>
-struct evaluator_nested_type<T, 1>
-{
-  typedef evaluator_impl<EvalToTemp<T> > type;
-};
 
+// By default, we assume a unary expression:
 template<typename T>
-struct evaluator
+struct evaluator : public unary_evaluator<T>
 {
-  typedef evaluator_impl<T> type;
-  typedef typename evaluator_nested_type<T>::type nestedType;
+  typedef unary_evaluator<T> Base;
+  evaluator(const T& xpr) : Base(xpr) {}
 };
 
+
 // TODO: Think about const-correctness
 
 template<typename T>
@@ -76,46 +102,58 @@ struct evaluator<const T>
 
 // TODO this class does not seem to be necessary anymore
 template<typename ExpressionType>
-struct evaluator_impl_base
+struct evaluator_base
 {
-  typedef typename ExpressionType::Index Index;
+//   typedef typename evaluator_traits<ExpressionType>::type type;
+//   typedef typename evaluator_traits<ExpressionType>::nestedType nestedType;
+  typedef evaluator<ExpressionType> type;
+  typedef evaluator<ExpressionType> nestedType;
+  
+  typedef typename traits<ExpressionType>::Index Index;
   // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
   typedef traits<ExpressionType> ExpressionTraits;
-
-  evaluator_impl<ExpressionType>& derived() 
-  {
-    return *static_cast<evaluator_impl<ExpressionType>*>(this); 
-  }
 };
 
 // -------------------- Matrix and Array --------------------
 //
-// evaluator_impl<PlainObjectBase> is a common base class for the
+// evaluator<PlainObjectBase> is a common base class for the
 // Matrix and Array evaluators.
+// Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense,
+// so no need for more sophisticated dispatching.
 
 template<typename Derived>
-struct evaluator_impl<PlainObjectBase<Derived> >
-  : evaluator_impl_base<Derived>
+struct evaluator<PlainObjectBase<Derived> >
+  : evaluator_base<Derived>
 {
   typedef PlainObjectBase<Derived> PlainObjectType;
+  typedef typename PlainObjectType::Index Index;
+  typedef typename PlainObjectType::Scalar Scalar;
+  typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
+  typedef typename PlainObjectType::PacketScalar PacketScalar;
+  typedef typename PlainObjectType::PacketReturnType PacketReturnType;
 
   enum {
     IsRowMajor = PlainObjectType::IsRowMajor,
     IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime,
     RowsAtCompileTime = PlainObjectType::RowsAtCompileTime,
-    ColsAtCompileTime = PlainObjectType::ColsAtCompileTime
+    ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,
+    
+    CoeffReadCost = NumTraits<Scalar>::ReadCost,
+    Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime,
+                                            Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret
   };
-
-  evaluator_impl(const PlainObjectType& m) 
+  
+  evaluator()
+    : m_data(0),
+      m_outerStride(IsVectorAtCompileTime  ? 0 
+                                           : int(IsRowMajor) ? ColsAtCompileTime 
+                                           : RowsAtCompileTime)
+  {}
+  
+  evaluator(const PlainObjectType& m) 
     : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) 
   { }
 
-  typedef typename PlainObjectType::Index Index;
-  typedef typename PlainObjectType::Scalar Scalar;
-  typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
-  typedef typename PlainObjectType::PacketScalar PacketScalar;
-  typedef typename PlainObjectType::PacketReturnType PacketReturnType;
-
   CoeffReturnType coeff(Index row, Index col) const
   {
     if (IsRowMajor)
@@ -184,153 +222,45 @@ protected:
 };
 
 template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
-struct evaluator_impl<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
-  : evaluator_impl<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+  : evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
 {
   typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
+  
+  evaluator() {}
 
-  evaluator_impl(const XprType& m) 
-    : evaluator_impl<PlainObjectBase<XprType> >(m) 
+  evaluator(const XprType& m) 
+    : evaluator<PlainObjectBase<XprType> >(m) 
   { }
 };
 
 template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
-struct evaluator_impl<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
-  : evaluator_impl<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+  : evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
 {
   typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
 
-  evaluator_impl(const XprType& m) 
-    : evaluator_impl<PlainObjectBase<XprType> >(m) 
-  { }
-};
-
-// -------------------- EvalToTemp --------------------
-
-template<typename ArgType>
-struct traits<EvalToTemp<ArgType> >
-  : public traits<ArgType>
-{ };
-
-template<typename ArgType>
-class EvalToTemp
-  : public dense_xpr_base<EvalToTemp<ArgType> >::type
-{
- public:
- 
-  typedef typename dense_xpr_base<EvalToTemp>::type Base;
-  EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp)
- 
-  EvalToTemp(const ArgType& arg)
-    : m_arg(arg)
-  { }
- 
-  const ArgType& arg() const
-  {
-    return m_arg;
-  }
-
-  Index rows() const 
-  {
-    return m_arg.rows();
-  }
-
-  Index cols() const 
-  {
-    return m_arg.cols();
-  }
-
- private:
-  const ArgType& m_arg;
-};
- 
-template<typename ArgType>
-struct evaluator_impl<EvalToTemp<ArgType> >
-{
-  typedef EvalToTemp<ArgType> XprType;
-  typedef typename ArgType::PlainObject PlainObject;
-
-  evaluator_impl(const XprType& xpr) 
-    : m_result(xpr.rows(), xpr.cols()), m_resultImpl(m_result)
-  {
-    // TODO we should simply do m_result(xpr.arg());
-    call_dense_assignment_loop(m_result, xpr.arg());
-  }
-
-  // This constructor is used when nesting an EvalTo evaluator in another evaluator
-  evaluator_impl(const ArgType& arg) 
-    : m_result(arg.rows(), arg.cols()), m_resultImpl(m_result)
-  {
-    // TODO we should simply do m_result(xpr.arg());
-    call_dense_assignment_loop(m_result, arg);
-  }
-
-  typedef typename PlainObject::Index Index;
-  typedef typename PlainObject::Scalar Scalar;
-  typedef typename PlainObject::CoeffReturnType CoeffReturnType;
-  typedef typename PlainObject::PacketScalar PacketScalar;
-  typedef typename PlainObject::PacketReturnType PacketReturnType;
-
-  // All other functions are forwarded to m_resultImpl
-
-  CoeffReturnType coeff(Index row, Index col) const 
-  { 
-    return m_resultImpl.coeff(row, col); 
-  }
-  
-  CoeffReturnType coeff(Index index) const 
-  { 
-    return m_resultImpl.coeff(index); 
-  }
+  evaluator() {}
   
-  Scalar& coeffRef(Index row, Index col) 
-  { 
-    return m_resultImpl.coeffRef(row, col); 
-  }
-  
-  Scalar& coeffRef(Index index) 
-  { 
-    return m_resultImpl.coeffRef(index); 
-  }
-
-  template<int LoadMode> 
-  PacketReturnType packet(Index row, Index col) const
-  {
-    return m_resultImpl.template packet<LoadMode>(row, col);
-  }
-
-  template<int LoadMode> 
-  PacketReturnType packet(Index index) const
-  {
-    return m_resultImpl.packet<LoadMode>(index);
-  }
-
-  template<int StoreMode> 
-  void writePacket(Index row, Index col, const PacketScalar& x)
-  {
-    m_resultImpl.template writePacket<StoreMode>(row, col, x);
-  }
-
-  template<int StoreMode> 
-  void writePacket(Index index, const PacketScalar& x)
-  {
-    m_resultImpl.template writePacket<StoreMode>(index, x);
-  }
-
-protected:
-  PlainObject m_result;
-  typename evaluator<PlainObject>::nestedType m_resultImpl;
+  evaluator(const XprType& m) 
+    : evaluator<PlainObjectBase<XprType> >(m) 
+  { }
 };
 
 // -------------------- Transpose --------------------
 
 template<typename ArgType>
-struct evaluator_impl<Transpose<ArgType> >
-  : evaluator_impl_base<Transpose<ArgType> >
+struct unary_evaluator<Transpose<ArgType>, IndexBased>
+  : evaluator_base<Transpose<ArgType> >
 {
   typedef Transpose<ArgType> XprType;
+  
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,    
+    Flags = evaluator<ArgType>::Flags ^ RowMajorBit
+  };
 
-  evaluator_impl(const XprType& t) : m_argImpl(t.nestedExpression()) {}
+  unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
 
   typedef typename XprType::Index Index;
   typedef typename XprType::Scalar Scalar;
@@ -387,13 +317,27 @@ protected:
 };
 
 // -------------------- CwiseNullaryOp --------------------
+// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator.
+// Likewise, there is not need to more sophisticated dispatching here.
 
 template<typename NullaryOp, typename PlainObjectType>
-struct evaluator_impl<CwiseNullaryOp<NullaryOp,PlainObjectType> >
+struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
+  : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
 {
   typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;
+  typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned;
+  
+  enum {
+    CoeffReadCost = internal::functor_traits<NullaryOp>::Cost,
+    
+    Flags = (evaluator<PlainObjectTypeCleaned>::Flags
+          &  (  HereditaryBits
+              | (functor_has_linear_access<NullaryOp>::ret  ? LinearAccessBit : 0)
+              | (functor_traits<NullaryOp>::PacketAccess    ? PacketAccessBit : 0)))
+          | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore
+  };
 
-  evaluator_impl(const XprType& n) 
+  evaluator(const XprType& n) 
     : m_functor(n.functor()) 
   { }
 
@@ -430,11 +374,20 @@ protected:
 // -------------------- CwiseUnaryOp --------------------
 
 template<typename UnaryOp, typename ArgType>
-struct evaluator_impl<CwiseUnaryOp<UnaryOp, ArgType> >
+struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
+  : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType> >
 {
   typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
+  
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+    
+    Flags = evaluator<ArgType>::Flags & (
+              HereditaryBits | LinearAccessBit | AlignedBit
+            | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))
+  };
 
-  evaluator_impl(const XprType& op) 
+  unary_evaluator(const XprType& op) 
     : m_functor(op.functor()), 
       m_argImpl(op.nestedExpression()) 
   { }
@@ -472,12 +425,43 @@ protected:
 
 // -------------------- CwiseBinaryOp --------------------
 
+// this is a binary expression
 template<typename BinaryOp, typename Lhs, typename Rhs>
-struct evaluator_impl<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+  : public binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
 {
   typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+  typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base;
+  
+  evaluator(const XprType& xpr) : Base(xpr) {}
+};
 
-  evaluator_impl(const XprType& xpr) 
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBased>
+  : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+  
+  enum {
+    CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+    
+    LhsFlags = evaluator<Lhs>::Flags,
+    RhsFlags = evaluator<Rhs>::Flags,
+    SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value,
+    StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit),
+    Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
+        HereditaryBits
+      | (int(LhsFlags) & int(RhsFlags) &
+           ( AlignedBit
+           | (StorageOrdersAgree ? LinearAccessBit : 0)
+           | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
+           )
+        )
+     ),
+    Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit)
+  };
+
+  binary_evaluator(const XprType& xpr) 
     : m_functor(xpr.functor()),
       m_lhsImpl(xpr.lhs()), 
       m_rhsImpl(xpr.rhs())  
@@ -501,14 +485,14 @@ struct evaluator_impl<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
   PacketScalar packet(Index row, Index col) const
   {
     return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(row, col),
-			      m_rhsImpl.template packet<LoadMode>(row, col));
+                              m_rhsImpl.template packet<LoadMode>(row, col));
   }
 
   template<int LoadMode>
   PacketScalar packet(Index index) const
   {
     return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(index),
-			      m_rhsImpl.template packet<LoadMode>(index));
+                              m_rhsImpl.template packet<LoadMode>(index));
   }
 
 protected:
@@ -520,12 +504,18 @@ protected:
 // -------------------- CwiseUnaryView --------------------
 
 template<typename UnaryOp, typename ArgType>
-struct evaluator_impl<CwiseUnaryView<UnaryOp, ArgType> >
-  : evaluator_impl_base<CwiseUnaryView<UnaryOp, ArgType> >
+struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
+  : evaluator_base<CwiseUnaryView<UnaryOp, ArgType> >
 {
   typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
+  
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+    
+    Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit))
+  };
 
-  evaluator_impl(const XprType& op) 
+  unary_evaluator(const XprType& op) 
     : m_unaryOp(op.functor()), 
       m_argImpl(op.nestedExpression()) 
   { }
@@ -561,13 +551,15 @@ protected:
 
 // -------------------- Map --------------------
 
-template<typename Derived, int AccessorsType>
-struct evaluator_impl<MapBase<Derived, AccessorsType> >
-  : evaluator_impl_base<Derived>
-{
-  typedef MapBase<Derived, AccessorsType> MapType;
-  typedef Derived XprType;
+// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ?
+// but that might complicate template specialization
+template<typename Derived, typename PlainObjectType>
+struct mapbase_evaluator;
 
+template<typename Derived, typename PlainObjectType>
+struct mapbase_evaluator : evaluator_base<Derived>
+{
+  typedef Derived  XprType;
   typedef typename XprType::PointerType PointerType;
   typedef typename XprType::Index Index;
   typedef typename XprType::Scalar Scalar;
@@ -575,81 +567,121 @@ struct evaluator_impl<MapBase<Derived, AccessorsType> >
   typedef typename XprType::PacketScalar PacketScalar;
   typedef typename XprType::PacketReturnType PacketReturnType;
   
-  evaluator_impl(const XprType& map) 
-    : m_data(const_cast<PointerType>(map.data())),  
-      m_rowStride(map.rowStride()),
-      m_colStride(map.colStride())
-  { }
- 
   enum {
-    RowsAtCompileTime = XprType::RowsAtCompileTime
+    IsRowMajor = XprType::RowsAtCompileTime,
+    ColsAtCompileTime = XprType::ColsAtCompileTime,
+    CoeffReadCost = NumTraits<Scalar>::ReadCost
   };
+  
+  mapbase_evaluator(const XprType& map) 
+    : m_data(const_cast<PointerType>(map.data())),  
+      m_xpr(map)
+  {
+    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
+                        PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
+  }
  
   CoeffReturnType coeff(Index row, Index col) const 
-  { 
-    return m_data[col * m_colStride + row * m_rowStride];
+  {
+    return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
   }
   
   CoeffReturnType coeff(Index index) const 
-  { 
-    return coeff(RowsAtCompileTime == 1 ? 0 : index,
-		 RowsAtCompileTime == 1 ? index : 0);
+  {
+    return m_data[index * m_xpr.innerStride()];
   }
 
   Scalar& coeffRef(Index row, Index col) 
-  { 
-    return m_data[col * m_colStride + row * m_rowStride];
+  {
+    return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
   }
   
   Scalar& coeffRef(Index index) 
-  { 
-    return coeffRef(RowsAtCompileTime == 1 ? 0 : index,
-		    RowsAtCompileTime == 1 ? index : 0);
+  {
+    return m_data[index * m_xpr.innerStride()];
   }
  
   template<int LoadMode> 
   PacketReturnType packet(Index row, Index col) const 
-  { 
-    PointerType ptr = m_data + row * m_rowStride + col * m_colStride;
+  {
+    PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
     return internal::ploadt<PacketScalar, LoadMode>(ptr);
   }
 
   template<int LoadMode> 
   PacketReturnType packet(Index index) const 
-  { 
-    return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index,
-			    RowsAtCompileTime == 1 ? index : 0);
+  {
+    return internal::ploadt<PacketScalar, LoadMode>(m_data + index * m_xpr.innerStride());
   }
   
   template<int StoreMode> 
   void writePacket(Index row, Index col, const PacketScalar& x) 
-  { 
-    PointerType ptr = m_data + row * m_rowStride + col * m_colStride;
+  {
+    PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
     return internal::pstoret<Scalar, PacketScalar, StoreMode>(ptr, x);
   }
   
   template<int StoreMode> 
   void writePacket(Index index, const PacketScalar& x) 
-  { 
-    return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index,
-				  RowsAtCompileTime == 1 ? index : 0,
-				  x);
+  {
+    internal::pstoret<Scalar, PacketScalar, StoreMode>(m_data + index * m_xpr.innerStride(), x);
   }
  
 protected:
   PointerType m_data;
-  int m_rowStride;
-  int m_colStride;
+  const XprType& m_xpr;
 };
 
 template<typename PlainObjectType, int MapOptions, typename StrideType> 
-struct evaluator_impl<Map<PlainObjectType, MapOptions, StrideType> >
-  : public evaluator_impl<MapBase<Map<PlainObjectType, MapOptions, StrideType> > >
+struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
+  : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType>
 {
   typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
+  typedef typename XprType::Scalar Scalar;
+  
+  enum {
+    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+                             ? int(PlainObjectType::InnerStrideAtCompileTime)
+                             : int(StrideType::InnerStrideAtCompileTime),
+    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+                             ? int(PlainObjectType::OuterStrideAtCompileTime)
+                             : int(StrideType::OuterStrideAtCompileTime),
+    HasNoInnerStride = InnerStrideAtCompileTime == 1,
+    HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
+    HasNoStride = HasNoInnerStride && HasNoOuterStride,
+    IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
+    IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
+    KeepsPacketAccess = bool(HasNoInnerStride)
+                        && ( bool(IsDynamicSize)
+                           || HasNoOuterStride
+                           || ( OuterStrideAtCompileTime!=Dynamic
+                           && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
+    Flags0 = evaluator<PlainObjectType>::Flags,
+    Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
+    Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
+           ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
+    Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
+  };
+
+  evaluator(const XprType& map) 
+    : mapbase_evaluator<XprType, PlainObjectType>(map) 
+  { }
+};
+
+// -------------------- Ref --------------------
+
+template<typename PlainObjectType, int RefOptions, typename StrideType> 
+struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
+  : public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType>
+{
+  typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
+  
+  enum {
+    Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags
+  };
 
-  evaluator_impl(const XprType& map) 
-    : evaluator_impl<MapBase<XprType> >(map) 
+  evaluator(const XprType& ref) 
+    : mapbase_evaluator<XprType, PlainObjectType>(ref) 
   { }
 };
 
@@ -659,21 +691,68 @@ template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel,
          bool HasDirectAccess = internal::has_direct_access<ArgType>::ret> struct block_evaluator;
          
 template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> 
-struct evaluator_impl<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
   : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
 {
   typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+  typedef typename XprType::Scalar Scalar; 
+  
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+    
+    RowsAtCompileTime = traits<XprType>::RowsAtCompileTime,
+    ColsAtCompileTime = traits<XprType>::ColsAtCompileTime,
+    MaxRowsAtCompileTime = traits<XprType>::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = traits<XprType>::MaxColsAtCompileTime,
+    
+    ArgTypeIsRowMajor = (int(evaluator<ArgType>::Flags)&RowMajorBit) != 0,
+    IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1
+               : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
+               : ArgTypeIsRowMajor,
+    HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor),
+    InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+    InnerStrideAtCompileTime = HasSameStorageOrderAsArgType
+                             ? int(inner_stride_at_compile_time<ArgType>::ret)
+                             : int(outer_stride_at_compile_time<ArgType>::ret),
+    OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
+                             ? int(outer_stride_at_compile_time<ArgType>::ret)
+                             : int(inner_stride_at_compile_time<ArgType>::ret),
+    MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
+                       && (InnerStrideAtCompileTime == 1)
+                        ? PacketAccessBit : 0,
+    
+    MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
+    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,    
+    FlagsRowMajorBit = XprType::Flags&RowMajorBit,
+    Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
+                                           DirectAccessBit |
+                                           MaskPacketAccessBit |
+                                           MaskAlignedBit),
+    Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit
+  };
   typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
-  evaluator_impl(const XprType& block) : block_evaluator_type(block) {}
+  evaluator(const XprType& block) : block_evaluator_type(block) {}
 };
 
+// no direct-access => dispatch to a unary evaluator
 template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
 struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAccess*/ false>
-  : evaluator_impl_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+  : unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
 {
   typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
 
   block_evaluator(const XprType& block) 
+    : unary_evaluator<XprType>(block) 
+  {}
+};
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBased>
+  : evaluator_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+{
+  typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+
+  unary_evaluator(const XprType& block) 
     : m_argImpl(block.nestedExpression()), 
       m_startRow(block.startRow()), 
       m_startCol(block.startCol()) 
@@ -696,8 +775,7 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc
   
   CoeffReturnType coeff(Index index) const 
   { 
-    return coeff(RowsAtCompileTime == 1 ? 0 : index,
-		 RowsAtCompileTime == 1 ? index : 0);
+    return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
   }
 
   Scalar& coeffRef(Index row, Index col) 
@@ -707,8 +785,7 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc
   
   Scalar& coeffRef(Index index) 
   { 
-    return coeffRef(RowsAtCompileTime == 1 ? 0 : index,
-		    RowsAtCompileTime == 1 ? index : 0);
+    return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
   }
  
   template<int LoadMode> 
@@ -721,7 +798,7 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc
   PacketReturnType packet(Index index) const 
   { 
     return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index,
-			    RowsAtCompileTime == 1 ? index : 0);
+                            RowsAtCompileTime == 1 ? index : 0);
   }
   
   template<int StoreMode> 
@@ -734,8 +811,8 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc
   void writePacket(Index index, const PacketScalar& x) 
   { 
     return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index,
-				  RowsAtCompileTime == 1 ? index : 0,
-				  x);
+                                  RowsAtCompileTime == 1 ? index : 0,
+                                  x);
   }
  
 protected:
@@ -749,24 +826,38 @@ protected:
 
 template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> 
 struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true>
-  : evaluator_impl<MapBase<Block<ArgType, BlockRows, BlockCols, InnerPanel> > >
+  : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>,
+                      typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
 {
   typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
 
   block_evaluator(const XprType& block) 
-    : evaluator_impl<MapBase<XprType> >(block) 
-  { }
+    : mapbase_evaluator<XprType, typename XprType::PlainObject>(block) 
+  {
+    // FIXME this should be an internal assertion
+    eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
+  }
 };
 
 
 // -------------------- Select --------------------
+// TODO shall we introduce a ternary_evaluator?
 
+// TODO enable vectorization for Select
 template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
-struct evaluator_impl<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+  : evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
 {
   typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
+  enum {
+    CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost
+                  + EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
+                                   evaluator<ElseMatrixType>::CoeffReadCost),
+
+    Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits
+  };
 
-  evaluator_impl(const XprType& select) 
+  evaluator(const XprType& select) 
     : m_conditionImpl(select.conditionMatrix()),
       m_thenImpl(select.thenMatrix()),
       m_elseImpl(select.elseMatrix())
@@ -801,20 +892,32 @@ protected:
 // -------------------- Replicate --------------------
 
 template<typename ArgType, int RowFactor, int ColFactor> 
-struct evaluator_impl<Replicate<ArgType, RowFactor, ColFactor> >
+struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
+  : evaluator_base<Replicate<ArgType, RowFactor, ColFactor> >
 {
   typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
-
-  evaluator_impl(const XprType& replicate) 
-    : m_argImpl(replicate.nestedExpression()),
-      m_rows(replicate.nestedExpression().rows()),
-      m_cols(replicate.nestedExpression().cols())
-  { }
- 
   typedef typename XprType::Index Index;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename XprType::PacketReturnType PacketReturnType;
+  enum {
+    Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
+  };
+  typedef typename internal::nested_eval<ArgType,Factor>::type ArgTypeNested;
+  typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+  
+  enum {
+    CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
+    
+    Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit)
+  };
 
+  unary_evaluator(const XprType& replicate) 
+    : m_arg(replicate.nestedExpression()),
+      m_argImpl(m_arg),
+      m_rows(replicate.nestedExpression().rows()),
+      m_cols(replicate.nestedExpression().cols())
+  {}
+ 
   CoeffReturnType coeff(Index row, Index col) const
   {
     // try to avoid using modulo; this is a pure optimization strategy
@@ -842,9 +945,10 @@ struct evaluator_impl<Replicate<ArgType, RowFactor, ColFactor> >
   }
  
 protected:
-  typename evaluator<ArgType>::nestedType m_argImpl;
-  const variable_if_dynamic<Index, XprType::RowsAtCompileTime> m_rows;
-  const variable_if_dynamic<Index, XprType::ColsAtCompileTime> m_cols;
+  const ArgTypeNested m_arg; // FIXME is it OK to store both the argument and its evaluator?? (we have the same situation in evaluator_product)
+  typename evaluator<ArgTypeNestedCleaned>::nestedType m_argImpl;
+  const variable_if_dynamic<Index, ArgType::RowsAtCompileTime> m_rows;
+  const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols;
 };
 
 
@@ -855,13 +959,25 @@ protected:
 //       the row() and col() member functions.
 
 template< typename ArgType, typename MemberOp, int Direction>
-struct evaluator_impl<PartialReduxExpr<ArgType, MemberOp, Direction> >
+struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
+  : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> >
 {
   typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
+  typedef typename XprType::Scalar InputScalar;
+  enum {
+    TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) :  int(XprType::ColsAtCompileTime)
+  };
+  typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
+  enum {
+    CoeffReadCost = TraversalSize==Dynamic ? Dynamic
+                  : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
+    
+    Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits)
+  };
 
-  evaluator_impl(const XprType expr)
+  evaluator(const XprType expr)
     : m_expr(expr)
-  { }
+  {}
 
   typedef typename XprType::Index Index;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
@@ -883,16 +999,20 @@ protected:
 
 // -------------------- MatrixWrapper and ArrayWrapper --------------------
 //
-// evaluator_impl_wrapper_base<T> is a common base class for the
+// evaluator_wrapper_base<T> is a common base class for the
 // MatrixWrapper and ArrayWrapper evaluators.
 
 template<typename XprType>
-struct evaluator_impl_wrapper_base
-  : evaluator_impl_base<XprType>
+struct evaluator_wrapper_base
+  : evaluator_base<XprType>
 {
   typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+    Flags = evaluator<ArgType>::Flags
+  };
 
-  evaluator_impl_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
+  evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
 
   typedef typename ArgType::Index Index;
   typedef typename ArgType::Scalar Scalar;
@@ -949,24 +1069,24 @@ protected:
 };
 
 template<typename TArgType>
-struct evaluator_impl<MatrixWrapper<TArgType> >
-  : evaluator_impl_wrapper_base<MatrixWrapper<TArgType> >
+struct unary_evaluator<MatrixWrapper<TArgType> >
+  : evaluator_wrapper_base<MatrixWrapper<TArgType> >
 {
   typedef MatrixWrapper<TArgType> XprType;
 
-  evaluator_impl(const XprType& wrapper) 
-    : evaluator_impl_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression())
+  unary_evaluator(const XprType& wrapper) 
+    : evaluator_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression())
   { }
 };
 
 template<typename TArgType>
-struct evaluator_impl<ArrayWrapper<TArgType> >
-  : evaluator_impl_wrapper_base<ArrayWrapper<TArgType> >
+struct unary_evaluator<ArrayWrapper<TArgType> >
+  : evaluator_wrapper_base<ArrayWrapper<TArgType> >
 {
   typedef ArrayWrapper<TArgType> XprType;
 
-  evaluator_impl(const XprType& wrapper) 
-    : evaluator_impl_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression())
+  unary_evaluator(const XprType& wrapper) 
+    : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression())
   { }
 };
 
@@ -977,8 +1097,8 @@ struct evaluator_impl<ArrayWrapper<TArgType> >
 template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond;
 
 template<typename ArgType, int Direction>
-struct evaluator_impl<Reverse<ArgType, Direction> >
-  : evaluator_impl_base<Reverse<ArgType, Direction> >
+struct unary_evaluator<Reverse<ArgType, Direction> >
+  : evaluator_base<Reverse<ArgType, Direction> >
 {
   typedef Reverse<ArgType, Direction> XprType;
   typedef typename XprType::Index Index;
@@ -997,11 +1117,21 @@ struct evaluator_impl<Reverse<ArgType, Direction> >
     OffsetCol  = ReverseCol && IsRowMajor ? PacketSize : 1,
     ReversePacket = (Direction == BothDirections)
                     || ((Direction == Vertical)   && IsColMajor)
-                    || ((Direction == Horizontal) && IsRowMajor)
+                    || ((Direction == Horizontal) && IsRowMajor),
+                    
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+    
+    // let's enable LinearAccess only with vectorization because of the product overhead
+    // FIXME enable DirectAccess with negative strides?
+    Flags0 = evaluator<ArgType>::Flags,
+    LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
+                 ? LinearAccessBit : 0,
+
+    Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess)
   };
   typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
 
-  evaluator_impl(const XprType& reverse) 
+  unary_evaluator(const XprType& reverse) 
     : m_argImpl(reverse.nestedExpression()),
       m_rows(ReverseRow ? reverse.nestedExpression().rows() : 0),
       m_cols(ReverseCol ? reverse.nestedExpression().cols() : 0)
@@ -1010,7 +1140,7 @@ struct evaluator_impl<Reverse<ArgType, Direction> >
   CoeffReturnType coeff(Index row, Index col) const
   {
     return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
-			   ReverseCol ? m_cols.value() - col - 1 : col);
+                           ReverseCol ? m_cols.value() - col - 1 : col);
   }
 
   CoeffReturnType coeff(Index index) const
@@ -1021,7 +1151,7 @@ struct evaluator_impl<Reverse<ArgType, Direction> >
   Scalar& coeffRef(Index row, Index col)
   {
     return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
-			      ReverseCol ? m_cols.value() - col - 1 : col);
+                              ReverseCol ? m_cols.value() - col - 1 : col);
   }
 
   Scalar& coeffRef(Index index)
@@ -1071,12 +1201,18 @@ protected:
 // -------------------- Diagonal --------------------
 
 template<typename ArgType, int DiagIndex>
-struct evaluator_impl<Diagonal<ArgType, DiagIndex> >
-  : evaluator_impl_base<Diagonal<ArgType, DiagIndex> >
+struct evaluator<Diagonal<ArgType, DiagIndex> >
+  : evaluator_base<Diagonal<ArgType, DiagIndex> >
 {
   typedef Diagonal<ArgType, DiagIndex> XprType;
+  
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+    
+    Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit
+  };
 
-  evaluator_impl(const XprType& diagonal) 
+  evaluator(const XprType& diagonal) 
     : m_argImpl(diagonal.nestedExpression()),
       m_index(diagonal.index())
   { }
@@ -1114,6 +1250,86 @@ private:
   EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; }
 };
 
+
+//----------------------------------------------------------------------
+// deprecated code
+//----------------------------------------------------------------------
+
+// -------------------- EvalToTemp --------------------
+
+// expression class for evaluating nested expression to a temporary
+
+template<typename ArgType> class EvalToTemp;
+
+template<typename ArgType>
+struct traits<EvalToTemp<ArgType> >
+  : public traits<ArgType>
+{ };
+
+template<typename ArgType>
+class EvalToTemp
+  : public dense_xpr_base<EvalToTemp<ArgType> >::type
+{
+ public:
+ 
+  typedef typename dense_xpr_base<EvalToTemp>::type Base;
+  EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp)
+ 
+  EvalToTemp(const ArgType& arg)
+    : m_arg(arg)
+  { }
+ 
+  const ArgType& arg() const
+  {
+    return m_arg;
+  }
+
+  Index rows() const 
+  {
+    return m_arg.rows();
+  }
+
+  Index cols() const 
+  {
+    return m_arg.cols();
+  }
+
+ private:
+  const ArgType& m_arg;
+};
+ 
+template<typename ArgType>
+struct evaluator<EvalToTemp<ArgType> >
+  : public evaluator<typename ArgType::PlainObject>::type
+{
+  typedef EvalToTemp<ArgType>                   XprType;
+  typedef typename ArgType::PlainObject         PlainObject;
+  typedef typename evaluator<PlainObject>::type Base;
+  
+  typedef evaluator type;
+  typedef evaluator nestedType;
+
+  evaluator(const XprType& xpr) 
+    : m_result(xpr.rows(), xpr.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    // TODO we should simply do m_result(xpr.arg());
+    call_dense_assignment_loop(m_result, xpr.arg());
+  }
+
+  // This constructor is used when nesting an EvalTo evaluator in another evaluator
+  evaluator(const ArgType& arg) 
+    : m_result(arg.rows(), arg.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    // TODO we should simply do m_result(xpr.arg());
+    call_dense_assignment_loop(m_result, arg);
+  }
+
+protected:
+  PlainObject m_result;
+};
+
 } // namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h
index e20daacc8..de9109e53 100644
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -56,8 +56,9 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
                        typename Rhs::Scalar
                      )
                    >::type Scalar;
-  typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
-                                           typename traits<Rhs>::StorageKind>::ret StorageKind;
+  typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
+                                              typename traits<Rhs>::StorageKind,
+                                              BinaryOp>::ret StorageKind;
   typedef typename promote_index_type<typename traits<Lhs>::Index,
                                          typename traits<Rhs>::Index>::type Index;
   typedef typename Lhs::Nested LhsNested;
@@ -65,60 +66,36 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
   typedef typename remove_reference<LhsNested>::type _LhsNested;
   typedef typename remove_reference<RhsNested>::type _RhsNested;
   enum {
-    LhsCoeffReadCost = _LhsNested::CoeffReadCost,
-    RhsCoeffReadCost = _RhsNested::CoeffReadCost,
-    LhsFlags = _LhsNested::Flags,
-    RhsFlags = _RhsNested::Flags,
-    SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
-    StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit),
-    Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
-        HereditaryBits
-      | (int(LhsFlags) & int(RhsFlags) &
-           ( AlignedBit
-           | (StorageOrdersAgree ? LinearAccessBit : 0)
-           | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
-           )
-        )
-     ),
-    Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
-    CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost
+    Flags = _LhsNested::Flags & RowMajorBit
   };
 };
 } // end namespace internal
 
-// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
-// that would take two operands of different types. If there were such an example, then this check should be
-// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
-// currently they take only one typename Scalar template parameter.
-// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
-// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
-// add together a float matrix and a double matrix.
-#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
-  EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \
-                        ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \
-                        : int(internal::is_same<LHS, RHS>::value)), \
-    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-
 template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
 class CwiseBinaryOpImpl;
 
-template<typename BinaryOp, typename Lhs, typename Rhs>
+template<typename BinaryOp, typename LhsType, typename RhsType>
 class CwiseBinaryOp : internal::no_assignment_operator,
   public CwiseBinaryOpImpl<
-          BinaryOp, Lhs, Rhs,
-          typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
-                                           typename internal::traits<Rhs>::StorageKind>::ret>
+          BinaryOp, LhsType, RhsType,
+          typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
+                                                        typename internal::traits<RhsType>::StorageKind,
+                                                        BinaryOp>::ret>
 {
   public:
+    
+    typedef typename internal::remove_all<LhsType>::type Lhs;
+    typedef typename internal::remove_all<RhsType>::type Rhs;
 
     typedef typename CwiseBinaryOpImpl<
-        BinaryOp, Lhs, Rhs,
-        typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
-                                         typename internal::traits<Rhs>::StorageKind>::ret>::Base Base;
+        BinaryOp, LhsType, RhsType,
+        typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
+                                                      typename internal::traits<Rhs>::StorageKind,
+                                                      BinaryOp>::ret>::Base Base;
     EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
 
-    typedef typename internal::nested<Lhs>::type LhsNested;
-    typedef typename internal::nested<Rhs>::type RhsNested;
+    typedef typename internal::nested<LhsType>::type LhsNested;
+    typedef typename internal::nested<RhsType>::type RhsNested;
     typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
     typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
 
@@ -165,43 +142,13 @@ class CwiseBinaryOp : internal::no_assignment_operator,
     const BinaryOp m_functor;
 };
 
-template<typename BinaryOp, typename Lhs, typename Rhs>
-class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
-  : public internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
+// Generic API dispatcher
+template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
+class CwiseBinaryOpImpl
+  : public internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
 {
-    typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived;
-  public:
-
-    typedef typename internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE( Derived )
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
-    {
-      return derived().functor()(derived().lhs().coeff(rowId, colId),
-                                 derived().rhs().coeff(rowId, colId));
-    }
-
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
-    {
-      return derived().functor().packetOp(derived().lhs().template packet<LoadMode>(rowId, colId),
-                                          derived().rhs().template packet<LoadMode>(rowId, colId));
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
-    {
-      return derived().functor()(derived().lhs().coeff(index),
-                                 derived().rhs().coeff(index));
-    }
-
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
-    {
-      return derived().functor().packetOp(derived().lhs().template packet<LoadMode>(index),
-                                          derived().rhs().template packet<LoadMode>(index));
-    }
+public:
+  typedef typename internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
 };
 
 /** replaces \c *this by \c *this - \a other.
@@ -213,8 +160,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
 {
-  SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived());
-  tmp = other.derived();
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
   return derived();
 }
 
@@ -227,8 +173,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
 {
-  SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived());
-  tmp = other.derived();
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
   return derived();
 }
 
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index 124383114..8b8397da6 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -35,12 +35,7 @@ template<typename NullaryOp, typename PlainObjectType>
 struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
 {
   enum {
-    Flags = (traits<PlainObjectType>::Flags
-      & (  HereditaryBits
-         | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
-         | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
-      | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
-    CoeffReadCost = functor_traits<NullaryOp>::Cost
+    Flags = traits<PlainObjectType>::Flags & RowMajorBit
   };
 };
 }
diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h
index aa7df197f..79a872934 100644
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -44,10 +44,7 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
   typedef typename XprType::Nested XprTypeNested;
   typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
   enum {
-    Flags = _XprTypeNested::Flags & (
-      HereditaryBits | LinearAccessBit | AlignedBit
-      | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
-    CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost
+    Flags = _XprTypeNested::Flags & RowMajorBit 
   };
 };
 }
@@ -63,6 +60,7 @@ class CwiseUnaryOp : internal::no_assignment_operator,
 
     typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
     EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
+    typedef typename internal::remove_all<XprType>::type NestedExpression;
 
     EIGEN_DEVICE_FUNC
     inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
@@ -92,42 +90,13 @@ class CwiseUnaryOp : internal::no_assignment_operator,
     const UnaryOp m_functor;
 };
 
-// This is the generic implementation for dense storage.
-// It can be used for any expression types implementing the dense concept.
-template<typename UnaryOp, typename XprType>
-class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
-  : public internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
+// Generic API dispatcher
+template<typename UnaryOp, typename XprType, typename StorageKind>
+class CwiseUnaryOpImpl
+  : public internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
 {
-  public:
-
-    typedef CwiseUnaryOp<UnaryOp, XprType> Derived;
-    typedef typename internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
-    {
-      return derived().functor()(derived().nestedExpression().coeff(rowId, colId));
-    }
-
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
-    {
-      return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(rowId, colId));
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
-    {
-      return derived().functor()(derived().nestedExpression().coeff(index));
-    }
-
-    template<int LoadMode>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
-    {
-      return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(index));
-    }
+public:
+  typedef typename internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
 };
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h
index b2638d326..71249a39c 100644
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@@ -37,8 +37,7 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
   typedef typename MatrixType::Nested MatrixTypeNested;
   typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
   enum {
-    Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
-    CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits<ViewOp>::Cost,
+    Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | LvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
     MatrixTypeInnerStride =  inner_stride_at_compile_time<MatrixType>::ret,
     // need to cast the sizeof's from size_t to int explicitly, otherwise:
     // "error: no integral type can represent all of the enumerator values
@@ -62,6 +61,7 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
 
     typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
     EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
+    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
 
     inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp())
       : m_matrix(mat), m_functor(func) {}
@@ -88,6 +88,15 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
     ViewOp m_functor;
 };
 
+// Generic API dispatcher
+template<typename ViewOp, typename XprType, typename StorageKind>
+class CwiseUnaryViewImpl
+  : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type
+{
+public:
+  typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type Base;
+};
+
 template<typename ViewOp, typename MatrixType>
 class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
   : public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
@@ -100,8 +109,8 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
     EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
     
-    inline Scalar* data() { return &coeffRef(0); }
-    inline const Scalar* data() const { return &coeff(0); }
+    inline Scalar* data() { return &(this->coeffRef(0)); }
+    inline const Scalar* data() const { return &(this->coeff(0)); }
 
     inline Index innerStride() const
     {
@@ -112,26 +121,6 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
     {
       return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
     }
-
-    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
-    {
-      return derived().functor()(derived().nestedExpression().coeff(row, col));
-    }
-
-    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-    {
-      return derived().functor()(derived().nestedExpression().coeff(index));
-    }
-
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
-    {
-      return derived().functor()(const_cast_derived().nestedExpression().coeffRef(row, col));
-    }
-
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
-    {
-      return derived().functor()(const_cast_derived().nestedExpression().coeffRef(index));
-    }
 };
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index bd5dd14ed..6078af553 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -74,16 +74,6 @@ template<typename Derived> class DenseBase
     using Base::colIndexByOuterInner;
     using Base::coeff;
     using Base::coeffByOuterInner;
-    using Base::packet;
-    using Base::packetByOuterInner;
-    using Base::writePacket;
-    using Base::writePacketByOuterInner;
-    using Base::coeffRef;
-    using Base::coeffRefByOuterInner;
-    using Base::copyCoeff;
-    using Base::copyCoeffByOuterInner;
-    using Base::copyPacket;
-    using Base::copyPacketByOuterInner;
     using Base::operator();
     using Base::operator[];
     using Base::x;
@@ -169,11 +159,6 @@ template<typename Derived> class DenseBase
       InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
                              : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
 
-      CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
-        /**< This is a rough measure of how expensive it is to read one coefficient from
-          * this expression.
-          */
-
       InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
       OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
     };
@@ -278,7 +263,8 @@ template<typename Derived> class DenseBase
     Derived& operator=(const ReturnByValue<OtherDerived>& func);
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-    /** Copies \a other into *this without evaluating other. \returns a reference to *this. */
+    /** Copies \a other into *this without evaluating other. \returns a reference to *this.
+      * \deprecated */
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     Derived& lazyAssign(const DenseBase<OtherDerived>& other);
@@ -287,8 +273,10 @@ template<typename Derived> class DenseBase
     EIGEN_DEVICE_FUNC
     CommaInitializer<Derived> operator<< (const Scalar& s);
 
+    // TODO flagged is temporarly disabled. It seems useless now
     template<unsigned int Added,unsigned int Removed>
-    const Flagged<Derived, Added, Removed> flagged() const;
+    const Derived& flagged() const
+    { return derived(); }
 
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
@@ -301,13 +289,6 @@ template<typename Derived> class DenseBase
     ConstTransposeReturnType transpose() const;
     EIGEN_DEVICE_FUNC
     void transposeInPlace();
-#ifndef EIGEN_NO_DEBUG
-  protected:
-    template<typename OtherDerived>
-    void checkTransposeAliasing(const OtherDerived& other) const;
-  public:
-#endif
-
 
     EIGEN_DEVICE_FUNC static const ConstantReturnType
     Constant(Index rows, Index cols, const Scalar& value);
@@ -387,7 +368,7 @@ template<typename Derived> class DenseBase
       // size types on MSVC.
       return typename internal::eval<Derived>::type(derived());
     }
-
+    
     /** swaps *this with the expression \a other.
       *
       */
@@ -396,7 +377,8 @@ template<typename Derived> class DenseBase
     void swap(const DenseBase<OtherDerived>& other,
               int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase)
     {
-      SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
+      eigen_assert(rows()==other.rows() && cols()==other.cols());
+      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
     }
 
     /** swaps *this with the matrix or array \a other.
@@ -406,10 +388,10 @@ template<typename Derived> class DenseBase
     EIGEN_DEVICE_FUNC
     void swap(PlainObjectBase<OtherDerived>& other)
     {
-      SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
+      eigen_assert(rows()==other.rows() && cols()==other.cols());
+      call_assignment(derived(), other.derived(), internal::swap_assign_op<Scalar>());
     }
 
-
     EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
     EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
     EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h
index 4e986e875..a9e4dbaf9 100644
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -97,8 +97,8 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
     EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
     {
       eigen_internal_assert(row >= 0 && row < rows()
-                        && col >= 0 && col < cols());
-      return derived().coeff(row, col);
+                         && col >= 0 && col < cols());
+      return typename internal::evaluator<Derived>::type(derived()).coeff(row,col);
     }
 
     EIGEN_DEVICE_FUNC
@@ -117,7 +117,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
     {
       eigen_assert(row >= 0 && row < rows()
           && col >= 0 && col < cols());
-      return derived().coeff(row, col);
+      return coeff(row, col);
     }
 
     /** Short version: don't use this function, use
@@ -140,7 +140,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
     coeff(Index index) const
     {
       eigen_internal_assert(index >= 0 && index < size());
-      return derived().coeff(index);
+      return typename internal::evaluator<Derived>::type(derived()).coeff(index);
     }
 
 
@@ -159,7 +159,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
       EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
                           THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
       eigen_assert(index >= 0 && index < size());
-      return derived().coeff(index);
+      return coeff(index);
     }
 
     /** \returns the coefficient at given index.
@@ -177,7 +177,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
     operator()(Index index) const
     {
       eigen_assert(index >= 0 && index < size());
-      return derived().coeff(index);
+      return coeff(index);
     }
 
     /** equivalent to operator[](0).  */
@@ -217,9 +217,8 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
     template<int LoadMode>
     EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
     {
-      eigen_internal_assert(row >= 0 && row < rows()
-                      && col >= 0 && col < cols());
-      return derived().template packet<LoadMode>(row,col);
+      eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
+      return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode>(row,col);
     }
 
 
@@ -245,7 +244,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
     EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
     {
       eigen_internal_assert(index >= 0 && index < size());
-      return derived().template packet<LoadMode>(index);
+      return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode>(index);
     }
 
   protected:
@@ -325,8 +324,8 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
     EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
     {
       eigen_internal_assert(row >= 0 && row < rows()
-                        && col >= 0 && col < cols());
-      return derived().coeffRef(row, col);
+                         && col >= 0 && col < cols());
+      return typename internal::evaluator<Derived>::type(derived()).coeffRef(row,col);
     }
 
     EIGEN_DEVICE_FUNC
@@ -348,7 +347,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
     {
       eigen_assert(row >= 0 && row < rows()
           && col >= 0 && col < cols());
-      return derived().coeffRef(row, col);
+      return coeffRef(row, col);
     }
 
 
@@ -372,7 +371,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
     coeffRef(Index index)
     {
       eigen_internal_assert(index >= 0 && index < size());
-      return derived().coeffRef(index);
+      return typename internal::evaluator<Derived>::type(derived()).coeffRef(index);
     }
 
     /** \returns a reference to the coefficient at given index.
@@ -389,7 +388,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
       EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
                           THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
       eigen_assert(index >= 0 && index < size());
-      return derived().coeffRef(index);
+      return coeffRef(index);
     }
 
     /** \returns a reference to the coefficient at given index.
@@ -406,7 +405,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
     operator()(Index index)
     {
       eigen_assert(index >= 0 && index < size());
-      return derived().coeffRef(index);
+      return coeffRef(index);
     }
 
     /** equivalent to operator[](0).  */
@@ -432,144 +431,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     w() { return (*this)[3]; }
-
-    /** \internal
-      * Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility
-      * to ensure that a packet really starts there. This method is only available on expressions having the
-      * PacketAccessBit.
-      *
-      * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
-      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
-      * starting at an address which is a multiple of the packet size.
-      */
-
-    template<int StoreMode>
-    EIGEN_STRONG_INLINE void writePacket
-    (Index row, Index col, const typename internal::packet_traits<Scalar>::type& val)
-    {
-      eigen_internal_assert(row >= 0 && row < rows()
-                        && col >= 0 && col < cols());
-      derived().template writePacket<StoreMode>(row,col,val);
-    }
-
-
-    /** \internal */
-    template<int StoreMode>
-    EIGEN_STRONG_INLINE void writePacketByOuterInner
-    (Index outer, Index inner, const typename internal::packet_traits<Scalar>::type& val)
-    {
-      writePacket<StoreMode>(rowIndexByOuterInner(outer, inner),
-                            colIndexByOuterInner(outer, inner),
-                            val);
-    }
-
-    /** \internal
-      * Stores the given packet of coefficients, at the given index in this expression. It is your responsibility
-      * to ensure that a packet really starts there. This method is only available on expressions having the
-      * PacketAccessBit and the LinearAccessBit.
-      *
-      * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
-      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
-      * starting at an address which is a multiple of the packet size.
-      */
-    template<int StoreMode>
-    EIGEN_STRONG_INLINE void writePacket
-    (Index index, const typename internal::packet_traits<Scalar>::type& val)
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      derived().template writePacket<StoreMode>(index,val);
-    }
-
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-
-    /** \internal Copies the coefficient at position (row,col) of other into *this.
-      *
-      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
-      * with usual assignments.
-      *
-      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
-      */
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
-    {
-      eigen_internal_assert(row >= 0 && row < rows()
-                        && col >= 0 && col < cols());
-      derived().coeffRef(row, col) = other.derived().coeff(row, col);
-    }
-
-    /** \internal Copies the coefficient at the given index of other into *this.
-      *
-      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
-      * with usual assignments.
-      *
-      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
-      */
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      derived().coeffRef(index) = other.derived().coeff(index);
-    }
-
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
-    {
-      const Index row = rowIndexByOuterInner(outer,inner);
-      const Index col = colIndexByOuterInner(outer,inner);
-      // derived() is important here: copyCoeff() may be reimplemented in Derived!
-      derived().copyCoeff(row, col, other);
-    }
-
-    /** \internal Copies the packet at position (row,col) of other into *this.
-      *
-      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
-      * with usual assignments.
-      *
-      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
-      */
-
-    template<typename OtherDerived, int StoreMode, int LoadMode>
-    EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
-    {
-      eigen_internal_assert(row >= 0 && row < rows()
-                        && col >= 0 && col < cols());
-      derived().template writePacket<StoreMode>(row, col,
-        other.derived().template packet<LoadMode>(row, col));
-    }
-
-    /** \internal Copies the packet at the given index of other into *this.
-      *
-      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
-      * with usual assignments.
-      *
-      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
-      */
-
-    template<typename OtherDerived, int StoreMode, int LoadMode>
-    EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase<OtherDerived>& other)
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      derived().template writePacket<StoreMode>(index,
-        other.derived().template packet<LoadMode>(index));
-    }
-
-    /** \internal */
-    template<typename OtherDerived, int StoreMode, int LoadMode>
-    EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
-    {
-      const Index row = rowIndexByOuterInner(outer,inner);
-      const Index col = colIndexByOuterInner(outer,inner);
-      // derived() is important here: copyCoeff() may be reimplemented in Derived!
-      derived().template copyPacket< OtherDerived, StoreMode, LoadMode>(row, col, other);
-    }
-#endif
-
 };
 
 /** \brief Base class providing direct read-only coefficient access to matrices and arrays.
diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h
index b160479ab..1ffcd97f9 100644
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@@ -52,8 +52,7 @@ struct traits<Diagonal<MatrixType,DiagIndex> >
                                                  MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
     MaxColsAtCompileTime = 1,
     MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
-    Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit,
-    CoeffReadCost = _MatrixTypeNested::CoeffReadCost,
+    Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
     MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
     InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
     OuterStrideAtCompileTime = 0
diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h
index 96b65483d..44c249aa6 100644
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -30,7 +30,7 @@ class DiagonalBase : public EigenBase<Derived>
       MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
       MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
       IsVectorAtCompileTime = 0,
-      Flags = 0
+      Flags = NoPreferredStorageOrderBit
     };
 
     typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType;
@@ -44,18 +44,7 @@ class DiagonalBase : public EigenBase<Derived>
 
     EIGEN_DEVICE_FUNC
     DenseMatrixType toDenseMatrix() const { return derived(); }
-    template<typename DenseDerived>
-    EIGEN_DEVICE_FUNC
-    void evalTo(MatrixBase<DenseDerived> &other) const;
-    template<typename DenseDerived>
-    EIGEN_DEVICE_FUNC
-    void addTo(MatrixBase<DenseDerived> &other) const
-    { other.diagonal() += diagonal(); }
-    template<typename DenseDerived>
-    EIGEN_DEVICE_FUNC
-    void subTo(MatrixBase<DenseDerived> &other) const
-    { other.diagonal() -= diagonal(); }
-
+    
     EIGEN_DEVICE_FUNC
     inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
     EIGEN_DEVICE_FUNC
@@ -66,14 +55,12 @@ class DiagonalBase : public EigenBase<Derived>
     EIGEN_DEVICE_FUNC
     inline Index cols() const { return diagonal().size(); }
 
-    /** \returns the diagonal matrix product of \c *this by the matrix \a matrix.
-      */
     template<typename MatrixDerived>
     EIGEN_DEVICE_FUNC
-    const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
+    const Product<Derived,MatrixDerived,LazyProduct>
     operator*(const MatrixBase<MatrixDerived> &matrix) const
     {
-      return DiagonalProduct<MatrixDerived, Derived, OnTheLeft>(matrix.derived(), derived());
+      return Product<Derived, MatrixDerived, LazyProduct>(derived(),matrix.derived());
     }
 
     EIGEN_DEVICE_FUNC
@@ -97,13 +84,6 @@ class DiagonalBase : public EigenBase<Derived>
     }
 };
 
-template<typename Derived>
-template<typename DenseDerived>
-void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
-{
-  other.setZero();
-  other.diagonal() = diagonal();
-}
 #endif
 
 /** \class DiagonalMatrix
@@ -125,10 +105,10 @@ struct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
  : traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
   typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
-  typedef Dense StorageKind;
+  typedef DiagonalShape StorageKind;
   typedef DenseIndex Index;
   enum {
-    Flags = LvalueBit
+    Flags = LvalueBit | NoPreferredStorageOrderBit
   };
 };
 }
@@ -249,13 +229,14 @@ struct traits<DiagonalWrapper<_DiagonalVectorType> >
   typedef _DiagonalVectorType DiagonalVectorType;
   typedef typename DiagonalVectorType::Scalar Scalar;
   typedef typename DiagonalVectorType::Index Index;
-  typedef typename DiagonalVectorType::StorageKind StorageKind;
+  typedef DiagonalShape StorageKind;
+  typedef typename traits<DiagonalVectorType>::XprKind XprKind;
   enum {
     RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
     ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
-    MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
-    MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
-    Flags =  traits<DiagonalVectorType>::Flags & LvalueBit
+    MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+    MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+    Flags =  (traits<DiagonalVectorType>::Flags & LvalueBit) | NoPreferredStorageOrderBit
   };
 };
 }
@@ -326,6 +307,27 @@ bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
   return true;
 }
 
+namespace internal {
+
+template<> struct storage_kind_to_shape<DiagonalShape> { typedef DiagonalShape Shape; };
+
+struct Diagonal2Dense {};
+
+template<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2Dense Kind; };
+
+// Diagonal matrix to Dense assignment
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense, Scalar>
+{
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+  {
+    dst.setZero();
+    dst.diagonal() = src.diagonal();
+  }
+};
+
+} // namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_DIAGONALMATRIX_H
diff --git a/Eigen/src/Core/DiagonalProduct.h b/Eigen/src/Core/DiagonalProduct.h
index c03a0c2e1..d372b938f 100644
--- a/Eigen/src/Core/DiagonalProduct.h
+++ b/Eigen/src/Core/DiagonalProduct.h
@@ -13,116 +13,14 @@
 
 namespace Eigen { 
 
-namespace internal {
-template<typename MatrixType, typename DiagonalType, int ProductOrder>
-struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
- : traits<MatrixType>
-{
-  typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
-  enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-
-    _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor,
-    _ScalarAccessOnDiag =  !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
-                          ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
-    _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
-    // FIXME currently we need same types, but in the future the next rule should be the one
-    //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
-    _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
-    _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0,
-
-    Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit,//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
-    CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
-  };
-};
-}
-
-template<typename MatrixType, typename DiagonalType, int ProductOrder>
-class DiagonalProduct : internal::no_assignment_operator,
-                        public MatrixBase<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
-{
-  public:
-
-    typedef MatrixBase<DiagonalProduct> Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(DiagonalProduct)
-
-    inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal)
-      : m_matrix(matrix), m_diagonal(diagonal)
-    {
-      eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols()));
-    }
-
-    EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); }
-    EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); }
-
-    EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
-    {
-      return m_diagonal.diagonal().coeff(ProductOrder == OnTheLeft ? row : col) * m_matrix.coeff(row, col);
-    }
-    
-    EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
-    {
-      enum {
-        StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor
-      };
-      return coeff(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
-    }
-
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
-    {
-      enum {
-        StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor
-      };
-      const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col;
-      return packet_impl<LoadMode>(row,col,indexInDiagonalVector,typename internal::conditional<
-        ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
-       ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type());
-    }
-    
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
-    {
-      enum {
-        StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor
-      };
-      return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
-    }
-
-  protected:
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const
-    {
-      return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
-                     internal::pset1<PacketScalar>(m_diagonal.diagonal().coeff(id)));
-    }
-
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const
-    {
-      enum {
-        InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
-        DiagonalVectorPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
-      };
-      return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
-                     m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
-    }
-
-    typename MatrixType::Nested m_matrix;
-    typename DiagonalType::Nested m_diagonal;
-};
-
 /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.
   */
 template<typename Derived>
 template<typename DiagonalDerived>
-inline const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
+inline const Product<Derived, DiagonalDerived, LazyProduct>
 MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const
 {
-  return DiagonalProduct<Derived, DiagonalDerived, OnTheRight>(derived(), a_diagonal.derived());
+  return Product<Derived, DiagonalDerived, LazyProduct>(derived(),a_diagonal.derived());
 }
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index db16e4acc..68e9c2660 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -113,8 +113,7 @@ template<typename Derived>
 inline const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::normalized() const
 {
-  typedef typename internal::nested<Derived>::type Nested;
-  typedef typename internal::remove_reference<Nested>::type _Nested;
+  typedef typename internal::nested_eval<Derived,2>::type _Nested;
   _Nested n(derived());
   return n / n.norm();
 }
@@ -206,8 +205,8 @@ template<typename OtherDerived>
 bool MatrixBase<Derived>::isOrthogonal
 (const MatrixBase<OtherDerived>& other, const RealScalar& prec) const
 {
-  typename internal::nested<Derived,2>::type nested(derived());
-  typename internal::nested<OtherDerived,2>::type otherNested(other.derived());
+  typename internal::nested_eval<Derived,2>::type nested(derived());
+  typename internal::nested_eval<OtherDerived,2>::type otherNested(other.derived());
   return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
 }
 
diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h
index 1a577c2dc..52b66e6dc 100644
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -121,7 +121,7 @@ template<typename Derived>
 template<typename OtherDerived>
 Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
 {
-  other.derived().evalTo(derived());
+  call_assignment(derived(), other.derived());
   return derived();
 }
 
@@ -129,7 +129,7 @@ template<typename Derived>
 template<typename OtherDerived>
 Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
 {
-  other.derived().addTo(derived());
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
   return derived();
 }
 
@@ -137,7 +137,7 @@ template<typename Derived>
 template<typename OtherDerived>
 Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
 {
-  other.derived().subTo(derived());
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
   return derived();
 }
 
diff --git a/Eigen/src/Core/Fuzzy.h b/Eigen/src/Core/Fuzzy.h
index f9a88dd3c..8cd069a0d 100644
--- a/Eigen/src/Core/Fuzzy.h
+++ b/Eigen/src/Core/Fuzzy.h
@@ -23,8 +23,8 @@ struct isApprox_selector
   static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
   {
     EIGEN_USING_STD_MATH(min);
-    typename internal::nested<Derived,2>::type nested(x);
-    typename internal::nested<OtherDerived,2>::type otherNested(y);
+    typename internal::nested_eval<Derived,2>::type nested(x);
+    typename internal::nested_eval<OtherDerived,2>::type otherNested(y);
     return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
   }
 };
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index 624b8b6e8..e05ff8dce 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -13,28 +13,6 @@
 
 namespace Eigen { 
 
-/** \class GeneralProduct
-  * \ingroup Core_Module
-  *
-  * \brief Expression of the product of two general matrices or vectors
-  *
-  * \param LhsNested the type used to store the left-hand side
-  * \param RhsNested the type used to store the right-hand side
-  * \param ProductMode the type of the product
-  *
-  * This class represents an expression of the product of two general matrices.
-  * We call a general matrix, a dense matrix with full storage. For instance,
-  * This excludes triangular, selfadjoint, and sparse matrices.
-  * It is the return type of the operator* between general matrices. Its template
-  * arguments are determined automatically by ProductReturnType. Therefore,
-  * GeneralProduct should never be used direclty. To determine the result type of a
-  * function which involves a matrix product, use ProductReturnType::Type.
-  *
-  * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
-  */
-template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
-class GeneralProduct;
-
 enum {
   Large = 2,
   Small = 3
@@ -59,14 +37,14 @@ template<typename Lhs, typename Rhs> struct product_type
   typedef typename remove_all<Lhs>::type _Lhs;
   typedef typename remove_all<Rhs>::type _Rhs;
   enum {
-    MaxRows  = _Lhs::MaxRowsAtCompileTime,
-    Rows  = _Lhs::RowsAtCompileTime,
-    MaxCols  = _Rhs::MaxColsAtCompileTime,
-    Cols  = _Rhs::ColsAtCompileTime,
-    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
-                                           _Rhs::MaxRowsAtCompileTime),
-    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
-                                        _Rhs::RowsAtCompileTime)
+    MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
+    Rows    = traits<_Lhs>::RowsAtCompileTime,
+    MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
+    Cols    = traits<_Rhs>::ColsAtCompileTime,
+    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,
+                                           traits<_Rhs>::MaxRowsAtCompileTime),
+    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,
+                                        traits<_Rhs>::RowsAtCompileTime)
   };
 
   // the splitting into different lines of code here, introducing the _select enums and the typedef below,
@@ -81,7 +59,8 @@ private:
 
 public:
   enum {
-    value = selector::ret
+    value = selector::ret,
+    ret = selector::ret
   };
 #ifdef EIGEN_DEBUG_PRODUCT
   static void debug()
@@ -97,6 +76,31 @@ public:
 #endif
 };
 
+// template<typename Lhs, typename Rhs> struct product_tag
+// {
+// private:
+//   
+//   typedef typename remove_all<Lhs>::type _Lhs;
+//   typedef typename remove_all<Rhs>::type _Rhs;
+//   enum {
+//     Rows  = _Lhs::RowsAtCompileTime,
+//     Cols  = _Rhs::ColsAtCompileTime,
+//     Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime)
+//   };
+// 
+//   enum {
+//     rows_select = Rows==1 ? int(Rows) : int(Large),
+//     cols_select = Cols==1 ? int(Cols) : int(Large),
+//     depth_select = Depth==1 ? int(Depth) : int(Large)
+//   };
+//   typedef product_type_selector<rows_select, cols_select, depth_select> selector;
+// 
+// public:
+//   enum {
+//     ret = selector::ret
+//   };
+// 
+// };
 
 /* The following allows to select the kind of product at compile time
  * based on the three dimensions of the product.
@@ -127,54 +131,6 @@ template<>              struct product_type_selector<Large,Large,Small>  { enum
 
 } // end namespace internal
 
-/** \class ProductReturnType
-  * \ingroup Core_Module
-  *
-  * \brief Helper class to get the correct and optimized returned type of operator*
-  *
-  * \param Lhs the type of the left-hand side
-  * \param Rhs the type of the right-hand side
-  * \param ProductMode the type of the product (determined automatically by internal::product_mode)
-  *
-  * This class defines the typename Type representing the optimized product expression
-  * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
-  * is the recommended way to define the result type of a function returning an expression
-  * which involve a matrix product. The class Product should never be
-  * used directly.
-  *
-  * \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
-  */
-template<typename Lhs, typename Rhs, int ProductType>
-struct ProductReturnType
-{
-  // TODO use the nested type to reduce instanciations ????
-//   typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
-//   typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
-
-  typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
-};
-
-template<typename Lhs, typename Rhs>
-struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
-{
-  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
-  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
-  typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
-};
-
-template<typename Lhs, typename Rhs>
-struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
-{
-  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
-  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
-  typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
-};
-
-// this is a workaround for sun CC
-template<typename Lhs, typename Rhs>
-struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
-{};
-
 /***********************************************************************
 *  Implementation of Inner Vector Vector Product
 ***********************************************************************/
@@ -186,119 +142,10 @@ struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedPr
 // product ends up to a row-vector times col-vector product... To tackle this use
 // case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
 
-namespace internal {
-
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
- : traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
-{};
-
-}
-
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, InnerProduct>
-  : internal::no_assignment_operator,
-    public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
-{
-    typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
-  public:
-    GeneralProduct(const Lhs& lhs, const Rhs& rhs)
-    {
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-
-      Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
-    }
-
-    /** Convertion to scalar */
-    operator const typename Base::Scalar() const {
-      return Base::coeff(0,0);
-    }
-};
-
 /***********************************************************************
 *  Implementation of Outer Vector Vector Product
 ***********************************************************************/
 
-namespace internal {
-
-// Column major
-template<typename ProductType, typename Dest, typename Func>
-EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&)
-{
-  typedef typename Dest::Index Index;
-  // FIXME make sure lhs is sequentially stored
-  // FIXME not very good if rhs is real and lhs complex while alpha is real too
-  const Index cols = dest.cols();
-  for (Index j=0; j<cols; ++j)
-    func(dest.col(j), prod.rhs().coeff(j) * prod.lhs());
-}
-
-// Row major
-template<typename ProductType, typename Dest, typename Func>
-EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) {
-  typedef typename Dest::Index Index;
-  // FIXME make sure rhs is sequentially stored
-  // FIXME not very good if lhs is real and rhs complex while alpha is real too
-  const Index rows = dest.rows();
-  for (Index i=0; i<rows; ++i)
-    func(dest.row(i), prod.lhs().coeff(i) * prod.rhs());
-}
-
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
- : traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
-{};
-
-}
-
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, OuterProduct>
-  : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
-{
-    template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
-    
-  public:
-    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
-
-    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
-    {
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-    }
-    
-    struct set  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived()  = src; } };
-    struct add  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
-    struct sub  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
-    struct adds {
-      Scalar m_scale;
-      adds(const Scalar& s) : m_scale(s) {}
-      template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
-        dst.const_cast_derived() += m_scale * src;
-      }
-    };
-    
-    template<typename Dest>
-    inline void evalTo(Dest& dest) const {
-      internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>());
-    }
-    
-    template<typename Dest>
-    inline void addTo(Dest& dest) const {
-      internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>());
-    }
-
-    template<typename Dest>
-    inline void subTo(Dest& dest) const {
-      internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>());
-    }
-
-    template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
-    {
-      internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>());
-    }
-};
-
 /***********************************************************************
 *  Implementation of General Matrix Vector Product
 ***********************************************************************/
@@ -312,60 +159,13 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
  */
 namespace internal {
 
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
- : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
-{};
-
 template<int Side, int StorageOrder, bool BlasCompatible>
-struct gemv_selector;
+struct gemv_dense_sense_selector;
 
 } // end namespace internal
 
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, GemvProduct>
-  : public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
-{
-  public:
-    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
-
-    typedef typename Lhs::Scalar LhsScalar;
-    typedef typename Rhs::Scalar RhsScalar;
-
-    GeneralProduct(const Lhs& a_lhs, const Rhs& a_rhs) : Base(a_lhs,a_rhs)
-    {
-//       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
-//         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-    }
-
-    enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
-    typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
-
-    template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
-    {
-      eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
-      internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
-                       bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
-    }
-};
-
 namespace internal {
 
-// The vector is on the left => transposition
-template<int StorageOrder, bool BlasCompatible>
-struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
-{
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
-  {
-    Transpose<Dest> destT(dest);
-    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
-    gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
-      ::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
-        (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
-  }
-};
-
 template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
 
 template<typename Scalar,int Size,int MaxSize>
@@ -402,27 +202,43 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
   #endif
 };
 
-template<> struct gemv_selector<OnTheRight,ColMajor,true>
+// The vector is on the left => transposition
+template<int StorageOrder, bool BlasCompatible>
+struct gemv_dense_sense_selector<OnTheLeft,StorageOrder,BlasCompatible>
+{
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+  {
+    Transpose<Dest> destT(dest);
+    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
+    gemv_dense_sense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
+      ::run(rhs.transpose(), lhs.transpose(), destT, alpha);
+  }
+};
+
+template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,true>
 {
-  template<typename ProductType, typename Dest>
-  static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+  template<typename Lhs, typename Rhs, typename Dest>
+  static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
   {
-    typedef typename ProductType::Index Index;
-    typedef typename ProductType::LhsScalar   LhsScalar;
-    typedef typename ProductType::RhsScalar   RhsScalar;
-    typedef typename ProductType::Scalar      ResScalar;
-    typedef typename ProductType::RealScalar  RealScalar;
-    typedef typename ProductType::ActualLhsType ActualLhsType;
-    typedef typename ProductType::ActualRhsType ActualRhsType;
-    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
-    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+    typedef typename Dest::Index Index;
+    typedef typename Lhs::Scalar   LhsScalar;
+    typedef typename Rhs::Scalar   RhsScalar;
+    typedef typename Dest::Scalar  ResScalar;
+    typedef typename Dest::RealScalar  RealScalar;
+    
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+  
     typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
 
-    ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
-    ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
+    ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
+    ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
 
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
-                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+                                  * RhsBlasTraits::extractScalarFactor(rhs);
 
     enum {
       // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
@@ -445,7 +261,7 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
     if(!evalToDest)
     {
       #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      Index size = dest.size();
+      int size = dest.size();
       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
       #endif
       if(!alphaIsCompatible)
@@ -475,34 +291,35 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
   }
 };
 
-template<> struct gemv_selector<OnTheRight,RowMajor,true>
+template<> struct gemv_dense_sense_selector<OnTheRight,RowMajor,true>
 {
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
   {
-    typedef typename ProductType::LhsScalar LhsScalar;
-    typedef typename ProductType::RhsScalar RhsScalar;
-    typedef typename ProductType::Scalar    ResScalar;
-    typedef typename ProductType::Index Index;
-    typedef typename ProductType::ActualLhsType ActualLhsType;
-    typedef typename ProductType::ActualRhsType ActualRhsType;
-    typedef typename ProductType::_ActualRhsType _ActualRhsType;
-    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
-    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
-
-    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
-    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
-
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
-                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
+    typedef typename Dest::Index Index;
+    typedef typename Lhs::Scalar   LhsScalar;
+    typedef typename Rhs::Scalar   RhsScalar;
+    typedef typename Dest::Scalar  ResScalar;
+    
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+
+    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
+    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+                                  * RhsBlasTraits::extractScalarFactor(rhs);
 
     enum {
       // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
       // on, the other hand it is good for the cache to pack the vector anyways...
-      DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
+      DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
     };
 
-    gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+    gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
 
     ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
         DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
@@ -510,10 +327,10 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
     if(!DirectlyUseRhs)
     {
       #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      Index size = actualRhs.size();
+      int size = actualRhs.size();
       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
       #endif
-      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
     }
 
     general_matrix_vector_product
@@ -526,29 +343,29 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
   }
 };
 
-template<> struct gemv_selector<OnTheRight,ColMajor,false>
+template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,false>
 {
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
   {
     typedef typename Dest::Index Index;
     // TODO makes sure dest is sequentially stored in memory, otherwise use a temp
-    const Index size = prod.rhs().rows();
+    const Index size = rhs.rows();
     for(Index k=0; k<size; ++k)
-      dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
+      dest += (alpha*rhs.coeff(k)) * lhs.col(k);
   }
 };
 
-template<> struct gemv_selector<OnTheRight,RowMajor,false>
+template<> struct gemv_dense_sense_selector<OnTheRight,RowMajor,false>
 {
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
   {
     typedef typename Dest::Index Index;
     // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
-    const Index rows = prod.rows();
+    const Index rows = dest.rows();
     for(Index i=0; i<rows; ++i)
-      dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
+      dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(rhs.transpose())).sum();
   }
 };
 
@@ -566,7 +383,6 @@ template<> struct gemv_selector<OnTheRight,RowMajor,false>
   */
 #ifndef __CUDACC__
 
-#ifdef EIGEN_TEST_EVALUATORS
 template<typename Derived>
 template<typename OtherDerived>
 inline const Product<Derived, OtherDerived>
@@ -597,39 +413,9 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
   
   return Product<Derived, OtherDerived>(derived(), other.derived());
 }
-#else
-template<typename Derived>
-template<typename OtherDerived>
-inline const typename ProductReturnType<Derived, OtherDerived>::Type
-MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
-{
-  // A note regarding the function declaration: In MSVC, this function will sometimes
-  // not be inlined since DenseStorage is an unwindable object for dynamic
-  // matrices and product types are holding a member to store the result.
-  // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
-  enum {
-    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic
-                   || OtherDerived::RowsAtCompileTime==Dynamic
-                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
-    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
-    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
-  };
-  // note to the lost user:
-  //    * for a dot product use: v1.dot(v2)
-  //    * for a coeff-wise product use: v1.cwiseProduct(v2)
-  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
-    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
-  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
-    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
-  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
-#ifdef EIGEN_DEBUG_PRODUCT
-  internal::product_type<Derived,OtherDerived>::debug();
-#endif
-  return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
-}
-#endif
 
-#endif
+#endif // __CUDACC__
+
 /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
   *
   * The returned product will behave like any other expressions: the coefficients of the product will be
@@ -643,7 +429,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
   */
 template<typename Derived>
 template<typename OtherDerived>
-const typename LazyProductReturnType<Derived,OtherDerived>::Type
+const Product<Derived,OtherDerived,LazyProduct>
 MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
 {
   enum {
@@ -662,7 +448,7 @@ MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
     INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
   EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
 
-  return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+  return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived());
 }
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/Inverse.h b/Eigen/src/Core/Inverse.h
new file mode 100644
index 000000000..5cfa7e50c
--- /dev/null
+++ b/Eigen/src/Core/Inverse.h
@@ -0,0 +1,130 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_INVERSE_H
+#define EIGEN_INVERSE_H
+
+namespace Eigen { 
+
+// TODO move the general declaration in Core, and rename this file DenseInverseImpl.h, or something like this...
+
+template<typename XprType,typename StorageKind> class InverseImpl;
+
+namespace internal {
+
+template<typename XprType>
+struct traits<Inverse<XprType> >
+  : traits<typename XprType::PlainObject>
+{
+  typedef typename XprType::PlainObject PlainObject;
+  typedef traits<PlainObject> BaseTraits;
+  enum {
+    Flags = BaseTraits::Flags & RowMajorBit,
+    CoeffReadCost = Dynamic
+  };
+};
+
+} // end namespace internal
+
+/** \class Inverse
+  *
+  * \brief Expression of the inverse of another expression
+  *
+  * \tparam XprType the type of the expression we are taking the inverse
+  *
+  * This class represents an abstract expression of A.inverse()
+  * and most of the time this is the only way it is used.
+  *
+  */
+template<typename XprType>
+class Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::StorageKind>
+{
+public:
+  typedef typename XprType::Index Index;
+  typedef typename XprType::PlainObject                       PlainObject;
+  typedef typename internal::nested<XprType>::type            XprTypeNested;
+  typedef typename internal::remove_all<XprTypeNested>::type  XprTypeNestedCleaned;
+  
+  Inverse(const XprType &xpr)
+    : m_xpr(xpr)
+  {}
+  
+  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
+  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
+
+  EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; }
+
+protected:
+  XprTypeNested m_xpr;
+};
+
+/** \internal
+  * Specialization of the Inverse expression for dense expressions.
+  * Direct access to the coefficients are discared.
+  * FIXME this intermediate class is probably not needed anymore.
+  */
+template<typename XprType>
+class InverseImpl<XprType,Dense>
+  : public MatrixBase<Inverse<XprType> >
+{
+  typedef Inverse<XprType> Derived;
+  
+public:
+  
+  typedef MatrixBase<Derived> Base;
+  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+  typedef typename internal::remove_all<XprType>::type NestedExpression;
+
+private:
+  
+  Scalar coeff(Index row, Index col) const;
+  Scalar coeff(Index i) const;
+};
+
+namespace internal {
+
+/** \internal
+  * \brief Default evaluator for Inverse expression.
+  * 
+  * This default evaluator for Inverse expression simply evaluate the inverse into a temporary
+  * by a call to internal::call_assignment_no_alias.
+  * Therefore, inverse implementers only have to specialize Assignment<Dst,Inverse<...>, ...> for
+  * there own nested expression.
+  *
+  * \sa class Inverse
+  */
+template<typename ArgType>
+struct unary_evaluator<Inverse<ArgType> >
+  : public evaluator<typename Inverse<ArgType>::PlainObject>::type
+{
+  typedef Inverse<ArgType> InverseType;
+  typedef typename InverseType::PlainObject PlainObject;
+  typedef typename evaluator<PlainObject>::type Base;
+  
+  typedef evaluator<InverseType> type;
+  typedef evaluator<InverseType> nestedType;
+  
+  enum { Flags = Base::Flags | EvalBeforeNestingBit };
+
+  unary_evaluator(const InverseType& inv_xpr)
+    : m_result(inv_xpr.rows(), inv_xpr.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    internal::call_assignment_no_alias(m_result, inv_xpr);
+  }
+  
+protected:
+  PlainObject m_result;
+};
+  
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_INVERSE_H
diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h
index ced1b76ba..87c1787bf 100644
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -79,22 +79,9 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
     OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
                              ? int(PlainObjectType::OuterStrideAtCompileTime)
                              : int(StrideType::OuterStrideAtCompileTime),
-    HasNoInnerStride = InnerStrideAtCompileTime == 1,
-    HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
-    HasNoStride = HasNoInnerStride && HasNoOuterStride,
     IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
-    IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
-    KeepsPacketAccess = bool(HasNoInnerStride)
-                        && ( bool(IsDynamicSize)
-                           || HasNoOuterStride
-                           || ( OuterStrideAtCompileTime!=Dynamic
-                           && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
     Flags0 = TraitsBase::Flags & (~NestByRefBit),
-    Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
-    Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
-           ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
-    Flags3 = is_lvalue<PlainObjectType>::value ? int(Flags2) : (int(Flags2) & ~LvalueBit),
-    Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit)
+    Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
   };
 private:
   enum { Options }; // Expressions don't have Options
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index e8ecb175b..6d3b344e8 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -12,7 +12,7 @@
 #define EIGEN_MAPBASE_H
 
 #define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \
-      EIGEN_STATIC_ASSERT((int(internal::traits<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
+      EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
                           YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
 
 namespace Eigen { 
@@ -161,11 +161,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
     EIGEN_DEVICE_FUNC
     void checkSanity() const
     {
-      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
-                                        internal::inner_stride_at_compile_time<Derived>::ret==1),
-                          PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
-      eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0)
-                   && "data is not aligned");
+      eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
     }
 
     PointerType m_data;
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 20fc2be74..73859b0ee 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -12,6 +12,15 @@
 
 namespace Eigen {
 
+// On WINCE, std::abs is defined for int only, so let's defined our own overloads:
+// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too.
+#if defined(_WIN32_WCE) && defined(_MSC_VER) && _MSC_VER<=1500
+long        abs(long        x) { return (labs(x));  }
+double      abs(double      x) { return (fabs(x));  }
+float       abs(float       x) { return (fabsf(x)); }
+long double abs(long double x) { return (fabsl(x)); }
+#endif
+  
 namespace internal {
 
 /** \internal \struct global_math_functions_filtering_base
@@ -308,10 +317,17 @@ struct hypot_impl
     using std::sqrt;
     RealScalar _x = abs(x);
     RealScalar _y = abs(y);
-    RealScalar p = (max)(_x, _y);
-    if(p==RealScalar(0)) return 0;
-    RealScalar q = (min)(_x, _y);
-    RealScalar qp = q/p;
+    Scalar p, qp;
+    if(_x>_y)
+    {
+      p = _x;
+      qp = _y / p;
+    }
+    else
+    {
+      p = _y;
+      qp = _x / p;
+    }
     return p * sqrt(RealScalar(1) + qp*qp);
   }
 };
@@ -678,6 +694,21 @@ bool (isfinite)(const std::complex<T>& x)
   return isfinite(real(x)) && isfinite(imag(x));
 }
 
+// Log base 2 for 32 bits positive integers.
+// Conveniently returns 0 for x==0.
+inline int log2(int x)
+{
+  eigen_assert(x>=0);
+  unsigned int v(x);
+  static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+  v |= v >> 1;
+  v |= v >> 2;
+  v |= v >> 4;
+  v |= v >> 8;
+  v |= v >> 16;
+  return table[(v * 0x07C4ACDDU) >> 27];
+}
+
 } // end namespace numext
 
 namespace internal {
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 8c95ee3ca..8a5821548 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -115,7 +115,8 @@ struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
     MaxRowsAtCompileTime = _MaxRows,
     MaxColsAtCompileTime = _MaxCols,
     Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
-    CoeffReadCost = NumTraits<Scalar>::ReadCost,
+    // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
+    EvaluatorFlags = compute_matrix_evaluator_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
     Options = _Options,
     InnerStrideAtCompileTime = 1,
     OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index f5987d194..9dbbd6fb5 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -66,8 +66,7 @@ template<typename Derived> class MatrixBase
     using Base::MaxSizeAtCompileTime;
     using Base::IsVectorAtCompileTime;
     using Base::Flags;
-    using Base::CoeffReadCost;
-
+    
     using Base::derived;
     using Base::const_cast_derived;
     using Base::rows;
@@ -81,6 +80,7 @@ template<typename Derived> class MatrixBase
     using Base::operator-=;
     using Base::operator*=;
     using Base::operator/=;
+    using Base::operator*;
 
     typedef typename Base::CoeffReturnType CoeffReturnType;
     typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
@@ -185,21 +185,15 @@ template<typename Derived> class MatrixBase
     { return this->lazyProduct(other); }
 #else
 
-#ifdef EIGEN_TEST_EVALUATORS
     template<typename OtherDerived>
     const Product<Derived,OtherDerived>
     operator*(const MatrixBase<OtherDerived> &other) const;
-#else
-    template<typename OtherDerived>
-    const typename ProductReturnType<Derived,OtherDerived>::Type
-    operator*(const MatrixBase<OtherDerived> &other) const;
-#endif
 
 #endif
 
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC 
-    const typename LazyProductReturnType<Derived,OtherDerived>::Type
+    const Product<Derived,OtherDerived,LazyProduct>
     lazyProduct(const MatrixBase<OtherDerived> &other) const;
 
     template<typename OtherDerived>
@@ -213,7 +207,7 @@ template<typename Derived> class MatrixBase
 
     template<typename DiagonalDerived>
     EIGEN_DEVICE_FUNC
-    const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
+    const Product<Derived, DiagonalDerived, LazyProduct>
     operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
 
     template<typename OtherDerived>
@@ -333,10 +327,12 @@ template<typename Derived> class MatrixBase
 
     NoAlias<Derived,Eigen::MatrixBase > noalias();
 
-    inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
-    inline ForceAlignedAccess<Derived> forceAlignedAccess();
-    template<bool Enable> inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type forceAlignedAccessIf() const;
-    template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
+    // TODO forceAlignedAccess is temporarly disabled
+    // Need to find a nicer workaround.
+    inline const Derived& forceAlignedAccess() const { return derived(); }
+    inline Derived& forceAlignedAccess() { return derived(); }
+    template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
+    template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }
 
     Scalar trace() const;
 
@@ -360,7 +356,8 @@ template<typename Derived> class MatrixBase
     const PartialPivLU<PlainObject> lu() const;
 
     EIGEN_DEVICE_FUNC
-    const internal::inverse_impl<Derived> inverse() const;
+    const Inverse<Derived> inverse() const;
+    
     template<typename ResultType>
     void computeInverseAndDetWithCheck(
       ResultType& inverse,
diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h
index 0a1c32743..097c9c062 100644
--- a/Eigen/src/Core/NoAlias.h
+++ b/Eigen/src/Core/NoAlias.h
@@ -30,68 +30,35 @@ namespace Eigen {
 template<typename ExpressionType, template <typename> class StorageBase>
 class NoAlias
 {
-    typedef typename ExpressionType::Scalar Scalar;
   public:
+    typedef typename ExpressionType::Scalar Scalar;
+    
     NoAlias(ExpressionType& expression) : m_expression(expression) {}
-
-    /** Behaves like MatrixBase::lazyAssign(other)
-      * \sa MatrixBase::lazyAssign() */
+    
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
-    { return internal::assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
-
-    /** \sa MatrixBase::operator+= */
+    {
+      call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar>());
+      return m_expression;
+    }
+    
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
     {
-      typedef SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
-      SelfAdder tmp(m_expression);
-      typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
-      typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
-      internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
+      call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar>());
       return m_expression;
     }
-
-    /** \sa MatrixBase::operator-= */
+    
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
     {
-      typedef SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
-      SelfAdder tmp(m_expression);
-      typedef typename internal::nested<OtherDerived>::type OtherDerivedNested;
-      typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested;
-      internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived()));
+      call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar>());
       return m_expression;
     }
 
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-    template<typename ProductDerived, typename Lhs, typename Rhs>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
-    { other.derived().addTo(m_expression); return m_expression; }
-
-    template<typename ProductDerived, typename Lhs, typename Rhs>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
-    { other.derived().subTo(m_expression); return m_expression; }
-
-    template<typename Lhs, typename Rhs, int NestingFlags>
-    EIGEN_STRONG_INLINE ExpressionType& operator+=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
-    { return m_expression.derived() += CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
-
-    template<typename Lhs, typename Rhs, int NestingFlags>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
-    { return m_expression.derived() -= CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
-    
-    template<typename OtherDerived>
-    ExpressionType& operator=(const ReturnByValue<OtherDerived>& func)
-    { return m_expression = func; }
-#endif
-
     EIGEN_DEVICE_FUNC
     ExpressionType& expression() const
     {
diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index 8aa4c8bc5..200518173 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -13,7 +13,8 @@
 
 namespace Eigen { 
 
-template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKind> class PermutedImpl;
+// TODO: this does not seems to be needed at all:
+// template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKind> class PermutedImpl;
 
 /** \class PermutationBase
   * \ingroup Core_Module
@@ -60,7 +61,6 @@ class PermutationBase : public EigenBase<Derived>
     typedef typename Traits::IndicesType IndicesType;
     enum {
       Flags = Traits::Flags,
-      CoeffReadCost = Traits::CoeffReadCost,
       RowsAtCompileTime = Traits::RowsAtCompileTime,
       ColsAtCompileTime = Traits::ColsAtCompileTime,
       MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
@@ -274,6 +274,7 @@ template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex
 struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType> >
  : traits<Matrix<_StorageIndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
+  typedef PermutationStorage StorageKind;
   typedef Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
   typedef typename IndicesType::Index Index;
   typedef _StorageIndexType StorageIndexType;
@@ -287,6 +288,8 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile
     typedef internal::traits<PermutationMatrix> Traits;
   public:
 
+    typedef const PermutationMatrix& Nested;
+
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     typedef typename Traits::IndicesType IndicesType;
     typedef typename Traits::StorageIndexType StorageIndexType;
@@ -391,6 +394,7 @@ template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex
 struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType>,_PacketAccess> >
  : traits<Matrix<_StorageIndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
+  typedef PermutationStorage StorageKind;
   typedef Map<const Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
   typedef typename IndicesType::Index Index;
   typedef _StorageIndexType StorageIndexType;
@@ -462,8 +466,6 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd
   * \sa class PermutationBase, class PermutationMatrix
   */
 
-struct PermutationStorage {};
-
 template<typename _IndicesType> class TranspositionsWrapper;
 namespace internal {
 template<typename _IndicesType>
@@ -477,10 +479,9 @@ struct traits<PermutationWrapper<_IndicesType> >
   enum {
     RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
     ColsAtCompileTime = _IndicesType::SizeAtCompileTime,
-    MaxRowsAtCompileTime = IndicesType::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = IndicesType::MaxColsAtCompileTime,
-    Flags = 0,
-    CoeffReadCost = _IndicesType::CoeffReadCost
+    MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
+    MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
+    Flags = 0
   };
 };
 }
@@ -509,35 +510,39 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp
     typename IndicesType::Nested m_indices;
 };
 
+
+// TODO: Do we need to define these operator* functions? Would it be better to have them inherited
+// from MatrixBase?
+
 /** \returns the matrix with the permutation applied to the columns.
   */
-template<typename Derived, typename PermutationDerived>
-inline const internal::permut_matrix_product_retval<PermutationDerived, Derived, OnTheRight>
-operator*(const MatrixBase<Derived>& matrix,
-          const PermutationBase<PermutationDerived> &permutation)
+template<typename MatrixDerived, typename PermutationDerived>
+EIGEN_DEVICE_FUNC
+const Product<MatrixDerived, PermutationDerived, DefaultProduct>
+operator*(const MatrixBase<MatrixDerived> &matrix,
+          const PermutationBase<PermutationDerived>& permutation)
 {
-  return internal::permut_matrix_product_retval
-           <PermutationDerived, Derived, OnTheRight>
-           (permutation.derived(), matrix.derived());
+  return Product<MatrixDerived, PermutationDerived, DefaultProduct>
+            (matrix.derived(), permutation.derived());
 }
 
 /** \returns the matrix with the permutation applied to the rows.
   */
-template<typename Derived, typename PermutationDerived>
-inline const internal::permut_matrix_product_retval
-               <PermutationDerived, Derived, OnTheLeft>
+template<typename PermutationDerived, typename MatrixDerived>
+EIGEN_DEVICE_FUNC
+const Product<PermutationDerived, MatrixDerived, DefaultProduct>
 operator*(const PermutationBase<PermutationDerived> &permutation,
-          const MatrixBase<Derived>& matrix)
+          const MatrixBase<MatrixDerived>& matrix)
 {
-  return internal::permut_matrix_product_retval
-           <PermutationDerived, Derived, OnTheLeft>
-           (permutation.derived(), matrix.derived());
+  return Product<PermutationDerived, MatrixDerived, DefaultProduct>
+            (permutation.derived(), matrix.derived());
 }
 
 namespace internal {
 
 template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
 struct traits<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
+  : traits<typename MatrixType::PlainObject>
 {
   typedef typename MatrixType::PlainObject ReturnType;
 };
@@ -617,6 +622,8 @@ struct traits<Transpose<PermutationBase<Derived> > >
 
 } // end namespace internal
 
+// TODO: the specificties should be handled by the evaluator,
+// at the very least we should only specialize TransposeImpl
 template<typename Derived>
 class Transpose<PermutationBase<Derived> >
   : public EigenBase<Transpose<PermutationBase<Derived> > >
@@ -631,7 +638,6 @@ class Transpose<PermutationBase<Derived> >
     typedef typename Derived::DenseMatrixType DenseMatrixType;
     enum {
       Flags = Traits::Flags,
-      CoeffReadCost = Traits::CoeffReadCost,
       RowsAtCompileTime = Traits::RowsAtCompileTime,
       ColsAtCompileTime = Traits::ColsAtCompileTime,
       MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
@@ -663,19 +669,19 @@ class Transpose<PermutationBase<Derived> >
     /** \returns the matrix with the inverse permutation applied to the columns.
       */
     template<typename OtherDerived> friend
-    inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>
+    const Product<OtherDerived, Transpose, DefaultProduct>
     operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trPerm)
     {
-      return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>(trPerm.m_permutation, matrix.derived());
+      return Product<OtherDerived, Transpose, DefaultProduct>(matrix.derived(), trPerm.derived());
     }
 
     /** \returns the matrix with the inverse permutation applied to the rows.
       */
     template<typename OtherDerived>
-    inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>
+    const Product<Transpose, OtherDerived, DefaultProduct>
     operator*(const MatrixBase<OtherDerived>& matrix) const
     {
-      return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>(m_permutation, matrix.derived());
+      return Product<Transpose, OtherDerived, DefaultProduct>(*this, matrix.derived());
     }
 
     const PermutationType& nestedPermutation() const { return m_permutation; }
@@ -690,6 +696,38 @@ const PermutationWrapper<const Derived> MatrixBase<Derived>::asPermutation() con
   return derived();
 }
 
+namespace internal {
+  
+// TODO currently a permutation matrix expression has the form PermutationMatrix or PermutationWrapper
+//      or their transpose; in the future shape should be defined by the expression traits
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType>
+struct evaluator_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> >
+{
+  typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind;
+  typedef PermutationShape Shape;
+  static const int AssumeAliasing = 0;
+};
+
+template<typename IndicesType>
+struct evaluator_traits<PermutationWrapper<IndicesType> >
+{
+  typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind;
+  typedef PermutationShape Shape;
+  static const int AssumeAliasing = 0;
+};
+
+template<typename Derived>
+struct evaluator_traits<Transpose<PermutationBase<Derived> > >
+{
+  typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind;
+  typedef PermutationShape Shape;
+  static const int AssumeAliasing = 0;
+};
+
+template<> struct AssignmentKind<DenseShape,PermutationShape> { typedef EigenBase2EigenBase Kind; };
+
+} // end namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_PERMUTATIONMATRIX_H
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 69f34bd3e..3b0e56445 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -128,7 +128,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
 
   public:
-    enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
+    enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::EvaluatorFlags & AlignedBit) != 0 };
     EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
 
     EIGEN_DEVICE_FUNC
@@ -639,22 +639,17 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       *
       * \internal
       */
+    // aliasing is dealt once in internall::call_assignment
+    // so at this stage we have to assume aliasing... and resising has to be done later.
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC 
     EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
     {
-      _set_selector(other.derived(), typename internal::conditional<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type());
+      internal::call_assignment(this->derived(), other.derived());
+      return this->derived();
       return this->derived();
     }
 
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC 
-    EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC 
-    EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); }
-
     /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which
       * is the case when creating a new matrix) so one can enforce lazy evaluation.
       *
@@ -669,7 +664,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       //_resize_to_match(other);
       // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because
       // it wouldn't allow to copy a row-vector into a column-vector.
-      return internal::assign_selector<Derived,OtherDerived,false>::run(this->derived(), other.derived());
+      internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar>());
+      return this->derived();
     }
 
     template<typename T0, typename T1>
@@ -704,9 +700,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       m_storage.data()[1] = Scalar(val1);
     }
 
+    // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array,
+    // then the argument is meant to be the size of the object.
     template<typename T>
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value,T>::type* = 0)
+    EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<    (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value)
+                                                                              && ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0)
     {
       // NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
       const bool is_integer = NumTraits<T>::IsInteger;
@@ -714,6 +713,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
                           FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
       resize(size);
     }
+    
+    // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitely converted)
     template<typename T>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)
@@ -722,6 +723,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       m_storage.data()[0] = val0;
     }
     
+    // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type match the index type)
     template<typename T>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init1(const Index& val0,
@@ -734,18 +736,21 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       m_storage.data()[0] = Scalar(val0);
     }
 
+    // Initialize a fixed size matrix from a pointer to raw data
     template<typename T>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init1(const Scalar* data){
       this->_set_noalias(ConstMapType(data));
     }
 
+    // Initialize an arbitrary matrix from a dense expression
     template<typename T, typename OtherDerived>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){
       this->_set_noalias(other);
     }
 
+    // Initialize an arbitrary matrix from a generic Eigen expression
     template<typename T, typename OtherDerived>
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){
@@ -766,6 +771,31 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     {
       this->derived() = r;
     }
+    
+    // For fixed -size arrays:
+    template<typename T>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init1(const Scalar& val0,
+                                    typename internal::enable_if<    Base::SizeAtCompileTime!=Dynamic
+                                                                  && Base::SizeAtCompileTime!=1
+                                                                  && internal::is_convertible<T, Scalar>::value
+                                                                  && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>::type* = 0)
+    {
+      Base::setConstant(val0);
+    }
+    
+    template<typename T>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init1(const Index& val0,
+                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)
+                                                                  && (internal::is_same<Index,T>::value)
+                                                                  && Base::SizeAtCompileTime!=Dynamic
+                                                                  && Base::SizeAtCompileTime!=1
+                                                                  && internal::is_convertible<T, Scalar>::value
+                                                                  && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>::type* = 0)
+    {
+      Base::setConstant(val0);
+    }
 
     template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
     friend struct internal::matrix_swap_impl;
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 5d3789be7..ae64d5200 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -12,8 +12,7 @@
 
 namespace Eigen {
 
-template<typename Lhs, typename Rhs> class Product;
-template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
+template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
 
 /** \class Product
   * \ingroup Core_Module
@@ -24,38 +23,93 @@ template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
   * \param Rhs the type of the right-hand side expression
   *
   * This class represents an expression of the product of two arbitrary matrices.
+  * 
+  * The other template parameters are:
+  * \tparam Option     can be DefaultProduct or LazyProduct
   *
   */
 
-// Use ProductReturnType to get correct traits, in particular vectorization flags
+
 namespace internal {
-template<typename Lhs, typename Rhs>
-struct traits<Product<Lhs, Rhs> >
-  : traits<typename ProductReturnType<Lhs, Rhs>::Type>
-{ 
-  // We want A+B*C to be of type Product<Matrix, Sum> and not Product<Matrix, Matrix>
-  // TODO: This flag should eventually go in a separate evaluator traits class
+
+// Determine the scalar of Product<Lhs, Rhs>. This is normally the same as Lhs::Scalar times
+// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor.
+template<typename Lhs, typename Rhs, typename LhsShape = typename evaluator_traits<Lhs>::Shape, 
+         typename RhsShape = typename evaluator_traits<Rhs>::Shape >
+struct product_result_scalar
+{
+  typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
+};
+
+template<typename Lhs, typename Rhs, typename RhsShape>
+struct product_result_scalar<Lhs, Rhs, PermutationShape, RhsShape>
+{
+  typedef typename Rhs::Scalar Scalar;
+};
+
+template<typename Lhs, typename Rhs, typename LhsShape>
+  struct product_result_scalar<Lhs, Rhs, LhsShape, PermutationShape>
+{
+  typedef typename Lhs::Scalar Scalar;
+};
+
+template<typename Lhs, typename Rhs, int Option>
+struct traits<Product<Lhs, Rhs, Option> >
+{
+  typedef typename remove_all<Lhs>::type LhsCleaned;
+  typedef typename remove_all<Rhs>::type RhsCleaned;
+  typedef traits<LhsCleaned> LhsTraits;
+  typedef traits<RhsCleaned> RhsTraits;
+  
+  typedef MatrixXpr XprKind;
+  
+  typedef typename product_result_scalar<LhsCleaned,RhsCleaned>::Scalar Scalar;
+  typedef typename product_promote_storage_type<typename LhsTraits::StorageKind,
+                                                typename RhsTraits::StorageKind,
+                                                internal::product_type<Lhs,Rhs>::ret>::ret StorageKind;
+  typedef typename promote_index_type<typename LhsTraits::Index,
+                                         typename RhsTraits::Index>::type Index;
+  
   enum {
-    Flags = traits<typename ProductReturnType<Lhs, Rhs>::Type>::Flags & ~(EvalBeforeNestingBit | DirectAccessBit)
+    RowsAtCompileTime    = LhsTraits::RowsAtCompileTime,
+    ColsAtCompileTime    = RhsTraits::ColsAtCompileTime,
+    MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,
+    
+    // FIXME: only needed by GeneralMatrixMatrixTriangular
+    InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
+    
+    // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
+    Flags = (   (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1)
+             || ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
+             || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) )
+          ? RowMajorBit : (MaxColsAtCompileTime==1 ? 0 : NoPreferredStorageOrderBit)
   };
 };
+
 } // end namespace internal
 
 
-template<typename Lhs, typename Rhs>
-class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind,
-                                                                            typename internal::traits<Rhs>::StorageKind>::ret>
+template<typename _Lhs, typename _Rhs, int Option>
+class Product : public ProductImpl<_Lhs,_Rhs,Option,
+                                   typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
+                                                                                   typename internal::traits<_Rhs>::StorageKind,
+                                                                                   internal::product_type<_Lhs,_Rhs>::ret>::ret>
 {
   public:
     
+    typedef _Lhs Lhs;
+    typedef _Rhs Rhs;
+    
     typedef typename ProductImpl<
-        Lhs, Rhs,
-        typename internal::promote_storage_type<typename Lhs::StorageKind,
-                                                typename Rhs::StorageKind>::ret>::Base Base;
+        Lhs, Rhs, Option,
+        typename internal::product_promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+                                                        typename internal::traits<Rhs>::StorageKind,
+                                                        internal::product_type<Lhs,Rhs>::ret>::ret>::Base Base;
     EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
 
-    typedef typename Lhs::Nested LhsNested;
-    typedef typename Rhs::Nested RhsNested;
+    typedef typename internal::nested<Lhs>::type LhsNested;
+    typedef typename internal::nested<Rhs>::type RhsNested;
     typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
     typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
 
@@ -78,14 +132,77 @@ class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_ty
     RhsNested m_rhs;
 };
 
-template<typename Lhs, typename Rhs>
-class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type
+namespace internal {
+  
+template<typename Lhs, typename Rhs, int Option, int ProductTag = internal::product_type<Lhs,Rhs>::ret>
+class dense_product_base
+ : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
+{};
+
+/** Convertion to scalar for inner-products */
+template<typename Lhs, typename Rhs, int Option>
+class dense_product_base<Lhs, Rhs, Option, InnerProduct>
+ : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
+{
+  typedef Product<Lhs,Rhs,Option> ProductXpr;
+  typedef typename internal::dense_xpr_base<ProductXpr>::type Base;
+public:
+  using Base::derived;
+  typedef typename Base::Scalar Scalar;
+  typedef typename Base::Index Index;
+  
+  operator const Scalar() const
+  {
+    return typename internal::evaluator<ProductXpr>::type(derived()).coeff(0,0);
+  }
+};
+
+} // namespace internal
+
+// Generic API dispatcher
+template<typename Lhs, typename Rhs, int Option, typename StorageKind>
+class ProductImpl : public internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type
 {
-    typedef Product<Lhs, Rhs> Derived;
   public:
+    typedef typename internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type Base;
+};
 
-    typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base;
+template<typename Lhs, typename Rhs, int Option>
+class ProductImpl<Lhs,Rhs,Option,Dense>
+  : public internal::dense_product_base<Lhs,Rhs,Option>
+{
+    typedef Product<Lhs, Rhs, Option> Derived;
+    
+  public:
+    
+    typedef typename internal::dense_product_base<Lhs, Rhs, Option> Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+  protected:
+    enum {
+      IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) && 
+                   (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic),
+      EnableCoeff = IsOneByOne || Option==LazyProduct
+    };
+    
+  public:
+  
+    Scalar coeff(Index row, Index col) const
+    {
+      EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
+      eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
+      
+      return typename internal::evaluator<Derived>::type(derived()).coeff(row,col);
+    }
+
+    Scalar coeff(Index i) const
+    {
+      EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
+      eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
+      
+      return typename internal::evaluator<Derived>::type(derived()).coeff(i);
+    }
+    
+  
 };
 
 /***************************************************************************
@@ -102,6 +219,15 @@ prod(const Lhs& lhs, const Rhs& rhs)
   return Product<Lhs,Rhs>(lhs,rhs);
 }
 
+/** \internal used to test the evaluator only
+  */
+template<typename Lhs,typename Rhs>
+const Product<Lhs,Rhs,LazyProduct>
+lazyprod(const Lhs& lhs, const Rhs& rhs)
+{
+  return Product<Lhs,Rhs,LazyProduct>(lhs,rhs);
+}
+
 } // end namespace Eigen
 
 #endif // EIGEN_PRODUCT_H
diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h
index 483914a9b..050343b2d 100644
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@@ -12,253 +12,6 @@
 
 namespace Eigen { 
 
-/** \class ProductBase
-  * \ingroup Core_Module
-  *
-  */
-
-namespace internal {
-template<typename Derived, typename _Lhs, typename _Rhs>
-struct traits<ProductBase<Derived,_Lhs,_Rhs> >
-{
-  typedef MatrixXpr XprKind;
-  typedef typename remove_all<_Lhs>::type Lhs;
-  typedef typename remove_all<_Rhs>::type Rhs;
-  typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
-  typedef typename promote_storage_type<typename traits<Lhs>::StorageKind,
-                                           typename traits<Rhs>::StorageKind>::ret StorageKind;
-  typedef typename promote_index_type<typename traits<Lhs>::Index,
-                                         typename traits<Rhs>::Index>::type Index;
-  enum {
-    RowsAtCompileTime = traits<Lhs>::RowsAtCompileTime,
-    ColsAtCompileTime = traits<Rhs>::ColsAtCompileTime,
-    MaxRowsAtCompileTime = traits<Lhs>::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = traits<Rhs>::MaxColsAtCompileTime,
-    Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0)
-          | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit,
-                  // Note that EvalBeforeNestingBit and NestByRefBit
-                  // are not used in practice because nested is overloaded for products
-    CoeffReadCost = 0 // FIXME why is it needed ?
-  };
-};
-}
-
-#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \
-  typedef ProductBase<Derived, Lhs, Rhs > Base; \
-  EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
-  typedef typename Base::LhsNested LhsNested; \
-  typedef typename Base::_LhsNested _LhsNested; \
-  typedef typename Base::LhsBlasTraits LhsBlasTraits; \
-  typedef typename Base::ActualLhsType ActualLhsType; \
-  typedef typename Base::_ActualLhsType _ActualLhsType; \
-  typedef typename Base::RhsNested RhsNested; \
-  typedef typename Base::_RhsNested _RhsNested; \
-  typedef typename Base::RhsBlasTraits RhsBlasTraits; \
-  typedef typename Base::ActualRhsType ActualRhsType; \
-  typedef typename Base::_ActualRhsType _ActualRhsType; \
-  using Base::m_lhs; \
-  using Base::m_rhs;
-
-template<typename Derived, typename Lhs, typename Rhs>
-class ProductBase : public MatrixBase<Derived>
-{
-  public:
-    typedef MatrixBase<Derived> Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase)
-    
-    typedef typename Lhs::Nested LhsNested;
-    typedef typename internal::remove_all<LhsNested>::type _LhsNested;
-    typedef internal::blas_traits<_LhsNested> LhsBlasTraits;
-    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
-    typedef typename internal::remove_all<ActualLhsType>::type _ActualLhsType;
-    typedef typename internal::traits<Lhs>::Scalar LhsScalar;
-
-    typedef typename Rhs::Nested RhsNested;
-    typedef typename internal::remove_all<RhsNested>::type _RhsNested;
-    typedef internal::blas_traits<_RhsNested> RhsBlasTraits;
-    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
-    typedef typename internal::remove_all<ActualRhsType>::type _ActualRhsType;
-    typedef typename internal::traits<Rhs>::Scalar RhsScalar;
-
-    // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once
-    typedef CoeffBasedProduct<LhsNested, RhsNested, 0> FullyLazyCoeffBaseProductType;
-
-  public:
-
-    typedef typename Base::PlainObject PlainObject;
-
-    ProductBase(const Lhs& a_lhs, const Rhs& a_rhs)
-      : m_lhs(a_lhs), m_rhs(a_rhs)
-    {
-      eigen_assert(a_lhs.cols() == a_rhs.rows()
-        && "invalid matrix product"
-        && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
-    }
-
-    inline Index rows() const { return m_lhs.rows(); }
-    inline Index cols() const { return m_rhs.cols(); }
-
-    template<typename Dest>
-    inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); }
-
-    template<typename Dest>
-    inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); }
-
-    template<typename Dest>
-    inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }
-
-    template<typename Dest>
-    inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); }
-
-    const _LhsNested& lhs() const { return m_lhs; }
-    const _RhsNested& rhs() const { return m_rhs; }
-
-    // Implicit conversion to the nested type (trigger the evaluation of the product)
-    operator const PlainObject& () const
-    {
-      m_result.resize(m_lhs.rows(), m_rhs.cols());
-      derived().evalTo(m_result);
-      return m_result;
-    }
-
-    const Diagonal<const FullyLazyCoeffBaseProductType,0> diagonal() const
-    { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
-
-    template<int Index>
-    const Diagonal<FullyLazyCoeffBaseProductType,Index> diagonal() const
-    { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
-
-    const Diagonal<FullyLazyCoeffBaseProductType,Dynamic> diagonal(Index index) const
-    { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); }
-
-    // restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isn't an Lvalue expression
-    typename Base::CoeffReturnType coeff(Index row, Index col) const
-    {
-      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
-      eigen_assert(this->rows() == 1 && this->cols() == 1);
-      Matrix<Scalar,1,1> result = *this;
-      return result.coeff(row,col);
-    }
-
-    typename Base::CoeffReturnType coeff(Index i) const
-    {
-      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
-      eigen_assert(this->rows() == 1 && this->cols() == 1);
-      Matrix<Scalar,1,1> result = *this;
-      return result.coeff(i);
-    }
-
-    const Scalar& coeffRef(Index row, Index col) const
-    {
-      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
-      eigen_assert(this->rows() == 1 && this->cols() == 1);
-      return derived().coeffRef(row,col);
-    }
-
-    const Scalar& coeffRef(Index i) const
-    {
-      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
-      eigen_assert(this->rows() == 1 && this->cols() == 1);
-      return derived().coeffRef(i);
-    }
-
-  protected:
-
-    LhsNested m_lhs;
-    RhsNested m_rhs;
-
-    mutable PlainObject m_result;
-};
-
-// here we need to overload the nested rule for products
-// such that the nested type is a const reference to a plain matrix
-namespace internal {
-template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
-struct nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
-{
-  typedef PlainObject const& type;
-};
-}
-
-template<typename NestedProduct>
-class ScaledProduct;
-
-// Note that these two operator* functions are not defined as member
-// functions of ProductBase, because, otherwise we would have to
-// define all overloads defined in MatrixBase. Furthermore, Using
-// "using Base::operator*" would not work with MSVC.
-//
-// Also note that here we accept any compatible scalar types
-template<typename Derived,typename Lhs,typename Rhs>
-const ScaledProduct<Derived>
-operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::Scalar& x)
-{ return ScaledProduct<Derived>(prod.derived(), x); }
-
-template<typename Derived,typename Lhs,typename Rhs>
-typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
-                      const ScaledProduct<Derived> >::type
-operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::RealScalar& x)
-{ return ScaledProduct<Derived>(prod.derived(), x); }
-
-
-template<typename Derived,typename Lhs,typename Rhs>
-const ScaledProduct<Derived>
-operator*(const typename Derived::Scalar& x,const ProductBase<Derived,Lhs,Rhs>& prod)
-{ return ScaledProduct<Derived>(prod.derived(), x); }
-
-template<typename Derived,typename Lhs,typename Rhs>
-typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value,
-                      const ScaledProduct<Derived> >::type
-operator*(const typename Derived::RealScalar& x,const ProductBase<Derived,Lhs,Rhs>& prod)
-{ return ScaledProduct<Derived>(prod.derived(), x); }
-
-namespace internal {
-template<typename NestedProduct>
-struct traits<ScaledProduct<NestedProduct> >
- : traits<ProductBase<ScaledProduct<NestedProduct>,
-                         typename NestedProduct::_LhsNested,
-                         typename NestedProduct::_RhsNested> >
-{
-  typedef typename traits<NestedProduct>::StorageKind StorageKind;
-};
-}
-
-template<typename NestedProduct>
-class ScaledProduct
-  : public ProductBase<ScaledProduct<NestedProduct>,
-                       typename NestedProduct::_LhsNested,
-                       typename NestedProduct::_RhsNested>
-{
-  public:
-    typedef ProductBase<ScaledProduct<NestedProduct>,
-                       typename NestedProduct::_LhsNested,
-                       typename NestedProduct::_RhsNested> Base;
-    typedef typename Base::Scalar Scalar;
-    typedef typename Base::PlainObject PlainObject;
-//     EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct)
-
-    ScaledProduct(const NestedProduct& prod, const Scalar& x)
-    : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}
-
-    template<typename Dest>
-    inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); }
-
-    template<typename Dest>
-    inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); }
-
-    template<typename Dest>
-    inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }
-
-    template<typename Dest>
-    inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); }
-
-    const Scalar& alpha() const { return m_alpha; }
-    
-  protected:
-    const NestedProduct& m_prod;
-    Scalar m_alpha;
-};
-
 /** \internal
   * Overloaded to perform an efficient C = (A*B).lazy() */
 template<typename Derived>
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 855914f2e..f880e7696 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -16,95 +16,344 @@
 namespace Eigen {
   
 namespace internal {
+
+/** \internal
+  * Evaluator of a product expression.
+  * Since products require special treatments to handle all possible cases,
+  * we simply deffer the evaluation logic to a product_evaluator class
+  * which offers more partial specialization possibilities.
+  * 
+  * \sa class product_evaluator
+  */
+template<typename Lhs, typename Rhs, int Options>
+struct evaluator<Product<Lhs, Rhs, Options> > 
+ : public product_evaluator<Product<Lhs, Rhs, Options> >
+{
+  typedef Product<Lhs, Rhs, Options> XprType;
+  typedef product_evaluator<XprType> Base;
+  
+  typedef evaluator type;
+  typedef evaluator nestedType;
   
-// We can evaluate the product either all at once, like GeneralProduct and its evalTo() function, or
-// traverse the matrix coefficient by coefficient, like CoeffBasedProduct.  Use the existing logic
-// in ProductReturnType to decide.
+  evaluator(const XprType& xpr) : Base(xpr) {}
+};
+ 
+// Catch scalar * ( A * B ) and transform it to (A*scalar) * B
+// TODO we should apply that rule only if that's really helpful
+template<typename Lhs, typename Rhs, typename Scalar>
+struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,  const Product<Lhs, Rhs, DefaultProduct>  > > 
+ : public evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> >
+{
+  typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > XprType;
+  typedef evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > Base;
+  
+  typedef evaluator type;
+  typedef evaluator nestedType;
+  
+  evaluator(const XprType& xpr)
+    : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs())
+  {}
+};
 
-template<typename XprType, typename ProductType>
-struct product_evaluator_dispatcher;
+
+template<typename Lhs, typename Rhs, int DiagIndex>
+struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> > 
+ : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
+{
+  typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
+  typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
+  
+  typedef evaluator type;
+  typedef evaluator nestedType;
+
+  evaluator(const XprType& xpr)
+    : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
+        Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
+        xpr.index() ))
+  {}
+};
+
+
+// Helper class to perform a matrix product with the destination at hand.
+// Depending on the sizes of the factors, there are different evaluation strategies
+// as controlled by internal::product_type.
+template< typename Lhs, typename Rhs,
+          typename LhsShape = typename evaluator_traits<Lhs>::Shape,
+          typename RhsShape = typename evaluator_traits<Rhs>::Shape,
+          int ProductType = internal::product_type<Lhs,Rhs>::value>
+struct generic_product_impl;
 
 template<typename Lhs, typename Rhs>
-struct evaluator_impl<Product<Lhs, Rhs> >
-  : product_evaluator_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type> 
+struct evaluator_traits<Product<Lhs, Rhs, DefaultProduct> > 
+ : evaluator_traits_base<Product<Lhs, Rhs, DefaultProduct> >
 {
-  typedef Product<Lhs, Rhs> XprType;
-  typedef product_evaluator_dispatcher<XprType, typename ProductReturnType<Lhs, Rhs>::Type> Base;
+  enum { AssumeAliasing = 1 };
+};
 
-  evaluator_impl(const XprType& xpr) : Base(xpr) 
-  { }
+// This is the default evaluator implementation for products:
+// It creates a temporary and call generic_product_impl
+template<typename Lhs, typename Rhs, int ProductTag, typename LhsShape, typename RhsShape>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, LhsShape, RhsShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> 
+  : public evaluator<typename Product<Lhs, Rhs, DefaultProduct>::PlainObject>::type
+{
+  typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+//   enum {
+//     CoeffReadCost = 0 // FIXME why is it needed? (this was already the case before the evaluators, see traits<ProductBase>)
+//   };
+  typedef typename XprType::PlainObject PlainObject;
+  typedef typename evaluator<PlainObject>::type Base;
+
+  product_evaluator(const XprType& xpr)
+    : m_result(xpr.rows(), xpr.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    
+// FIXME shall we handle nested_eval here?
+//     typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+//     typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+//     typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+//     typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+//     
+//     const LhsNested lhs(xpr.lhs());
+//     const RhsNested rhs(xpr.rhs());
+//   
+//     generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
+
+    generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
+  }
+  
+protected:  
+  PlainObject m_result;
 };
 
-template<typename XprType, typename ProductType>
-struct product_evaluator_traits_dispatcher;
+// Dense = Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  {
+    // FIXME shall we handle nested_eval here?
+    generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
+  }
+};
 
-template<typename Lhs, typename Rhs>
-struct evaluator_traits<Product<Lhs, Rhs> >
-  : product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type> 
-{ 
-  static const int AssumeAliasing = 1;
+// Dense += Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar>, Dense2Dense, Scalar>
+{
+  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &)
+  {
+    // FIXME shall we handle nested_eval here?
+    generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
+  }
 };
 
-// Case 1: Evaluate all at once
-//
-// We can view the GeneralProduct class as a part of the product evaluator. 
-// Four sub-cases: InnerProduct, OuterProduct, GemmProduct and GemvProduct.
-// InnerProduct is special because GeneralProduct does not have an evalTo() method in this case.
+// Dense -= Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar>, Dense2Dense, Scalar>
+{
+  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &)
+  {
+    // FIXME shall we handle nested_eval here?
+    generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
+  }
+};
 
-template<typename Lhs, typename Rhs>
-struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> > 
+
+// Dense ?= scalar * Product
+// TODO we should apply that rule if that's really helpful
+// for instance, this is not good for inner products
+template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis>
+struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>,
+                                           const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense, Scalar>
 {
-  static const int HasEvalTo = 0;
+  typedef CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>,
+                       const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
+  {
+    // TODO use operator* instead of prod() once we have made enough progress
+    call_assignment(dst.noalias(), prod(src.functor().m_other * src.nestedExpression().lhs(), src.nestedExpression().rhs()), func);
+  }
 };
 
+
 template<typename Lhs, typename Rhs>
-struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> > 
-  : public evaluator<typename Product<Lhs, Rhs>::PlainObject>::type
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
 {
-  typedef Product<Lhs, Rhs> XprType;
-  typedef typename XprType::PlainObject PlainObject;
-  typedef typename evaluator<PlainObject>::type evaluator_base;
+  template<typename Dst>
+  static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
+  }
+  
+  template<typename Dst>
+  static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
+  }
+  
+  template<typename Dst>
+  static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
+};
+
 
-  // TODO: Computation is too early (?)
-  product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result)
+/***********************************************************************
+*  Implementation of outer dense * dense vector product
+***********************************************************************/
+
+// Column major result
+template<typename Dst, typename Lhs, typename Rhs, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
+{
+  typedef typename Dst::Index Index;
+  // FIXME make sure lhs is sequentially stored
+  // FIXME not very good if rhs is real and lhs complex while alpha is real too
+  // FIXME we should probably build an evaluator for dst and rhs
+  const Index cols = dst.cols();
+  for (Index j=0; j<cols; ++j)
+    func(dst.col(j), rhs.coeff(0,j) * lhs);
+}
+
+// Row major result
+template<typename Dst, typename Lhs, typename Rhs, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) {
+  typedef typename Dst::Index Index;
+  // FIXME make sure rhs is sequentially stored
+  // FIXME not very good if lhs is real and rhs complex while alpha is real too
+  // FIXME we should probably build an evaluator for dst and lhs
+  const Index rows = dst.rows();
+  for (Index i=0; i<rows; ++i)
+    func(dst.row(i), lhs.coeff(i,0) * rhs);
+}
+
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
+{
+  template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
+  struct set  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived()  = src; } };
+  struct add  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
+  struct sub  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
+  struct adds {
+    Scalar m_scale;
+    adds(const Scalar& s) : m_scale(s) {}
+    template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
+      dst.const_cast_derived() += m_scale * src;
+    }
+  };
+  
+  template<typename Dst>
+  static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
-    m_result.coeffRef(0,0) = (xpr.lhs().transpose().cwiseProduct(xpr.rhs())).sum();
+    internal::outer_product_selector_run(dst, lhs, rhs, set(), IsRowMajor<Dst>());
+  }
+  
+  template<typename Dst>
+  static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    internal::outer_product_selector_run(dst, lhs, rhs, add(), IsRowMajor<Dst>());
+  }
+  
+  template<typename Dst>
+  static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    internal::outer_product_selector_run(dst, lhs, rhs, sub(), IsRowMajor<Dst>());
+  }
+  
+  template<typename Dst>
+  static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+  {
+    internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), IsRowMajor<Dst>());
   }
   
-protected:  
-  PlainObject m_result;
 };
 
-// For the other three subcases, simply call the evalTo() method of GeneralProduct
-// TODO: GeneralProduct should take evaluators, not expression objects.
 
-template<typename Lhs, typename Rhs, int ProductType>
-struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> > 
+// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo
+template<typename Lhs, typename Rhs, typename Derived>
+struct generic_product_impl_base
 {
-  static const int HasEvalTo = 1;
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  template<typename Dst>
+  static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
+
+  template<typename Dst>
+  static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
+
+  template<typename Dst>
+  static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
+  
+  template<typename Dst>
+  static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+  { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
+
 };
 
-template<typename Lhs, typename Rhs, int ProductType>
-struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> > 
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
+  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
 {
-  typedef Product<Lhs, Rhs> XprType;
-  typedef typename XprType::PlainObject PlainObject;
-  typedef typename evaluator<PlainObject>::type evaluator_base;
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
+  typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType;
+
+  template<typename Dest>
+  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+  {
+    internal::gemv_dense_sense_selector<Side,
+                            (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
+                            bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
+                           >::run(lhs, rhs, dst, alpha);
+  }
+};
+
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> 
+{
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
   
-  product_evaluator_dispatcher(const XprType& xpr) : m_xpr(xpr)
-  { }
+  template<typename Dst>
+  static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    // TODO: use the following instead of calling call_assignment, same for the other methods
+    // dst = lazyprod(lhs,rhs);
+    call_assignment(dst, lazyprod(lhs,rhs), internal::assign_op<Scalar>());
+  }
+  
+  template<typename Dst>
+  static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    // dst += lazyprod(lhs,rhs);
+    call_assignment(dst, lazyprod(lhs,rhs), internal::add_assign_op<Scalar>());
+  }
   
-  template<typename DstEvaluatorType, typename DstXprType>
-  void evalTo(DstEvaluatorType /* not used */, DstXprType& dst) const
+  template<typename Dst>
+  static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
-    dst.resize(m_xpr.rows(), m_xpr.cols());
-    GeneralProduct<Lhs, Rhs, ProductType>(m_xpr.lhs(), m_xpr.rhs()).evalTo(dst);
+    // dst -= lazyprod(lhs,rhs);
+    call_assignment(dst, lazyprod(lhs,rhs), internal::sub_assign_op<Scalar>());
   }
   
-protected: 
-  const XprType& m_xpr;
+//   template<typename Dst>
+//   static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+//   { dst += alpha * lazyprod(lhs,rhs); }
 };
 
+// This specialization enforces the use of a coefficient-based evaluation strategy
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode>
+  : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {};
+
 // Case 2: Evaluate coeff by coeff
 //
 // This is mostly taken from CoeffBasedProduct.h
@@ -117,65 +366,116 @@ struct etor_product_coeff_impl;
 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl;
 
-template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
-struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > 
+    : evaluator_base<Product<Lhs, Rhs, LazyProduct> >
 {
-  static const int HasEvalTo = 0;
-};
-
-template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
-struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
-  : evaluator_impl_base<Product<Lhs, Rhs> >
-{
-  typedef Product<Lhs, Rhs> XprType;
-  typedef CoeffBasedProduct<LhsNested, RhsNested, Flags> CoeffBasedProductType;
-
-  product_evaluator_dispatcher(const XprType& xpr) 
-    : m_lhsImpl(xpr.lhs()), 
-      m_rhsImpl(xpr.rhs()),  
-      m_innerDim(xpr.lhs().cols())
-  { }
-
+  typedef Product<Lhs, Rhs, LazyProduct> XprType;
   typedef typename XprType::Index Index;
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename XprType::PacketScalar PacketScalar;
   typedef typename XprType::PacketReturnType PacketReturnType;
 
+  product_evaluator(const XprType& xpr) 
+    : m_lhs(xpr.lhs()),
+      m_rhs(xpr.rhs()),
+      m_lhsImpl(m_lhs),     // FIXME the creation of the evaluator objects should result in a no-op, but check that!
+      m_rhsImpl(m_rhs),     //       Moreover, they are only useful for the packet path, so we could completely disable them when not needed,
+                            //       or perhaps declare them on the fly on the packet method... We have experiment to check what's best.
+      m_innerDim(xpr.lhs().cols())
+  { }
+
   // Everything below here is taken from CoeffBasedProduct.h
 
+  typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+  typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+  
+  typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+  typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+
+  typedef typename evaluator<LhsNestedCleaned>::type LhsEtorType;
+  typedef typename evaluator<RhsNestedCleaned>::type RhsEtorType;
+  
   enum {
-    RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime,
+    RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
+    ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
+    InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
+    MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime,
+      
     PacketSize = packet_traits<Scalar>::size,
-    InnerSize  = traits<CoeffBasedProductType>::InnerSize,
-    CoeffReadCost = traits<CoeffBasedProductType>::CoeffReadCost,
+    
+    LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
+    RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
+    CoeffReadCost = (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic
+                  : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+                    + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
+
     Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
-    CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner
+    
+    LhsFlags = LhsEtorType::Flags,
+    RhsFlags = RhsEtorType::Flags,
+    
+    LhsRowMajor = LhsFlags & RowMajorBit,
+    RhsRowMajor = RhsFlags & RowMajorBit,
+      
+    SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
+
+    CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
+                    && (ColsAtCompileTime == Dynamic
+                        || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
+                            && (RhsFlags&AlignedBit)
+                            )
+                        ),
+
+    CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
+                    && (RowsAtCompileTime == Dynamic
+                        || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
+                            && (LhsFlags&AlignedBit)
+                            )
+                        ),
+
+    EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
+                    : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
+                    : (RhsRowMajor && !CanVectorizeLhs),
+
+    Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
+          | (EvalToRowMajor ? RowMajorBit : 0)
+          | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
+          | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
+          // TODO enable vectorization for mixed types
+          | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
+          
+    /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
+    * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
+    * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
+    * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
+    */
+    CanVectorizeInner =    SameType
+                        && LhsRowMajor
+                        && (!RhsRowMajor)
+                        && (LhsFlags & RhsFlags & ActualPacketAccessBit)
+                        && (LhsFlags & RhsFlags & AlignedBit)
+                        && (InnerSize % packet_traits<Scalar>::size == 0)
   };
-
-  typedef typename evaluator<Lhs>::type LhsEtorType;
-  typedef typename evaluator<Rhs>::type RhsEtorType;
-  typedef etor_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
-                                  Unroll ? InnerSize-1 : Dynamic,
-                                  LhsEtorType, RhsEtorType, Scalar> CoeffImpl;
-
+  
   const CoeffReturnType coeff(Index row, Index col) const
   {
-    Scalar res;
-    CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
-    return res;
+    // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function
+    return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
   }
 
   /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
    * which is why we don't set the LinearAccessBit.
+   * TODO: this seems possible when the result is a vector
    */
   const CoeffReturnType coeff(Index index) const
   {
-    Scalar res;
     const Index row = RowsAtCompileTime == 1 ? 0 : index;
     const Index col = RowsAtCompileTime == 1 ? index : 0;
-    CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
-    return res;
+    // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function
+    return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
   }
 
   template<int LoadMode>
@@ -183,224 +483,376 @@ struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNest
   {
     PacketScalar res;
     typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
-				     Unroll ? InnerSize-1 : Dynamic,
-				     LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
+                                     Unroll ? InnerSize-1 : Dynamic,
+                                     LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
+
     PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
     return res;
   }
 
 protected:
-  typename evaluator<Lhs>::type m_lhsImpl;
-  typename evaluator<Rhs>::type m_rhsImpl;
+  const LhsNested m_lhs;
+  const RhsNested m_rhs;
+  
+  LhsEtorType m_lhsImpl;
+  RhsEtorType m_rhsImpl;
 
   // TODO: Get rid of m_innerDim if known at compile time
   Index m_innerDim;
 };
 
-/***************************************************************************
-* Normal product .coeff() implementation (with meta-unrolling)
-***************************************************************************/
+template<typename Lhs, typename Rhs>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar > 
+  : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar >
+{
+  typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+  typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
+  typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > Base;
+  product_evaluator(const XprType& xpr) 
+    : Base(BaseProduct(xpr.lhs(),xpr.rhs()))
+  {}
+};
 
-/**************************************
-*** Scalar path  - no vectorization ***
-**************************************/
+/****************************************
+*** Coeff based product, Packet path  ***
+****************************************/
 
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
   {
-    etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res);
-    res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
+    etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
+    res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
   }
 };
 
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
   {
-    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
+    etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
+    res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
   }
 };
 
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
   {
-    eigen_assert(innerDim>0 && "you are using a non initialized matrix");
-    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
-    for(Index i = 1; i < innerDim; ++i)
-      res += lhs.coeff(row, i) * rhs.coeff(i, col);
+    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
   }
 };
 
-/*******************************************
-*** Scalar path with inner vectorization ***
-*******************************************/
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
-struct etor_product_coeff_vectorized_unroller
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
   {
-    etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
-    pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
+    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
   }
 };
 
-template<typename Lhs, typename Rhs, typename Packet>
-struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
   {
-    pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
+    eigen_assert(innerDim>0 && "you are using a non initialized matrix");
+    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
+    for(Index i = 1; i < innerDim; ++i)
+      res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
   }
 };
 
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 {
-  typedef typename Lhs::PacketScalar Packet;
   typedef typename Lhs::Index Index;
-  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
   {
-    Packet pres;
-    etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
-    etor_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, innerDim, res);
-    res = predux(pres);
+    eigen_assert(innerDim>0 && "you are using a non initialized matrix");
+    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
+    for(Index i = 1; i < innerDim; ++i)
+      res =  pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
   }
 };
 
-template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
-struct etor_product_coeff_vectorized_dyn_selector
+
+/***************************************************************************
+* Triangular products
+***************************************************************************/
+template<int Mode, bool LhsIsTriangular,
+         typename Lhs, bool LhsIsVector,
+         typename Rhs, bool RhsIsVector>
+struct triangular_product_impl;
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
+  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
 {
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  template<typename Dest>
+  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
   {
-    res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
+    triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1>
+        ::run(dst, lhs.nestedExpression(), rhs, alpha);
   }
 };
 
-// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
-// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
-template<typename Lhs, typename Rhs, int RhsCols>
-struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
+: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
 {
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  template<typename Dest>
+  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
   {
-    res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
+    triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha);
   }
 };
 
-template<typename Lhs, typename Rhs, int LhsRows>
-struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
+
+/***************************************************************************
+* SelfAdjoint products
+***************************************************************************/
+template <typename Lhs, int LhsMode, bool LhsIsVector,
+          typename Rhs, int RhsMode, bool RhsIsVector>
+struct selfadjoint_product_impl;
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
+  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
 {
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  template<typename Dest>
+  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
   {
-    res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
+    selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
   }
 };
 
-template<typename Lhs, typename Rhs>
-struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
+: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
 {
-  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  template<typename Dest>
+  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
   {
-    res = lhs.transpose().cwiseProduct(rhs).sum();
+    selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha);
   }
 };
 
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
+
+/***************************************************************************
+* Diagonal products
+***************************************************************************/
+  
+template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
+struct diagonal_product_evaluator_base
+  : evaluator_base<Derived>
 {
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
+   typedef typename MatrixType::Index Index;
+   typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
+   typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+public:
+  enum {
+    CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
+    
+    MatrixFlags = evaluator<MatrixType>::Flags,
+    DiagFlags = evaluator<DiagonalType>::Flags,
+    _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
+    _ScalarAccessOnDiag =  !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
+                           ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
+    _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
+    // FIXME currently we need same types, but in the future the next rule should be the one
+    //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
+    _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
+    _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
+    Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit
+            //(int(MatrixFlags)&int(DiagFlags)&AlignedBit),
+  };
+  
+  diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
+    : m_diagImpl(diag), m_matImpl(mat)
+  {
+  }
+  
+  EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
   {
-    etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res);
+    return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
   }
+  
+protected:
+  template<int LoadMode>
+  EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const
+  {
+    return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
+                          internal::pset1<PacketScalar>(m_diagImpl.coeff(id)));
+  }
+  
+  template<int LoadMode>
+  EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const
+  {
+    enum {
+      InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
+      DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
+    };
+    return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
+                          m_diagImpl.template packet<DiagonalPacketLoadMode>(id));
+  }
+  
+  typename evaluator<DiagonalType>::nestedType m_diagImpl;
+  typename evaluator<MatrixType>::nestedType   m_matImpl;
 };
 
-/*******************
-*** Packet path  ***
-*******************/
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+// diagonal * dense
+template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> 
+  : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>
 {
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+  typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;
+  using Base::m_diagImpl;
+  using Base::m_matImpl;
+  using Base::coeff;
+  using Base::packet_impl;
+  typedef typename Base::Scalar Scalar;
+  typedef typename Base::Index Index;
+  typedef typename Base::PacketScalar PacketScalar;
+  
+  typedef Product<Lhs, Rhs, ProductKind> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+  
+  enum {
+    StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
+  };
+
+  product_evaluator(const XprType& xpr)
+    : Base(xpr.rhs(), xpr.lhs().diagonal())
   {
-    etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
-    res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
   }
+  
+  EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+  {
+    return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
+  }
+  
+  template<int LoadMode>
+  EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
+  {
+    return this->template packet_impl<LoadMode>(row,col, row,
+                                 typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
+  }
+  
+  template<int LoadMode>
+  EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
+  {
+    return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
+  }
+  
 };
 
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+// dense * diagonal
+template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape, typename Lhs::Scalar, typename Rhs::Scalar> 
+  : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>
 {
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+  typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;
+  using Base::m_diagImpl;
+  using Base::m_matImpl;
+  using Base::coeff;
+  using Base::packet_impl;
+  typedef typename Base::Scalar Scalar;
+  typedef typename Base::Index Index;
+  typedef typename Base::PacketScalar PacketScalar;
+  
+  typedef Product<Lhs, Rhs, ProductKind> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+  
+  enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
+
+  product_evaluator(const XprType& xpr)
+    : Base(xpr.lhs(), xpr.rhs().diagonal())
   {
-    etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
-    res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
   }
+  
+  EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+  {
+    return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
+  }
+  
+  template<int LoadMode>
+  EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
+  {
+    return this->template packet_impl<LoadMode>(row,col, col,
+                                 typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
+  }
+  
+  template<int LoadMode>
+  EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
+  {
+    return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
+  }
+  
 };
 
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
+/***************************************************************************
+* Products with permutation matrices
+***************************************************************************/
+  
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs, Rhs, PermutationShape, DenseShape, ProductTag>
 {
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
   {
-    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
+    permut_matrix_product_retval<Lhs, Rhs, OnTheLeft, false> pmpr(lhs, rhs);
+    pmpr.evalTo(dst);
   }
 };
 
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs, Rhs, DenseShape, PermutationShape, ProductTag>
 {
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
   {
-    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
+    permut_matrix_product_retval<Rhs, Lhs, OnTheRight, false> pmpr(rhs, lhs);
+    pmpr.evalTo(dst);
   }
 };
 
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Transpose<Lhs>, Rhs, PermutationShape, DenseShape, ProductTag>
 {
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
   {
-    eigen_assert(innerDim>0 && "you are using a non initialized matrix");
-    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
-    for(Index i = 1; i < innerDim; ++i)
-      res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
+    permut_matrix_product_retval<Lhs, Rhs, OnTheLeft, true> pmpr(lhs.nestedPermutation(), rhs);
+    pmpr.evalTo(dst);
   }
 };
 
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs, Transpose<Rhs>, DenseShape, PermutationShape, ProductTag>
 {
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
   {
-    eigen_assert(innerDim>0 && "you are using a non initialized matrix");
-    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
-    for(Index i = 1; i < innerDim; ++i)
-      res =  pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
+    permut_matrix_product_retval<Rhs, Lhs, OnTheRight, true> pmpr(rhs.nestedPermutation(), lhs);
+    pmpr.evalTo(dst);
   }
 };
 
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 5b82c9a65..c6c355d43 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -65,6 +65,25 @@ public:
               ? CompleteUnrolling
               : NoUnrolling
   };
+  
+#ifdef EIGEN_DEBUG_ASSIGN
+  static void debug()
+  {
+    std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
+    std::cerr.setf(std::ios::hex, std::ios::basefield);
+    EIGEN_DEBUG_VAR(Derived::Flags)
+    std::cerr.unsetf(std::ios::hex);
+    EIGEN_DEBUG_VAR(InnerMaxSize)
+    EIGEN_DEBUG_VAR(PacketSize)
+    EIGEN_DEBUG_VAR(MightVectorize)
+    EIGEN_DEBUG_VAR(MayLinearVectorize)
+    EIGEN_DEBUG_VAR(MaySliceVectorize)
+    EIGEN_DEBUG_VAR(Traversal)
+    EIGEN_DEBUG_VAR(UnrollingLimit)
+    EIGEN_DEBUG_VAR(Unrolling)
+    std::cerr << std::endl;
+  }
+#endif
 };
 
 /***************************************************************************
@@ -174,7 +193,7 @@ struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
   typedef typename Derived::Scalar Scalar;
   typedef typename Derived::Index Index;
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
+  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
   {
     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
     Scalar res;
@@ -200,14 +219,14 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
   typedef typename packet_traits<Scalar>::type PacketScalar;
   typedef typename Derived::Index Index;
 
-  static Scalar run(const Derived& mat, const Func& func)
+  static Scalar run(const Derived &mat, const Func& func)
   {
     const Index size = mat.size();
-    eigen_assert(size && "you are using an empty matrix");
+    
     const Index packetSize = packet_traits<Scalar>::size;
     const Index alignedStart = internal::first_aligned(mat);
     enum {
-      alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
+      alignment = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) || bool(Derived::Flags & AlignedBit)
                 ? Aligned : Unaligned
     };
     const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
@@ -258,7 +277,7 @@ struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
   typedef typename packet_traits<Scalar>::type PacketScalar;
   typedef typename Derived::Index Index;
 
-  static Scalar run(const Derived& mat, const Func& func)
+  static Scalar run(const Derived &mat, const Func& func)
   {
     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
     const Index innerSize = mat.innerSize();
@@ -300,7 +319,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
     Size = Derived::SizeAtCompileTime,
     VectorizedSize = (Size / PacketSize) * PacketSize
   };
-  static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
+  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
   {
     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
     if (VectorizedSize > 0) {
@@ -315,6 +334,63 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
   }
 };
 
+// evaluator adaptor
+template<typename _XprType>
+class redux_evaluator
+{
+public:
+  typedef _XprType XprType;
+  redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
+  
+  typedef typename XprType::Index Index;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  typedef typename XprType::PacketScalar PacketScalar;
+  typedef typename XprType::PacketReturnType PacketReturnType;
+  
+  enum {
+    MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
+    // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
+    Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
+    IsRowMajor = XprType::IsRowMajor,
+    SizeAtCompileTime = XprType::SizeAtCompileTime,
+    InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
+    CoeffReadCost = evaluator<XprType>::CoeffReadCost
+  };
+  
+  Index rows() const { return m_xpr.rows(); }
+  Index cols() const { return m_xpr.cols(); }
+  Index size() const { return m_xpr.size(); }
+  Index innerSize() const { return m_xpr.innerSize(); }
+  Index outerSize() const { return m_xpr.outerSize(); }
+
+  CoeffReturnType coeff(Index row, Index col) const
+  { return m_evaluator.coeff(row, col); }
+
+  CoeffReturnType coeff(Index index) const
+  { return m_evaluator.coeff(index); }
+
+  template<int LoadMode>
+  PacketReturnType packet(Index row, Index col) const
+  { return m_evaluator.template packet<LoadMode>(row, col); }
+
+  template<int LoadMode>
+  PacketReturnType packet(Index index) const
+  { return m_evaluator.template packet<LoadMode>(index); }
+  
+  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
+  { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
+  
+  template<int LoadMode>
+  PacketReturnType packetByOuterInner(Index outer, Index inner) const
+  { return m_evaluator.template packet<LoadMode>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
+  
+protected:
+  typename internal::evaluator<XprType>::nestedType m_evaluator;
+  const XprType &m_xpr;
+};
+
 } // end namespace internal
 
 /***************************************************************************
@@ -325,7 +401,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
 /** \returns the result of a full redux operation on the whole matrix or vector using \a func
   *
   * The template parameter \a BinaryOp is the type of the functor \a func which must be
-  * an associative operator. Both current STL and TR1 functor styles are handled.
+  * an associative operator. Both current C++98 and C++11 functor styles are handled.
   *
   * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
   */
@@ -334,9 +410,22 @@ template<typename Func>
 EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
 DenseBase<Derived>::redux(const Func& func) const
 {
-  typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
-  return internal::redux_impl<Func, ThisNested>
-            ::run(derived(), func);
+  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
+  
+  // FIXME, eval_nest should be handled by redux_evaluator, however:
+  //  - it is currently difficult to provide the right Flags since they are still handled by the expressions
+  //  - handling it here might reduce the number of template instantiations
+//   typedef typename internal::nested_eval<Derived,1>::type ThisNested;
+//   typedef typename internal::remove_all<ThisNested>::type ThisNestedCleaned;
+//   typedef typename internal::redux_evaluator<ThisNestedCleaned> ThisEvaluator;
+//   
+//   ThisNested thisNested(derived());
+//   ThisEvaluator thisEval(thisNested);
+  
+  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
+  ThisEvaluator thisEval(derived());
+  
+  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
 }
 
 /** \returns the minimum of all coefficients of \c *this.
@@ -346,7 +435,7 @@ template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::minCoeff() const
 {
-  return this->redux(Eigen::internal::scalar_min_op<Scalar>());
+  return derived().redux(Eigen::internal::scalar_min_op<Scalar>());
 }
 
 /** \returns the maximum of all coefficients of \c *this.
@@ -356,7 +445,7 @@ template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::maxCoeff() const
 {
-  return this->redux(Eigen::internal::scalar_max_op<Scalar>());
+  return derived().redux(Eigen::internal::scalar_max_op<Scalar>());
 }
 
 /** \returns the sum of all coefficients of *this
@@ -369,7 +458,7 @@ DenseBase<Derived>::sum() const
 {
   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
     return Scalar(0);
-  return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
+  return derived().redux(Eigen::internal::scalar_sum_op<Scalar>());
 }
 
 /** \returns the mean of all coefficients of *this
@@ -380,7 +469,7 @@ template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::mean() const
 {
-  return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
+  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
 }
 
 /** \returns the product of all coefficients of *this
@@ -396,7 +485,7 @@ DenseBase<Derived>::prod() const
 {
   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
     return Scalar(1);
-  return this->redux(Eigen::internal::scalar_product_op<Scalar>());
+  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
 }
 
 /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h
index 92614c6e2..09921c9e7 100644
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -12,10 +12,6 @@
 
 namespace Eigen { 
 
-template<typename Derived> class RefBase;
-template<typename PlainObjectType, int Options = 0,
-         typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;
-
 /** \class Ref
   * \ingroup Core_Module
   *
@@ -247,7 +243,7 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
     template<typename Expression>
     void construct(const Expression& expr, internal::false_type)
     {
-      m_object.lazyAssign(expr);
+      internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar>());
       Base::construct(m_object);
     }
 
diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h
index dde86a834..3777049ee 100644
--- a/Eigen/src/Core/Replicate.h
+++ b/Eigen/src/Core/Replicate.h
@@ -53,8 +53,9 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
     IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
                : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
                : (MatrixType::Flags & RowMajorBit) ? 1 : 0,
-    Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0),
-    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+    
+    // FIXME enable DirectAccess with negative strides?
+    Flags = IsRowMajor ? RowMajorBit : 0
   };
 };
 }
@@ -68,6 +69,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
 
     typedef typename internal::dense_xpr_base<Replicate>::type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
+    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
 
     template<typename OriginalMatrixType>
     inline explicit Replicate(const OriginalMatrixType& a_matrix)
diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h
index 7834f6cbc..f4e12a93b 100644
--- a/Eigen/src/Core/ReturnByValue.h
+++ b/Eigen/src/Core/ReturnByValue.h
@@ -38,9 +38,10 @@ struct traits<ReturnByValue<Derived> >
  * So internal::nested always gives the plain return matrix type.
  *
  * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ??
+ * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators
  */
 template<typename Derived,int n,typename PlainObject>
-struct nested<ReturnByValue<Derived>, n, PlainObject>
+struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
 {
   typedef typename traits<Derived>::ReturnType type;
 };
@@ -73,6 +74,7 @@ template<typename Derived> class ReturnByValue
     const Unusable& coeff(Index,Index) const { return *reinterpret_cast<const Unusable*>(this); }
     Unusable& coeffRef(Index) { return *reinterpret_cast<Unusable*>(this); }
     Unusable& coeffRef(Index,Index) { return *reinterpret_cast<Unusable*>(this); }
+#undef Unusable
 #endif
 };
 
@@ -84,6 +86,36 @@ Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
   return derived();
 }
 
+namespace internal {
+
+// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that
+// when a ReturnByValue expression is assigned, the evaluator is not constructed.
+// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world
+  
+template<typename Derived>
+struct evaluator<ReturnByValue<Derived> >
+  : public evaluator<typename internal::traits<Derived>::ReturnType>::type
+{
+  typedef ReturnByValue<Derived> XprType;
+  typedef typename internal::traits<Derived>::ReturnType PlainObject;
+  typedef typename evaluator<PlainObject>::type Base;
+  
+  typedef evaluator type;
+  typedef evaluator nestedType;
+
+  evaluator(const XprType& xpr) 
+    : m_result(xpr.rows(), xpr.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    xpr.evalTo(m_result);
+  }
+
+protected:
+  PlainObject m_result;
+};
+
+} // end namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_RETURNBYVALUE_H
diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h
index e30ae3d28..01de90800 100644
--- a/Eigen/src/Core/Reverse.h
+++ b/Eigen/src/Core/Reverse.h
@@ -44,14 +44,7 @@ struct traits<Reverse<MatrixType, Direction> >
     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-
-    // let's enable LinearAccess only with vectorization because of the product overhead
-    LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) )
-                 ? LinearAccessBit : 0,
-
-    Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess),
-
-    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+    Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit)
   };
 };
 
@@ -74,6 +67,7 @@ template<typename MatrixType, int Direction> class Reverse
 
     typedef typename internal::dense_xpr_base<Reverse>::type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
+    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
     using Base::IsRowMajor;
 
     // next line is necessary because otherwise const version of operator()
diff --git a/Eigen/src/Core/Select.h b/Eigen/src/Core/Select.h
index 87993bbb5..0cb85a4ad 100644
--- a/Eigen/src/Core/Select.h
+++ b/Eigen/src/Core/Select.h
@@ -43,10 +43,7 @@ struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
     ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,
-    Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits,
-    CoeffReadCost = traits<typename remove_all<ConditionMatrixNested>::type>::CoeffReadCost
-                  + EIGEN_SIZE_MAX(traits<typename remove_all<ThenMatrixNested>::type>::CoeffReadCost,
-                                   traits<typename remove_all<ElseMatrixNested>::type>::CoeffReadCost)
+    Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit
   };
 };
 }
diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h
index 6c2733650..19cb232c9 100644
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -35,26 +35,22 @@ struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
   typedef typename nested<MatrixType>::type MatrixTypeNested;
   typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
   typedef MatrixType ExpressionType;
-  typedef typename MatrixType::PlainObject DenseMatrixType;
+  typedef typename MatrixType::PlainObject FullMatrixType;
   enum {
     Mode = UpLo | SelfAdjoint,
     Flags =  MatrixTypeNestedCleaned::Flags & (HereditaryBits)
-           & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)), // FIXME these flags should be preserved
-    CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost
+           & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)) // FIXME these flags should be preserved
   };
 };
 }
 
-template <typename Lhs, int LhsMode, bool LhsIsVector,
-          typename Rhs, int RhsMode, bool RhsIsVector>
-struct SelfadjointProductMatrix;
-
 // FIXME could also be called SelfAdjointWrapper to be consistent with DiagonalWrapper ??
-template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
-  : public TriangularBase<SelfAdjointView<MatrixType, UpLo> >
+template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
+  : public TriangularBase<SelfAdjointView<_MatrixType, UpLo> >
 {
   public:
 
+    typedef _MatrixType MatrixType;
     typedef TriangularBase<SelfAdjointView> Base;
     typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
     typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
@@ -65,7 +61,8 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
     typedef typename MatrixType::Index Index;
 
     enum {
-      Mode = internal::traits<SelfAdjointView>::Mode
+      Mode = internal::traits<SelfAdjointView>::Mode,
+      Flags = internal::traits<SelfAdjointView>::Flags
     };
     typedef typename MatrixType::PlainObject PlainObject;
 
@@ -111,26 +108,29 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
     EIGEN_DEVICE_FUNC
     MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
 
-    /** Efficient self-adjoint matrix times vector/matrix product */
+    /** Efficient triangular matrix times vector/matrix product */
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
-    SelfadjointProductMatrix<MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
+    const Product<SelfAdjointView,OtherDerived>
     operator*(const MatrixBase<OtherDerived>& rhs) const
     {
-      return SelfadjointProductMatrix
-              <MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
-              (m_matrix, rhs.derived());
+      return Product<SelfAdjointView,OtherDerived>(*this, rhs.derived());
     }
 
-    /** Efficient vector/matrix times self-adjoint matrix product */
+    /** Efficient vector/matrix times triangular matrix product */
     template<typename OtherDerived> friend
     EIGEN_DEVICE_FUNC
-    SelfadjointProductMatrix<OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
+    const Product<OtherDerived,SelfAdjointView>
     operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs)
     {
-      return SelfadjointProductMatrix
-              <OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
-              (lhs.derived(),rhs.m_matrix);
+      return Product<OtherDerived,SelfAdjointView>(lhs.derived(),rhs);
+    }
+    
+    friend EIGEN_DEVICE_FUNC
+    const SelfAdjointView<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,MatrixType>,UpLo>
+    operator*(const Scalar& s, const SelfAdjointView& mat)
+    {
+      return (s*mat.nestedExpression()).template selfadjointView<UpLo>();
     }
 
     /** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this:
@@ -194,96 +194,57 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
 
 namespace internal {
 
-template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount, ClearOpposite>
-{
-  enum {
-    col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
-    row = (UnrollCount-1) % Derived1::RowsAtCompileTime
-  };
-
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &dst, const Derived2 &src)
-  {
-    triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
-
-    if(row == col)
-      dst.coeffRef(row, col) = numext::real(src.coeff(row, col));
-    else if(row < col)
-      dst.coeffRef(col, row) = numext::conj(dst.coeffRef(row, col) = src.coeff(row, col));
-  }
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
+// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
+//      in the future selfadjoint-ness should be defined by the expression traits
+//      such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
+template<typename MatrixType, unsigned int Mode>
+struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
 {
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &, const Derived2 &) {}
+  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
+  typedef SelfAdjointShape Shape;
+  
+  static const int AssumeAliasing = 0;
 };
 
-template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount, ClearOpposite>
+template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version>
+class triangular_dense_assignment_kernel<UpLo,SelfAdjoint,SetOpposite,DstEvaluatorTypeT,SrcEvaluatorTypeT,Functor,Version>
+  : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>
 {
-  enum {
-    col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
-    row = (UnrollCount-1) % Derived1::RowsAtCompileTime
-  };
-
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &dst, const Derived2 &src)
+protected:
+  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;
+  typedef typename Base::DstXprType DstXprType;
+  typedef typename Base::SrcXprType SrcXprType;
+  using Base::m_dst;
+  using Base::m_src;
+  using Base::m_functor;
+public:
+  
+  typedef typename Base::DstEvaluatorType DstEvaluatorType;
+  typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
+  typedef typename Base::Scalar Scalar;
+  typedef typename Base::Index Index;
+  typedef typename Base::AssignmentTraits AssignmentTraits;
+  
+  
+  triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
+    : Base(dst, src, func, dstExpr)
+  {}
+  
+  void assignCoeff(Index row, Index col)
   {
-    triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
-
-    if(row == col)
-      dst.coeffRef(row, col) = numext::real(src.coeff(row, col));
-    else if(row > col)
-      dst.coeffRef(col, row) = numext::conj(dst.coeffRef(row, col) = src.coeff(row, col));
+    eigen_internal_assert(row!=col);
+    Scalar tmp = m_src.coeff(row,col);
+    m_functor.assignCoeff(m_dst.coeffRef(row,col), tmp);
+    m_functor.assignCoeff(m_dst.coeffRef(col,row), numext::conj(tmp));
   }
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
-{
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &, const Derived2 &) {}
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
-{
-  typedef typename Derived1::Index Index;
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &dst, const Derived2 &src)
+  
+  void assignDiagonalCoeff(Index id)
   {
-    for(Index j = 0; j < dst.cols(); ++j)
-    {
-      for(Index i = 0; i < j; ++i)
-      {
-        dst.copyCoeff(i, j, src);
-        dst.coeffRef(j,i) = numext::conj(dst.coeff(i,j));
-      }
-      dst.copyCoeff(j, j, src);
-    }
-  }
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
-{
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &dst, const Derived2 &src)
-  {
-  typedef typename Derived1::Index Index;
-    for(Index i = 0; i < dst.rows(); ++i)
-    {
-      for(Index j = 0; j < i; ++j)
-      {
-        dst.copyCoeff(i, j, src);
-        dst.coeffRef(j,i) = numext::conj(dst.coeff(i,j));
-      }
-      dst.copyCoeff(i, i, src);
-    }
+    Base::assignCoeff(id,id);
   }
+  
+  void assignOppositeCoeff(Index, Index)
+  { eigen_internal_assert(false && "should never be called"); }
 };
 
 } // end namespace internal
diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h
index 65864adf8..38185d9d7 100644
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -12,179 +12,11 @@
 
 namespace Eigen { 
 
-/** \class SelfCwiseBinaryOp
-  * \ingroup Core_Module
-  *
-  * \internal
-  *
-  * \brief Internal helper class for optimizing operators like +=, -=
-  *
-  * This is a pseudo expression class re-implementing the copyCoeff/copyPacket
-  * method to directly performs a +=/-= operations in an optimal way. In particular,
-  * this allows to make sure that the input/output data are loaded only once using
-  * aligned packet loads.
-  *
-  * \sa class SwapWrapper for a similar trick.
-  */
-
-namespace internal {
-template<typename BinaryOp, typename Lhs, typename Rhs>
-struct traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
-  : traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >
-{
-  enum {
-    // Note that it is still a good idea to preserve the DirectAccessBit
-    // so that assign can correctly align the data.
-    Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&AlignedBit) | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
-    OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime,
-    InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime
-  };
-};
-}
-
-template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
-  : public internal::dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
-{
-  public:
-
-    typedef typename internal::dense_xpr_base<SelfCwiseBinaryOp>::type Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(SelfCwiseBinaryOp)
-
-    typedef typename internal::packet_traits<Scalar>::type Packet;
-
-    EIGEN_DEVICE_FUNC
-    inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
-
-    EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
-    EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
-    EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); }
-    EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); }
-    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_matrix.data(); }
-
-    // note that this function is needed by assign to correctly align loads/stores
-    // TODO make Assign use .data()
-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index row, Index col)
-    {
-      EIGEN_STATIC_ASSERT_LVALUE(Lhs)
-      return m_matrix.const_cast_derived().coeffRef(row, col);
-    }
-    EIGEN_DEVICE_FUNC
-    inline const Scalar& coeffRef(Index row, Index col) const
-    {
-      return m_matrix.coeffRef(row, col);
-    }
-
-    // note that this function is needed by assign to correctly align loads/stores
-    // TODO make Assign use .data()
-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index index)
-    {
-      EIGEN_STATIC_ASSERT_LVALUE(Lhs)
-      return m_matrix.const_cast_derived().coeffRef(index);
-    }
-    EIGEN_DEVICE_FUNC
-    inline const Scalar& coeffRef(Index index) const
-    {
-      return m_matrix.const_cast_derived().coeffRef(index);
-    }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
-    {
-      OtherDerived& _other = other.const_cast_derived();
-      eigen_internal_assert(row >= 0 && row < rows()
-                         && col >= 0 && col < cols());
-      Scalar& tmp = m_matrix.coeffRef(row,col);
-      tmp = m_functor(tmp, _other.coeff(row,col));
-    }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
-    {
-      OtherDerived& _other = other.const_cast_derived();
-      eigen_internal_assert(index >= 0 && index < m_matrix.size());
-      Scalar& tmp = m_matrix.coeffRef(index);
-      tmp = m_functor(tmp, _other.coeff(index));
-    }
-
-    template<typename OtherDerived, int StoreMode, int LoadMode>
-    void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
-    {
-      OtherDerived& _other = other.const_cast_derived();
-      eigen_internal_assert(row >= 0 && row < rows()
-                        && col >= 0 && col < cols());
-      m_matrix.template writePacket<StoreMode>(row, col,
-        m_functor.packetOp(m_matrix.template packet<StoreMode>(row, col),_other.template packet<LoadMode>(row, col)) );
-    }
-
-    template<typename OtherDerived, int StoreMode, int LoadMode>
-    void copyPacket(Index index, const DenseBase<OtherDerived>& other)
-    {
-      OtherDerived& _other = other.const_cast_derived();
-      eigen_internal_assert(index >= 0 && index < m_matrix.size());
-      m_matrix.template writePacket<StoreMode>(index,
-        m_functor.packetOp(m_matrix.template packet<StoreMode>(index),_other.template packet<LoadMode>(index)) );
-    }
-
-    // reimplement lazyAssign to handle complex *= real
-    // see CwiseBinaryOp ctor for details
-    template<typename RhsDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs)
-    {
-      EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived)
-      EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename RhsDerived::Scalar);
-      
-    #ifdef EIGEN_DEBUG_ASSIGN
-      internal::assign_traits<SelfCwiseBinaryOp, RhsDerived>::debug();
-    #endif
-      eigen_assert(rows() == rhs.rows() && cols() == rhs.cols());
-      internal::assign_impl<SelfCwiseBinaryOp, RhsDerived>::run(*this,rhs.derived());
-    #ifndef EIGEN_NO_DEBUG
-      this->checkTransposeAliasing(rhs.derived());
-    #endif
-      return *this;
-    }
-    
-    // overloaded to honor evaluation of special matrices
-    // maybe another solution would be to not use SelfCwiseBinaryOp
-    // at first...
-    EIGEN_DEVICE_FUNC
-    SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
-    {
-      typename internal::nested<Rhs>::type rhs(_rhs);
-      return Base::operator=(rhs);
-    }
-
-    EIGEN_DEVICE_FUNC
-    Lhs& expression() const 
-    { 
-      return m_matrix;
-    }
-
-    EIGEN_DEVICE_FUNC
-    const BinaryOp& functor() const 
-    { 
-      return m_functor;
-    }
-
-  protected:
-    Lhs& m_matrix;
-    const BinaryOp& m_functor;
-
-  private:
-    SelfCwiseBinaryOp& operator=(const SelfCwiseBinaryOp&);
-};
-
 template<typename Derived>
 inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
 {
   typedef typename Derived::PlainObject PlainObject;
-  SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
-  tmp = PlainObject::Constant(rows(),cols(),other);
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar>());
   return derived();
 }
 
@@ -192,8 +24,7 @@ template<typename Derived>
 inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
 {
   typedef typename Derived::PlainObject PlainObject;
-  SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
-  tmp = PlainObject::Constant(rows(),cols(),other);
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar>());
   return derived();
 }
 
@@ -201,23 +32,15 @@ template<typename Derived>
 inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
 {
   typedef typename Derived::PlainObject PlainObject;
-  SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
-  tmp = PlainObject::Constant(rows(),cols(),other);
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar>());
   return derived();
 }
 
 template<typename Derived>
 inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
 {
-  typedef typename internal::conditional<NumTraits<Scalar>::IsInteger,
-                                        internal::scalar_quotient_op<Scalar>,
-                                        internal::scalar_product_op<Scalar> >::type BinOp;
   typedef typename Derived::PlainObject PlainObject;
-  SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
-  Scalar actual_other;
-  if(NumTraits<Scalar>::IsInteger)  actual_other = other;
-  else                              actual_other = Scalar(1)/other;
-  tmp = PlainObject::Constant(rows(),cols(), actual_other);
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar>());
   return derived();
 }
 
diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h
new file mode 100644
index 000000000..7b12be1e6
--- /dev/null
+++ b/Eigen/src/Core/Solve.h
@@ -0,0 +1,152 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SOLVE_H
+#define EIGEN_SOLVE_H
+
+namespace Eigen {
+
+template<typename Decomposition, typename RhsType, typename StorageKind> class SolveImpl;
+  
+/** \class Solve
+  * \ingroup Core_Module
+  *
+  * \brief Pseudo expression representing a solving operation
+  *
+  * \tparam Decomposition the type of the matrix or decomposion object
+  * \tparam Rhstype the type of the right-hand side
+  *
+  * This class represents an expression of A.solve(B)
+  * and most of the time this is the only way it is used.
+  *
+  */
+namespace internal {
+
+// this solve_traits class permits to determine the evaluation type with respect to storage kind (Dense vs Sparse)
+template<typename Decomposition, typename RhsType,typename StorageKind> struct solve_traits;
+
+template<typename Decomposition, typename RhsType>
+struct solve_traits<Decomposition,RhsType,Dense>
+{
+  typedef typename Decomposition::MatrixType MatrixType;
+  typedef Matrix<typename RhsType::Scalar,
+                 MatrixType::ColsAtCompileTime,
+                 RhsType::ColsAtCompileTime,
+                 RhsType::PlainObject::Options,
+                 MatrixType::MaxColsAtCompileTime,
+                 RhsType::MaxColsAtCompileTime> PlainObject;  
+};
+
+template<typename Decomposition, typename RhsType>
+struct traits<Solve<Decomposition, RhsType> >
+  : traits<typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject>
+{
+  typedef typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject PlainObject;
+  typedef traits<PlainObject> BaseTraits;
+  enum {
+    Flags = BaseTraits::Flags & RowMajorBit,
+    CoeffReadCost = Dynamic
+  };
+};
+
+}
+
+
+template<typename Decomposition, typename RhsType>
+class Solve : public SolveImpl<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>
+{
+public:
+  typedef typename RhsType::Index Index;
+  typedef typename internal::traits<Solve>::PlainObject PlainObject;
+  
+  Solve(const Decomposition &dec, const RhsType &rhs)
+    : m_dec(dec), m_rhs(rhs)
+  {}
+  
+  EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); }
+  EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); }
+
+  EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; }
+  EIGEN_DEVICE_FUNC const RhsType&       rhs() const { return m_rhs; }
+
+protected:
+  const Decomposition &m_dec;
+  const RhsType       &m_rhs;
+};
+
+
+// Specialization of the Solve expression for dense results
+template<typename Decomposition, typename RhsType>
+class SolveImpl<Decomposition,RhsType,Dense>
+  : public MatrixBase<Solve<Decomposition,RhsType> >
+{
+  typedef Solve<Decomposition,RhsType> Derived;
+  
+public:
+  
+  typedef MatrixBase<Solve<Decomposition,RhsType> > Base;
+  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+
+private:
+  
+  Scalar coeff(Index row, Index col) const;
+  Scalar coeff(Index i) const;
+};
+
+// Generic API dispatcher
+template<typename Decomposition, typename RhsType, typename StorageKind>
+class SolveImpl : public internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type
+{
+  public:
+    typedef typename internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type Base;
+};
+
+namespace internal {
+
+// Evaluator of Solve -> eval into a temporary
+template<typename Decomposition, typename RhsType>
+struct evaluator<Solve<Decomposition,RhsType> >
+  : public evaluator<typename Solve<Decomposition,RhsType>::PlainObject>::type
+{
+  typedef Solve<Decomposition,RhsType> SolveType;
+  typedef typename SolveType::PlainObject PlainObject;
+  typedef typename evaluator<PlainObject>::type Base;
+  
+  typedef evaluator type;
+  typedef evaluator nestedType;
+
+  evaluator(const SolveType& solve)
+    : m_result(solve.rows(), solve.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    solve.dec()._solve_impl(solve.rhs(), m_result);
+  }
+  
+protected:  
+  PlainObject m_result;
+};
+
+// Specialization for "dst = dec.solve(rhs)"
+// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere
+template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
+struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+  typedef Solve<DecType,RhsType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  {
+    // FIXME shall we resize dst here?
+    src.dec()._solve_impl(src.rhs(), dst);
+  }
+};
+
+} // end namepsace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SOLVE_H
diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h
index ef17f288e..0f17e3a89 100644
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -171,10 +171,10 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
   */
 template<typename MatrixType, unsigned int Mode>
 template<int Side, typename OtherDerived>
-void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
+void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
 {
   OtherDerived& other = _other.const_cast_derived();
-  eigen_assert( cols() == rows() && ((Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols())) );
+  eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
   eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
 
   enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit  && OtherDerived::IsVectorAtCompileTime };
@@ -183,7 +183,7 @@ void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived
   OtherCopy otherCopy(other);
 
   internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type,
-    Side, Mode>::run(nestedExpression(), otherCopy);
+    Side, Mode>::run(derived().nestedExpression(), otherCopy);
 
   if (copy)
     other = otherCopy;
@@ -213,9 +213,9 @@ void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived
 template<typename Derived, unsigned int Mode>
 template<int Side, typename Other>
 const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>
-TriangularView<Derived,Mode>::solve(const MatrixBase<Other>& other) const
+TriangularViewImpl<Derived,Mode,Dense>::solve(const MatrixBase<Other>& other) const
 {
-  return internal::triangular_solve_retval<Side,TriangularView,Other>(*this, other.derived());
+  return internal::triangular_solve_retval<Side,TriangularViewType,Other>(derived(), other.derived());
 }
 
 namespace internal {
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index 525620bad..64d43e1b1 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -20,7 +20,7 @@ inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& sc
   using std::max;
   Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
   
-  if (maxCoeff>scale)
+  if(maxCoeff>scale)
   {
     ssq = ssq * numext::abs2(scale/maxCoeff);
     Scalar tmp = Scalar(1)/maxCoeff;
@@ -29,12 +29,21 @@ inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& sc
       invScale = NumTraits<Scalar>::highest();
       scale = Scalar(1)/invScale;
     }
+    else if(maxCoeff>NumTraits<Scalar>::highest()) // we got a INF
+    {
+      invScale = Scalar(1);
+      scale = maxCoeff;
+    }
     else
     {
       scale = maxCoeff;
       invScale = tmp;
     }
   }
+  else if(maxCoeff!=maxCoeff) // we got a NaN
+  {
+    scale = maxCoeff;
+  }
   
   // TODO if the maxCoeff is much much smaller than the current scale,
   // then we can neglect this sub vector
@@ -55,7 +64,7 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
   using std::abs;
   const Derived& vec(_vec.derived());
   static bool initialized = false;
-  static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
+  static RealScalar b1, b2, s1m, s2m, rbig, relerr;
   if(!initialized)
   {
     int ibeta, it, iemin, iemax, iexp;
@@ -84,7 +93,6 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
     iexp  = - ((iemax+it)/2);
     s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for upper range
 
-    overfl  = rbig*s2m;                                             // overflow boundary for abig
     eps     = RealScalar(pow(double(ibeta), 1-it));
     relerr  = sqrt(eps);                                            // tolerance for neglecting asml
     initialized = true;
@@ -101,13 +109,13 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
     else if(ax < b1) asml += numext::abs2(ax*s1m);
     else             amed += numext::abs2(ax);
   }
+  if(amed!=amed)
+    return amed;  // we got a NaN
   if(abig > RealScalar(0))
   {
     abig = sqrt(abig);
-    if(abig > overfl)
-    {
-      return rbig;
-    }
+    if(abig > rbig) // overflow, or *this contains INF values
+      return abig;  // return INF
     if(amed > RealScalar(0))
     {
       abig = abig/s2m;
diff --git a/Eigen/src/Core/Stride.h b/Eigen/src/Core/Stride.h
index d3d454e4e..187774978 100644
--- a/Eigen/src/Core/Stride.h
+++ b/Eigen/src/Core/Stride.h
@@ -86,7 +86,7 @@ class Stride
 
 /** \brief Convenience specialization of Stride to specify only an inner stride
   * See class Map for some examples */
-template<int Value = Dynamic>
+template<int Value>
 class InnerStride : public Stride<0, Value>
 {
     typedef Stride<0, Value> Base;
@@ -98,7 +98,7 @@ class InnerStride : public Stride<0, Value>
 
 /** \brief Convenience specialization of Stride to specify only an outer stride
   * See class Map for some examples */
-template<int Value = Dynamic>
+template<int Value>
 class OuterStride : public Stride<Value, 0>
 {
     typedef Stride<Value, 0> Base;
diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h
index d602fba65..3277cb5ba 100644
--- a/Eigen/src/Core/Swap.h
+++ b/Eigen/src/Core/Swap.h
@@ -12,129 +12,54 @@
 
 namespace Eigen { 
 
-/** \class SwapWrapper
-  * \ingroup Core_Module
-  *
-  * \internal
-  *
-  * \brief Internal helper class for swapping two expressions
-  */
 namespace internal {
-template<typename ExpressionType>
-struct traits<SwapWrapper<ExpressionType> > : traits<ExpressionType> {};
-}
 
-template<typename ExpressionType> class SwapWrapper
-  : public internal::dense_xpr_base<SwapWrapper<ExpressionType> >::type
+// Overload default assignPacket behavior for swapping them
+template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT>
+class generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, Specialized>
+ : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn>
 {
-  public:
-
-    typedef typename internal::dense_xpr_base<SwapWrapper>::type Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(SwapWrapper)
-    typedef typename internal::packet_traits<Scalar>::type Packet;
-
-    EIGEN_DEVICE_FUNC
-    inline SwapWrapper(ExpressionType& xpr) : m_expression(xpr) {}
-
-    EIGEN_DEVICE_FUNC
-    inline Index rows() const { return m_expression.rows(); }
-    EIGEN_DEVICE_FUNC
-    inline Index cols() const { return m_expression.cols(); }
-    EIGEN_DEVICE_FUNC
-    inline Index outerStride() const { return m_expression.outerStride(); }
-    EIGEN_DEVICE_FUNC
-    inline Index innerStride() const { return m_expression.innerStride(); }
-    
-    typedef typename internal::conditional<
-                       internal::is_lvalue<ExpressionType>::value,
-                       Scalar,
-                       const Scalar
-                     >::type ScalarWithConstIfNotLvalue;
-                     
-    EIGEN_DEVICE_FUNC
-    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
-    EIGEN_DEVICE_FUNC
-    inline const Scalar* data() const { return m_expression.data(); }
-
-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index rowId, Index colId)
-    {
-      return m_expression.const_cast_derived().coeffRef(rowId, colId);
-    }
-
-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index index)
-    {
-      return m_expression.const_cast_derived().coeffRef(index);
-    }
-
-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index rowId, Index colId) const
-    {
-      return m_expression.coeffRef(rowId, colId);
-    }
-
-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index index) const
-    {
-      return m_expression.coeffRef(index);
-    }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    void copyCoeff(Index rowId, Index colId, const DenseBase<OtherDerived>& other)
-    {
-      OtherDerived& _other = other.const_cast_derived();
-      eigen_internal_assert(rowId >= 0 && rowId < rows()
-                         && colId >= 0 && colId < cols());
-      Scalar tmp = m_expression.coeff(rowId, colId);
-      m_expression.coeffRef(rowId, colId) = _other.coeff(rowId, colId);
-      _other.coeffRef(rowId, colId) = tmp;
-    }
-
-    template<typename OtherDerived>
-    EIGEN_DEVICE_FUNC
-    void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
-    {
-      OtherDerived& _other = other.const_cast_derived();
-      eigen_internal_assert(index >= 0 && index < m_expression.size());
-      Scalar tmp = m_expression.coeff(index);
-      m_expression.coeffRef(index) = _other.coeff(index);
-      _other.coeffRef(index) = tmp;
-    }
-
-    template<typename OtherDerived, int StoreMode, int LoadMode>
-    void copyPacket(Index rowId, Index colId, const DenseBase<OtherDerived>& other)
-    {
-      OtherDerived& _other = other.const_cast_derived();
-      eigen_internal_assert(rowId >= 0 && rowId < rows()
-                        && colId >= 0 && colId < cols());
-      Packet tmp = m_expression.template packet<StoreMode>(rowId, colId);
-      m_expression.template writePacket<StoreMode>(rowId, colId,
-        _other.template packet<LoadMode>(rowId, colId)
-      );
-      _other.template writePacket<LoadMode>(rowId, colId, tmp);
-    }
-
-    template<typename OtherDerived, int StoreMode, int LoadMode>
-    void copyPacket(Index index, const DenseBase<OtherDerived>& other)
-    {
-      OtherDerived& _other = other.const_cast_derived();
-      eigen_internal_assert(index >= 0 && index < m_expression.size());
-      Packet tmp = m_expression.template packet<StoreMode>(index);
-      m_expression.template writePacket<StoreMode>(index,
-        _other.template packet<LoadMode>(index)
-      );
-      _other.template writePacket<LoadMode>(index, tmp);
-    }
-
-    EIGEN_DEVICE_FUNC
-    ExpressionType& expression() const { return m_expression; }
-
-  protected:
-    ExpressionType& m_expression;
+protected:
+  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn> Base;
+  typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar;
+  using Base::m_dst;
+  using Base::m_src;
+  using Base::m_functor;
+  
+public:
+  typedef typename Base::Scalar Scalar;
+  typedef typename Base::Index Index;
+  typedef typename Base::DstXprType DstXprType;
+  typedef swap_assign_op<Scalar> Functor;
+  
+  generic_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
+    : Base(dst, src, func, dstExpr)
+  {}
+  
+  template<int StoreMode, int LoadMode>
+  void assignPacket(Index row, Index col)
+  {
+    m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
+  }
+  
+  template<int StoreMode, int LoadMode>
+  void assignPacket(Index index)
+  {
+    m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
+  }
+  
+  // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
+  template<int StoreMode, int LoadMode>
+  void assignPacketByOuterInner(Index outer, Index inner)
+  {
+    Index row = Base::rowIndexByOuterInner(outer, inner); 
+    Index col = Base::colIndexByOuterInner(outer, inner);
+    assignPacket<StoreMode,LoadMode>(row, col);
+  }
 };
 
+} // namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_SWAP_H
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index aba3f6670..144bb2c01 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -2,7 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -29,9 +29,10 @@ namespace Eigen {
 
 namespace internal {
 template<typename MatrixType>
-struct traits<Transpose<MatrixType> > : traits<MatrixType>
+struct traits<Transpose<MatrixType> >
 {
-  typedef typename MatrixType::Scalar Scalar;
+  typedef typename traits<MatrixType>::Scalar Scalar;
+  typedef typename traits<MatrixType>::Index Index;
   typedef typename nested<MatrixType>::type MatrixTypeNested;
   typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
   typedef typename traits<MatrixType>::StorageKind StorageKind;
@@ -45,7 +46,6 @@ struct traits<Transpose<MatrixType> > : traits<MatrixType>
     Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit),
     Flags1 = Flags0 | FlagsLvalueBit,
     Flags = Flags1 ^ RowMajorBit,
-    CoeffReadCost = MatrixTypeNestedPlain::CoeffReadCost,
     InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
     OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
   };
@@ -61,6 +61,7 @@ template<typename MatrixType> class Transpose
 
     typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
     EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
+    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
 
     EIGEN_DEVICE_FUNC
     inline Transpose(MatrixType& a_matrix) : m_matrix(a_matrix) {}
@@ -100,12 +101,22 @@ struct TransposeImpl_base<MatrixType, false>
 
 } // end namespace internal
 
+// Generic API dispatcher
+template<typename XprType, typename StorageKind>
+class TransposeImpl
+  : public internal::generic_xpr_base<Transpose<XprType> >::type
+{
+public:
+  typedef typename internal::generic_xpr_base<Transpose<XprType> >::type Base;
+};
+
 template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
   : public internal::TransposeImpl_base<MatrixType>::type
 {
   public:
 
     typedef typename internal::TransposeImpl_base<MatrixType>::type Base;
+    using Base::coeffRef;
     EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
 
@@ -121,20 +132,7 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
     inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
     inline const Scalar* data() const { return derived().nestedExpression().data(); }
 
-    EIGEN_DEVICE_FUNC
-    inline ScalarWithConstIfNotLvalue& coeffRef(Index rowId, Index colId)
-    {
-      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
-      return derived().nestedExpression().const_cast_derived().coeffRef(colId, rowId);
-    }
-
-    EIGEN_DEVICE_FUNC
-    inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
-    {
-      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
-      return derived().nestedExpression().const_cast_derived().coeffRef(index);
-    }
-
+    // FIXME: shall we keep the const version of coeffRef?
     EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index rowId, Index colId) const
     {
@@ -146,42 +144,6 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
     {
       return derived().nestedExpression().coeffRef(index);
     }
-
-    EIGEN_DEVICE_FUNC
-    inline CoeffReturnType coeff(Index rowId, Index colId) const
-    {
-      return derived().nestedExpression().coeff(colId, rowId);
-    }
-
-    EIGEN_DEVICE_FUNC
-    inline CoeffReturnType coeff(Index index) const
-    {
-      return derived().nestedExpression().coeff(index);
-    }
-
-    template<int LoadMode>
-    inline const PacketScalar packet(Index rowId, Index colId) const
-    {
-      return derived().nestedExpression().template packet<LoadMode>(colId, rowId);
-    }
-
-    template<int LoadMode>
-    inline void writePacket(Index rowId, Index colId, const PacketScalar& x)
-    {
-      derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(colId, rowId, x);
-    }
-
-    template<int LoadMode>
-    inline const PacketScalar packet(Index index) const
-    {
-      return derived().nestedExpression().template packet<LoadMode>(index);
-    }
-
-    template<int LoadMode>
-    inline void writePacket(Index index, const PacketScalar& x)
-    {
-      derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(index, x);
-    }
 };
 
 /** \returns an expression of the transpose of *this.
@@ -413,15 +375,15 @@ struct checkTransposeAliasing_impl<Derived, OtherDerived, false>
     }
 };
 
-} // end namespace internal
-
-template<typename Derived>
-template<typename OtherDerived>
-void DenseBase<Derived>::checkTransposeAliasing(const OtherDerived& other) const
+template<typename Dst, typename Src>
+void check_for_aliasing(const Dst &dst, const Src &src)
 {
-    internal::checkTransposeAliasing_impl<Derived, OtherDerived>::run(derived(), other);
+  internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src);
 }
-#endif
+
+} // end namespace internal
+
+#endif // EIGEN_NO_DEBUG
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index 72792d21b..0d315dd50 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -32,17 +32,23 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
 
     enum {
       Mode = internal::traits<Derived>::Mode,
-      CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
       RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
       ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
       MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
-      MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime
+      MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
+      
+      SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
+                                                   internal::traits<Derived>::ColsAtCompileTime>::ret)
+        /**< This is equal to the number of coefficients, i.e. the number of
+          * rows times the number of columns, or to \a Dynamic if this is not
+          * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
     };
     typedef typename internal::traits<Derived>::Scalar Scalar;
     typedef typename internal::traits<Derived>::StorageKind StorageKind;
     typedef typename internal::traits<Derived>::Index Index;
-    typedef typename internal::traits<Derived>::DenseMatrixType DenseMatrixType;
+    typedef typename internal::traits<Derived>::FullMatrixType DenseMatrixType;
     typedef DenseMatrixType DenseType;
+    typedef Derived const& Nested;
 
     EIGEN_DEVICE_FUNC
     inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
@@ -55,6 +61,14 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
     inline Index outerStride() const { return derived().outerStride(); }
     EIGEN_DEVICE_FUNC
     inline Index innerStride() const { return derived().innerStride(); }
+    
+    // dummy resize function
+    void resize(Index nbRows, Index nbCols)
+    {
+      EIGEN_UNUSED_VARIABLE(nbRows);
+      EIGEN_UNUSED_VARIABLE(nbCols);
+      eigen_assert(nbRows==rows() && nbCols==nbCols);
+    }
 
     EIGEN_DEVICE_FUNC
     inline Scalar coeff(Index row, Index col) const  { return derived().coeff(row,col); }
@@ -155,49 +169,41 @@ struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
   typedef typename nested<MatrixType>::type MatrixTypeNested;
   typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
   typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
+  typedef typename MatrixType::PlainObject FullMatrixType;
   typedef MatrixType ExpressionType;
-  typedef typename MatrixType::PlainObject DenseMatrixType;
   enum {
     Mode = _Mode,
-    Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode,
-    CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost
+    Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits | LvalueBit) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)))
   };
 };
 }
 
-template<int Mode, bool LhsIsTriangular,
-         typename Lhs, bool LhsIsVector,
-         typename Rhs, bool RhsIsVector>
-struct TriangularProduct;
+template<typename _MatrixType, unsigned int _Mode, typename StorageKind> class TriangularViewImpl;
 
 template<typename _MatrixType, unsigned int _Mode> class TriangularView
-  : public TriangularBase<TriangularView<_MatrixType, _Mode> >
+  : public TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind >
 {
   public:
 
-    typedef TriangularBase<TriangularView> Base;
+    typedef TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind > Base;
     typedef typename internal::traits<TriangularView>::Scalar Scalar;
-
     typedef _MatrixType MatrixType;
-    typedef typename internal::traits<TriangularView>::DenseMatrixType DenseMatrixType;
-    typedef DenseMatrixType PlainObject;
 
   protected:
     typedef typename internal::traits<TriangularView>::MatrixTypeNested MatrixTypeNested;
     typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef;
-    typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
 
     typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
     
   public:
-    using Base::evalToLazy;
-  
 
     typedef typename internal::traits<TriangularView>::StorageKind StorageKind;
     typedef typename internal::traits<TriangularView>::Index Index;
+    typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned NestedExpression;
 
     enum {
       Mode = _Mode,
+      Flags = internal::traits<TriangularView>::Flags,
       TransposeMode = (Mode & Upper ? Lower : 0)
                     | (Mode & Lower ? Upper : 0)
                     | (Mode & (UnitDiag))
@@ -207,44 +213,160 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
     EIGEN_DEVICE_FUNC
     inline TriangularView(const MatrixType& matrix) : m_matrix(matrix)
     {}
+    
+    using Base::operator=;
+    TriangularView& operator=(const TriangularView &other)
+    { return Base::operator=(other); }
 
     EIGEN_DEVICE_FUNC
     inline Index rows() const { return m_matrix.rows(); }
     EIGEN_DEVICE_FUNC
     inline Index cols() const { return m_matrix.cols(); }
+
+    EIGEN_DEVICE_FUNC
+    const NestedExpression& nestedExpression() const { return m_matrix; }
+    EIGEN_DEVICE_FUNC
+    NestedExpression& nestedExpression() { return *const_cast<NestedExpression*>(&m_matrix); }
+
+    /** \sa MatrixBase::conjugate() */
+    EIGEN_DEVICE_FUNC
+    inline TriangularView<MatrixConjugateReturnType,Mode> conjugate()
+    { return m_matrix.conjugate(); }
+    /** \sa MatrixBase::conjugate() const */
+    EIGEN_DEVICE_FUNC
+    inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
+    { return m_matrix.conjugate(); }
+
+    /** \sa MatrixBase::adjoint() const */
+    EIGEN_DEVICE_FUNC
+    inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
+    { return m_matrix.adjoint(); }
+
+    /** \sa MatrixBase::transpose() */
+    EIGEN_DEVICE_FUNC
+    inline TriangularView<Transpose<MatrixType>,TransposeMode> transpose()
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
+      return m_matrix.const_cast_derived().transpose();
+    }
+    /** \sa MatrixBase::transpose() const */
+    EIGEN_DEVICE_FUNC
+    inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
+    {
+      return m_matrix.transpose();
+    }
+
+    template<typename Other>
+    EIGEN_DEVICE_FUNC
+    inline const Solve<TriangularView, Other> 
+    solve(const MatrixBase<Other>& other) const
+    { return Solve<TriangularView, Other>(*this, other.derived()); }
+    
+  // workaround MSVC ICE
+  #ifdef _MSC_VER
+    template<int Side, typename Other>
+    EIGEN_DEVICE_FUNC
+    inline const internal::triangular_solve_retval<Side,TriangularView, Other>
+    solve(const MatrixBase<Other>& other) const
+    { return Base::template solve<Side>(other); }
+  #else
+    using Base::solve;
+  #endif
+
+    EIGEN_DEVICE_FUNC
+    const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
+    {
+      EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
+      return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
+    }
+    EIGEN_DEVICE_FUNC
+    SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
+    {
+      EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
+      return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
+    }
+
+    EIGEN_DEVICE_FUNC
+    Scalar determinant() const
+    {
+      if (Mode & UnitDiag)
+        return 1;
+      else if (Mode & ZeroDiag)
+        return 0;
+      else
+        return m_matrix.diagonal().prod();
+    }
+      
+  protected:
+
+    MatrixTypeNested m_matrix;
+};
+
+template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_MatrixType,_Mode,Dense>
+  : public TriangularBase<TriangularView<_MatrixType, _Mode> >
+{
+  public:
+
+    typedef TriangularView<_MatrixType, _Mode> TriangularViewType;
+    typedef TriangularBase<TriangularViewType> Base;
+    typedef typename internal::traits<TriangularViewType>::Scalar Scalar;
+
+    typedef _MatrixType MatrixType;
+    typedef typename MatrixType::PlainObject DenseMatrixType;
+    typedef DenseMatrixType PlainObject;
+
+  public:
+    using Base::evalToLazy;
+    using Base::derived;
+
+    typedef typename internal::traits<TriangularViewType>::StorageKind StorageKind;
+    typedef typename internal::traits<TriangularViewType>::Index Index;
+
+    enum {
+      Mode = _Mode,
+      Flags = internal::traits<TriangularViewType>::Flags
+    };
+
     EIGEN_DEVICE_FUNC
-    inline Index outerStride() const { return m_matrix.outerStride(); }
+    inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
     EIGEN_DEVICE_FUNC
-    inline Index innerStride() const { return m_matrix.innerStride(); }
+    inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
 
     /** \sa MatrixBase::operator+=() */    
     template<typename Other>
     EIGEN_DEVICE_FUNC
-    TriangularView&  operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
+    TriangularViewType&  operator+=(const DenseBase<Other>& other) {
+      internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar>());
+      return derived();
+    }
     /** \sa MatrixBase::operator-=() */
     template<typename Other>
     EIGEN_DEVICE_FUNC
-    TriangularView&  operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
+    TriangularViewType&  operator-=(const DenseBase<Other>& other) {
+      internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar>());
+      return derived();
+    }
+    
     /** \sa MatrixBase::operator*=() */
     EIGEN_DEVICE_FUNC
-    TriangularView&  operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; }
+    TriangularViewType&  operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() * other; }
     /** \sa MatrixBase::operator/=() */
     EIGEN_DEVICE_FUNC
-    TriangularView&  operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix / other; }
+    TriangularViewType&  operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() / other; }
 
     /** \sa MatrixBase::fill() */
     EIGEN_DEVICE_FUNC
     void fill(const Scalar& value) { setConstant(value); }
     /** \sa MatrixBase::setConstant() */
     EIGEN_DEVICE_FUNC
-    TriangularView& setConstant(const Scalar& value)
-    { return *this = MatrixType::Constant(rows(), cols(), value); }
+    TriangularViewType& setConstant(const Scalar& value)
+    { return *this = MatrixType::Constant(derived().rows(), derived().cols(), value); }
     /** \sa MatrixBase::setZero() */
     EIGEN_DEVICE_FUNC
-    TriangularView& setZero() { return setConstant(Scalar(0)); }
+    TriangularViewType& setZero() { return setConstant(Scalar(0)); }
     /** \sa MatrixBase::setOnes() */
     EIGEN_DEVICE_FUNC
-    TriangularView& setOnes() { return setConstant(Scalar(1)); }
+    TriangularViewType& setOnes() { return setConstant(Scalar(1)); }
 
     /** \sa MatrixBase::coeff()
       * \warning the coordinates must fit into the referenced triangular part
@@ -253,7 +375,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
     inline Scalar coeff(Index row, Index col) const
     {
       Base::check_coordinates_internal(row, col);
-      return m_matrix.coeff(row, col);
+      return derived().nestedExpression().coeff(row, col);
     }
 
     /** \sa MatrixBase::coeffRef()
@@ -263,26 +385,21 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
     inline Scalar& coeffRef(Index row, Index col)
     {
       Base::check_coordinates_internal(row, col);
-      return m_matrix.const_cast_derived().coeffRef(row, col);
+      return derived().nestedExpression().const_cast_derived().coeffRef(row, col);
     }
 
-    EIGEN_DEVICE_FUNC
-    const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
-    EIGEN_DEVICE_FUNC
-    MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
-
     /** Assigns a triangular matrix to a triangular part of a dense matrix */
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
-    TriangularView& operator=(const TriangularBase<OtherDerived>& other);
+    TriangularViewType& operator=(const TriangularBase<OtherDerived>& other);
 
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
-    TriangularView& operator=(const MatrixBase<OtherDerived>& other);
+    TriangularViewType& operator=(const MatrixBase<OtherDerived>& other);
 
     EIGEN_DEVICE_FUNC
-    TriangularView& operator=(const TriangularView& other)
-    { return *this = other.nestedExpression(); }
+    TriangularViewType& operator=(const TriangularViewImpl& other)
+    { return *this = other.derived().nestedExpression(); }
 
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
@@ -290,378 +407,88 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
 
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
-    void lazyAssign(const MatrixBase<OtherDerived>& other);
-
-    /** \sa MatrixBase::conjugate() */
-    EIGEN_DEVICE_FUNC
-    inline TriangularView<MatrixConjugateReturnType,Mode> conjugate()
-    { return m_matrix.conjugate(); }
-    /** \sa MatrixBase::conjugate() const */
-    EIGEN_DEVICE_FUNC
-    inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
-    { return m_matrix.conjugate(); }
-
-    /** \sa MatrixBase::adjoint() const */
-    EIGEN_DEVICE_FUNC
-    inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
-    { return m_matrix.adjoint(); }
-
-    /** \sa MatrixBase::transpose() */
-    EIGEN_DEVICE_FUNC
-    inline TriangularView<Transpose<MatrixType>,TransposeMode> transpose()
-    {
-      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
-      return m_matrix.const_cast_derived().transpose();
-    }
-    /** \sa MatrixBase::transpose() const */
-    EIGEN_DEVICE_FUNC
-    inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
-    {
-      return m_matrix.transpose();
-    }
+    void lazyAssign(const MatrixBase<OtherDerived>& other);  
 
     /** Efficient triangular matrix times vector/matrix product */
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
-    TriangularProduct<Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1>
+    const Product<TriangularViewType,OtherDerived>
     operator*(const MatrixBase<OtherDerived>& rhs) const
     {
-      return TriangularProduct
-              <Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1>
-              (m_matrix, rhs.derived());
+      return Product<TriangularViewType,OtherDerived>(derived(), rhs.derived());
     }
 
     /** Efficient vector/matrix times triangular matrix product */
     template<typename OtherDerived> friend
     EIGEN_DEVICE_FUNC
-    TriangularProduct<Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false>
-    operator*(const MatrixBase<OtherDerived>& lhs, const TriangularView& rhs)
+    const Product<OtherDerived,TriangularViewType>
+    operator*(const MatrixBase<OtherDerived>& lhs, const TriangularViewImpl& rhs)
     {
-      return TriangularProduct
-              <Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false>
-              (lhs.derived(),rhs.m_matrix);
+      return Product<OtherDerived,TriangularViewType>(lhs.derived(),rhs.derived());
     }
 
     template<int Side, typename Other>
     EIGEN_DEVICE_FUNC
-    inline const internal::triangular_solve_retval<Side,TriangularView, Other>
+    inline const internal::triangular_solve_retval<Side,TriangularViewType, Other>
     solve(const MatrixBase<Other>& other) const;
 
     template<int Side, typename OtherDerived>
     EIGEN_DEVICE_FUNC
     void solveInPlace(const MatrixBase<OtherDerived>& other) const;
 
-    template<typename Other>
-    EIGEN_DEVICE_FUNC
-    inline const internal::triangular_solve_retval<OnTheLeft,TriangularView, Other> 
-    solve(const MatrixBase<Other>& other) const
-    { return solve<OnTheLeft>(other); }
-
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     void solveInPlace(const MatrixBase<OtherDerived>& other) const
     { return solveInPlace<OnTheLeft>(other); }
 
-    EIGEN_DEVICE_FUNC
-    const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
-    {
-      EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
-      return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
-    }
-    EIGEN_DEVICE_FUNC
-    SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
-    {
-      EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
-      return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
-    }
-
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     void swap(TriangularBase<OtherDerived> const & other)
     {
-      TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
+      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
     }
 
+    // TODO: this overload is ambiguous and it should be deprecated (Gael)
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     void swap(MatrixBase<OtherDerived> const & other)
     {
-      SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix));
-      TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived());
+      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
     }
 
+    template<typename RhsType, typename DstType>
     EIGEN_DEVICE_FUNC
-    Scalar determinant() const
-    {
-      if (Mode & UnitDiag)
-        return 1;
-      else if (Mode & ZeroDiag)
-        return 0;
-      else
-        return m_matrix.diagonal().prod();
-    }
-    
-    // TODO simplify the following:
-    template<typename ProductDerived, typename Lhs, typename Rhs>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
-    {
-      setZero();
-      return assignProduct(other,1);
-    }
-    
-    template<typename ProductDerived, typename Lhs, typename Rhs>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
-    {
-      return assignProduct(other,1);
-    }
-    
-    template<typename ProductDerived, typename Lhs, typename Rhs>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
-    {
-      return assignProduct(other,-1);
-    }
-    
-    
-    template<typename ProductDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other)
-    {
-      setZero();
-      return assignProduct(other,other.alpha());
+    EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const {
+      if(!(internal::is_same<RhsType,DstType>::value && internal::extract_data(dst) == internal::extract_data(rhs)))
+        dst = rhs;
+      this->solveInPlace(dst);
     }
-    
-    template<typename ProductDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other)
-    {
-      return assignProduct(other,other.alpha());
-    }
-    
-    template<typename ProductDerived>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other)
-    {
-      return assignProduct(other,-other.alpha());
-    }
-    
-  protected:
-    
-    template<typename ProductDerived, typename Lhs, typename Rhs>
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
 
-    MatrixTypeNested m_matrix;
+    template<typename ProductType>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha);
 };
 
 /***************************************************************************
 * Implementation of triangular evaluation/assignment
 ***************************************************************************/
 
-namespace internal {
-
-template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount, bool ClearOpposite>
-struct triangular_assignment_selector
-{
-  enum {
-    col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
-    row = (UnrollCount-1) % Derived1::RowsAtCompileTime
-  };
-  
-  typedef typename Derived1::Scalar Scalar;
-
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &dst, const Derived2 &src)
-  {
-    triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
-
-    eigen_assert( Mode == Upper || Mode == Lower
-            || Mode == StrictlyUpper || Mode == StrictlyLower
-            || Mode == UnitUpper || Mode == UnitLower);
-    if((Mode == Upper && row <= col)
-    || (Mode == Lower && row >= col)
-    || (Mode == StrictlyUpper && row < col)
-    || (Mode == StrictlyLower && row > col)
-    || (Mode == UnitUpper && row < col)
-    || (Mode == UnitLower && row > col))
-      dst.copyCoeff(row, col, src);
-    else if(ClearOpposite)
-    {
-      if (Mode&UnitDiag && row==col)
-        dst.coeffRef(row, col) = Scalar(1);
-      else
-        dst.coeffRef(row, col) = Scalar(0);
-    }
-  }
-};
-
-// prevent buggy user code from causing an infinite recursion
-template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
-{
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &, const Derived2 &) {}
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearOpposite>
-{
-  typedef typename Derived1::Index Index;
-  typedef typename Derived1::Scalar Scalar;
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &dst, const Derived2 &src)
-  {
-    for(Index j = 0; j < dst.cols(); ++j)
-    {
-      Index maxi = (std::min)(j, dst.rows()-1);
-      for(Index i = 0; i <= maxi; ++i)
-        dst.copyCoeff(i, j, src);
-      if (ClearOpposite)
-        for(Index i = maxi+1; i < dst.rows(); ++i)
-          dst.coeffRef(i, j) = Scalar(0);
-    }
-  }
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
-{
-  typedef typename Derived1::Index Index;
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &dst, const Derived2 &src)
-  {
-    for(Index j = 0; j < dst.cols(); ++j)
-    {
-      for(Index i = j; i < dst.rows(); ++i)
-        dst.copyCoeff(i, j, src);
-      Index maxi = (std::min)(j, dst.rows());
-      if (ClearOpposite)
-        for(Index i = 0; i < maxi; ++i)
-          dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
-    }
-  }
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite>
-{
-  typedef typename Derived1::Index Index;
-  typedef typename Derived1::Scalar Scalar;
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &dst, const Derived2 &src)
-  {
-    for(Index j = 0; j < dst.cols(); ++j)
-    {
-      Index maxi = (std::min)(j, dst.rows());
-      for(Index i = 0; i < maxi; ++i)
-        dst.copyCoeff(i, j, src);
-      if (ClearOpposite)
-        for(Index i = maxi; i < dst.rows(); ++i)
-          dst.coeffRef(i, j) = Scalar(0);
-    }
-  }
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
-{
-  typedef typename Derived1::Index Index;
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &dst, const Derived2 &src)
-  {
-    for(Index j = 0; j < dst.cols(); ++j)
-    {
-      for(Index i = j+1; i < dst.rows(); ++i)
-        dst.copyCoeff(i, j, src);
-      Index maxi = (std::min)(j, dst.rows()-1);
-      if (ClearOpposite)
-        for(Index i = 0; i <= maxi; ++i)
-          dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
-    }
-  }
-};
-
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
-{
-  typedef typename Derived1::Index Index;
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &dst, const Derived2 &src)
-  {
-    for(Index j = 0; j < dst.cols(); ++j)
-    {
-      Index maxi = (std::min)(j, dst.rows());
-      for(Index i = 0; i < maxi; ++i)
-        dst.copyCoeff(i, j, src);
-      if (ClearOpposite)
-      {
-        for(Index i = maxi+1; i < dst.rows(); ++i)
-          dst.coeffRef(i, j) = 0;
-      }
-    }
-    dst.diagonal().setOnes();
-  }
-};
-template<typename Derived1, typename Derived2, bool ClearOpposite>
-struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
-{
-  typedef typename Derived1::Index Index;
-  EIGEN_DEVICE_FUNC
-  static inline void run(Derived1 &dst, const Derived2 &src)
-  {
-    for(Index j = 0; j < dst.cols(); ++j)
-    {
-      Index maxi = (std::min)(j, dst.rows());
-      for(Index i = maxi+1; i < dst.rows(); ++i)
-        dst.copyCoeff(i, j, src);
-      if (ClearOpposite)
-      {
-        for(Index i = 0; i < maxi; ++i)
-          dst.coeffRef(i, j) = 0;
-      }
-    }
-    dst.diagonal().setOnes();
-  }
-};
-
-} // end namespace internal
-
 // FIXME should we keep that possibility
 template<typename MatrixType, unsigned int Mode>
 template<typename OtherDerived>
 inline TriangularView<MatrixType, Mode>&
-TriangularView<MatrixType, Mode>::operator=(const MatrixBase<OtherDerived>& other)
+TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other)
 {
-  if(OtherDerived::Flags & EvalBeforeAssigningBit)
-  {
-    typename internal::plain_matrix_type<OtherDerived>::type other_evaluated(other.rows(), other.cols());
-    other_evaluated.template triangularView<Mode>().lazyAssign(other.derived());
-    lazyAssign(other_evaluated);
-  }
-  else
-    lazyAssign(other.derived());
-  return *this;
+  internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar>());
+  return derived();
 }
 
 // FIXME should we keep that possibility
 template<typename MatrixType, unsigned int Mode>
 template<typename OtherDerived>
-void TriangularView<MatrixType, Mode>::lazyAssign(const MatrixBase<OtherDerived>& other)
+void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other)
 {
-  enum {
-    unroll = MatrixType::SizeAtCompileTime != Dynamic
-          && internal::traits<OtherDerived>::CoeffReadCost != Dynamic
-          && MatrixType::SizeAtCompileTime*internal::traits<OtherDerived>::CoeffReadCost/2 <= EIGEN_UNROLLING_LIMIT
-  };
-  eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
-
-  internal::triangular_assignment_selector
-    <MatrixType, OtherDerived, int(Mode),
-    unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic,
-    false // do not change the opposite triangular part
-    >::run(m_matrix.const_cast_derived(), other.derived());
+  internal::call_assignment(derived().noalias(), other.template triangularView<Mode>());
 }
 
 
@@ -669,37 +496,19 @@ void TriangularView<MatrixType, Mode>::lazyAssign(const MatrixBase<OtherDerived>
 template<typename MatrixType, unsigned int Mode>
 template<typename OtherDerived>
 inline TriangularView<MatrixType, Mode>&
-TriangularView<MatrixType, Mode>::operator=(const TriangularBase<OtherDerived>& other)
+TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<OtherDerived>& other)
 {
   eigen_assert(Mode == int(OtherDerived::Mode));
-  if(internal::traits<OtherDerived>::Flags & EvalBeforeAssigningBit)
-  {
-    typename OtherDerived::DenseMatrixType other_evaluated(other.rows(), other.cols());
-    other_evaluated.template triangularView<Mode>().lazyAssign(other.derived().nestedExpression());
-    lazyAssign(other_evaluated);
-  }
-  else
-    lazyAssign(other.derived().nestedExpression());
-  return *this;
+  internal::call_assignment(derived(), other.derived());
+  return derived();
 }
 
 template<typename MatrixType, unsigned int Mode>
 template<typename OtherDerived>
-void TriangularView<MatrixType, Mode>::lazyAssign(const TriangularBase<OtherDerived>& other)
+void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other)
 {
-  enum {
-    unroll = MatrixType::SizeAtCompileTime != Dynamic
-                   && internal::traits<OtherDerived>::CoeffReadCost != Dynamic
-                   && MatrixType::SizeAtCompileTime * internal::traits<OtherDerived>::CoeffReadCost / 2
-                        <= EIGEN_UNROLLING_LIMIT
-  };
-  eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
-
-  internal::triangular_assignment_selector
-    <MatrixType, OtherDerived, int(Mode),
-    unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic,
-    false // preserve the opposite triangular part
-    >::run(m_matrix.const_cast_derived(), other.derived().nestedExpression());
+  eigen_assert(Mode == int(OtherDerived::Mode));
+  internal::call_assignment(derived().noalias(), other.derived());
 }
 
 /***************************************************************************
@@ -722,27 +531,6 @@ void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
     evalToLazy(other.derived());
 }
 
-/** Assigns a triangular or selfadjoint matrix to a dense matrix.
-  * If the matrix is triangular, the opposite part is set to zero. */
-template<typename Derived>
-template<typename DenseDerived>
-void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
-{
-  enum {
-    unroll = DenseDerived::SizeAtCompileTime != Dynamic
-                   && internal::traits<Derived>::CoeffReadCost != Dynamic
-                   && DenseDerived::SizeAtCompileTime * internal::traits<Derived>::CoeffReadCost / 2
-                        <= EIGEN_UNROLLING_LIMIT
-  };
-  other.derived().resize(this->rows(), this->cols());
-
-  internal::triangular_assignment_selector
-    <DenseDerived, typename internal::traits<Derived>::MatrixTypeNestedCleaned, Derived::Mode,
-    unroll ? int(DenseDerived::SizeAtCompileTime) : Dynamic,
-    true // clear the opposite triangular part
-    >::run(other.derived(), derived().nestedExpression());
-}
-
 /***************************************************************************
 * Implementation of TriangularView methods
 ***************************************************************************/
@@ -831,6 +619,293 @@ bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
   return true;
 }
 
+
+/***************************************************************************
+****************************************************************************
+* Evaluators and Assignment of triangular expressions
+***************************************************************************
+***************************************************************************/
+
+namespace internal {
+
+  
+// TODO currently a triangular expression has the form TriangularView<.,.>
+//      in the future triangular-ness should be defined by the expression traits
+//      such that Transpose<TriangularView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
+template<typename MatrixType, unsigned int Mode>
+struct evaluator_traits<TriangularView<MatrixType,Mode> >
+{
+  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
+  typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape;
+  
+  // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a
+  // temporary; 0 if not.
+  static const int AssumeAliasing = 0;
+};
+
+template<typename MatrixType, unsigned int Mode>
+struct unary_evaluator<TriangularView<MatrixType,Mode>, IndexBased>
+ : evaluator<typename internal::remove_all<MatrixType>::type>
+{
+  typedef TriangularView<MatrixType,Mode> XprType;
+  typedef evaluator<typename internal::remove_all<MatrixType>::type> Base;
+  typedef evaluator<XprType> type;
+  unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {}
+};
+
+// Additional assignment kinds:
+struct Triangular2Triangular    {};
+struct Triangular2Dense         {};
+struct Dense2Triangular         {};
+
+
+template<typename Kernel, unsigned int Mode, int UnrollCount, bool ClearOpposite> struct triangular_assignment_loop;
+
+ 
+/** \internal Specialization of the dense assignment kernel for triangular matrices.
+  * The main difference is that the triangular, diagonal, and opposite parts are processed through three different functions.
+  * \tparam UpLo must be either Lower or Upper
+  * \tparam Mode must be either 0, UnitDiag, ZeroDiag, or SelfAdjoint
+  */
+template<int UpLo, int Mode, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
+class triangular_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>
+{
+protected:
+  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;
+  typedef typename Base::DstXprType DstXprType;
+  typedef typename Base::SrcXprType SrcXprType;
+  using Base::m_dst;
+  using Base::m_src;
+  using Base::m_functor;
+public:
+  
+  typedef typename Base::DstEvaluatorType DstEvaluatorType;
+  typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
+  typedef typename Base::Scalar Scalar;
+  typedef typename Base::Index Index;
+  typedef typename Base::AssignmentTraits AssignmentTraits;
+  
+  
+  triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
+    : Base(dst, src, func, dstExpr)
+  {}
+  
+#ifdef EIGEN_INTERNAL_DEBUGGING
+  void assignCoeff(Index row, Index col)
+  {
+    eigen_internal_assert(row!=col);
+    Base::assignCoeff(row,col);
+  }
+#else
+  using Base::assignCoeff;
+#endif
+  
+  void assignDiagonalCoeff(Index id)
+  {
+         if(Mode==UnitDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(1));
+    else if(Mode==ZeroDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(0));
+    else if(Mode==0)                       Base::assignCoeff(id,id);
+  }
+  
+  void assignOppositeCoeff(Index row, Index col)
+  { 
+    eigen_internal_assert(row!=col);
+    if(SetOpposite)
+      m_functor.assignCoeff(m_dst.coeffRef(row,col), Scalar(0));
+  }
+};
+
+template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>
+void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
+{
+  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+  
+  typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+  typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+  DstEvaluatorType dstEvaluator(dst);
+  SrcEvaluatorType srcEvaluator(src);
+    
+  typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite,
+                                              DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
+  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
+  
+  enum {
+      unroll = DstXprType::SizeAtCompileTime != Dynamic
+            && SrcEvaluatorType::CoeffReadCost != Dynamic
+            && DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT
+    };
+  
+  triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel);
+}
+
+template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
+void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src)
+{
+  call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar>());
+}
+
+template<> struct AssignmentKind<TriangularShape,TriangularShape> { typedef Triangular2Triangular Kind; };
+template<> struct AssignmentKind<DenseShape,TriangularShape>      { typedef Triangular2Dense      Kind; };
+template<> struct AssignmentKind<TriangularShape,DenseShape>      { typedef Dense2Triangular      Kind; };
+
+
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular, Scalar>
+{
+  static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+  {
+    eigen_assert(int(DstXprType::Mode) == int(SrcXprType::Mode));
+    
+    call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);  
+  }
+};
+
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense, Scalar>
+{
+  static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+  {
+    call_triangular_assignment_loop<SrcXprType::Mode, (SrcXprType::Mode&SelfAdjoint)==0>(dst, src, func);  
+  }
+};
+
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular, Scalar>
+{
+  static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+  {
+    call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);  
+  }
+};
+
+
+template<typename Kernel, unsigned int Mode, int UnrollCount, bool SetOpposite>
+struct triangular_assignment_loop
+{
+  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
+  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
+  typedef typename DstEvaluatorType::XprType DstXprType;
+  
+  enum {
+    col = (UnrollCount-1) / DstXprType::RowsAtCompileTime,
+    row = (UnrollCount-1) % DstXprType::RowsAtCompileTime
+  };
+  
+  typedef typename Kernel::Scalar Scalar;
+
+  EIGEN_DEVICE_FUNC
+  static inline void run(Kernel &kernel)
+  {
+    triangular_assignment_loop<Kernel, Mode, UnrollCount-1, SetOpposite>::run(kernel);
+    
+    if(row==col)
+      kernel.assignDiagonalCoeff(row);
+    else if( ((Mode&Lower) && row>col) || ((Mode&Upper) && row<col) )
+      kernel.assignCoeff(row,col);
+    else if(SetOpposite)
+      kernel.assignOppositeCoeff(row,col);
+  }
+};
+
+// prevent buggy user code from causing an infinite recursion
+template<typename Kernel, unsigned int Mode, bool SetOpposite>
+struct triangular_assignment_loop<Kernel, Mode, 0, SetOpposite>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(Kernel &) {}
+};
+
+
+
+// TODO: experiment with a recursive assignment procedure splitting the current
+//       triangular part into one rectangular and two triangular parts.
+
+
+template<typename Kernel, unsigned int Mode, bool SetOpposite>
+struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
+{
+  typedef typename Kernel::Index Index;
+  typedef typename Kernel::Scalar Scalar;
+  EIGEN_DEVICE_FUNC
+  static inline void run(Kernel &kernel)
+  {
+    for(Index j = 0; j < kernel.cols(); ++j)
+    {
+      Index maxi = (std::min)(j, kernel.rows());
+      Index i = 0;
+      if (((Mode&Lower) && SetOpposite) || (Mode&Upper))
+      {
+        for(; i < maxi; ++i)
+          if(Mode&Upper) kernel.assignCoeff(i, j);
+          else           kernel.assignOppositeCoeff(i, j);
+      }
+      else
+        i = maxi;
+      
+      if(i<kernel.rows()) // then i==j
+        kernel.assignDiagonalCoeff(i++);
+      
+      if (((Mode&Upper) && SetOpposite) || (Mode&Lower))
+      {
+        for(; i < kernel.rows(); ++i)
+          if(Mode&Lower) kernel.assignCoeff(i, j);
+          else           kernel.assignOppositeCoeff(i, j);
+      }
+    }
+  }
+};
+
+} // end namespace internal
+
+/** Assigns a triangular or selfadjoint matrix to a dense matrix.
+  * If the matrix is triangular, the opposite part is set to zero. */
+template<typename Derived>
+template<typename DenseDerived>
+void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
+{
+  other.derived().resize(this->rows(), this->cols());
+  internal::call_triangular_assignment_loop<Derived::Mode,(Derived::Mode&SelfAdjoint)==0 /* SetOpposite */>(other.derived(), derived().nestedExpression());
+}
+
+namespace internal {
+  
+// Triangular = Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar>, Dense2Triangular, Scalar>
+{
+  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  {
+    dst.setZero();
+    dst._assignProduct(src, 1);
+  }
+};
+
+// Triangular += Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar>, Dense2Triangular, Scalar>
+{
+  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &)
+  {
+    dst._assignProduct(src, 1);
+  }
+};
+
+// Triangular -= Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar>, Dense2Triangular, Scalar>
+{
+  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &)
+  {
+    dst._assignProduct(src, -1);
+  }
+};
+
+} // end namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_TRIANGULARMATRIX_H
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h
index 52eb4f604..a8130e902 100644
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -48,19 +48,9 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
     ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
     MaxRowsAtCompileTime = Direction==Vertical   ? 1 : MatrixType::MaxRowsAtCompileTime,
     MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
-    Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits,
-    Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0),
+    Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0,
     TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime :  MatrixType::ColsAtCompileTime
   };
-  #if EIGEN_GNUC_AT_LEAST(3,4)
-  typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
-  #else
-  typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType;
-  #endif
-  enum {
-    CoeffReadCost = TraversalSize==Dynamic ? Dynamic
-                  : TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
-  };
 };
 }
 
diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h
index 6f4b9ec35..810ec28e7 100644
--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@@ -53,6 +53,33 @@ struct visitor_impl<Visitor, Derived, Dynamic>
   }
 };
 
+// evaluator adaptor
+template<typename XprType>
+class visitor_evaluator
+{
+public:
+  visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
+  
+  typedef typename XprType::Index Index;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  
+  enum {
+    RowsAtCompileTime = XprType::RowsAtCompileTime,
+    CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost
+  };
+  
+  Index rows() const { return m_xpr.rows(); }
+  Index cols() const { return m_xpr.cols(); }
+  Index size() const { return m_xpr.size(); }
+
+  CoeffReturnType coeff(Index row, Index col) const
+  { return m_evaluator.coeff(row, col); }
+  
+protected:
+  typename internal::evaluator<XprType>::nestedType m_evaluator;
+  const XprType &m_xpr;
+};
 } // end namespace internal
 
 /** Applies the visitor \a visitor to the whole coefficients of the matrix or vector.
@@ -76,14 +103,17 @@ template<typename Derived>
 template<typename Visitor>
 void DenseBase<Derived>::visit(Visitor& visitor) const
 {
-  enum { unroll = SizeAtCompileTime != Dynamic
-                   && CoeffReadCost != Dynamic
-                   && (SizeAtCompileTime == 1 || internal::functor_traits<Visitor>::Cost != Dynamic)
-                   && SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost
-                      <= EIGEN_UNROLLING_LIMIT };
-  return internal::visitor_impl<Visitor, Derived,
+  typedef typename internal::visitor_evaluator<Derived> ThisEvaluator;
+  ThisEvaluator thisEval(derived());
+  
+  enum { unroll =   SizeAtCompileTime != Dynamic
+                &&  ThisEvaluator::CoeffReadCost != Dynamic
+                &&  (SizeAtCompileTime == 1 || internal::functor_traits<Visitor>::Cost != Dynamic)
+                &&  SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost
+                <= EIGEN_UNROLLING_LIMIT };
+  return internal::visitor_impl<Visitor, ThisEvaluator,
       unroll ? int(SizeAtCompileTime) : Dynamic
-    >::run(derived(), visitor);
+    >::run(thisEval, visitor);
 }
 
 namespace internal {
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 66b97bd69..01730c5ee 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -141,7 +141,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
   // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
   // the result of the product.
   Packet8f res = c;
-  asm("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+  __asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
   return res;
 #else
   return _mm256_fmadd_ps(a,b,c);
@@ -151,7 +151,7 @@ template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d&
 #if defined(__clang__) || defined(__GNUC__)
   // see above
   Packet4d res = c;
-  asm("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+  __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
   return res;
 #else
   return _mm256_fmadd_pd(a,b,c);
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 380b76ae9..0504c095c 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -52,12 +52,12 @@ typedef uint32x4_t  Packet4ui;
 
 // arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
 // which available on LLVM and GCC (at least)
-#if (defined(__has_builtin) && __has_builtin(__builtin_prefetch)) || defined(__GNUC__)
+#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
   #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
 #elif defined __pld
   #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
 #elif !defined(__aarch64__)
-  #define EIGEN_ARM_PREFETCH(ADDR) asm volatile ( "   pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
+  #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( "   pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
 #else
   // by default no explicit prefetching
   #define EIGEN_ARM_PREFETCH(ADDR)
diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h
index ae264aa64..d4d85a1ca 100644
--- a/Eigen/src/Core/functors/AssignmentFunctors.h
+++ b/Eigen/src/Core/functors/AssignmentFunctors.h
@@ -31,7 +31,7 @@ template<typename Scalar>
 struct functor_traits<assign_op<Scalar> > {
   enum {
     Cost = NumTraits<Scalar>::ReadCost,
-    PacketAccess = packet_traits<Scalar>::IsVectorized
+    PacketAccess = packet_traits<Scalar>::Vectorizable
   };
 };
 
@@ -73,7 +73,7 @@ template<typename Scalar>
 struct functor_traits<sub_assign_op<Scalar> > {
   enum {
     Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
-    PacketAccess = packet_traits<Scalar>::HasAdd
+    PacketAccess = packet_traits<Scalar>::HasSub
   };
 };
 
@@ -81,22 +81,24 @@ struct functor_traits<sub_assign_op<Scalar> > {
   * \brief Template functor for scalar/packet assignment with multiplication
   *
   */
-template<typename Scalar> struct mul_assign_op {
+template<typename DstScalar, typename SrcScalar=DstScalar>
+struct mul_assign_op {
 
   EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a *= b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a *= b; }
   
   template<int Alignment, typename Packet>
-  EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
-  { internal::pstoret<Scalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
+  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+  { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
 };
-template<typename Scalar>
-struct functor_traits<mul_assign_op<Scalar> > {
+template<typename DstScalar, typename SrcScalar>
+struct functor_traits<mul_assign_op<DstScalar,SrcScalar> > {
   enum {
-    Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
-    PacketAccess = packet_traits<Scalar>::HasMul
+    Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
+    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasMul
   };
 };
+template<typename DstScalar,typename SrcScalar> struct functor_is_product_like<mul_assign_op<DstScalar,SrcScalar> > { enum { ret = 1 }; };
 
 /** \internal
   * \brief Template functor for scalar/packet assignment with diviving
@@ -115,7 +117,7 @@ template<typename Scalar>
 struct functor_traits<div_assign_op<Scalar> > {
   enum {
     Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost,
-    PacketAccess = packet_traits<Scalar>::HasMul
+    PacketAccess = packet_traits<Scalar>::HasDiv
   };
 };
 
@@ -156,7 +158,7 @@ template<typename Scalar>
 struct functor_traits<swap_assign_op<Scalar> > {
   enum {
     Cost = 3 * NumTraits<Scalar>::ReadCost,
-    PacketAccess = packet_traits<Scalar>::IsVectorized
+    PacketAccess = packet_traits<Scalar>::Vectorizable
   };
 };
 
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index ba094f5d1..157d075a7 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -167,9 +167,17 @@ template<typename Scalar> struct scalar_hypot_op {
     EIGEN_USING_STD_MATH(max);
     EIGEN_USING_STD_MATH(min);
     using std::sqrt;
-    Scalar p = (max)(_x, _y);
-    Scalar q = (min)(_x, _y);
-    Scalar qp = q/p;
+    Scalar p, qp;
+    if(_x>_y)
+    {
+      p = _x;
+      qp = _y / p;
+    }
+    else
+    {
+      p = _y;
+      qp = _x / p;
+    }
     return p * sqrt(Scalar(1) + qp*qp);
   }
 };
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
deleted file mode 100644
index 637513132..000000000
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ /dev/null
@@ -1,452 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_COEFFBASED_PRODUCT_H
-#define EIGEN_COEFFBASED_PRODUCT_H
-
-namespace Eigen { 
-
-namespace internal {
-
-/*********************************************************************************
-*  Coefficient based product implementation.
-*  It is designed for the following use cases:
-*  - small fixed sizes
-*  - lazy products
-*********************************************************************************/
-
-/* Since the all the dimensions of the product are small, here we can rely
- * on the generic Assign mechanism to evaluate the product per coeff (or packet).
- *
- * Note that here the inner-loops should always be unrolled.
- */
-
-template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct product_coeff_impl;
-
-template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl;
-
-template<typename LhsNested, typename RhsNested, int NestingFlags>
-struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
-{
-  typedef MatrixXpr XprKind;
-  typedef typename remove_all<LhsNested>::type _LhsNested;
-  typedef typename remove_all<RhsNested>::type _RhsNested;
-  typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
-  typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
-                                           typename traits<_RhsNested>::StorageKind>::ret StorageKind;
-  typedef typename promote_index_type<typename traits<_LhsNested>::Index,
-                                         typename traits<_RhsNested>::Index>::type Index;
-
-  enum {
-      LhsCoeffReadCost = _LhsNested::CoeffReadCost,
-      RhsCoeffReadCost = _RhsNested::CoeffReadCost,
-      LhsFlags = _LhsNested::Flags,
-      RhsFlags = _RhsNested::Flags,
-
-      RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
-      ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
-      InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
-
-      MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
-      MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
-
-      LhsRowMajor = LhsFlags & RowMajorBit,
-      RhsRowMajor = RhsFlags & RowMajorBit,
-
-      SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
-
-      CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
-                      && (ColsAtCompileTime == Dynamic
-                          || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
-                              && (RhsFlags&AlignedBit)
-                             )
-                         ),
-
-      CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
-                      && (RowsAtCompileTime == Dynamic
-                          || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
-                              && (LhsFlags&AlignedBit)
-                             )
-                         ),
-
-      EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
-                     : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
-                     : (RhsRowMajor && !CanVectorizeLhs),
-
-      Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
-            | (EvalToRowMajor ? RowMajorBit : 0)
-            | NestingFlags
-            | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
-            | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
-            // TODO enable vectorization for mixed types
-            | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
-
-      CoeffReadCost = InnerSize == Dynamic ? Dynamic
-                    : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
-                      + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
-
-      /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
-      * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
-      * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
-      * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
-      */
-      CanVectorizeInner =    SameType
-                          && LhsRowMajor
-                          && (!RhsRowMajor)
-                          && (LhsFlags & RhsFlags & ActualPacketAccessBit)
-                          && (LhsFlags & RhsFlags & AlignedBit)
-                          && (InnerSize % packet_traits<Scalar>::size == 0)
-    };
-};
-
-} // end namespace internal
-
-template<typename LhsNested, typename RhsNested, int NestingFlags>
-class CoeffBasedProduct
-  : internal::no_assignment_operator,
-    public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
-{
-  public:
-
-    typedef MatrixBase<CoeffBasedProduct> Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
-    typedef typename Base::PlainObject PlainObject;
-
-  private:
-
-    typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
-    typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
-
-    enum {
-      PacketSize = internal::packet_traits<Scalar>::size,
-      InnerSize  = internal::traits<CoeffBasedProduct>::InnerSize,
-      Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
-      CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
-    };
-
-    typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
-                                   Unroll ? InnerSize-1 : Dynamic,
-                                   _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
-
-    typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
-
-  public:
-
-    EIGEN_DEVICE_FUNC
-    inline CoeffBasedProduct(const CoeffBasedProduct& other)
-      : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
-    {}
-
-    template<typename Lhs, typename Rhs>
-    EIGEN_DEVICE_FUNC 
-    inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
-      : m_lhs(lhs), m_rhs(rhs)
-    {
-      // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
-      // We still allow to mix T and complex<T>.
-      EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-      eigen_assert(lhs.cols() == rhs.rows()
-        && "invalid matrix product"
-        && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
-    }
-
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
-    {
-      Scalar res;
-      ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
-      return res;
-    }
-
-    /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
-     * which is why we don't set the LinearAccessBit.
-     */
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
-    {
-      Scalar res;
-      const Index row = RowsAtCompileTime == 1 ? 0 : index;
-      const Index col = RowsAtCompileTime == 1 ? index : 0;
-      ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
-      return res;
-    }
-
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
-    {
-      PacketScalar res;
-      internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
-                              Unroll ? InnerSize-1 : Dynamic,
-                              _LhsNested, _RhsNested, PacketScalar, LoadMode>
-        ::run(row, col, m_lhs, m_rhs, res);
-      return res;
-    }
-
-    // Implicit conversion to the nested type (trigger the evaluation of the product)
-    EIGEN_DEVICE_FUNC 
-    EIGEN_STRONG_INLINE operator const PlainObject& () const
-    {
-      m_result.lazyAssign(*this);
-      return m_result;
-    }
-
-    EIGEN_DEVICE_FUNC const _LhsNested& lhs() const { return m_lhs; }
-    EIGEN_DEVICE_FUNC const _RhsNested& rhs() const { return m_rhs; }
-
-    EIGEN_DEVICE_FUNC
-    const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
-    { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
-
-    template<int DiagonalIndex>
-    EIGEN_DEVICE_FUNC 
-    const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
-    { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
-
-    EIGEN_DEVICE_FUNC
-    const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
-    { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
-
-  protected:
-    typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
-    typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
-
-    mutable PlainObject m_result;
-};
-
-namespace internal {
-
-// here we need to overload the nested rule for products
-// such that the nested type is a const reference to a plain matrix
-template<typename Lhs, typename Rhs, int N, typename PlainObject>
-struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
-{
-  typedef PlainObject const& type;
-};
-
-/***************************************************************************
-* Normal product .coeff() implementation (with meta-unrolling)
-***************************************************************************/
-
-/**************************************
-*** Scalar path  - no vectorization ***
-**************************************/
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
-{
-  typedef typename Lhs::Index Index;
-  EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
-  {
-    product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
-    res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
-  }
-};
-
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
-{
-  typedef typename Lhs::Index Index;
-  EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
-  {
-    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
-  }
-};
-
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
-{
-  typedef typename Lhs::Index Index;
-  EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
-  {
-    eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
-    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
-      for(Index i = 1; i < lhs.cols(); ++i)
-        res += lhs.coeff(row, i) * rhs.coeff(i, col);
-  }
-};
-
-/*******************************************
-*** Scalar path with inner vectorization ***
-*******************************************/
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
-struct product_coeff_vectorized_unroller
-{
-  typedef typename Lhs::Index Index;
-  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
-  {
-    product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
-    pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
-  }
-};
-
-template<typename Lhs, typename Rhs, typename Packet>
-struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
-  {
-    pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
-  }
-};
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
-struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
-{
-  typedef typename Lhs::PacketScalar Packet;
-  typedef typename Lhs::Index Index;
-  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
-  {
-    Packet pres;
-    product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
-    res = predux(pres);
-  }
-};
-
-template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
-struct product_coeff_vectorized_dyn_selector
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
-  {
-    res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
-  }
-};
-
-// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
-// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
-template<typename Lhs, typename Rhs, int RhsCols>
-struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
-  {
-    res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
-  }
-};
-
-template<typename Lhs, typename Rhs, int LhsRows>
-struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
-  {
-    res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
-  }
-};
-
-template<typename Lhs, typename Rhs>
-struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
-  {
-    res = lhs.transpose().cwiseProduct(rhs).sum();
-  }
-};
-
-template<typename Lhs, typename Rhs, typename RetScalar>
-struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
-  {
-    product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
-  }
-};
-
-/*******************
-*** Packet path  ***
-*******************/
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
-  {
-    product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
-    res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
-  }
-};
-
-template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
-  {
-    product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
-    res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
-  }
-};
-
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
-  {
-    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
-  }
-};
-
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
-  {
-    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
-  }
-};
-
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
-  {
-    eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
-    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
-      for(Index i = 1; i < lhs.cols(); ++i)
-        res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
-  }
-};
-
-template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
-struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
-{
-  typedef typename Lhs::Index Index;
-  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
-  {
-    eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
-    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
-      for(Index i = 1; i < lhs.cols(); ++i)
-        res =  pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
-  }
-};
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_COEFFBASED_PRODUCT_H
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 6ad07eccb..b7e1867f0 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -216,8 +216,8 @@ struct gemm_functor
       cols = m_rhs.cols();
 
     Gemm::run(rows, cols, m_lhs.cols(),
-              /*(const Scalar*)*/&m_lhs.coeffRef(row,0), m_lhs.outerStride(),
-              /*(const Scalar*)*/&m_rhs.coeffRef(0,col), m_rhs.outerStride(),
+              &m_lhs.coeffRef(row,0), m_lhs.outerStride(),
+              &m_rhs.coeffRef(0,col), m_rhs.outerStride(),
               (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
               m_actualAlpha, m_blocking, info);
   }
@@ -367,84 +367,93 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
 
 } // end namespace internal
 
+namespace internal {
+  
 template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, GemmProduct>
-  : public ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
+  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
 {
-    enum {
-      MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
-    };
-  public:
-    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
-    
-    typedef typename  Lhs::Scalar LhsScalar;
-    typedef typename  Rhs::Scalar RhsScalar;
-    typedef           Scalar      ResScalar;
-
-    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
-    {
-      typedef internal::scalar_product_op<LhsScalar,RhsScalar> BinOp;
-      EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
-    }
-    
-    template<typename Dest>
-    inline void evalTo(Dest& dst) const
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  typedef typename Product<Lhs,Rhs>::Index  Index;
+  typedef typename Lhs::Scalar LhsScalar;
+  typedef typename Rhs::Scalar RhsScalar;
+  
+  typedef internal::blas_traits<Lhs> LhsBlasTraits;
+  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+  typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+  
+  typedef internal::blas_traits<Rhs> RhsBlasTraits;
+  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+  typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+  
+  enum {
+    MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
+  };
+  
+  typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
+  
+  template<typename Dst>
+  static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+      lazyproduct::evalTo(dst, lhs, rhs);
+    else
     {
-      if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
-        dst.noalias() = m_lhs .lazyProduct( m_rhs );
-      else
-      {
-        dst.setZero();
-        scaleAndAddTo(dst,Scalar(1));
-      }
+      dst.setZero();
+      scaleAndAddTo(dst, lhs, rhs, Scalar(1));
     }
+  }
 
-    template<typename Dest>
-    inline void addTo(Dest& dst) const
-    {
-      if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
-        dst.noalias() += m_lhs .lazyProduct( m_rhs );
-      else
-        scaleAndAddTo(dst,Scalar(1));
-    }
+  template<typename Dst>
+  static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+      lazyproduct::addTo(dst, lhs, rhs);
+    else
+      scaleAndAddTo(dst,lhs, rhs, Scalar(1));
+  }
 
-    template<typename Dest>
-    inline void subTo(Dest& dst) const
-    {
-      if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0)
-        dst.noalias() -= m_lhs .lazyProduct( m_rhs );
-      else
-        scaleAndAddTo(dst,Scalar(-1));
-    }
-    
-    template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
-    {
-      eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+  template<typename Dst>
+  static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+      lazyproduct::subTo(dst, lhs, rhs);
+    else
+      scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
+  }
+  
+  template<typename Dest>
+  static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
+  {
+    eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
 
-      typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
-      typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
 
-      Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
-                                 * RhsBlasTraits::extractScalarFactor(m_rhs);
+    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+                               * RhsBlasTraits::extractScalarFactor(a_rhs);
 
-      typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
-              Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
+    typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
+            Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
 
-      typedef internal::gemm_functor<
-        Scalar, Index,
-        internal::general_matrix_matrix_product<
-          Index,
-          LhsScalar, (_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
-          RhsScalar, (_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
-          (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
-        _ActualLhsType, _ActualRhsType, Dest, BlockingType> GemmFunctor;
+    typedef internal::gemm_functor<
+      Scalar, Index,
+      internal::general_matrix_matrix_product<
+        Index,
+        LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
+        RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
+        (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
+      ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor;
 
-      BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), true);
+    BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), true);
 
-      internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit);
-    }
+    internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
+                              (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), Dest::Flags&RowMajorBit);
+  }
 };
 
+} // end namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_GENERAL_MATRIX_MATRIX_H
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index 225b994d1..7db3e3d38 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -20,7 +20,7 @@ namespace internal {
 /**********************************************************************
 * This file implements a general A * B product while
 * evaluating only one triangular part of the product.
-* This is more general version of self adjoint product (C += A A^T)
+* This is a more general version of self adjoint product (C += A A^T)
 * as the level 3 SYRK Blas routine.
 **********************************************************************/
 
@@ -262,14 +262,14 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
 };
 
 template<typename MatrixType, unsigned int UpLo>
-template<typename ProductDerived, typename _Lhs, typename _Rhs>
-TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
+template<typename ProductType>
+TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha)
 {
-  eigen_assert(m_matrix.rows() == prod.rows() && m_matrix.cols() == prod.cols());
-
-  general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha);
+  eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());
+  
+  general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha);
   
-  return *this;
+  return derived();
 }
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
index 060af328e..b6ae729b2 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
@@ -53,6 +53,8 @@ template< \
   int RhsStorageOrder, bool ConjugateRhs> \
 struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
 { \
+typedef gebp_traits<EIGTYPE,EIGTYPE> Traits; \
+\
 static void run(Index rows, Index cols, Index depth, \
   const EIGTYPE* _lhs, Index lhsStride, \
   const EIGTYPE* _rhs, Index rhsStride, \
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index d67164ec3..4e507b6cf 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -460,55 +460,54 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
 ***************************************************************************/
 
 namespace internal {
+  
 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
-struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
-  : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
-{};
-}
-
-template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
-struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
-  : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
+struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
 {
-  EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
-
-  SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
-
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  typedef typename Product<Lhs,Rhs>::Index  Index;
+  
+  typedef internal::blas_traits<Lhs> LhsBlasTraits;
+  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+  typedef internal::blas_traits<Rhs> RhsBlasTraits;
+  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+  
   enum {
     LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
     LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
     RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
     RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
   };
-
-  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+  
+  template<typename Dest>
+  static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
   {
-    eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+    eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
 
-    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
-    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
 
-    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
-                               * RhsBlasTraits::extractScalarFactor(m_rhs);
+    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+                               * RhsBlasTraits::extractScalarFactor(a_rhs);
 
     internal::product_selfadjoint_matrix<Scalar, Index,
-      EIGEN_LOGICAL_XOR(LhsIsUpper,
-                        internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
+      EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
       NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
-      EIGEN_LOGICAL_XOR(RhsIsUpper,
-                        internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
+      EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
       NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
       internal::traits<Dest>::Flags&RowMajorBit  ? RowMajor : ColMajor>
       ::run(
-        lhs.rows(), rhs.cols(),                     // sizes
-        &lhs.coeffRef(0,0),    lhs.outerStride(),   // lhs info
-        &rhs.coeffRef(0,0),    rhs.outerStride(),   // rhs info
-        &dst.coeffRef(0,0), dst.outerStride(),      // result info
-        actualAlpha                                 // alpha
+        lhs.rows(), rhs.cols(),                 // sizes
+        &lhs.coeffRef(0,0), lhs.outerStride(),  // lhs info
+        &rhs.coeffRef(0,0), rhs.outerStride(),  // rhs info
+        &dst.coeffRef(0,0), dst.outerStride(),  // result info
+        actualAlpha                             // alpha
       );
   }
 };
 
+} // end namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index 26e787949..d9c041f0c 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -169,45 +169,45 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
 ***************************************************************************/
 
 namespace internal {
-template<typename Lhs, int LhsMode, typename Rhs>
-struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
-  : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
-{};
-}
 
 template<typename Lhs, int LhsMode, typename Rhs>
-struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
-  : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs >
+struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
 {
-  EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
-
-  enum {
-    LhsUpLo = LhsMode&(Upper|Lower)
-  };
-
-  SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
-
-  template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  typedef typename Product<Lhs,Rhs>::Index Index;
+  
+  typedef internal::blas_traits<Lhs> LhsBlasTraits;
+  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+  typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+  
+  typedef internal::blas_traits<Rhs> RhsBlasTraits;
+  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+  typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+
+  enum { LhsUpLo = LhsMode&(Upper|Lower) };
+
+  template<typename Dest>
+  static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
   {
     typedef typename Dest::Scalar ResScalar;
-    typedef typename Base::RhsScalar RhsScalar;
+    typedef typename Rhs::Scalar RhsScalar;
     typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
     
-    eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
+    eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols());
 
-    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
-    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
 
-    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
-                               * RhsBlasTraits::extractScalarFactor(m_rhs);
+    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+                               * RhsBlasTraits::extractScalarFactor(a_rhs);
 
     enum {
       EvalToDest = (Dest::InnerStrideAtCompileTime==1),
-      UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1)
+      UseRhs = (ActualRhsTypeCleaned::InnerStrideAtCompileTime==1)
     };
     
     internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
-    internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
+    internal::gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!UseRhs> static_rhs;
 
     ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
                                                   EvalToDest ? dest.data() : static_dest.data());
@@ -218,7 +218,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
     if(!EvalToDest)
     {
       #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      Index size = dest.size();
+      int size = dest.size();
       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
       #endif
       MappedDest(actualDestPtr, dest.size()) = dest;
@@ -227,14 +227,15 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
     if(!UseRhs)
     {
       #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      Index size = rhs.size();
+      int size = rhs.size();
       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
       #endif
-      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
+      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
     }
       
       
-    internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
+    internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+                                                int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
       (
         lhs.rows(),                             // size
         &lhs.coeffRef(0,0),  lhs.outerStride(), // lhs info
@@ -248,34 +249,24 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
   }
 };
 
-namespace internal {
-template<typename Lhs, typename Rhs, int RhsMode>
-struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
-  : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
-{};
-}
-
 template<typename Lhs, typename Rhs, int RhsMode>
-struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
-  : public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs >
+struct selfadjoint_product_impl<Lhs,0,true,Rhs,RhsMode,false>
 {
-  EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
-
-  enum {
-    RhsUpLo = RhsMode&(Upper|Lower)
-  };
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  enum { RhsUpLo = RhsMode&(Upper|Lower)  };
 
-  SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
-
-  template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+  template<typename Dest>
+  static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
   {
     // let's simply transpose the product
     Transpose<Dest> destT(dest);
-    SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
-                             Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha);
+    selfadjoint_product_impl<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
+                             Transpose<const Lhs>, 0, true>::run(destT, a_rhs.transpose(), a_lhs.transpose(), alpha);
   }
 };
 
+} // end namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index db7b27f8e..c2d0817ea 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -369,28 +369,29 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
 * Wrapper to product_triangular_matrix_matrix
 ***************************************************************************/
 
-template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
-struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> >
-  : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> >
-{};
-
 } // end namespace internal
 
+namespace internal {
 template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
-struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
-  : public ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs >
+struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
 {
-  EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
-
-  TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
-
-  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+  template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
   {
-    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
-    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
+    typedef typename Dest::Index      Index;
+    typedef typename Dest::Scalar     Scalar;
+    
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+    
+    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
 
-    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
-                               * RhsBlasTraits::extractScalarFactor(m_rhs);
+    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+                               * RhsBlasTraits::extractScalarFactor(a_rhs);
 
     typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
               Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
@@ -405,19 +406,21 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
 
     internal::product_triangular_matrix_matrix<Scalar, Index,
       Mode, LhsIsTriangular,
-      (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
-      (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+      (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+      (internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
       (internal::traits<Dest          >::Flags&RowMajorBit) ? RowMajor : ColMajor>
       ::run(
         stripedRows, stripedCols, stripedDepth,   // sizes
-        &lhs.coeffRef(0,0),    lhs.outerStride(), // lhs info
-        &rhs.coeffRef(0,0),    rhs.outerStride(), // rhs info
+        &lhs.coeffRef(0,0), lhs.outerStride(),    // lhs info
+        &rhs.coeffRef(0,0), rhs.outerStride(),    // rhs info
         &dst.coeffRef(0,0), dst.outerStride(),    // result info
         actualAlpha, blocking
       );
   }
 };
 
+} // end namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
index ba41a1c99..4cc56a42f 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
@@ -109,7 +109,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
 /* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
    if (rows != depth) { \
 \
-     int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
+     int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \
 \
      if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
      /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
@@ -223,7 +223,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
 /* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
    if (cols != depth) { \
 \
-     int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
+     int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \
 \
      if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
      /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index 817768481..92d64e384 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -157,83 +157,67 @@ EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,Con
 * Wrapper to product_triangular_vector
 ***************************************************************************/
 
-template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
-struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true> >
- : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true>, Lhs, Rhs> >
-{};
-
-template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
-struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false> >
- : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false>, Lhs, Rhs> >
-{};
-
-
-template<int StorageOrder>
+template<int Mode,int StorageOrder>
 struct trmv_selector;
 
 } // end namespace internal
 
+namespace internal {
+
 template<int Mode, typename Lhs, typename Rhs>
-struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>
-  : public ProductBase<TriangularProduct<Mode,true,Lhs,false,Rhs,true>, Lhs, Rhs >
+struct triangular_product_impl<Mode,true,Lhs,false,Rhs,true>
 {
-  EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
-
-  TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
-
-  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+  template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha)
   {
-    eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+    eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols());
   
-    internal::trmv_selector<(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dst, alpha);
+    internal::trmv_selector<Mode,(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(lhs, rhs, dst, alpha);
   }
 };
 
 template<int Mode, typename Lhs, typename Rhs>
-struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
-  : public ProductBase<TriangularProduct<Mode,false,Lhs,true,Rhs,false>, Lhs, Rhs >
+struct triangular_product_impl<Mode,false,Lhs,true,Rhs,false>
 {
-  EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
-
-  TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
-
-  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
+  template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha)
   {
-    eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
+    eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols());
 
-    typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
     Transpose<Dest> dstT(dst);
-    internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run(
-      TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha);
+    internal::trmv_selector<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),
+                            (int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>
+            ::run(rhs.transpose(),lhs.transpose(), dstT, alpha);
   }
 };
 
+} // end namespace internal
+
 namespace internal {
 
 // TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same.
   
-template<> struct trmv_selector<ColMajor>
+template<int Mode> struct trmv_selector<Mode,ColMajor>
 {
-  template<int Mode, typename Lhs, typename Rhs, typename Dest>
-  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
   {
-    typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
-    typedef typename ProductType::Index Index;
-    typedef typename ProductType::LhsScalar   LhsScalar;
-    typedef typename ProductType::RhsScalar   RhsScalar;
-    typedef typename ProductType::Scalar      ResScalar;
-    typedef typename ProductType::RealScalar  RealScalar;
-    typedef typename ProductType::ActualLhsType ActualLhsType;
-    typedef typename ProductType::ActualRhsType ActualRhsType;
-    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
-    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
+    typedef typename Dest::Index      Index;
+    typedef typename Lhs::Scalar      LhsScalar;
+    typedef typename Rhs::Scalar      RhsScalar;
+    typedef typename Dest::Scalar     ResScalar;
+    typedef typename Dest::RealScalar RealScalar;
+    
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+    
     typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
 
-    typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
-    typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+    typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
+    typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
 
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
-                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+                                  * RhsBlasTraits::extractScalarFactor(rhs);
 
     enum {
       // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
@@ -288,33 +272,33 @@ template<> struct trmv_selector<ColMajor>
   }
 };
 
-template<> struct trmv_selector<RowMajor>
+template<int Mode> struct trmv_selector<Mode,RowMajor>
 {
-  template<int Mode, typename Lhs, typename Rhs, typename Dest>
-  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
   {
-    typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
-    typedef typename ProductType::LhsScalar LhsScalar;
-    typedef typename ProductType::RhsScalar RhsScalar;
-    typedef typename ProductType::Scalar    ResScalar;
-    typedef typename ProductType::Index Index;
-    typedef typename ProductType::ActualLhsType ActualLhsType;
-    typedef typename ProductType::ActualRhsType ActualRhsType;
-    typedef typename ProductType::_ActualRhsType _ActualRhsType;
-    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
-    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
-
-    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
-    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
-
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
-                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
+    typedef typename Dest::Index      Index;
+    typedef typename Lhs::Scalar      LhsScalar;
+    typedef typename Rhs::Scalar      RhsScalar;
+    typedef typename Dest::Scalar     ResScalar;
+    
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+
+    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
+    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+                                  * RhsBlasTraits::extractScalarFactor(rhs);
 
     enum {
-      DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
+      DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
     };
 
-    gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+    gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
 
     ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
         DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
@@ -325,7 +309,7 @@ template<> struct trmv_selector<RowMajor>
       Index size = actualRhs.size();
       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
       #endif
-      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
     }
     
     internal::triangular_matrix_vector_product
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index 31073b990..2c9fb443d 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -53,14 +53,13 @@ const int Infinity = -1;
 const unsigned int RowMajorBit = 0x1;
 
 /** \ingroup flags
-  *
   * means the expression should be evaluated by the calling expression */
 const unsigned int EvalBeforeNestingBit = 0x2;
 
 /** \ingroup flags
-  *
+  * \deprecated
   * means the expression should be evaluated before any assignment */
-const unsigned int EvalBeforeAssigningBit = 0x4;
+const unsigned int EvalBeforeAssigningBit = 0x4; // FIXME deprecated
 
 /** \ingroup flags
   *
@@ -155,6 +154,16 @@ const unsigned int AlignedBit = 0x80;
 
 const unsigned int NestByRefBit = 0x100;
 
+/** \ingroup flags
+  *
+  * for an expression, this means that the storage order
+  * can be either row-major or column-major.
+  * The precise choice will be decided at evaluation time or when
+  * combined with other expressions.
+  * \sa \ref RowMajorBit, \ref TopicStorageOrders */
+const unsigned int NoPreferredStorageOrderBit = 0x200;
+
+
 // list of flags that are inherited by default
 const unsigned int HereditaryBits = RowMajorBit
                                   | EvalBeforeNestingBit
@@ -431,7 +440,7 @@ namespace Architecture
 
 /** \internal \ingroup enums
   * Enum used as template parameter in GeneralProduct. */
-enum { CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
+enum { DefaultProduct=0, CoeffBasedProductMode, LazyCoeffBasedProductMode, LazyProduct, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
 
 /** \internal \ingroup enums
   * Enum used in experimental parallel implementation. */
@@ -440,12 +449,25 @@ enum Action {GetAction, SetAction};
 /** The type used to identify a dense storage. */
 struct Dense {};
 
+/** The type used to identify a permutation storage. */
+struct PermutationStorage {};
+
 /** The type used to identify a matrix expression */
 struct MatrixXpr {};
 
 /** The type used to identify an array expression */
 struct ArrayXpr {};
 
+// An evaluator must define its shape. By default, it can be one of the following:
+struct DenseShape             { static std::string debugName() { return "DenseShape"; } };
+struct HomogeneousShape       { static std::string debugName() { return "HomogeneousShape"; } };
+struct DiagonalShape          { static std::string debugName() { return "DiagonalShape"; } };
+struct BandShape              { static std::string debugName() { return "BandShape"; } };
+struct TriangularShape        { static std::string debugName() { return "TriangularShape"; } };
+struct SelfAdjointShape       { static std::string debugName() { return "SelfAdjointShape"; } };
+struct PermutationShape       { static std::string debugName() { return "PermutationShape"; } };
+struct SparseShape            { static std::string debugName() { return "SparseShape"; } };
+
 } // end namespace Eigen
 
 #endif // EIGEN_CONSTANTS_H
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index 33deb88ec..9ec57468b 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -36,6 +36,10 @@ template<typename Derived> struct accessors_level
   };
 };
 
+template<typename T> struct evaluator_traits;
+
+template< typename T> struct evaluator;
+
 } // end namespace internal
 
 template<typename T> struct NumTraits;
@@ -87,11 +91,19 @@ template<typename NullaryOp, typename MatrixType>         class CwiseNullaryOp;
 template<typename UnaryOp,   typename MatrixType>         class CwiseUnaryOp;
 template<typename ViewOp,    typename MatrixType>         class CwiseUnaryView;
 template<typename BinaryOp,  typename Lhs, typename Rhs>  class CwiseBinaryOp;
-template<typename BinOp,     typename Lhs, typename Rhs>  class SelfCwiseBinaryOp;
-template<typename Derived,   typename Lhs, typename Rhs>  class ProductBase;
-template<typename Lhs, typename Rhs>                      class Product;
-template<typename Lhs, typename Rhs, int Mode>            class GeneralProduct;
-template<typename Lhs, typename Rhs, int NestingFlags>    class CoeffBasedProduct;
+template<typename BinOp,     typename Lhs, typename Rhs>  class SelfCwiseBinaryOp;      // TODO deprecated
+template<typename Derived,   typename Lhs, typename Rhs>  class ProductBase;            // TODO deprecated
+template<typename Decomposition, typename Rhstype>        class Solve;
+template<typename XprType>                                class Inverse;
+
+namespace internal {
+  template<typename Lhs, typename Rhs> struct product_tag;
+}
+
+template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
+         
+template<typename Lhs, typename Rhs, int Mode>            class GeneralProduct;         // TODO deprecated
+template<typename Lhs, typename Rhs, int NestingFlags>    class CoeffBasedProduct;      // TODO deprecated
 
 template<typename Derived> class DiagonalBase;
 template<typename _DiagonalVectorType> class DiagonalWrapper;
@@ -109,7 +121,12 @@ template<typename Derived,
          int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
 > class MapBase;
 template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride;
+template<int Value = Dynamic> class InnerStride;
+template<int Value = Dynamic> class OuterStride;
 template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map;
+template<typename Derived> class RefBase;
+template<typename PlainObjectType, int Options = 0,
+         typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;
 
 template<typename Derived> class TriangularBase;
 template<typename MatrixType, unsigned int Mode> class TriangularView;
@@ -122,8 +139,6 @@ template<typename ExpressionType> class ArrayWrapper;
 template<typename ExpressionType> class MatrixWrapper;
 
 namespace internal {
-template<typename DecompositionType, typename Rhs> struct solve_retval_base;
-template<typename DecompositionType, typename Rhs> struct solve_retval;
 template<typename DecompositionType> struct kernel_retval_base;
 template<typename DecompositionType> struct kernel_retval;
 template<typename DecompositionType> struct image_retval_base;
@@ -136,6 +151,18 @@ template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynami
 
 namespace internal {
 template<typename Lhs, typename Rhs> struct product_type;
+/** \internal
+  * \class product_evaluator
+  * Products need their own evaluator with more template arguments allowing for
+  * easier partial template specializations.
+  */
+template< typename T,
+          int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret,
+          typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
+          typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
+          typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
+          typename RhsScalar = typename traits<typename T::Rhs>::Scalar
+        > struct product_evaluator;
 }
 
 template<typename Lhs, typename Rhs,
diff --git a/Eigen/src/Core/util/MKL_support.h b/Eigen/src/Core/util/MKL_support.h
index 8acca9c8c..1ef3b61db 100644
--- a/Eigen/src/Core/util/MKL_support.h
+++ b/Eigen/src/Core/util/MKL_support.h
@@ -76,6 +76,38 @@
 #include <mkl_lapacke.h>
 #define EIGEN_MKL_VML_THRESHOLD 128
 
+/* MKL_DOMAIN_BLAS, etc are defined only in 10.3 update 7 */
+/* MKL_BLAS, etc are not defined in 11.2 */
+#ifdef MKL_DOMAIN_ALL
+#define EIGEN_MKL_DOMAIN_ALL MKL_DOMAIN_ALL
+#else
+#define EIGEN_MKL_DOMAIN_ALL MKL_ALL
+#endif
+
+#ifdef MKL_DOMAIN_BLAS
+#define EIGEN_MKL_DOMAIN_BLAS MKL_DOMAIN_BLAS
+#else
+#define EIGEN_MKL_DOMAIN_BLAS MKL_BLAS
+#endif
+
+#ifdef MKL_DOMAIN_FFT
+#define EIGEN_MKL_DOMAIN_FFT MKL_DOMAIN_FFT
+#else
+#define EIGEN_MKL_DOMAIN_FFT MKL_FFT
+#endif
+
+#ifdef MKL_DOMAIN_VML
+#define EIGEN_MKL_DOMAIN_VML MKL_DOMAIN_VML
+#else
+#define EIGEN_MKL_DOMAIN_VML MKL_VML
+#endif
+
+#ifdef MKL_DOMAIN_PARDISO
+#define EIGEN_MKL_DOMAIN_PARDISO MKL_DOMAIN_PARDISO
+#else
+#define EIGEN_MKL_DOMAIN_PARDISO MKL_PARDISO
+#endif
+
 namespace Eigen {
 
 typedef std::complex<double> dcomplex;
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 5e9b0a112..f9b908e22 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -107,6 +107,13 @@
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
 #endif
 
+// Cross compiler wrapper around LLVM's __has_builtin
+#ifdef __has_builtin
+#  define EIGEN_HAS_BUILTIN(x) __has_builtin(x)
+#else
+#  define EIGEN_HAS_BUILTIN(x) 0
+#endif
+
 // A Clang feature extension to determine compiler features.
 // We use it to determine 'cxx_rvalue_references'
 #ifndef __has_feature
@@ -272,7 +279,7 @@ namespace Eigen {
 
 #if !defined(EIGEN_ASM_COMMENT)
   #if (defined __GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
-    #define EIGEN_ASM_COMMENT(X)  asm("#" X)
+    #define EIGEN_ASM_COMMENT(X)  __asm__("#" X)
   #else
     #define EIGEN_ASM_COMMENT(X)
   #endif
@@ -367,6 +374,8 @@ namespace Eigen {
 * documentation in a single line.
 **/
 
+// TODO The EIGEN_DENSE_PUBLIC_INTERFACE should not exists anymore
+  
 #define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
   typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex<float>. */ \
   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \
@@ -377,7 +386,6 @@ namespace Eigen {
   enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
         ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \
         Flags = Eigen::internal::traits<Derived>::Flags, \
-        CoeffReadCost = Eigen::internal::traits<Derived>::CoeffReadCost, \
         SizeAtCompileTime = Base::SizeAtCompileTime, \
         MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
         IsVectorAtCompileTime = Base::IsVectorAtCompileTime };
@@ -396,13 +404,11 @@ namespace Eigen {
         MaxRowsAtCompileTime = Eigen::internal::traits<Derived>::MaxRowsAtCompileTime, \
         MaxColsAtCompileTime = Eigen::internal::traits<Derived>::MaxColsAtCompileTime, \
         Flags = Eigen::internal::traits<Derived>::Flags, \
-        CoeffReadCost = Eigen::internal::traits<Derived>::CoeffReadCost, \
         SizeAtCompileTime = Base::SizeAtCompileTime, \
         MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
         IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \
   using Base::derived; \
-  using Base::const_cast_derived;
-
+  using Base::const_cast_derived;  
 
 #define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
 #define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index b99b8849e..f3bafd5af 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -274,18 +274,6 @@ template<typename T> struct scalar_product_traits<std::complex<T>, T>
 // typedef typename scalar_product_traits<typename remove_all<ArgType0>::type, typename remove_all<ArgType1>::type>::ReturnType type;
 // };
 
-template<typename T> struct is_diagonal
-{ enum { ret = false }; };
-
-template<typename T> struct is_diagonal<DiagonalBase<T> >
-{ enum { ret = true }; };
-
-template<typename T> struct is_diagonal<DiagonalWrapper<T> >
-{ enum { ret = true }; };
-
-template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> >
-{ enum { ret = true }; };
-
 } // end namespace internal
 
 namespace numext {
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h
index 59aa0811c..54a16ebf2 100644
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -84,13 +84,15 @@
         THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY,
         YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT,
         THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS,
+        THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS,
         THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL,
         THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES,
         YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
         YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
         THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
         THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
-        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG
+        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
+        IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY
       };
     };
 
@@ -157,7 +159,7 @@
 
 #define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
      ( \
-        (int(TYPE0::SizeAtCompileTime)==0 && int(TYPE1::SizeAtCompileTime)==0) \
+        (int(internal::size_of_xpr_at_compile_time<TYPE0>::ret)==0 && int(internal::size_of_xpr_at_compile_time<TYPE1>::ret)==0) \
     || (\
           (int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \
         || int(TYPE1::RowsAtCompileTime)==Eigen::Dynamic \
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 1b3e122e1..f2536714e 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -128,6 +128,17 @@ template<typename _Scalar, int _Rows, int _Cols,
 template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
 class compute_matrix_flags
 {
+    enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 };
+  public:
+    // FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<>
+    // and then propagate this information to the evaluator's flags.
+    // However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage.
+    enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
+};
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+class compute_matrix_evaluator_flags
+{
     enum {
       row_major_bit = Options&RowMajor ? RowMajorBit : 0,
       is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
@@ -156,7 +167,7 @@ class compute_matrix_flags
     };
 
   public:
-    enum { ret = LinearAccessBit | LvalueBit | DirectAccessBit | NestByRefBit | packet_access_bit | row_major_bit | aligned_bit };
+    enum { ret = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit };
 };
 
 template<int _Rows, int _Cols> struct size_at_compile_time
@@ -164,6 +175,11 @@ template<int _Rows, int _Cols> struct size_at_compile_time
   enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
 };
 
+template<typename XprType> struct size_of_xpr_at_compile_time
+{
+  enum { ret = size_at_compile_time<traits<XprType>::RowsAtCompileTime,traits<XprType>::ColsAtCompileTime>::ret };
+};
+
 /* plain_matrix_type : the difference from eval is that plain_matrix_type is always a plain matrix type,
  * whereas eval is a const reference in the case of a matrix
  */
@@ -174,6 +190,10 @@ template<typename T> struct plain_matrix_type<T,Dense>
 {
   typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind>::type type;
 };
+template<typename T> struct plain_matrix_type<T,DiagonalShape>
+{
+  typedef typename T::PlainObject type;
+};
 
 template<typename T> struct plain_matrix_type_dense<T,MatrixXpr>
 {
@@ -216,6 +236,11 @@ template<typename T> struct eval<T,Dense>
 //           > type;
 };
 
+template<typename T> struct eval<T,DiagonalShape>
+{
+  typedef typename plain_matrix_type<T>::type type;
+};
+
 // for matrices, no need to evaluate, just use a const reference to avoid a useless copy
 template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
 struct eval<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
@@ -294,38 +319,42 @@ struct transfer_constness
   >::type type;
 };
 
-/** \internal Determines how a given expression should be nested into another one.
+
+// When using evaluators, we never evaluate when assembling the expression!!
+// TODO: get rid of this nested class since it's just an alias for ref_selector.
+template<typename T, int n=1, typename PlainObject = void> struct nested
+{
+  typedef typename ref_selector<T>::type type;
+};
+
+// However, we still need a mechanism to detect whether an expression which is evaluated multiple time
+// has to be evaluated into a temporary.
+// That's the purpose of this new nested_eval helper:
+/** \internal Determines how a given expression should be nested when evaluated multiple times.
   * For example, when you do a * (b+c), Eigen will determine how the expression b+c should be
-  * nested into the bigger product expression. The choice is between nesting the expression b+c as-is, or
+  * evaluated into the bigger product expression. The choice is between nesting the expression b+c as-is, or
   * evaluating that expression b+c into a temporary variable d, and nest d so that the resulting expression is
   * a*d. Evaluating can be beneficial for example if every coefficient access in the resulting expression causes
   * many coefficient accesses in the nested expressions -- as is the case with matrix product for example.
   *
-  * \param T the type of the expression being nested
+  * \param T the type of the expression being nested.
   * \param n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression.
-  *
-  * Note that if no evaluation occur, then the constness of T is preserved.
-  *
-  * Example. Suppose that a, b, and c are of type Matrix3d. The user forms the expression a*(b+c).
-  * b+c is an expression "sum of matrices", which we will denote by S. In order to determine how to nest it,
-  * the Product expression uses: nested<S, 3>::type, which turns out to be Matrix3d because the internal logic of
-  * nested determined that in this case it was better to evaluate the expression b+c into a temporary. On the other hand,
-  * since a is of type Matrix3d, the Product expression nests it as nested<Matrix3d, 3>::type, which turns out to be
-  * const Matrix3d&, because the internal logic of nested determined that since a was already a matrix, there was no point
-  * in copying it into another matrix.
+  * \param PlainObject the type of the temporary if needed.
   */
-template<typename T, int n=1, typename PlainObject = typename eval<T>::type> struct nested
+template<typename T, int n, typename PlainObject = typename eval<T>::type> struct nested_eval
 {
   enum {
-    // for the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values.
+    // For the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values.
     // the choice of 10000 makes it larger than any practical fixed value and even most dynamic values.
     // in extreme cases where these assumptions would be wrong, we would still at worst suffer performance issues
     // (poor choice of temporaries).
-    // it's important that this value can still be squared without integer overflowing.
+    // It's important that this value can still be squared without integer overflowing.
     DynamicAsInteger = 10000,
     ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost,
     ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost),
-    CoeffReadCost = traits<T>::CoeffReadCost,
+    CoeffReadCost = evaluator<T>::CoeffReadCost,  // TODO What if an evaluator evaluate itself into a tempory?
+                                                  // Then CoeffReadCost will be small but we still have to evaluate if n>1... 
+                                                  // The solution might be to ask the evaluator if it creates a temp. Perhaps we could even ask the number of temps?
     CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost),
     NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n,
     CostEvalAsInteger   = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger,
@@ -333,11 +362,10 @@ template<typename T, int n=1, typename PlainObject = typename eval<T>::type> str
   };
 
   typedef typename conditional<
-      ( (int(traits<T>::Flags) & EvalBeforeNestingBit) ||
-        int(CostEvalAsInteger) < int(CostNoEvalAsInteger)
-      ),
-      PlainObject,
-      typename ref_selector<T>::type
+        ( (int(evaluator<T>::Flags) & EvalBeforeNestingBit) ||
+          (int(CostEvalAsInteger) < int(CostNoEvalAsInteger)) ),
+        PlainObject,
+        typename ref_selector<T>::type
   >::type type;
 };
 
@@ -366,6 +394,15 @@ struct dense_xpr_base<Derived, ArrayXpr>
   typedef ArrayBase<Derived> type;
 };
 
+template<typename Derived, typename XprKind = typename traits<Derived>::XprKind, typename StorageKind = typename traits<Derived>::StorageKind>
+struct generic_xpr_base;
+
+template<typename Derived, typename XprKind>
+struct generic_xpr_base<Derived, XprKind, Dense>
+{
+  typedef typename dense_xpr_base<Derived,XprKind>::type type;
+};
+
 /** \internal Helper base class to add a scalar multiple operator
   * overloads for complex types */
 template<typename Derived,typename Scalar,typename OtherScalar,
@@ -383,13 +420,21 @@ struct special_scalar_op_base<Derived,Scalar,OtherScalar,true>  : public DenseCo
   const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
   operator*(const OtherScalar& scalar) const
   {
+#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
+    EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
+#endif
     return CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
       (*static_cast<const Derived*>(this), scalar_multiple2_op<Scalar,OtherScalar>(scalar));
   }
 
   inline friend const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
   operator*(const OtherScalar& scalar, const Derived& matrix)
-  { return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar); }
+  {
+#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
+    EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
+#endif
+    return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar);
+  }
 };
 
 template<typename XprType, typename CastType> struct cast_return_type
@@ -401,12 +446,59 @@ template<typename XprType, typename CastType> struct cast_return_type
                               const XprType&,CastType>::type type;
 };
 
-template <typename A, typename B> struct promote_storage_type;
+/** \internal Specify the "storage kind" of applying a coefficient-wise
+  * binary operations between two expressions of kinds A and B respectively.
+  * The template parameter Functor permits to specialize the resulting storage kind wrt to
+  * the functor.
+  * The default rules are as follows:
+  * \code
+  * A     op A      -> A
+  * A     op dense  -> dense
+  * dense op B      -> dense
+  * A     *  dense  -> A
+  * dense *  B      -> B
+  * \endcode
+  */
+template <typename A, typename B, typename Functor> struct cwise_promote_storage_type;
+
+template <typename A, typename Functor>                   struct cwise_promote_storage_type<A,A,Functor>                                      { typedef A     ret; };
+template <typename Functor>                               struct cwise_promote_storage_type<Dense,Dense,Functor>                              { typedef Dense ret; };
+template <typename ScalarA, typename ScalarB>             struct cwise_promote_storage_type<Dense,Dense,scalar_product_op<ScalarA,ScalarB> >  { typedef Dense ret; };
+template <typename A, typename Functor>                   struct cwise_promote_storage_type<A,Dense,Functor>                                  { typedef Dense ret; };
+template <typename B, typename Functor>                   struct cwise_promote_storage_type<Dense,B,Functor>                                  { typedef Dense ret; };
+template <typename A, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<A,Dense,scalar_product_op<ScalarA,ScalarB> >      { typedef A     ret; };
+template <typename B, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<Dense,B,scalar_product_op<ScalarA,ScalarB> >      { typedef B     ret; };
+
+/** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B.
+  * The template parameter ProductTag permits to specialize the resulting storage kind wrt to
+  * some compile-time properties of the product: GemmProduct, GemvProduct, OuterProduct, InnerProduct.
+  * The default rules are as follows:
+  * \code
+  *  K * K            -> K
+  *  dense * K        -> dense
+  *  K * dense        -> dense
+  *  diag * K         -> K
+  *  K * diag         -> K
+  *  Perm * K         -> K
+  * K * Perm          -> K
+  * \endcode
+  */
+template <typename A, typename B, int ProductTag> struct product_promote_storage_type;
 
-template <typename A> struct promote_storage_type<A,A>
-{
-  typedef A ret;
-};
+template <typename A, int ProductTag> struct product_promote_storage_type<A,                  A,                  ProductTag> { typedef A     ret;};
+template <int ProductTag>             struct product_promote_storage_type<Dense,              Dense,              ProductTag> { typedef Dense ret;};
+template <typename A, int ProductTag> struct product_promote_storage_type<A,                  Dense,              ProductTag> { typedef Dense ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<Dense,              B,                  ProductTag> { typedef Dense ret; };
+
+template <typename A, int ProductTag> struct product_promote_storage_type<A,                  DiagonalShape,      ProductTag> { typedef A ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<DiagonalShape,      B,                  ProductTag> { typedef B ret; };
+template <int ProductTag>             struct product_promote_storage_type<Dense,              DiagonalShape,      ProductTag> { typedef Dense ret; };
+template <int ProductTag>             struct product_promote_storage_type<DiagonalShape,      Dense,              ProductTag> { typedef Dense ret; };
+
+template <typename A, int ProductTag> struct product_promote_storage_type<A,                  PermutationStorage, ProductTag> { typedef A ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<PermutationStorage, B,                  ProductTag> { typedef B ret; };
+template <int ProductTag>             struct product_promote_storage_type<Dense,              PermutationStorage, ProductTag> { typedef Dense ret; };
+template <int ProductTag>             struct product_promote_storage_type<PermutationStorage, Dense,              ProductTag> { typedef Dense ret; };
 
 /** \internal gives the plain matrix or array type to store a row/column/diagonal of a matrix type.
   * \param Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType.
@@ -464,8 +556,36 @@ struct is_lvalue
                  bool(traits<ExpressionType>::Flags & LvalueBit) };
 };
 
+template<typename T> struct is_diagonal
+{ enum { ret = false }; };
+
+template<typename T> struct is_diagonal<DiagonalBase<T> >
+{ enum { ret = true }; };
+
+template<typename T> struct is_diagonal<DiagonalWrapper<T> >
+{ enum { ret = true }; };
+
+template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> >
+{ enum { ret = true }; };
+
+template<typename S1, typename S2> struct glue_shapes;
+template<> struct glue_shapes<DenseShape,TriangularShape> { typedef TriangularShape type;  };
+
 } // end namespace internal
 
+// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
+// that would take two operands of different types. If there were such an example, then this check should be
+// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
+// currently they take only one typename Scalar template parameter.
+// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
+// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
+// add together a float matrix and a double matrix.
+#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
+  EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \
+                        ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \
+                        : int(internal::is_same<LHS, RHS>::value)), \
+    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+    
 } // end namespace Eigen
 
 #endif // EIGEN_XPRHELPER_H
diff --git a/Eigen/src/Eigenvalues/Tridiagonalization.h b/Eigen/src/Eigenvalues/Tridiagonalization.h
index 192278d68..e3a27f275 100644
--- a/Eigen/src/Eigenvalues/Tridiagonalization.h
+++ b/Eigen/src/Eigenvalues/Tridiagonalization.h
@@ -18,8 +18,10 @@ namespace internal {
 template<typename MatrixType> struct TridiagonalizationMatrixTReturnType;
 template<typename MatrixType>
 struct traits<TridiagonalizationMatrixTReturnType<MatrixType> >
+  : public traits<typename MatrixType::PlainObject>
 {
-  typedef typename MatrixType::PlainObject ReturnType;
+  typedef typename MatrixType::PlainObject ReturnType; // FIXME shall it be a BandMatrix?
+  enum { Flags = 0 };
 };
 
 template<typename MatrixType, typename CoeffVectorType>
diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h
index b6a2f0e24..d6c5c1293 100644
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@@ -71,7 +71,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
   template<typename Derived>
   inline explicit AlignedBox(const MatrixBase<Derived>& a_p)
   {
-    typename internal::nested<Derived,2>::type p(a_p.derived());
+    typename internal::nested_eval<Derived,2>::type p(a_p.derived());
     m_min = p;
     m_max = p;
   }
diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h
index 00e71d190..d1881d84d 100644
--- a/Eigen/src/Geometry/Homogeneous.h
+++ b/Eigen/src/Geometry/Homogeneous.h
@@ -48,8 +48,7 @@ struct traits<Homogeneous<MatrixType,Direction> >
     TmpFlags = _MatrixTypeNested::Flags & HereditaryBits,
     Flags = ColsAtCompileTime==1 ? (TmpFlags & ~RowMajorBit)
           : RowsAtCompileTime==1 ? (TmpFlags | RowMajorBit)
-          : TmpFlags,
-    CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+          : TmpFlags
   };
 };
 
@@ -63,6 +62,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
 {
   public:
 
+    typedef MatrixType NestedExpression;
     enum { Direction = _Direction };
 
     typedef MatrixBase<Homogeneous> Base;
@@ -74,37 +74,38 @@ template<typename MatrixType,int _Direction> class Homogeneous
 
     inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical   ? 1 : 0); }
     inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); }
-
-    inline Scalar coeff(Index row, Index col) const
-    {
-      if(  (int(Direction)==Vertical   && row==m_matrix.rows())
-        || (int(Direction)==Horizontal && col==m_matrix.cols()))
-        return 1;
-      return m_matrix.coeff(row, col);
-    }
+    
+    const NestedExpression& nestedExpression() const { return m_matrix; }
 
     template<typename Rhs>
-    inline const internal::homogeneous_right_product_impl<Homogeneous,Rhs>
+    inline const Product<Homogeneous,Rhs>
     operator* (const MatrixBase<Rhs>& rhs) const
     {
       eigen_assert(int(Direction)==Horizontal);
-      return internal::homogeneous_right_product_impl<Homogeneous,Rhs>(m_matrix,rhs.derived());
+      return Product<Homogeneous,Rhs>(*this,rhs.derived());
     }
 
     template<typename Lhs> friend
-    inline const internal::homogeneous_left_product_impl<Homogeneous,Lhs>
+    inline const Product<Lhs,Homogeneous>
     operator* (const MatrixBase<Lhs>& lhs, const Homogeneous& rhs)
     {
       eigen_assert(int(Direction)==Vertical);
-      return internal::homogeneous_left_product_impl<Homogeneous,Lhs>(lhs.derived(),rhs.m_matrix);
+      return Product<Lhs,Homogeneous>(lhs.derived(),rhs);
     }
 
     template<typename Scalar, int Dim, int Mode, int Options> friend
-    inline const internal::homogeneous_left_product_impl<Homogeneous,Transform<Scalar,Dim,Mode,Options> >
+    inline const Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous >
     operator* (const Transform<Scalar,Dim,Mode,Options>& lhs, const Homogeneous& rhs)
     {
       eigen_assert(int(Direction)==Vertical);
-      return internal::homogeneous_left_product_impl<Homogeneous,Transform<Scalar,Dim,Mode,Options> >(lhs,rhs.m_matrix);
+      return Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous>(lhs,rhs);
+    }
+
+    template<typename Func>
+    EIGEN_STRONG_INLINE typename internal::result_of<Func(Scalar)>::type
+    redux(const Func& func) const
+    {
+      return func(m_matrix.redux(func), Scalar(1));
     }
 
   protected:
@@ -120,7 +121,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
   * Example: \include MatrixBase_homogeneous.cpp
   * Output: \verbinclude MatrixBase_homogeneous.out
   *
-  * \sa class Homogeneous
+  * \sa VectorwiseOp::homogeneous(), class Homogeneous
   */
 template<typename Derived>
 inline typename MatrixBase<Derived>::HomogeneousReturnType
@@ -137,7 +138,7 @@ MatrixBase<Derived>::homogeneous() const
   * Example: \include VectorwiseOp_homogeneous.cpp
   * Output: \verbinclude VectorwiseOp_homogeneous.out
   *
-  * \sa MatrixBase::homogeneous() */
+  * \sa MatrixBase::homogeneous(), class Homogeneous */
 template<typename ExpressionType, int Direction>
 inline Homogeneous<ExpressionType,Direction>
 VectorwiseOp<ExpressionType,Direction>::homogeneous() const
@@ -300,6 +301,93 @@ struct homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs>
   typename Rhs::Nested m_rhs;
 };
 
+template<typename ArgType,int Direction>
+struct evaluator_traits<Homogeneous<ArgType,Direction> >
+{
+  typedef typename storage_kind_to_evaluator_kind<typename ArgType::StorageKind>::Kind Kind;
+  typedef HomogeneousShape Shape;  
+  static const int AssumeAliasing = 0;
+};
+
+template<> struct AssignmentKind<DenseShape,HomogeneousShape> { typedef Dense2Dense Kind; };
+
+
+template<typename ArgType,int Direction>
+struct unary_evaluator<Homogeneous<ArgType,Direction>, IndexBased>
+  : evaluator<typename Homogeneous<ArgType,Direction>::PlainObject >::type
+{
+  typedef Homogeneous<ArgType,Direction> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+  typedef typename evaluator<PlainObject>::type Base;
+  
+  typedef evaluator<XprType> type;
+  typedef evaluator<XprType> nestedType;
+
+  unary_evaluator(const XprType& op) 
+    : Base(), m_temp(op)
+  {
+    ::new (static_cast<Base*>(this)) Base(m_temp);
+  }
+
+protected:
+  PlainObject m_temp;
+};
+
+// dense = homogeneous
+template< typename DstXprType, typename ArgType, typename Scalar>
+struct Assignment<DstXprType, Homogeneous<ArgType,Vertical>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+  typedef Homogeneous<ArgType,Vertical> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  {
+    dst.template topRows<ArgType::RowsAtCompileTime>(src.nestedExpression().rows()) = src.nestedExpression();
+    dst.row(dst.rows()-1).setOnes();
+  }
+};
+
+// dense = homogeneous
+template< typename DstXprType, typename ArgType, typename Scalar>
+struct Assignment<DstXprType, Homogeneous<ArgType,Horizontal>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+  typedef Homogeneous<ArgType,Horizontal> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  {
+    dst.template leftCols<ArgType::ColsAtCompileTime>(src.nestedExpression().cols()) = src.nestedExpression();
+    dst.col(dst.cols()-1).setOnes();
+  }
+};
+
+template<typename LhsArg, typename Rhs, int ProductTag>
+struct generic_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs, HomogeneousShape, DenseShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Homogeneous<LhsArg,Horizontal>& lhs, const Rhs& rhs)
+  {
+    homogeneous_right_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs>(lhs.nestedExpression(), rhs).evalTo(dst);
+  }
+};
+
+template<typename Lhs, typename RhsArg, int ProductTag>
+struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
+  {
+    homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, Lhs>(lhs, rhs.nestedExpression()).evalTo(dst);
+  }
+};
+
+template<typename Scalar, int Dim, int Mode,int Options, typename RhsArg, int ProductTag>
+struct generic_product_impl<Transform<Scalar,Dim,Mode,Options>, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag>
+{
+  typedef Transform<Scalar,Dim,Mode,Options> TransformType;
+  template<typename Dest>
+  static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
+  {
+    homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, TransformType>(lhs, rhs.nestedExpression()).evalTo(dst);
+  }
+};
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index 26be3ee5b..a245c79d3 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -30,8 +30,8 @@ MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
 
   // Note that there is no need for an expression here since the compiler
   // optimize such a small temporary very well (even within a complex expression)
-  typename internal::nested<Derived,2>::type lhs(derived());
-  typename internal::nested<OtherDerived,2>::type rhs(other.derived());
+  typename internal::nested_eval<Derived,2>::type lhs(derived());
+  typename internal::nested_eval<OtherDerived,2>::type rhs(other.derived());
   return typename cross_product_return_type<OtherDerived>::type(
     numext::conj(lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1)),
     numext::conj(lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2)),
@@ -76,8 +76,8 @@ MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4)
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,4)
 
-  typedef typename internal::nested<Derived,2>::type DerivedNested;
-  typedef typename internal::nested<OtherDerived,2>::type OtherDerivedNested;
+  typedef typename internal::nested_eval<Derived,2>::type DerivedNested;
+  typedef typename internal::nested_eval<OtherDerived,2>::type OtherDerivedNested;
   DerivedNested lhs(derived());
   OtherDerivedNested rhs(other.derived());
 
@@ -103,21 +103,24 @@ VectorwiseOp<ExpressionType,Direction>::cross(const MatrixBase<OtherDerived>& ot
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)
   EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
     YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+  
+  typename internal::nested_eval<ExpressionType,2>::type mat(_expression());
+  typename internal::nested_eval<OtherDerived,2>::type vec(other.derived());
 
   CrossReturnType res(_expression().rows(),_expression().cols());
   if(Direction==Vertical)
   {
     eigen_assert(CrossReturnType::RowsAtCompileTime==3 && "the matrix must have exactly 3 rows");
-    res.row(0) = (_expression().row(1) * other.coeff(2) - _expression().row(2) * other.coeff(1)).conjugate();
-    res.row(1) = (_expression().row(2) * other.coeff(0) - _expression().row(0) * other.coeff(2)).conjugate();
-    res.row(2) = (_expression().row(0) * other.coeff(1) - _expression().row(1) * other.coeff(0)).conjugate();
+    res.row(0) = (mat.row(1) * vec.coeff(2) - mat.row(2) * vec.coeff(1)).conjugate();
+    res.row(1) = (mat.row(2) * vec.coeff(0) - mat.row(0) * vec.coeff(2)).conjugate();
+    res.row(2) = (mat.row(0) * vec.coeff(1) - mat.row(1) * vec.coeff(0)).conjugate();
   }
   else
   {
     eigen_assert(CrossReturnType::ColsAtCompileTime==3 && "the matrix must have exactly 3 columns");
-    res.col(0) = (_expression().col(1) * other.coeff(2) - _expression().col(2) * other.coeff(1)).conjugate();
-    res.col(1) = (_expression().col(2) * other.coeff(0) - _expression().col(0) * other.coeff(2)).conjugate();
-    res.col(2) = (_expression().col(0) * other.coeff(1) - _expression().col(1) * other.coeff(0)).conjugate();
+    res.col(0) = (mat.col(1) * vec.coeff(2) - mat.col(2) * vec.coeff(1)).conjugate();
+    res.col(1) = (mat.col(2) * vec.coeff(0) - mat.col(0) * vec.coeff(2)).conjugate();
+    res.col(2) = (mat.col(0) * vec.coeff(1) - mat.col(1) * vec.coeff(0)).conjugate();
   }
   return res;
 }
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 11e5398d4..3f0067286 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -217,7 +217,7 @@ struct traits<Quaternion<_Scalar,_Options> >
   typedef _Scalar Scalar;
   typedef Matrix<_Scalar,4,1,_Options> Coefficients;
   enum{
-    IsAligned = internal::traits<Coefficients>::Flags & AlignedBit,
+    IsAligned = (internal::traits<Coefficients>::EvaluatorFlags & AlignedBit) != 0,
     Flags = IsAligned ? (AlignedBit | LvalueBit) : LvalueBit
   };
 };
diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h
index cb93acf6b..89e9cc1a4 100644
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -62,6 +62,22 @@ struct transform_construct_from_matrix;
 
 template<typename TransformType> struct transform_take_affine_part;
 
+template<typename _Scalar, int _Dim, int _Mode, int _Options>
+struct traits<Transform<_Scalar,_Dim,_Mode,_Options> >
+{
+  typedef _Scalar Scalar;
+  typedef DenseIndex Index;
+  typedef Dense StorageKind;
+  enum {
+    Dim1 = _Dim==Dynamic ? _Dim : _Dim + 1,
+    RowsAtCompileTime = _Mode==Projective ? Dim1 : _Dim,
+    ColsAtCompileTime = Dim1,
+    MaxRowsAtCompileTime = RowsAtCompileTime,
+    MaxColsAtCompileTime = ColsAtCompileTime,
+    Flags = 0
+  };
+};
+
 } // end namespace internal
 
 /** \geometry_module \ingroup Geometry_Module
@@ -355,6 +371,9 @@ public:
   inline Transform& operator=(const QTransform& other);
   inline QTransform toQTransform(void) const;
   #endif
+  
+  Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); }
+  Index cols() const { return m_matrix.cols(); }
 
   /** shortcut for m_matrix(row,col);
     * \sa MatrixBase::operator(Index,Index) const */
diff --git a/Eigen/src/Householder/BlockHouseholder.h b/Eigen/src/Householder/BlockHouseholder.h
index 60dbea5f5..35dbf80a1 100644
--- a/Eigen/src/Householder/BlockHouseholder.h
+++ b/Eigen/src/Householder/BlockHouseholder.h
@@ -16,48 +16,85 @@
 namespace Eigen { 
 
 namespace internal {
+  
+/** \internal */
+// template<typename TriangularFactorType,typename VectorsType,typename CoeffsType>
+// void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)
+// {
+//   typedef typename TriangularFactorType::Index Index;
+//   typedef typename VectorsType::Scalar Scalar;
+//   const Index nbVecs = vectors.cols();
+//   eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);
+// 
+//   for(Index i = 0; i < nbVecs; i++)
+//   {
+//     Index rs = vectors.rows() - i;
+//     // Warning, note that hCoeffs may alias with vectors.
+//     // It is then necessary to copy it before modifying vectors(i,i). 
+//     typename CoeffsType::Scalar h = hCoeffs(i);
+//     // This hack permits to pass trough nested Block<> and Transpose<> expressions.
+//     Scalar *Vii_ptr = const_cast<Scalar*>(vectors.data() + vectors.outerStride()*i + vectors.innerStride()*i);
+//     Scalar Vii = *Vii_ptr;
+//     *Vii_ptr = Scalar(1);
+//     triFactor.col(i).head(i).noalias() = -h * vectors.block(i, 0, rs, i).adjoint()
+//                                        * vectors.col(i).tail(rs);
+//     *Vii_ptr = Vii;
+//     // FIXME add .noalias() once the triangular product can work inplace
+//     triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>()
+//                              * triFactor.col(i).head(i);
+//     triFactor(i,i) = hCoeffs(i);
+//   }
+// }
 
 /** \internal */
+// This variant avoid modifications in vectors
 template<typename TriangularFactorType,typename VectorsType,typename CoeffsType>
 void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)
 {
   typedef typename TriangularFactorType::Index Index;
-  typedef typename VectorsType::Scalar Scalar;
   const Index nbVecs = vectors.cols();
   eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);
 
-  for(Index i = 0; i < nbVecs; i++)
+  for(Index i = nbVecs-1; i >=0 ; --i)
   {
-    Index rs = vectors.rows() - i;
-    Scalar Vii = vectors(i,i);
-    vectors.const_cast_derived().coeffRef(i,i) = Scalar(1);
-    triFactor.col(i).head(i).noalias() = -hCoeffs(i) * vectors.block(i, 0, rs, i).adjoint()
-                                       * vectors.col(i).tail(rs);
-    vectors.const_cast_derived().coeffRef(i, i) = Vii;
-    // FIXME add .noalias() once the triangular product can work inplace
-    triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>()
-                             * triFactor.col(i).head(i);
+    Index rs = vectors.rows() - i - 1;
+    Index rt = nbVecs-i-1;
+
+    if(rt>0)
+    {
+      triFactor.row(i).tail(rt).noalias() = -hCoeffs(i) * vectors.col(i).tail(rs).adjoint()
+                                                        * vectors.bottomRightCorner(rs, rt).template triangularView<UnitLower>();
+            
+      // FIXME add .noalias() once the triangular product can work inplace
+      triFactor.row(i).tail(rt) = triFactor.row(i).tail(rt) * triFactor.bottomRightCorner(rt,rt).template triangularView<Upper>();
+      
+    }
     triFactor(i,i) = hCoeffs(i);
   }
 }
 
-/** \internal */
+/** \internal
+  * if forward then perform   mat = H0 * H1 * H2 * mat
+  * otherwise perform         mat = H2 * H1 * H0 * mat
+  */
 template<typename MatrixType,typename VectorsType,typename CoeffsType>
-void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs)
+void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs, bool forward)
 {
   typedef typename MatrixType::Index Index;
   enum { TFactorSize = MatrixType::ColsAtCompileTime };
   Index nbVecs = vectors.cols();
-  Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, ColMajor> T(nbVecs,nbVecs);
-  make_block_householder_triangular_factor(T, vectors, hCoeffs);
-
-  const TriangularView<const VectorsType, UnitLower>& V(vectors);
+  Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, RowMajor> T(nbVecs,nbVecs);
+  
+  if(forward) make_block_householder_triangular_factor(T, vectors, hCoeffs);
+  else        make_block_householder_triangular_factor(T, vectors, hCoeffs.conjugate());  
+  const TriangularView<const VectorsType, UnitLower> V(vectors);
 
   // A -= V T V^* A
   Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,0,
          VectorsType::MaxColsAtCompileTime,MatrixType::MaxColsAtCompileTime> tmp = V.adjoint() * mat;
   // FIXME add .noalias() once the triangular product can work inplace
-  tmp = T.template triangularView<Upper>().adjoint() * tmp;
+  if(forward) tmp = T.template triangularView<Upper>()           * tmp;
+  else        tmp = T.template triangularView<Upper>().adjoint() * tmp;
   mat.noalias() -= V * tmp;
 }
 
diff --git a/Eigen/src/Householder/HouseholderSequence.h b/Eigen/src/Householder/HouseholderSequence.h
index d800ca1fa..4ded2995f 100644
--- a/Eigen/src/Householder/HouseholderSequence.h
+++ b/Eigen/src/Householder/HouseholderSequence.h
@@ -73,6 +73,15 @@ struct traits<HouseholderSequence<VectorsType,CoeffsType,Side> >
   };
 };
 
+struct HouseholderSequenceShape {};
+
+template<typename VectorsType, typename CoeffsType, int Side>
+struct evaluator_traits<HouseholderSequence<VectorsType,CoeffsType,Side> >
+  : public evaluator_traits_base<HouseholderSequence<VectorsType,CoeffsType,Side> >
+{
+  typedef HouseholderSequenceShape Shape;
+};
+
 template<typename VectorsType, typename CoeffsType, int Side>
 struct hseq_side_dependent_impl
 {
@@ -307,12 +316,36 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
     template<typename Dest, typename Workspace>
     inline void applyThisOnTheLeft(Dest& dst, Workspace& workspace) const
     {
-      workspace.resize(dst.cols());
-      for(Index k = 0; k < m_length; ++k)
+      const Index BlockSize = 48;
+      // if the entries are large enough, then apply the reflectors by block
+      if(m_length>=BlockSize && dst.cols()>1)
       {
-        Index actual_k = m_trans ? k : m_length-k-1;
-        dst.bottomRows(rows()-m_shift-actual_k)
-           .applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
+        for(Index i = 0; i < m_length; i+=BlockSize)
+        {
+          Index end = m_trans ? (std::min)(m_length,i+BlockSize) : m_length-i;
+          Index k = m_trans ? i : (std::max)(Index(0),end-BlockSize);
+          Index bs = end-k;
+          Index start = k + m_shift;
+          
+          typedef Block<typename internal::remove_all<VectorsType>::type,Dynamic,Dynamic> SubVectorsType;
+          SubVectorsType sub_vecs1(m_vectors.const_cast_derived(), Side==OnTheRight ? k : start,
+                                                                   Side==OnTheRight ? start : k,
+                                                                   Side==OnTheRight ? bs : m_vectors.rows()-start,
+                                                                   Side==OnTheRight ? m_vectors.cols()-start : bs);
+          typename internal::conditional<Side==OnTheRight, Transpose<SubVectorsType>, SubVectorsType&>::type sub_vecs(sub_vecs1);
+          Block<Dest,Dynamic,Dynamic> sub_dst(dst,dst.rows()-rows()+m_shift+k,0, rows()-m_shift-k,dst.cols());
+          apply_block_householder_on_the_left(sub_dst, sub_vecs, m_coeffs.segment(k, bs), !m_trans);
+        }
+      }
+      else
+      {
+        workspace.resize(dst.cols());
+        for(Index k = 0; k < m_length; ++k)
+        {
+          Index actual_k = m_trans ? k : m_length-k-1;
+          dst.bottomRows(rows()-m_shift-actual_k)
+            .applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
+        }
       }
     }
 
diff --git a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
index 1f3c060d0..98b169868 100644
--- a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
+++ b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -80,19 +80,20 @@ class DiagonalPreconditioner
       return factorize(mat);
     }
 
+    /** \internal */
     template<typename Rhs, typename Dest>
-    void _solve(const Rhs& b, Dest& x) const
+    void _solve_impl(const Rhs& b, Dest& x) const
     {
       x = m_invdiag.array() * b.array() ;
     }
 
-    template<typename Rhs> inline const internal::solve_retval<DiagonalPreconditioner, Rhs>
+    template<typename Rhs> inline const Solve<DiagonalPreconditioner, Rhs>
     solve(const MatrixBase<Rhs>& b) const
     {
       eigen_assert(m_isInitialized && "DiagonalPreconditioner is not initialized.");
       eigen_assert(m_invdiag.size()==b.rows()
                 && "DiagonalPreconditioner::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<DiagonalPreconditioner, Rhs>(*this, b.derived());
+      return Solve<DiagonalPreconditioner, Rhs>(*this, b.derived());
     }
 
   protected:
@@ -100,22 +101,6 @@ class DiagonalPreconditioner
     bool m_isInitialized;
 };
 
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<DiagonalPreconditioner<_MatrixType>, Rhs>
-  : solve_retval_base<DiagonalPreconditioner<_MatrixType>, Rhs>
-{
-  typedef DiagonalPreconditioner<_MatrixType> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-}
 
 /** \ingroup IterativeLinearSolvers_Module
   * \brief A naive preconditioner which approximates any matrix as the identity matrix
diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
index 27824b9d5..051940dc7 100644
--- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
+++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -181,26 +181,10 @@ public:
   BiCGSTAB(const MatrixType& A) : Base(A) {}
 
   ~BiCGSTAB() {}
-  
-  /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
-    * \a x0 as an initial solution.
-    *
-    * \sa compute()
-    */
-  template<typename Rhs,typename Guess>
-  inline const internal::solve_retval_with_guess<BiCGSTAB, Rhs, Guess>
-  solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
-  {
-    eigen_assert(m_isInitialized && "BiCGSTAB is not initialized.");
-    eigen_assert(Base::rows()==b.rows()
-              && "BiCGSTAB::solve(): invalid number of rows of the right hand side matrix b");
-    return internal::solve_retval_with_guess
-            <BiCGSTAB, Rhs, Guess>(*this, b.derived(), x0);
-  }
-  
+
   /** \internal */
   template<typename Rhs,typename Dest>
-  void _solveWithGuess(const Rhs& b, Dest& x) const
+  void _solve_with_guess_impl(const Rhs& b, Dest& x) const
   {    
     bool failed = false;
     for(int j=0; j<b.cols(); ++j)
@@ -219,36 +203,19 @@ public:
   }
 
   /** \internal */
+  using Base::_solve_impl;
   template<typename Rhs,typename Dest>
-  void _solve(const Rhs& b, Dest& x) const
+  void _solve_impl(const MatrixBase<Rhs>& b, Dest& x) const
   {
-//     x.setZero();
-  x = b;
-    _solveWithGuess(b,x);
+    // x.setZero();
+    x = b;
+    _solve_with_guess_impl(b,x);
   }
 
 protected:
 
 };
 
-
-namespace internal {
-
-  template<typename _MatrixType, typename _Preconditioner, typename Rhs>
-struct solve_retval<BiCGSTAB<_MatrixType, _Preconditioner>, Rhs>
-  : solve_retval_base<BiCGSTAB<_MatrixType, _Preconditioner>, Rhs>
-{
-  typedef BiCGSTAB<_MatrixType, _Preconditioner> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-} // end namespace internal
-
 } // end namespace Eigen
 
 #endif // EIGEN_BICGSTAB_H
diff --git a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
index 3ce517940..f72cf86a5 100644
--- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
+++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -192,26 +192,10 @@ public:
   ConjugateGradient(const MatrixType& A) : Base(A) {}
 
   ~ConjugateGradient() {}
-  
-  /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
-    * \a x0 as an initial solution.
-    *
-    * \sa compute()
-    */
-  template<typename Rhs,typename Guess>
-  inline const internal::solve_retval_with_guess<ConjugateGradient, Rhs, Guess>
-  solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
-  {
-    eigen_assert(m_isInitialized && "ConjugateGradient is not initialized.");
-    eigen_assert(Base::rows()==b.rows()
-              && "ConjugateGradient::solve(): invalid number of rows of the right hand side matrix b");
-    return internal::solve_retval_with_guess
-            <ConjugateGradient, Rhs, Guess>(*this, b.derived(), x0);
-  }
 
   /** \internal */
   template<typename Rhs,typename Dest>
-  void _solveWithGuess(const Rhs& b, Dest& x) const
+  void _solve_with_guess_impl(const Rhs& b, Dest& x) const
   {
     m_iterations = Base::maxIterations();
     m_error = Base::m_tolerance;
@@ -231,35 +215,18 @@ public:
   }
   
   /** \internal */
+  using Base::_solve_impl;
   template<typename Rhs,typename Dest>
-  void _solve(const Rhs& b, Dest& x) const
+  void _solve_impl(const MatrixBase<Rhs>& b, Dest& x) const
   {
     x.setOnes();
-    _solveWithGuess(b,x);
+    _solve_with_guess_impl(b.derived(),x);
   }
 
 protected:
 
 };
 
-
-namespace internal {
-
-template<typename _MatrixType, int _UpLo, typename _Preconditioner, typename Rhs>
-struct solve_retval<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner>, Rhs>
-  : solve_retval_base<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner>, Rhs>
-{
-  typedef ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-} // end namespace internal
-
 } // end namespace Eigen
 
 #endif // EIGEN_CONJUGATE_GRADIENT_H
diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
index b55afc136..7adbbc489 100644
--- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
+++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
@@ -2,6 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -93,8 +94,12 @@ Index QuickSplit(VectorV &row, VectorI &ind, Index ncut)
   *   http://comments.gmane.org/gmane.comp.lib.eigen/3302
   */
 template <typename _Scalar>
-class IncompleteLUT : internal::noncopyable
+class IncompleteLUT : public SparseSolverBase<IncompleteLUT<_Scalar> >
 {
+  protected:
+    typedef SparseSolverBase<IncompleteLUT<_Scalar> > Base;
+    using Base::m_isInitialized;
+  public:
     typedef _Scalar Scalar;
     typedef typename NumTraits<Scalar>::Real RealScalar;
     typedef Matrix<Scalar,Dynamic,1> Vector;
@@ -107,13 +112,13 @@ class IncompleteLUT : internal::noncopyable
     
     IncompleteLUT()
       : m_droptol(NumTraits<Scalar>::dummy_precision()), m_fillfactor(10),
-        m_analysisIsOk(false), m_factorizationIsOk(false), m_isInitialized(false)
+        m_analysisIsOk(false), m_factorizationIsOk(false)
     {}
     
     template<typename MatrixType>
     IncompleteLUT(const MatrixType& mat, const RealScalar& droptol=NumTraits<Scalar>::dummy_precision(), int fillfactor = 10)
       : m_droptol(droptol),m_fillfactor(fillfactor),
-        m_analysisIsOk(false),m_factorizationIsOk(false),m_isInitialized(false)
+        m_analysisIsOk(false),m_factorizationIsOk(false)
     {
       eigen_assert(fillfactor != 0);
       compute(mat); 
@@ -158,7 +163,7 @@ class IncompleteLUT : internal::noncopyable
     void setFillfactor(int fillfactor); 
     
     template<typename Rhs, typename Dest>
-    void _solve(const Rhs& b, Dest& x) const
+    void _solve_impl(const Rhs& b, Dest& x) const
     {
       x = m_Pinv * b;  
       x = m_lu.template triangularView<UnitLower>().solve(x);
@@ -166,15 +171,6 @@ class IncompleteLUT : internal::noncopyable
       x = m_P * x; 
     }
 
-    template<typename Rhs> inline const internal::solve_retval<IncompleteLUT, Rhs>
-     solve(const MatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_isInitialized && "IncompleteLUT is not initialized.");
-      eigen_assert(cols()==b.rows()
-                && "IncompleteLUT::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<IncompleteLUT, Rhs>(*this, b.derived());
-    }
-
 protected:
 
     /** keeps off-diagonal entries; drops diagonal entries */
@@ -192,7 +188,6 @@ protected:
     int m_fillfactor;
     bool m_analysisIsOk;
     bool m_factorizationIsOk;
-    bool m_isInitialized;
     ComputationInfo m_info;
     PermutationMatrix<Dynamic,Dynamic,Index> m_P;     // Fill-reducing permutation
     PermutationMatrix<Dynamic,Dynamic,Index> m_Pinv;  // Inverse permutation
@@ -445,23 +440,6 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
   m_info = Success;
 }
 
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<IncompleteLUT<_MatrixType>, Rhs>
-  : solve_retval_base<IncompleteLUT<_MatrixType>, Rhs>
-{
-  typedef IncompleteLUT<_MatrixType> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-} // end namespace internal
-
 } // end namespace Eigen
 
 #endif // EIGEN_INCOMPLETE_LUT_H
diff --git a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
index 2036922d6..fd9285087 100644
--- a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
+++ b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -18,8 +18,12 @@ namespace Eigen {
   * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
   */
 template< typename Derived>
-class IterativeSolverBase : internal::noncopyable
+class IterativeSolverBase : public SparseSolverBase<Derived>
 {
+protected:
+  typedef SparseSolverBase<Derived> Base;
+  using Base::m_isInitialized;
+  
 public:
   typedef typename internal::traits<Derived>::MatrixType MatrixType;
   typedef typename internal::traits<Derived>::Preconditioner Preconditioner;
@@ -29,8 +33,7 @@ public:
 
 public:
 
-  Derived& derived() { return *static_cast<Derived*>(this); }
-  const Derived& derived() const { return *static_cast<const Derived*>(this); }
+  using Base::derived;
 
   /** Default constructor. */
   IterativeSolverBase()
@@ -159,31 +162,18 @@ public:
     return m_error;
   }
 
-  /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
-    *
-    * \sa compute()
-    */
-  template<typename Rhs> inline const internal::solve_retval<Derived, Rhs>
-  solve(const MatrixBase<Rhs>& b) const
-  {
-    eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized.");
-    eigen_assert(rows()==b.rows()
-              && "IterativeSolverBase::solve(): invalid number of rows of the right hand side matrix b");
-    return internal::solve_retval<Derived, Rhs>(derived(), b.derived());
-  }
-  
-  /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
+  /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
+    * and \a x0 as an initial solution.
     *
-    * \sa compute()
+    * \sa solve(), compute()
     */
-  template<typename Rhs>
-  inline const internal::sparse_solve_retval<IterativeSolverBase, Rhs>
-  solve(const SparseMatrixBase<Rhs>& b) const
+  template<typename Rhs,typename Guess>
+  inline const SolveWithGuess<Derived, Rhs, Guess>
+  solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
   {
-    eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized.");
-    eigen_assert(rows()==b.rows()
-              && "IterativeSolverBase::solve(): invalid number of rows of the right hand side matrix b");
-    return internal::sparse_solve_retval<IterativeSolverBase, Rhs>(*this, b.derived());
+    eigen_assert(m_isInitialized && "Solver is not initialized.");
+    eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b");
+    return SolveWithGuess<Derived, Rhs, Guess>(derived(), b.derived(), x0);
   }
 
   /** \returns Success if the iterations converged, and NoConvergence otherwise. */
@@ -195,7 +185,7 @@ public:
   
   /** \internal */
   template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex>
-  void _solve_sparse(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
+  void _solve_impl(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
   {
     eigen_assert(rows()==b.rows());
     
@@ -229,26 +219,9 @@ protected:
   mutable RealScalar m_error;
   mutable int m_iterations;
   mutable ComputationInfo m_info;
-  mutable bool m_isInitialized, m_analysisIsOk, m_factorizationIsOk;
-};
-
-namespace internal {
- 
-template<typename Derived, typename Rhs>
-struct sparse_solve_retval<IterativeSolverBase<Derived>, Rhs>
-  : sparse_solve_retval_base<IterativeSolverBase<Derived>, Rhs>
-{
-  typedef IterativeSolverBase<Derived> Dec;
-  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec().derived()._solve_sparse(rhs(),dst);
-  }
+  mutable bool m_analysisIsOk, m_factorizationIsOk;
 };
 
-} // end namespace internal
-
 } // end namespace Eigen
 
 #endif // EIGEN_ITERATIVE_SOLVER_BASE_H
diff --git a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h
new file mode 100644
index 000000000..46dd5babe
--- /dev/null
+++ b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h
@@ -0,0 +1,114 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SOLVEWITHGUESS_H
+#define EIGEN_SOLVEWITHGUESS_H
+
+namespace Eigen {
+
+template<typename Decomposition, typename RhsType, typename GuessType> class SolveWithGuess;
+  
+/** \class SolveWithGuess
+  * \ingroup IterativeLinearSolvers_Module
+  *
+  * \brief Pseudo expression representing a solving operation
+  *
+  * \tparam Decomposition the type of the matrix or decomposion object
+  * \tparam Rhstype the type of the right-hand side
+  *
+  * This class represents an expression of A.solve(B)
+  * and most of the time this is the only way it is used.
+  *
+  */
+namespace internal {
+
+
+template<typename Decomposition, typename RhsType, typename GuessType>
+struct traits<SolveWithGuess<Decomposition, RhsType, GuessType> >
+  : traits<Solve<Decomposition,RhsType> >
+{};
+
+}
+
+
+template<typename Decomposition, typename RhsType, typename GuessType>
+class SolveWithGuess : public internal::generic_xpr_base<SolveWithGuess<Decomposition,RhsType,GuessType>, MatrixXpr, typename internal::traits<RhsType>::StorageKind>::type
+{
+public:
+  typedef typename RhsType::Index Index;
+  typedef typename internal::traits<SolveWithGuess>::PlainObject PlainObject;
+  typedef typename internal::generic_xpr_base<SolveWithGuess<Decomposition,RhsType,GuessType>, MatrixXpr, typename internal::traits<RhsType>::StorageKind>::type Base;
+  
+  SolveWithGuess(const Decomposition &dec, const RhsType &rhs, const GuessType &guess)
+    : m_dec(dec), m_rhs(rhs), m_guess(guess)
+  {}
+  
+  EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); }
+  EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); }
+
+  EIGEN_DEVICE_FUNC const Decomposition& dec()   const { return m_dec; }
+  EIGEN_DEVICE_FUNC const RhsType&       rhs()   const { return m_rhs; }
+  EIGEN_DEVICE_FUNC const GuessType&     guess() const { return m_guess; }
+
+protected:
+  const Decomposition &m_dec;
+  const RhsType       &m_rhs;
+  const GuessType     &m_guess;
+  
+  typedef typename internal::traits<SolveWithGuess>::Scalar Scalar;
+  
+private:
+  Scalar coeff(Index row, Index col) const;
+  Scalar coeff(Index i) const;
+};
+
+namespace internal {
+
+// Evaluator of SolveWithGuess -> eval into a temporary
+template<typename Decomposition, typename RhsType, typename GuessType>
+struct evaluator<SolveWithGuess<Decomposition,RhsType, GuessType> >
+  : public evaluator<typename SolveWithGuess<Decomposition,RhsType,GuessType>::PlainObject>::type
+{
+  typedef SolveWithGuess<Decomposition,RhsType,GuessType> SolveType;
+  typedef typename SolveType::PlainObject PlainObject;
+  typedef typename evaluator<PlainObject>::type Base;
+  
+  typedef evaluator type;
+  typedef evaluator nestedType;
+
+  evaluator(const SolveType& solve)
+    : m_result(solve.rows(), solve.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    solve.dec()._solve_with_guess_impl(solve.rhs(), m_result, solve().guess());
+  }
+  
+protected:  
+  PlainObject m_result;
+};
+
+// Specialization for "dst = dec.solve(rhs)"
+// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere
+template<typename DstXprType, typename DecType, typename RhsType, typename GuessType, typename Scalar>
+struct Assignment<DstXprType, SolveWithGuess<DecType,RhsType,GuessType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+  typedef Solve<DecType,RhsType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  {
+    // FIXME shall we resize dst here?
+    dst = src.guess();
+    src.dec()._solve_with_guess_impl(src.rhs(), dst/*, src.guess()*/);
+  }
+};
+
+} // end namepsace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SOLVEWITHGUESS_H
diff --git a/Eigen/src/LU/Determinant.h b/Eigen/src/LU/Determinant.h
index bb8e78a8a..d6a3c1e5a 100644
--- a/Eigen/src/LU/Determinant.h
+++ b/Eigen/src/LU/Determinant.h
@@ -92,7 +92,7 @@ template<typename Derived>
 inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const
 {
   eigen_assert(rows() == cols());
-  typedef typename internal::nested<Derived,Base::RowsAtCompileTime>::type Nested;
+  typedef typename internal::nested_eval<Derived,Base::RowsAtCompileTime>::type Nested;
   return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived());
 }
 
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 971b9da1d..fdf2e0642 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -12,6 +12,15 @@
 
 namespace Eigen { 
 
+namespace internal {
+template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
+ : traits<_MatrixType>
+{
+  enum { Flags = 0 };
+};
+
+} // end namespace internal
+
 /** \ingroup LU_Module
   *
   * \class FullPivLU
@@ -62,6 +71,7 @@ template<typename _MatrixType> class FullPivLU
     typedef typename internal::plain_col_type<MatrixType, Index>::type IntColVectorType;
     typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationQType;
     typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationPType;
+    typedef typename MatrixType::PlainObject PlainObject;
 
     /**
       * \brief Default Constructor.
@@ -211,11 +221,11 @@ template<typename _MatrixType> class FullPivLU
       * \sa TriangularView::solve(), kernel(), inverse()
       */
     template<typename Rhs>
-    inline const internal::solve_retval<FullPivLU, Rhs>
+    inline const Solve<FullPivLU, Rhs>
     solve(const MatrixBase<Rhs>& b) const
     {
       eigen_assert(m_isInitialized && "LU is not initialized.");
-      return internal::solve_retval<FullPivLU, Rhs>(*this, b.derived());
+      return Solve<FullPivLU, Rhs>(*this, b.derived());
     }
 
     /** \returns the determinant of the matrix of which
@@ -360,18 +370,23 @@ template<typename _MatrixType> class FullPivLU
       *
       * \sa MatrixBase::inverse()
       */
-    inline const internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType> inverse() const
+    inline const Inverse<FullPivLU> inverse() const
     {
       eigen_assert(m_isInitialized && "LU is not initialized.");
       eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the inverse of a non-square matrix!");
-      return internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType>
-               (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols()));
+      return Inverse<FullPivLU>(*this);
     }
 
     MatrixType reconstructedMatrix() const;
 
     inline Index rows() const { return m_lu.rows(); }
     inline Index cols() const { return m_lu.cols(); }
+    
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+    #endif
 
   protected:
     MatrixType m_lu;
@@ -663,64 +678,72 @@ struct image_retval<FullPivLU<_MatrixType> >
 
 /***** Implementation of solve() *****************************************************/
 
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<FullPivLU<_MatrixType>, Rhs>
-  : solve_retval_base<FullPivLU<_MatrixType>, Rhs>
-{
-  EIGEN_MAKE_SOLVE_HELPERS(FullPivLU<_MatrixType>,Rhs)
+} // end namespace internal
 
-  template<typename Dest> void evalTo(Dest& dst) const
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{  
+  /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}.
+  * So we proceed as follows:
+  * Step 1: compute c = P * rhs.
+  * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible.
+  * Step 3: replace c by the solution x to Ux = c. May or may not exist.
+  * Step 4: result = Q * c;
+  */
+
+  const Index rows = this->rows(),
+              cols = this->cols(),
+              nonzero_pivots = this->nonzeroPivots();
+  eigen_assert(rhs.rows() == rows);
+  const Index smalldim = (std::min)(rows, cols);
+
+  if(nonzero_pivots == 0)
   {
-    /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}.
-     * So we proceed as follows:
-     * Step 1: compute c = P * rhs.
-     * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible.
-     * Step 3: replace c by the solution x to Ux = c. May or may not exist.
-     * Step 4: result = Q * c;
-     */
-
-    const Index rows = dec().rows(), cols = dec().cols(),
-              nonzero_pivots = dec().nonzeroPivots();
-    eigen_assert(rhs().rows() == rows);
-    const Index smalldim = (std::min)(rows, cols);
-
-    if(nonzero_pivots == 0)
-    {
-      dst.setZero();
-      return;
-    }
+    dst.setZero();
+    return;
+  }
 
-    typename Rhs::PlainObject c(rhs().rows(), rhs().cols());
+  typename RhsType::PlainObject c(rhs.rows(), rhs.cols());
 
-    // Step 1
-    c = dec().permutationP() * rhs();
+  // Step 1
+  c = permutationP() * rhs;
 
-    // Step 2
-    dec().matrixLU()
-        .topLeftCorner(smalldim,smalldim)
-        .template triangularView<UnitLower>()
-        .solveInPlace(c.topRows(smalldim));
-    if(rows>cols)
-    {
-      c.bottomRows(rows-cols)
-        -= dec().matrixLU().bottomRows(rows-cols)
-         * c.topRows(cols);
-    }
+  // Step 2
+  m_lu.topLeftCorner(smalldim,smalldim)
+      .template triangularView<UnitLower>()
+      .solveInPlace(c.topRows(smalldim));
+  if(rows>cols)
+    c.bottomRows(rows-cols) -= m_lu.bottomRows(rows-cols) * c.topRows(cols);
+
+  // Step 3
+  m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots)
+      .template triangularView<Upper>()
+      .solveInPlace(c.topRows(nonzero_pivots));
+
+  // Step 4
+  for(Index i = 0; i < nonzero_pivots; ++i)
+    dst.row(permutationQ().indices().coeff(i)) = c.row(i);
+  for(Index i = nonzero_pivots; i < m_lu.cols(); ++i)
+    dst.row(permutationQ().indices().coeff(i)).setZero();
+}
+#endif
+
+namespace internal {
 
-    // Step 3
-    dec().matrixLU()
-        .topLeftCorner(nonzero_pivots, nonzero_pivots)
-        .template triangularView<Upper>()
-        .solveInPlace(c.topRows(nonzero_pivots));
-
-    // Step 4
-    for(Index i = 0; i < nonzero_pivots; ++i)
-      dst.row(dec().permutationQ().indices().coeff(i)) = c.row(i);
-    for(Index i = nonzero_pivots; i < dec().matrixLU().cols(); ++i)
-      dst.row(dec().permutationQ().indices().coeff(i)).setZero();
+
+/***** Implementation of inverse() *****************************************************/
+template<typename DstXprType, typename MatrixType, typename Scalar>
+struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+  typedef FullPivLU<MatrixType> LuType;
+  typedef Inverse<LuType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  {    
+    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
   }
 };
-
 } // end namespace internal
 
 /******* MatrixBase methods *****************************************************************/
diff --git a/Eigen/src/LU/Inverse.h b/Eigen/src/LU/InverseImpl.h
index 8d1364e0a..e5f270d19 100644
--- a/Eigen/src/LU/Inverse.h
+++ b/Eigen/src/LU/InverseImpl.h
@@ -2,13 +2,14 @@
 // for linear algebra.
 //
 // Copyright (C) 2008-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_INVERSE_H
-#define EIGEN_INVERSE_H
+#ifndef EIGEN_INVERSE_IMPL_H
+#define EIGEN_INVERSE_IMPL_H
 
 namespace Eigen { 
 
@@ -42,7 +43,8 @@ struct compute_inverse<MatrixType, ResultType, 1>
   static inline void run(const MatrixType& matrix, ResultType& result)
   {
     typedef typename MatrixType::Scalar Scalar;
-    result.coeffRef(0,0) = Scalar(1) / matrix.coeff(0,0);
+    typename internal::evaluator<MatrixType>::type matrixEval(matrix);
+    result.coeffRef(0,0) = Scalar(1) / matrixEval.coeff(0,0);
   }
 };
 
@@ -75,10 +77,10 @@ inline void compute_inverse_size2_helper(
     const MatrixType& matrix, const typename ResultType::Scalar& invdet,
     ResultType& result)
 {
-  result.coeffRef(0,0) = matrix.coeff(1,1) * invdet;
+  result.coeffRef(0,0) =  matrix.coeff(1,1) * invdet;
   result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet;
   result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet;
-  result.coeffRef(1,1) = matrix.coeff(0,0) * invdet;
+  result.coeffRef(1,1) =  matrix.coeff(0,0) * invdet;
 }
 
 template<typename MatrixType, typename ResultType>
@@ -279,41 +281,33 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4>
 *** MatrixBase methods ***
 *************************/
 
-template<typename MatrixType>
-struct traits<inverse_impl<MatrixType> >
-{
-  typedef typename MatrixType::PlainObject ReturnType;
-};
-
-template<typename MatrixType>
-struct inverse_impl : public ReturnByValue<inverse_impl<MatrixType> >
-{
-  typedef typename MatrixType::Index Index;
-  typedef typename internal::eval<MatrixType>::type MatrixTypeNested;
-  typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
-  MatrixTypeNested m_matrix;
-
-  EIGEN_DEVICE_FUNC
-  inverse_impl(const MatrixType& matrix)
-    : m_matrix(matrix)
-  {}
+} // end namespace internal
 
-  EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
-  EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
+namespace internal {
 
-  template<typename Dest>
-  EIGEN_DEVICE_FUNC
-  inline void evalTo(Dest& dst) const
+// Specialization for "dense = dense_xpr.inverse()"
+template<typename DstXprType, typename XprType, typename Scalar>
+struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+  typedef Inverse<XprType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
   {
-    const int Size = EIGEN_PLAIN_ENUM_MIN(MatrixType::ColsAtCompileTime,Dest::ColsAtCompileTime);
+    // FIXME shall we resize dst here?
+    const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime);
     EIGEN_ONLY_USED_FOR_DEBUG(Size);
-    eigen_assert(( (Size<=1) || (Size>4) || (extract_data(m_matrix)!=extract_data(dst)))
+    eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst)))
               && "Aliasing problem detected in inverse(), you need to do inverse().eval() here.");
 
-    compute_inverse<MatrixTypeNestedCleaned, Dest>::run(m_matrix, dst);
+    typedef typename internal::nested_eval<XprType,XprType::ColsAtCompileTime>::type  ActualXprType;
+    typedef typename internal::remove_all<ActualXprType>::type                        ActualXprTypeCleanded;
+    
+    ActualXprType actual_xpr(src.nestedExpression());
+    
+    compute_inverse<ActualXprTypeCleanded, DstXprType>::run(actual_xpr, dst);
   }
 };
 
+  
 } // end namespace internal
 
 /** \lu_module
@@ -334,11 +328,11 @@ struct inverse_impl : public ReturnByValue<inverse_impl<MatrixType> >
   * \sa computeInverseAndDetWithCheck()
   */
 template<typename Derived>
-inline const internal::inverse_impl<Derived> MatrixBase<Derived>::inverse() const
+inline const Inverse<Derived> MatrixBase<Derived>::inverse() const
 {
   EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
   eigen_assert(rows() == cols());
-  return internal::inverse_impl<Derived>(derived());
+  return Inverse<Derived>(derived());
 }
 
 /** \lu_module
@@ -374,7 +368,7 @@ inline void MatrixBase<Derived>::computeInverseAndDetWithCheck(
   // for larger sizes, evaluating has negligible cost and limits code size.
   typedef typename internal::conditional<
     RowsAtCompileTime == 2,
-    typename internal::remove_all<typename internal::nested<Derived, 2>::type>::type,
+    typename internal::remove_all<typename internal::nested_eval<Derived, 2>::type>::type,
     PlainObject
   >::type MatrixType;
   internal::compute_inverse_and_det_with_check<MatrixType, ResultType>::run
@@ -414,4 +408,4 @@ inline void MatrixBase<Derived>::computeInverseWithCheck(
 
 } // end namespace Eigen
 
-#endif // EIGEN_INVERSE_H
+#endif // EIGEN_INVERSE_IMPL_H
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 2f65c3a49..a4d22ce5f 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -13,6 +13,19 @@
 
 namespace Eigen { 
 
+namespace internal {
+template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
+ : traits<_MatrixType>
+{
+  typedef traits<_MatrixType> BaseTraits;
+  enum {
+    Flags = BaseTraits::Flags & RowMajorBit,
+    CoeffReadCost = Dynamic
+  };
+};
+
+} // end namespace internal
+
 /** \ingroup LU_Module
   *
   * \class PartialPivLU
@@ -62,6 +75,7 @@ template<typename _MatrixType> class PartialPivLU
     typedef typename MatrixType::Index Index;
     typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
     typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
+    typedef typename MatrixType::PlainObject PlainObject;
 
 
     /**
@@ -129,11 +143,11 @@ template<typename _MatrixType> class PartialPivLU
       * \sa TriangularView::solve(), inverse(), computeInverse()
       */
     template<typename Rhs>
-    inline const internal::solve_retval<PartialPivLU, Rhs>
+    inline const Solve<PartialPivLU, Rhs>
     solve(const MatrixBase<Rhs>& b) const
     {
       eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
-      return internal::solve_retval<PartialPivLU, Rhs>(*this, b.derived());
+      return Solve<PartialPivLU, Rhs>(*this, b.derived());
     }
 
     /** \returns the inverse of the matrix of which *this is the LU decomposition.
@@ -143,11 +157,10 @@ template<typename _MatrixType> class PartialPivLU
       *
       * \sa MatrixBase::inverse(), LU::inverse()
       */
-    inline const internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType> inverse() const
+    inline const Inverse<PartialPivLU> inverse() const
     {
       eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
-      return internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType>
-               (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols()));
+      return Inverse<PartialPivLU>(*this);
     }
 
     /** \returns the determinant of the matrix of which
@@ -169,6 +182,30 @@ template<typename _MatrixType> class PartialPivLU
 
     inline Index rows() const { return m_lu.rows(); }
     inline Index cols() const { return m_lu.cols(); }
+    
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const {
+     /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
+      * So we proceed as follows:
+      * Step 1: compute c = Pb.
+      * Step 2: replace c by the solution x to Lx = c.
+      * Step 3: replace c by the solution x to Ux = c.
+      */
+
+      eigen_assert(rhs.rows() == m_lu.rows());
+
+      // Step 1
+      dst = permutationP() * rhs;
+
+      // Step 2
+      m_lu.template triangularView<UnitLower>().solveInPlace(dst);
+
+      // Step 3
+      m_lu.template triangularView<Upper>().solveInPlace(dst); 
+    }
+    #endif
 
   protected:
     MatrixType m_lu;
@@ -434,34 +471,17 @@ MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const
 
 namespace internal {
 
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<PartialPivLU<_MatrixType>, Rhs>
-  : solve_retval_base<PartialPivLU<_MatrixType>, Rhs>
+/***** Implementation of inverse() *****************************************************/
+template<typename DstXprType, typename MatrixType, typename Scalar>
+struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
 {
-  EIGEN_MAKE_SOLVE_HELPERS(PartialPivLU<_MatrixType>,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
-    * So we proceed as follows:
-    * Step 1: compute c = Pb.
-    * Step 2: replace c by the solution x to Lx = c.
-    * Step 3: replace c by the solution x to Ux = c.
-    */
-
-    eigen_assert(rhs().rows() == dec().matrixLU().rows());
-
-    // Step 1
-    dst = dec().permutationP() * rhs();
-
-    // Step 2
-    dec().matrixLU().template triangularView<UnitLower>().solveInPlace(dst);
-
-    // Step 3
-    dec().matrixLU().template triangularView<Upper>().solveInPlace(dst);
+  typedef PartialPivLU<MatrixType> LuType;
+  typedef Inverse<LuType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  {    
+    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
   }
 };
-
 } // end namespace internal
 
 /******** MatrixBase methods *******/
diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h
index 60b7a2376..1f62ef14e 100644
--- a/Eigen/src/LU/arch/Inverse_SSE.h
+++ b/Eigen/src/LU/arch/Inverse_SSE.h
@@ -39,9 +39,11 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
     ResultAlignment     = bool(ResultType::Flags&AlignedBit),
     StorageOrdersMatch  = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
   };
+  typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
   
-  static void run(const MatrixType& matrix, ResultType& result)
+  static void run(const MatrixType& mat, ResultType& result)
   {
+    ActualMatrixType matrix(mat);
     EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 };
 
     // Load the full matrix into registers
@@ -167,14 +169,17 @@ struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
     ResultAlignment = bool(ResultType::Flags&AlignedBit),
     StorageOrdersMatch  = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
   };
-  static void run(const MatrixType& matrix, ResultType& result)
+  typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
+  
+  static void run(const MatrixType& mat, ResultType& result)
   {
+    ActualMatrixType matrix(mat);
     const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
     const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
 
     // The inverse is calculated using "Divide and Conquer" technique. The
     // original matrix is divide into four 2x2 sub-matrices. Since each
-    // register of the matrix holds two element, the smaller matrices are
+    // register of the matrix holds two elements, the smaller matrices are
     // consisted of two registers. Hence we get a better locality of the
     // calculations.
 
diff --git a/Eigen/src/PaStiXSupport/PaStiXSupport.h b/Eigen/src/PaStiXSupport/PaStiXSupport.h
index 8a546dc2f..bb8e0d1a8 100644
--- a/Eigen/src/PaStiXSupport/PaStiXSupport.h
+++ b/Eigen/src/PaStiXSupport/PaStiXSupport.h
@@ -125,9 +125,15 @@ namespace internal
 // This is the base class to interface with PaStiX functions. 
 // Users should not used this class directly. 
 template <class Derived>
-class PastixBase : internal::noncopyable
+class PastixBase : public SparseSolverBase<Derived>
 {
+  protected:
+    typedef SparseSolverBase<Derived> Base;
+    using Base::derived;
+    using Base::m_isInitialized;
   public:
+    using Base::_solve_impl;
+    
     typedef typename internal::pastix_traits<Derived>::MatrixType _MatrixType;
     typedef _MatrixType MatrixType;
     typedef typename MatrixType::Scalar Scalar;
@@ -138,7 +144,7 @@ class PastixBase : internal::noncopyable
     
   public:
     
-    PastixBase() : m_initisOk(false), m_analysisIsOk(false), m_factorizationIsOk(false), m_isInitialized(false), m_pastixdata(0), m_size(0)
+    PastixBase() : m_initisOk(false), m_analysisIsOk(false), m_factorizationIsOk(false), m_pastixdata(0), m_size(0)
     {
       init();
     }
@@ -147,33 +153,10 @@ class PastixBase : internal::noncopyable
     {
       clean();
     }
-
-    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::solve_retval<PastixBase, Rhs>
-    solve(const MatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_isInitialized && "Pastix solver is not initialized.");
-      eigen_assert(rows()==b.rows()
-                && "PastixBase::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<PastixBase, Rhs>(*this, b.derived());
-    }
     
     template<typename Rhs,typename Dest>
-    bool _solve (const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const;
+    bool _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const;
     
-    Derived& derived()
-    {
-      return *static_cast<Derived*>(this);
-    }
-    const Derived& derived() const
-    {
-      return *static_cast<const Derived*>(this);
-    }
-
     /** Returns a reference to the integer vector IPARM of PaStiX parameters
       * to modify the default parameters. 
       * The statistics related to the different phases of factorization and solve are saved here as well
@@ -228,20 +211,6 @@ class PastixBase : internal::noncopyable
       return m_info;
     }
     
-    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::sparse_solve_retval<PastixBase, Rhs>
-    solve(const SparseMatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_isInitialized && "Pastix LU, LLT or LDLT is not initialized.");
-      eigen_assert(rows()==b.rows()
-                && "PastixBase::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::sparse_solve_retval<PastixBase, Rhs>(*this, b.derived());
-    }
-    
   protected:
 
     // Initialize the Pastix data structure, check the matrix
@@ -268,7 +237,6 @@ class PastixBase : internal::noncopyable
     int m_initisOk; 
     int m_analysisIsOk;
     int m_factorizationIsOk;
-    bool m_isInitialized;
     mutable ComputationInfo m_info; 
     mutable pastix_data_t *m_pastixdata; // Data structure for pastix
     mutable int m_comm; // The MPI communicator identifier
@@ -328,7 +296,6 @@ void PastixBase<Derived>::compute(ColSpMatrix& mat)
   factorize(mat);
   
   m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO;
-  m_isInitialized = m_factorizationIsOk;
 }
 
 
@@ -393,7 +360,7 @@ void PastixBase<Derived>::factorize(ColSpMatrix& mat)
 /* Solve the system */
 template<typename Base>
 template<typename Rhs,typename Dest>
-bool PastixBase<Base>::_solve (const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const
+bool PastixBase<Base>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const
 {
   eigen_assert(m_isInitialized && "The matrix should be factorized first");
   EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,
@@ -694,36 +661,6 @@ class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> >
     }
 };
 
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<PastixBase<_MatrixType>, Rhs>
-  : solve_retval_base<PastixBase<_MatrixType>, Rhs>
-{
-  typedef PastixBase<_MatrixType> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-template<typename _MatrixType, typename Rhs>
-struct sparse_solve_retval<PastixBase<_MatrixType>, Rhs>
-  : sparse_solve_retval_base<PastixBase<_MatrixType>, Rhs>
-{
-  typedef PastixBase<_MatrixType> Dec;
-  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    this->defaultEvalTo(dst);
-  }
-};
-
-} // end namespace internal
-
 } // end namespace Eigen
 
 #endif
diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h
index b6571069e..e1b0e1818 100644
--- a/Eigen/src/PardisoSupport/PardisoSupport.h
+++ b/Eigen/src/PardisoSupport/PardisoSupport.h
@@ -96,10 +96,17 @@ namespace internal
 }
 
 template<class Derived>
-class PardisoImpl : internal::noncopyable
+class PardisoImpl : public SparseSolveBase<PardisoImpl<Derived>
 {
+  protected:
+    typedef SparseSolveBase<PardisoImpl<Derived> Base;
+    using Base::derived;
+    using Base::m_isInitialized;
+    
     typedef internal::pardiso_traits<Derived> Traits;
   public:
+    using base::_solve_impl;
+    
     typedef typename Traits::MatrixType MatrixType;
     typedef typename Traits::Scalar Scalar;
     typedef typename Traits::RealScalar RealScalar;
@@ -118,7 +125,7 @@ class PardisoImpl : internal::noncopyable
       eigen_assert((sizeof(Index) >= sizeof(_INTEGER_t) && sizeof(Index) <= 8) && "Non-supported index type");
       m_iparm.setZero();
       m_msglvl = 0; // No output
-      m_initialized = false;
+      m_isInitialized = false;
     }
 
     ~PardisoImpl()
@@ -136,7 +143,7 @@ class PardisoImpl : internal::noncopyable
       */
     ComputationInfo info() const
     {
-      eigen_assert(m_initialized && "Decomposition is not initialized.");
+      eigen_assert(m_isInitialized && "Decomposition is not initialized.");
       return m_info;
     }
 
@@ -165,51 +172,14 @@ class PardisoImpl : internal::noncopyable
     Derived& factorize(const MatrixType& matrix);
 
     Derived& compute(const MatrixType& matrix);
-    
-    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::solve_retval<PardisoImpl, Rhs>
-    solve(const MatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_initialized && "Pardiso solver is not initialized.");
-      eigen_assert(rows()==b.rows()
-                && "PardisoImpl::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<PardisoImpl, Rhs>(*this, b.derived());
-    }
-
-    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::sparse_solve_retval<PardisoImpl, Rhs>
-    solve(const SparseMatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_initialized && "Pardiso solver is not initialized.");
-      eigen_assert(rows()==b.rows()
-                && "PardisoImpl::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::sparse_solve_retval<PardisoImpl, Rhs>(*this, b.derived());
-    }
-
-    Derived& derived()
-    {
-      return *static_cast<Derived*>(this);
-    }
-    const Derived& derived() const
-    {
-      return *static_cast<const Derived*>(this);
-    }
 
     template<typename BDerived, typename XDerived>
-    bool _solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const;
+    bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const;
 
   protected:
     void pardisoRelease()
     {
-      if(m_initialized) // Factorization ran at least once
+      if(m_isInitialized) // Factorization ran at least once
       {
         internal::pardiso_run_selector<Index>::run(m_pt, 1, 1, m_type, -1, m_size, 0, 0, 0, m_perm.data(), 0,
                                                    m_iparm.data(), m_msglvl, 0, 0);
@@ -270,7 +240,7 @@ class PardisoImpl : internal::noncopyable
 
     mutable SparseMatrixType m_matrix;
     ComputationInfo m_info;
-    bool m_initialized, m_analysisIsOk, m_factorizationIsOk;
+    bool m_analysisIsOk, m_factorizationIsOk;
     Index m_type, m_msglvl;
     mutable void *m_pt[64];
     mutable ParameterType m_iparm;
@@ -298,7 +268,7 @@ Derived& PardisoImpl<Derived>::compute(const MatrixType& a)
   manageErrorCode(error);
   m_analysisIsOk = true;
   m_factorizationIsOk = true;
-  m_initialized = true;
+  m_isInitialized = true;
   return derived();
 }
 
@@ -321,7 +291,7 @@ Derived& PardisoImpl<Derived>::analyzePattern(const MatrixType& a)
   manageErrorCode(error);
   m_analysisIsOk = true;
   m_factorizationIsOk = false;
-  m_initialized = true;
+  m_isInitialized = true;
   return derived();
 }
 
@@ -345,7 +315,7 @@ Derived& PardisoImpl<Derived>::factorize(const MatrixType& a)
 
 template<class Base>
 template<typename BDerived,typename XDerived>
-bool PardisoImpl<Base>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const
+bool PardisoImpl<Base>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const
 {
   if(m_iparm[0] == 0) // Factorization was not computed
     return false;
@@ -546,36 +516,6 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> >
     }
 };
 
-namespace internal {
-  
-template<typename _Derived, typename Rhs>
-struct solve_retval<PardisoImpl<_Derived>, Rhs>
-  : solve_retval_base<PardisoImpl<_Derived>, Rhs>
-{
-  typedef PardisoImpl<_Derived> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-template<typename Derived, typename Rhs>
-struct sparse_solve_retval<PardisoImpl<Derived>, Rhs>
-  : sparse_solve_retval_base<PardisoImpl<Derived>, Rhs>
-{
-  typedef PardisoImpl<Derived> Dec;
-  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    this->defaultEvalTo(dst);
-  }
-};
-
-} // end namespace internal
-
 } // end namespace Eigen
 
 #endif // EIGEN_PARDISOSUPPORT_H
diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h
index 4824880f5..adf737276 100644
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -13,6 +13,15 @@
 
 namespace Eigen { 
 
+namespace internal {
+template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> >
+ : traits<_MatrixType>
+{
+  enum { Flags = 0 };
+};
+
+} // end namespace internal
+
 /** \ingroup QR_Module
   *
   * \class ColPivHouseholderQR
@@ -56,6 +65,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
     typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
     typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType;
     typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType;
+    typedef typename MatrixType::PlainObject PlainObject;
     
   private:
     
@@ -138,15 +148,15 @@ template<typename _MatrixType> class ColPivHouseholderQR
       * Output: \verbinclude ColPivHouseholderQR_solve.out
       */
     template<typename Rhs>
-    inline const internal::solve_retval<ColPivHouseholderQR, Rhs>
+    inline const Solve<ColPivHouseholderQR, Rhs>
     solve(const MatrixBase<Rhs>& b) const
     {
       eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
-      return internal::solve_retval<ColPivHouseholderQR, Rhs>(*this, b.derived());
+      return Solve<ColPivHouseholderQR, Rhs>(*this, b.derived());
     }
 
-    HouseholderSequenceType householderQ(void) const;
-    HouseholderSequenceType matrixQ(void) const
+    HouseholderSequenceType householderQ() const;
+    HouseholderSequenceType matrixQ() const
     {
       return householderQ(); 
     }
@@ -284,13 +294,10 @@ template<typename _MatrixType> class ColPivHouseholderQR
       * \note If this matrix is not invertible, the returned matrix has undefined coefficients.
       *       Use isInvertible() to first determine whether this matrix is invertible.
       */
-    inline const
-    internal::solve_retval<ColPivHouseholderQR, typename MatrixType::IdentityReturnType>
-    inverse() const
+    inline const Inverse<ColPivHouseholderQR> inverse() const
     {
       eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
-      return internal::solve_retval<ColPivHouseholderQR,typename MatrixType::IdentityReturnType>
-               (*this, MatrixType::Identity(m_qr.rows(), m_qr.cols()));
+      return Inverse<ColPivHouseholderQR>(*this);
     }
 
     inline Index rows() const { return m_qr.rows(); }
@@ -382,6 +389,12 @@ template<typename _MatrixType> class ColPivHouseholderQR
       eigen_assert(m_isInitialized && "Decomposition is not initialized.");
       return Success;
     }
+    
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+    #endif
 
   protected:
     MatrixType m_qr;
@@ -514,42 +527,48 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
   return *this;
 }
 
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<ColPivHouseholderQR<_MatrixType>, Rhs>
-  : solve_retval_base<ColPivHouseholderQR<_MatrixType>, Rhs>
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
 {
-  EIGEN_MAKE_SOLVE_HELPERS(ColPivHouseholderQR<_MatrixType>,Rhs)
+  eigen_assert(rhs.rows() == rows());
+
+  const Index nonzero_pivots = nonzeroPivots();
 
-  template<typename Dest> void evalTo(Dest& dst) const
+  if(nonzero_pivots == 0)
   {
-    eigen_assert(rhs().rows() == dec().rows());
+    dst.setZero();
+    return;
+  }
 
-    const Index cols = dec().cols(),
-				nonzero_pivots = dec().nonzeroPivots();
+  typename RhsType::PlainObject c(rhs);
 
-    if(nonzero_pivots == 0)
-    {
-      dst.setZero();
-      return;
-    }
+  // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
+  c.applyOnTheLeft(householderSequence(m_qr, m_hCoeffs)
+                    .setLength(nonzero_pivots)
+                    .transpose()
+    );
 
-    typename Rhs::PlainObject c(rhs());
+  m_qr.topLeftCorner(nonzero_pivots, nonzero_pivots)
+      .template triangularView<Upper>()
+      .solveInPlace(c.topRows(nonzero_pivots));
 
-    // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
-    c.applyOnTheLeft(householderSequence(dec().matrixQR(), dec().hCoeffs())
-                     .setLength(dec().nonzeroPivots())
-		     .transpose()
-      );
+  for(Index i = 0; i < nonzero_pivots; ++i) dst.row(m_colsPermutation.indices().coeff(i)) = c.row(i);
+  for(Index i = nonzero_pivots; i < cols(); ++i) dst.row(m_colsPermutation.indices().coeff(i)).setZero();
+}
+#endif
 
-    dec().matrixR()
-       .topLeftCorner(nonzero_pivots, nonzero_pivots)
-       .template triangularView<Upper>()
-       .solveInPlace(c.topRows(nonzero_pivots));
+namespace internal {
 
-    for(Index i = 0; i < nonzero_pivots; ++i) dst.row(dec().colsPermutation().indices().coeff(i)) = c.row(i);
-    for(Index i = nonzero_pivots; i < cols; ++i) dst.row(dec().colsPermutation().indices().coeff(i)).setZero();
+template<typename DstXprType, typename MatrixType, typename Scalar>
+struct Assignment<DstXprType, Inverse<ColPivHouseholderQR<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+  typedef ColPivHouseholderQR<MatrixType> QrType;
+  typedef Inverse<QrType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  {    
+    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
   }
 };
 
diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h
index a7b0fc16f..710c64a45 100644
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@@ -15,6 +15,12 @@ namespace Eigen {
 
 namespace internal {
 
+template<typename _MatrixType> struct traits<FullPivHouseholderQR<_MatrixType> >
+ : traits<_MatrixType>
+{
+  enum { Flags = 0 };
+};
+
 template<typename MatrixType> struct FullPivHouseholderQRMatrixQReturnType;
 
 template<typename MatrixType>
@@ -23,7 +29,7 @@ struct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
   typedef typename MatrixType::PlainObject ReturnType;
 };
 
-}
+} // end namespace internal
 
 /** \ingroup QR_Module
   *
@@ -69,6 +75,7 @@ template<typename _MatrixType> class FullPivHouseholderQR
     typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationType;
     typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
     typedef typename internal::plain_col_type<MatrixType>::type ColVectorType;
+    typedef typename MatrixType::PlainObject PlainObject;
 
     /** \brief Default Constructor.
       *
@@ -145,11 +152,11 @@ template<typename _MatrixType> class FullPivHouseholderQR
       * Output: \verbinclude FullPivHouseholderQR_solve.out
       */
     template<typename Rhs>
-    inline const internal::solve_retval<FullPivHouseholderQR, Rhs>
+    inline const Solve<FullPivHouseholderQR, Rhs>
     solve(const MatrixBase<Rhs>& b) const
     {
       eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
-      return internal::solve_retval<FullPivHouseholderQR, Rhs>(*this, b.derived());
+      return Solve<FullPivHouseholderQR, Rhs>(*this, b.derived());
     }
 
     /** \returns Expression object representing the matrix Q
@@ -280,13 +287,11 @@ template<typename _MatrixType> class FullPivHouseholderQR
       *
       * \note If this matrix is not invertible, the returned matrix has undefined coefficients.
       *       Use isInvertible() to first determine whether this matrix is invertible.
-      */    inline const
-    internal::solve_retval<FullPivHouseholderQR, typename MatrixType::IdentityReturnType>
-    inverse() const
+      */
+    inline const Inverse<FullPivHouseholderQR> inverse() const
     {
       eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
-      return internal::solve_retval<FullPivHouseholderQR,typename MatrixType::IdentityReturnType>
-               (*this, MatrixType::Identity(m_qr.rows(), m_qr.cols()));
+      return Inverse<FullPivHouseholderQR>(*this);
     }
 
     inline Index rows() const { return m_qr.rows(); }
@@ -366,6 +371,12 @@ template<typename _MatrixType> class FullPivHouseholderQR
       *          diagonal coefficient of U.
       */
     RealScalar maxPivot() const { return m_maxpivot; }
+    
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+    #endif
 
   protected:
     MatrixType m_qr;
@@ -485,46 +496,53 @@ FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(cons
   return *this;
 }
 
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<FullPivHouseholderQR<_MatrixType>, Rhs>
-  : solve_retval_base<FullPivHouseholderQR<_MatrixType>, Rhs>
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
 {
-  EIGEN_MAKE_SOLVE_HELPERS(FullPivHouseholderQR<_MatrixType>,Rhs)
+  eigen_assert(rhs.rows() == rows());
+  const Index l_rank = rank();
 
-  template<typename Dest> void evalTo(Dest& dst) const
+  // FIXME introduce nonzeroPivots() and use it here. and more generally,
+  // make the same improvements in this dec as in FullPivLU.
+  if(l_rank==0)
   {
-    const Index rows = dec().rows(), cols = dec().cols();
-    eigen_assert(rhs().rows() == rows);
+    dst.setZero();
+    return;
+  }
 
-    // FIXME introduce nonzeroPivots() and use it here. and more generally,
-    // make the same improvements in this dec as in FullPivLU.
-    if(dec().rank()==0)
-    {
-      dst.setZero();
-      return;
-    }
+  typename RhsType::PlainObject c(rhs);
 
-    typename Rhs::PlainObject c(rhs());
+  Matrix<Scalar,1,RhsType::ColsAtCompileTime> temp(rhs.cols());
+  for (Index k = 0; k < l_rank; ++k)
+  {
+    Index remainingSize = rows()-k;
+    c.row(k).swap(c.row(m_rows_transpositions.coeff(k)));
+    c.bottomRightCorner(remainingSize, rhs.cols())
+      .applyHouseholderOnTheLeft(m_qr.col(k).tail(remainingSize-1),
+                               m_hCoeffs.coeff(k), &temp.coeffRef(0));
+  }
 
-    Matrix<Scalar,1,Rhs::ColsAtCompileTime> temp(rhs().cols());
-    for (Index k = 0; k < dec().rank(); ++k)
-    {
-      Index remainingSize = rows-k;
-      c.row(k).swap(c.row(dec().rowsTranspositions().coeff(k)));
-      c.bottomRightCorner(remainingSize, rhs().cols())
-       .applyHouseholderOnTheLeft(dec().matrixQR().col(k).tail(remainingSize-1),
-                                  dec().hCoeffs().coeff(k), &temp.coeffRef(0));
-    }
+  m_qr.topLeftCorner(l_rank, l_rank)
+      .template triangularView<Upper>()
+      .solveInPlace(c.topRows(l_rank));
 
-    dec().matrixQR()
-       .topLeftCorner(dec().rank(), dec().rank())
-       .template triangularView<Upper>()
-       .solveInPlace(c.topRows(dec().rank()));
+  for(Index i = 0; i < l_rank; ++i) dst.row(m_cols_permutation.indices().coeff(i)) = c.row(i);
+  for(Index i = l_rank; i < cols(); ++i) dst.row(m_cols_permutation.indices().coeff(i)).setZero();
+}
+#endif
 
-    for(Index i = 0; i < dec().rank(); ++i) dst.row(dec().colsPermutation().indices().coeff(i)) = c.row(i);
-    for(Index i = dec().rank(); i < cols; ++i) dst.row(dec().colsPermutation().indices().coeff(i)).setZero();
+namespace internal {
+  
+template<typename DstXprType, typename MatrixType, typename Scalar>
+struct Assignment<DstXprType, Inverse<FullPivHouseholderQR<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+{
+  typedef FullPivHouseholderQR<MatrixType> QrType;
+  typedef Inverse<QrType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  {    
+    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
   }
 };
 
@@ -550,7 +568,7 @@ public:
     : m_qr(qr),
       m_hCoeffs(hCoeffs),
       m_rowsTranspositions(rowsTranspositions)
-      {}
+  {}
 
   template <typename ResultType>
   void evalTo(ResultType& result) const
@@ -580,8 +598,8 @@ public:
     }
   }
 
-    Index rows() const { return m_qr.rows(); }
-    Index cols() const { return m_qr.rows(); }
+  Index rows() const { return m_qr.rows(); }
+  Index cols() const { return m_qr.rows(); }
 
 protected:
   typename MatrixType::Nested m_qr;
@@ -589,6 +607,11 @@ protected:
   typename IntDiagSizeVectorType::Nested m_rowsTranspositions;
 };
 
+// template<typename MatrixType>
+// struct evaluator<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
+//  : public evaluator<ReturnByValue<FullPivHouseholderQRMatrixQReturnType<MatrixType> > >
+// {};
+
 } // end namespace internal
 
 template<typename MatrixType>
diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index 352dbf3f0..0b0c9d1bd 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -118,11 +118,11 @@ template<typename _MatrixType> class HouseholderQR
       * Output: \verbinclude HouseholderQR_solve.out
       */
     template<typename Rhs>
-    inline const internal::solve_retval<HouseholderQR, Rhs>
+    inline const Solve<HouseholderQR, Rhs>
     solve(const MatrixBase<Rhs>& b) const
     {
       eigen_assert(m_isInitialized && "HouseholderQR is not initialized.");
-      return internal::solve_retval<HouseholderQR, Rhs>(*this, b.derived());
+      return Solve<HouseholderQR, Rhs>(*this, b.derived());
     }
 
     /** This method returns an expression of the unitary matrix Q as a sequence of Householder transformations.
@@ -187,6 +187,12 @@ template<typename _MatrixType> class HouseholderQR
       * For advanced uses only.
       */
     const HCoeffsType& hCoeffs() const { return m_hCoeffs; }
+    
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+    #endif
 
   protected:
     MatrixType m_qr;
@@ -283,8 +289,8 @@ struct householder_qr_inplace_blocked
     for (k = 0; k < size; k += blockSize)
     {
       Index bs = (std::min)(size-k,blockSize);  // actual size of the block
-      Index tcols = cols - k - bs;            // trailing columns
-      Index brows = rows-k;                   // rows of the block
+      Index tcols = cols - k - bs;              // trailing columns
+      Index brows = rows-k;                     // rows of the block
 
       // partition the matrix:
       //        A00 | A01 | A02
@@ -302,43 +308,38 @@ struct householder_qr_inplace_blocked
       if(tcols)
       {
         BlockType A21_22 = mat.block(k,k+bs,brows,tcols);
-        apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment.adjoint());
+        apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment, false); // false == backward
       }
     }
   }
 };
 
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<HouseholderQR<_MatrixType>, Rhs>
-  : solve_retval_base<HouseholderQR<_MatrixType>, Rhs>
-{
-  EIGEN_MAKE_SOLVE_HELPERS(HouseholderQR<_MatrixType>,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    const Index rows = dec().rows(), cols = dec().cols();
-    const Index rank = (std::min)(rows, cols);
-    eigen_assert(rhs().rows() == rows);
+} // end namespace internal
 
-    typename Rhs::PlainObject c(rhs());
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{
+  const Index rank = (std::min)(rows(), cols());
+  eigen_assert(rhs.rows() == rows());
 
-    // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
-    c.applyOnTheLeft(householderSequence(
-      dec().matrixQR().leftCols(rank),
-      dec().hCoeffs().head(rank)).transpose()
-    );
+  typename RhsType::PlainObject c(rhs);
 
-    dec().matrixQR()
-       .topLeftCorner(rank, rank)
-       .template triangularView<Upper>()
-       .solveInPlace(c.topRows(rank));
+  // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
+  c.applyOnTheLeft(householderSequence(
+    m_qr.leftCols(rank),
+    m_hCoeffs.head(rank)).transpose()
+  );
 
-    dst.topRows(rank) = c.topRows(rank);
-    dst.bottomRows(cols-rank).setZero();
-  }
-};
+  m_qr.topLeftCorner(rank, rank)
+      .template triangularView<Upper>()
+      .solveInPlace(c.topRows(rank));
 
-} // end namespace internal
+  dst.topRows(rank) = c.topRows(rank);
+  dst.bottomRows(cols()-rank).setZero();
+}
+#endif
 
 /** Performs the QR factorization of the given matrix \a matrix. The result of
   * the factorization is stored into \c *this, and a reference to \c *this
diff --git a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
index a2cc2a9e2..bcdc981d7 100644
--- a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
+++ b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
@@ -2,6 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2012 Desire Nuentsa <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -54,8 +55,11 @@ namespace Eigen {
  * 
  */
 template<typename _MatrixType>
-class SPQR
+class SPQR : public SparseSolverBase<SPQR<_MatrixType> >
 {
+  protected:
+    typedef SparseSolverBase<SPQR<_MatrixType> > Base;
+    using Base::m_isInitialized;
   public:
     typedef typename _MatrixType::Scalar Scalar;
     typedef typename _MatrixType::RealScalar RealScalar;
@@ -64,19 +68,13 @@ class SPQR
     typedef PermutationMatrix<Dynamic, Dynamic> PermutationType;
   public:
     SPQR() 
-      : m_isInitialized(false),
-      m_ordering(SPQR_ORDERING_DEFAULT),
-      m_allow_tol(SPQR_DEFAULT_TOL),
-      m_tolerance (NumTraits<Scalar>::epsilon())
+      : m_ordering(SPQR_ORDERING_DEFAULT), m_allow_tol(SPQR_DEFAULT_TOL), m_tolerance (NumTraits<Scalar>::epsilon())
     { 
       cholmod_l_start(&m_cc);
     }
     
     SPQR(const _MatrixType& matrix) 
-    : m_isInitialized(false),
-      m_ordering(SPQR_ORDERING_DEFAULT),
-      m_allow_tol(SPQR_DEFAULT_TOL),
-      m_tolerance (NumTraits<Scalar>::epsilon())
+    : m_ordering(SPQR_ORDERING_DEFAULT), m_allow_tol(SPQR_DEFAULT_TOL), m_tolerance (NumTraits<Scalar>::epsilon())
     {
       cholmod_l_start(&m_cc);
       compute(matrix);
@@ -126,22 +124,9 @@ class SPQR
      * Get the number of columns of the input matrix. 
      */
     inline Index cols() const { return m_cR->ncol; }
-   
-      /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::solve_retval<SPQR, Rhs> solve(const MatrixBase<Rhs>& B) const 
-    {
-      eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()");
-      eigen_assert(this->rows()==B.rows()
-                    && "SPQR::solve(): invalid number of rows of the right hand side matrix B");
-          return internal::solve_retval<SPQR, Rhs>(*this, B.derived());
-    }
     
     template<typename Rhs, typename Dest>
-    void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
+    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
     {
       eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()");
       eigen_assert(b.cols()==1 && "This method is for vectors only");
@@ -214,7 +199,6 @@ class SPQR
       return m_info;
     }
   protected:
-    bool m_isInitialized;
     bool m_analysisIsOk;
     bool m_factorizationIsOk;
     mutable bool m_isRUpToDate;
@@ -293,22 +277,5 @@ struct SPQRMatrixQTransposeReturnType{
   const SPQRType& m_spqr;
 };
 
-namespace internal {
-  
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<SPQR<_MatrixType>, Rhs>
-  : solve_retval_base<SPQR<_MatrixType>, Rhs>
-{
-  typedef SPQR<_MatrixType> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-} // end namespace internal
-
 }// End namespace Eigen
 #endif
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index 412daa746..f2a72faa3 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -2,6 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2013-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -424,24 +425,31 @@ void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
   JacobiRotation<RealScalar> rot1;
   RealScalar t = m.coeff(0,0) + m.coeff(1,1);
   RealScalar d = m.coeff(1,0) - m.coeff(0,1);
-  if(t == RealScalar(0))
+  
+  if(d == RealScalar(0))
   {
-    rot1.c() = RealScalar(0);
-    rot1.s() = d > RealScalar(0) ? RealScalar(1) : RealScalar(-1);
+    rot1.s() = RealScalar(0);
+    rot1.c() = RealScalar(1);
   }
   else
   {
-    RealScalar t2d2 = numext::hypot(t,d);
-    rot1.c() = abs(t)/t2d2;
-    rot1.s() = d/t2d2;
-    if(t<RealScalar(0))
-      rot1.s() = -rot1.s();
+    // If d!=0, then t/d cannot overflow because the magnitude of the
+    // entries forming d are not too small compared to the ones forming t.
+    RealScalar u = t / d;
+    rot1.s() = RealScalar(1) / sqrt(RealScalar(1) + numext::abs2(u));
+    rot1.c() = rot1.s() * u;
   }
   m.applyOnTheLeft(0,1,rot1);
   j_right->makeJacobi(m,0,1);
   *j_left  = rot1 * j_right->transpose();
 }
 
+template<typename _MatrixType, int QRPreconditioner> 
+struct traits<JacobiSVD<_MatrixType,QRPreconditioner> >
+{
+  typedef _MatrixType MatrixType;
+};
+
 } // end namespace internal
 
 /** \ingroup SVD_Module
@@ -498,7 +506,9 @@ void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
   * \sa MatrixBase::jacobiSvd()
   */
 template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
+ : public SVDBase<JacobiSVD<_MatrixType,QRPreconditioner> >
 {
+    typedef SVDBase<JacobiSVD> Base;
   public:
 
     typedef _MatrixType MatrixType;
@@ -515,13 +525,10 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
       MatrixOptions = MatrixType::Options
     };
 
-    typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime,
-                   MatrixOptions, MaxRowsAtCompileTime, MaxRowsAtCompileTime>
-            MatrixUType;
-    typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime,
-                   MatrixOptions, MaxColsAtCompileTime, MaxColsAtCompileTime>
-            MatrixVType;
-    typedef typename internal::plain_diag_type<MatrixType, RealScalar>::type SingularValuesType;
+    typedef typename Base::MatrixUType MatrixUType;
+    typedef typename Base::MatrixVType MatrixVType;
+    typedef typename Base::SingularValuesType SingularValuesType;
+    
     typedef typename internal::plain_row_type<MatrixType>::type RowType;
     typedef typename internal::plain_col_type<MatrixType>::type ColType;
     typedef Matrix<Scalar, DiagSizeAtCompileTime, DiagSizeAtCompileTime,
@@ -534,11 +541,6 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
       * perform decompositions via JacobiSVD::compute(const MatrixType&).
       */
     JacobiSVD()
-      : m_isInitialized(false),
-        m_isAllocated(false),
-        m_usePrescribedThreshold(false),
-        m_computationOptions(0),
-        m_rows(-1), m_cols(-1), m_diagSize(0)
     {}
 
 
@@ -549,11 +551,6 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
       * \sa JacobiSVD()
       */
     JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0)
-      : m_isInitialized(false),
-        m_isAllocated(false),
-        m_usePrescribedThreshold(false),
-        m_computationOptions(0),
-        m_rows(-1), m_cols(-1)
     {
       allocate(rows, cols, computationOptions);
     }
@@ -569,11 +566,6 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
      * available with the (non-default) FullPivHouseholderQR preconditioner.
      */
     JacobiSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
-      : m_isInitialized(false),
-        m_isAllocated(false),
-        m_usePrescribedThreshold(false),
-        m_computationOptions(0),
-        m_rows(-1), m_cols(-1)
     {
       compute(matrix, computationOptions);
     }
@@ -601,159 +593,33 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
       return compute(matrix, m_computationOptions);
     }
 
-    /** \returns the \a U matrix.
-     *
-     * For the SVD decomposition of a n-by-p matrix, letting \a m be the minimum of \a n and \a p,
-     * the U matrix is n-by-n if you asked for #ComputeFullU, and is n-by-m if you asked for #ComputeThinU.
-     *
-     * The \a m first columns of \a U are the left singular vectors of the matrix being decomposed.
-     *
-     * This method asserts that you asked for \a U to be computed.
-     */
-    const MatrixUType& matrixU() const
-    {
-      eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
-      eigen_assert(computeU() && "This JacobiSVD decomposition didn't compute U. Did you ask for it?");
-      return m_matrixU;
-    }
-
-    /** \returns the \a V matrix.
-     *
-     * For the SVD decomposition of a n-by-p matrix, letting \a m be the minimum of \a n and \a p,
-     * the V matrix is p-by-p if you asked for #ComputeFullV, and is p-by-m if you asked for ComputeThinV.
-     *
-     * The \a m first columns of \a V are the right singular vectors of the matrix being decomposed.
-     *
-     * This method asserts that you asked for \a V to be computed.
-     */
-    const MatrixVType& matrixV() const
-    {
-      eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
-      eigen_assert(computeV() && "This JacobiSVD decomposition didn't compute V. Did you ask for it?");
-      return m_matrixV;
-    }
-
-    /** \returns the vector of singular values.
-     *
-     * For the SVD decomposition of a n-by-p matrix, letting \a m be the minimum of \a n and \a p, the
-     * returned vector has size \a m.  Singular values are always sorted in decreasing order.
-     */
-    const SingularValuesType& singularValues() const
-    {
-      eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
-      return m_singularValues;
-    }
-
-    /** \returns true if \a U (full or thin) is asked for in this SVD decomposition */
-    inline bool computeU() const { return m_computeFullU || m_computeThinU; }
-    /** \returns true if \a V (full or thin) is asked for in this SVD decomposition */
-    inline bool computeV() const { return m_computeFullV || m_computeThinV; }
-
-    /** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A.
-      *
-      * \param b the right-hand-side of the equation to solve.
-      *
-      * \note Solving requires both U and V to be computed. Thin U and V are enough, there is no need for full U or V.
-      *
-      * \note SVD solving is implicitly least-squares. Thus, this method serves both purposes of exact solving and least-squares solving.
-      * In other words, the returned solution is guaranteed to minimize the Euclidean norm \f$ \Vert A x - b \Vert \f$.
-      */
-    template<typename Rhs>
-    inline const internal::solve_retval<JacobiSVD, Rhs>
-    solve(const MatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
-      eigen_assert(computeU() && computeV() && "JacobiSVD::solve() requires both unitaries U and V to be computed (thin unitaries suffice).");
-      return internal::solve_retval<JacobiSVD, Rhs>(*this, b.derived());
-    }
-
-    /** \returns the number of singular values that are not exactly 0 */
-    Index nonzeroSingularValues() const
-    {
-      eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
-      return m_nonzeroSingularValues;
-    }
-    
-    /** \returns the rank of the matrix of which \c *this is the SVD.
-      *
-      * \note This method has to determine which singular values should be considered nonzero.
-      *       For that, it uses the threshold value that you can control by calling
-      *       setThreshold(const RealScalar&).
-      */
-    inline Index rank() const
-    {
-      using std::abs;
-      eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
-      if(m_singularValues.size()==0) return 0;
-      RealScalar premultiplied_threshold = m_singularValues.coeff(0) * threshold();
-      Index i = m_nonzeroSingularValues-1;
-      while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i;
-      return i+1;
-    }
-    
-    /** Allows to prescribe a threshold to be used by certain methods, such as rank() and solve(),
-      * which need to determine when singular values are to be considered nonzero.
-      * This is not used for the SVD decomposition itself.
-      *
-      * When it needs to get the threshold value, Eigen calls threshold().
-      * The default is \c NumTraits<Scalar>::epsilon()
-      *
-      * \param threshold The new value to use as the threshold.
-      *
-      * A singular value will be considered nonzero if its value is strictly greater than
-      *  \f$ \vert singular value \vert \leqslant threshold \times \vert max singular value \vert \f$.
-      *
-      * If you want to come back to the default behavior, call setThreshold(Default_t)
-      */
-    JacobiSVD& setThreshold(const RealScalar& threshold)
-    {
-      m_usePrescribedThreshold = true;
-      m_prescribedThreshold = threshold;
-      return *this;
-    }
-
-    /** Allows to come back to the default behavior, letting Eigen use its default formula for
-      * determining the threshold.
-      *
-      * You should pass the special object Eigen::Default as parameter here.
-      * \code svd.setThreshold(Eigen::Default); \endcode
-      *
-      * See the documentation of setThreshold(const RealScalar&).
-      */
-    JacobiSVD& setThreshold(Default_t)
-    {
-      m_usePrescribedThreshold = false;
-      return *this;
-    }
-
-    /** Returns the threshold that will be used by certain methods such as rank().
-      *
-      * See the documentation of setThreshold(const RealScalar&).
-      */
-    RealScalar threshold() const
-    {
-      eigen_assert(m_isInitialized || m_usePrescribedThreshold);
-      return m_usePrescribedThreshold ? m_prescribedThreshold
-                                      : (std::max<Index>)(1,m_diagSize)*NumTraits<Scalar>::epsilon();
-    }
-
-    inline Index rows() const { return m_rows; }
-    inline Index cols() const { return m_cols; }
+    using Base::computeU;
+    using Base::computeV;
+    using Base::rows;
+    using Base::cols;
+    using Base::rank;
 
   private:
     void allocate(Index rows, Index cols, unsigned int computationOptions);
 
   protected:
-    MatrixUType m_matrixU;
-    MatrixVType m_matrixV;
-    SingularValuesType m_singularValues;
+    using Base::m_matrixU;
+    using Base::m_matrixV;
+    using Base::m_singularValues;
+    using Base::m_isInitialized;
+    using Base::m_isAllocated;
+    using Base::m_usePrescribedThreshold;
+    using Base::m_computeFullU;
+    using Base::m_computeThinU;
+    using Base::m_computeFullV;
+    using Base::m_computeThinV;
+    using Base::m_computationOptions;
+    using Base::m_nonzeroSingularValues;
+    using Base::m_rows;
+    using Base::m_cols;
+    using Base::m_diagSize;
+    using Base::m_prescribedThreshold;
     WorkMatrixType m_workMatrix;
-    bool m_isInitialized, m_isAllocated, m_usePrescribedThreshold;
-    bool m_computeFullU, m_computeThinU;
-    bool m_computeFullV, m_computeThinV;
-    unsigned int m_computationOptions;
-    Index m_nonzeroSingularValues, m_rows, m_cols, m_diagSize;
-    RealScalar m_prescribedThreshold;
 
     template<typename __MatrixType, int _QRPreconditioner, bool _IsComplex>
     friend struct internal::svd_precondition_2x2_block_to_be_real;
@@ -861,7 +727,8 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
         EIGEN_USING_STD_MATH(max);
         RealScalar threshold = (max)(considerAsZero, precision * (max)(abs(m_workMatrix.coeff(p,p)),
                                                                        abs(m_workMatrix.coeff(q,q))));
-        if((max)(abs(m_workMatrix.coeff(p,q)),abs(m_workMatrix.coeff(q,p))) > threshold)
+        // We compare both values to threshold instead of calling max to be robust to NaN (See bug 791)
+        if(abs(m_workMatrix.coeff(p,q))>threshold || abs(m_workMatrix.coeff(q,p)) > threshold)
         {
           finished = false;
 
@@ -917,31 +784,6 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
   return *this;
 }
 
-namespace internal {
-template<typename _MatrixType, int QRPreconditioner, typename Rhs>
-struct solve_retval<JacobiSVD<_MatrixType, QRPreconditioner>, Rhs>
-  : solve_retval_base<JacobiSVD<_MatrixType, QRPreconditioner>, Rhs>
-{
-  typedef JacobiSVD<_MatrixType, QRPreconditioner> JacobiSVDType;
-  EIGEN_MAKE_SOLVE_HELPERS(JacobiSVDType,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    eigen_assert(rhs().rows() == dec().rows());
-
-    // A = U S V^*
-    // So A^{-1} = V S^{-1} U^*
-
-    Matrix<Scalar, Dynamic, Rhs::ColsAtCompileTime, 0, _MatrixType::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime> tmp;
-    Index rank = dec().rank();
-    
-    tmp.noalias() = dec().matrixU().leftCols(rank).adjoint() * rhs();
-    tmp = dec().singularValues().head(rank).asDiagonal().inverse() * tmp;
-    dst = dec().matrixV().leftCols(rank) * tmp;
-  }
-};
-} // end namespace internal
-
 #ifndef __CUDACC__
 /** \svd_module
   *
diff --git a/unsupported/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h
index fd8af3b8c..27b732b80 100644
--- a/unsupported/Eigen/src/SVD/SVDBase.h
+++ b/Eigen/src/SVD/SVDBase.h
@@ -2,6 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
 // Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
@@ -12,8 +13,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_SVD_H
-#define EIGEN_SVD_H
+#ifndef EIGEN_SVDBASE_H
+#define EIGEN_SVDBASE_H
 
 namespace Eigen {
 /** \ingroup SVD_Module
@@ -21,9 +22,10 @@ namespace Eigen {
  *
  * \class SVDBase
  *
- * \brief Mother class of SVD classes algorithms
+ * \brief Base class of SVD algorithms
+ *
+ * \tparam Derived the type of the actual SVD decomposition
  *
- * \param MatrixType the type of the matrix of which we are computing the SVD decomposition
  * SVD decomposition consists in decomposing any n-by-p matrix \a A as a product
  *   \f[ A = U S V^* \f]
  * where \a U is a n-by-n unitary, \a V is a p-by-p unitary, and \a S is a n-by-p real positive matrix which is zero outside of its main diagonal;
@@ -42,12 +44,12 @@ namespace Eigen {
  * terminate in finite (and reasonable) time.
  * \sa MatrixBase::genericSvd()
  */
-template<typename _MatrixType> 
+template<typename Derived>
 class SVDBase
 {
 
 public:
-  typedef _MatrixType MatrixType;
+  typedef typename internal::traits<Derived>::MatrixType MatrixType;
   typedef typename MatrixType::Scalar Scalar;
   typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
   typedef typename MatrixType::Index Index;
@@ -61,46 +63,16 @@ public:
     MatrixOptions = MatrixType::Options
   };
 
-  typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime,
-		 MatrixOptions, MaxRowsAtCompileTime, MaxRowsAtCompileTime>
-  MatrixUType;
-  typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime,
-		 MatrixOptions, MaxColsAtCompileTime, MaxColsAtCompileTime>
-  MatrixVType;
+  typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime, MatrixOptions, MaxRowsAtCompileTime, MaxRowsAtCompileTime> MatrixUType;
+  typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime, MatrixOptions, MaxColsAtCompileTime, MaxColsAtCompileTime> MatrixVType;
   typedef typename internal::plain_diag_type<MatrixType, RealScalar>::type SingularValuesType;
-  typedef typename internal::plain_row_type<MatrixType>::type RowType;
-  typedef typename internal::plain_col_type<MatrixType>::type ColType;
-  typedef Matrix<Scalar, DiagSizeAtCompileTime, DiagSizeAtCompileTime,
-		 MatrixOptions, MaxDiagSizeAtCompileTime, MaxDiagSizeAtCompileTime>
-  WorkMatrixType;
-	
-
-
-
-  /** \brief Method performing the decomposition of given matrix using custom options.
-   *
-   * \param matrix the matrix to decompose
-   * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
-   *                           By default, none is computed. This is a bit-field, the possible bits are #ComputeFullU, #ComputeThinU,
-   *                           #ComputeFullV, #ComputeThinV.
-   *
-   * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
-   * available with the (non-default) FullPivHouseholderQR preconditioner.
-   */
-  SVDBase& compute(const MatrixType& matrix, unsigned int computationOptions);
-
-  /** \brief Method performing the decomposition of given matrix using current options.
-   *
-   * \param matrix the matrix to decompose
-   *
-   * This method uses the current \a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).
-   */
-  //virtual SVDBase& compute(const MatrixType& matrix) = 0;
-  SVDBase& compute(const MatrixType& matrix);
+  
+  Derived& derived() { return *static_cast<Derived*>(this); }
+  const Derived& derived() const { return *static_cast<const Derived*>(this); }
 
   /** \returns the \a U matrix.
    *
-   * For the SVDBase decomposition of a n-by-p matrix, letting \a m be the minimum of \a n and \a p,
+   * For the SVD decomposition of a n-by-p matrix, letting \a m be the minimum of \a n and \a p,
    * the U matrix is n-by-n if you asked for #ComputeFullU, and is n-by-m if you asked for #ComputeThinU.
    *
    * The \a m first columns of \a U are the left singular vectors of the matrix being decomposed.
@@ -141,25 +113,107 @@ public:
     return m_singularValues;
   }
 
-  
-
   /** \returns the number of singular values that are not exactly 0 */
   Index nonzeroSingularValues() const
   {
     eigen_assert(m_isInitialized && "SVD is not initialized.");
     return m_nonzeroSingularValues;
   }
+  
+  /** \returns the rank of the matrix of which \c *this is the SVD.
+    *
+    * \note This method has to determine which singular values should be considered nonzero.
+    *       For that, it uses the threshold value that you can control by calling
+    *       setThreshold(const RealScalar&).
+    */
+  inline Index rank() const
+  {
+    using std::abs;
+    eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
+    if(m_singularValues.size()==0) return 0;
+    RealScalar premultiplied_threshold = m_singularValues.coeff(0) * threshold();
+    Index i = m_nonzeroSingularValues-1;
+    while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i;
+    return i+1;
+  }
+  
+  /** Allows to prescribe a threshold to be used by certain methods, such as rank() and solve(),
+    * which need to determine when singular values are to be considered nonzero.
+    * This is not used for the SVD decomposition itself.
+    *
+    * When it needs to get the threshold value, Eigen calls threshold().
+    * The default is \c NumTraits<Scalar>::epsilon()
+    *
+    * \param threshold The new value to use as the threshold.
+    *
+    * A singular value will be considered nonzero if its value is strictly greater than
+    *  \f$ \vert singular value \vert \leqslant threshold \times \vert max singular value \vert \f$.
+    *
+    * If you want to come back to the default behavior, call setThreshold(Default_t)
+    */
+  Derived& setThreshold(const RealScalar& threshold)
+  {
+    m_usePrescribedThreshold = true;
+    m_prescribedThreshold = threshold;
+    return derived();
+  }
+
+  /** Allows to come back to the default behavior, letting Eigen use its default formula for
+    * determining the threshold.
+    *
+    * You should pass the special object Eigen::Default as parameter here.
+    * \code svd.setThreshold(Eigen::Default); \endcode
+    *
+    * See the documentation of setThreshold(const RealScalar&).
+    */
+  Derived& setThreshold(Default_t)
+  {
+    m_usePrescribedThreshold = false;
+    return derived();
+  }
 
+  /** Returns the threshold that will be used by certain methods such as rank().
+    *
+    * See the documentation of setThreshold(const RealScalar&).
+    */
+  RealScalar threshold() const
+  {
+    eigen_assert(m_isInitialized || m_usePrescribedThreshold);
+    return m_usePrescribedThreshold ? m_prescribedThreshold
+                                    : (std::max<Index>)(1,m_diagSize)*NumTraits<Scalar>::epsilon();
+  }
 
   /** \returns true if \a U (full or thin) is asked for in this SVD decomposition */
   inline bool computeU() const { return m_computeFullU || m_computeThinU; }
   /** \returns true if \a V (full or thin) is asked for in this SVD decomposition */
   inline bool computeV() const { return m_computeFullV || m_computeThinV; }
 
-
   inline Index rows() const { return m_rows; }
   inline Index cols() const { return m_cols; }
-
+  
+  /** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A.
+    *
+    * \param b the right-hand-side of the equation to solve.
+    *
+    * \note Solving requires both U and V to be computed. Thin U and V are enough, there is no need for full U or V.
+    *
+    * \note SVD solving is implicitly least-squares. Thus, this method serves both purposes of exact solving and least-squares solving.
+    * In other words, the returned solution is guaranteed to minimize the Euclidean norm \f$ \Vert A x - b \Vert \f$.
+    */
+  template<typename Rhs>
+  inline const Solve<Derived, Rhs>
+  solve(const MatrixBase<Rhs>& b) const
+  {
+    eigen_assert(m_isInitialized && "SVD is not initialized.");
+    eigen_assert(computeU() && computeV() && "SVD::solve() requires both unitaries U and V to be computed (thin unitaries suffice).");
+    return Solve<Derived, Rhs>(derived(), b.derived());
+  }
+  
+  #ifndef EIGEN_PARSED_BY_DOXYGEN
+  template<typename RhsType, typename DstType>
+  EIGEN_DEVICE_FUNC
+  void _solve_impl(const RhsType &rhs, DstType &dst) const;
+  #endif
 
 protected:
   // return true if already allocated
@@ -168,12 +222,12 @@ protected:
   MatrixUType m_matrixU;
   MatrixVType m_matrixV;
   SingularValuesType m_singularValues;
-  bool m_isInitialized, m_isAllocated;
+  bool m_isInitialized, m_isAllocated, m_usePrescribedThreshold;
   bool m_computeFullU, m_computeThinU;
   bool m_computeFullV, m_computeThinV;
   unsigned int m_computationOptions;
   Index m_nonzeroSingularValues, m_rows, m_cols, m_diagSize;
-
+  RealScalar m_prescribedThreshold;
 
   /** \brief Default Constructor.
    *
@@ -182,13 +236,31 @@ protected:
   SVDBase()
     : m_isInitialized(false),
       m_isAllocated(false),
+      m_usePrescribedThreshold(false),
       m_computationOptions(0),
-      m_rows(-1), m_cols(-1)
+      m_rows(-1), m_cols(-1), m_diagSize(0)
   {}
 
 
 };
 
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename Derived>
+template<typename RhsType, typename DstType>
+void SVDBase<Derived>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{
+  eigen_assert(rhs.rows() == rows());
+
+  // A = U S V^*
+  // So A^{-1} = V S^{-1} U^*
+
+  Matrix<Scalar, Dynamic, RhsType::ColsAtCompileTime, 0, MatrixType::MaxRowsAtCompileTime, RhsType::MaxColsAtCompileTime> tmp;
+  Index l_rank = rank();
+  tmp.noalias() =  m_matrixU.leftCols(l_rank).adjoint() * rhs;
+  tmp = m_singularValues.head(l_rank).asDiagonal().inverse() * tmp;
+  dst = m_matrixV.leftCols(l_rank) * tmp;
+}
+#endif
 
 template<typename MatrixType>
 bool SVDBase<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions)
@@ -220,17 +292,13 @@ bool SVDBase<MatrixType>::allocate(Index rows, Index cols, unsigned int computat
   m_diagSize = (std::min)(m_rows, m_cols);
   m_singularValues.resize(m_diagSize);
   if(RowsAtCompileTime==Dynamic)
-    m_matrixU.resize(m_rows, m_computeFullU ? m_rows
-		     : m_computeThinU ? m_diagSize
-		     : 0);
+    m_matrixU.resize(m_rows, m_computeFullU ? m_rows : m_computeThinU ? m_diagSize : 0);
   if(ColsAtCompileTime==Dynamic)
-    m_matrixV.resize(m_cols, m_computeFullV ? m_cols
-		     : m_computeThinV ? m_diagSize
-		     : 0);
+    m_matrixV.resize(m_cols, m_computeFullV ? m_cols : m_computeThinV ? m_diagSize : 0);
 
   return false;
 }
 
 }// end namespace
 
-#endif // EIGEN_SVD_H
+#endif // EIGEN_SVDBASE_H
diff --git a/Eigen/src/SVD/UpperBidiagonalization.h b/Eigen/src/SVD/UpperBidiagonalization.h
index 40067682c..40b1237a0 100644
--- a/Eigen/src/SVD/UpperBidiagonalization.h
+++ b/Eigen/src/SVD/UpperBidiagonalization.h
@@ -2,6 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2013-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -153,14 +154,19 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
                                            typename MatrixType::RealScalar *diagonal,
                                            typename MatrixType::RealScalar *upper_diagonal,
                                            typename MatrixType::Index bs,
-                                           Ref<Matrix<typename MatrixType::Scalar, Dynamic, Dynamic> > X,
-                                           Ref<Matrix<typename MatrixType::Scalar, Dynamic, Dynamic> > Y)
+                                           Ref<Matrix<typename MatrixType::Scalar, Dynamic, Dynamic,
+                                                      traits<MatrixType>::Flags & RowMajorBit> > X,
+                                           Ref<Matrix<typename MatrixType::Scalar, Dynamic, Dynamic,
+                                                      traits<MatrixType>::Flags & RowMajorBit> > Y)
 {
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef Ref<Matrix<Scalar, Dynamic, 1> >                    SubColumnType;
-  typedef Ref<Matrix<Scalar, 1, Dynamic>, 0, InnerStride<> >  SubRowType;
-  typedef Ref<Matrix<Scalar, Dynamic, Dynamic> >              SubMatType;
+  enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit };
+  typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride;
+  typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride;
+  typedef Ref<Matrix<Scalar, Dynamic, 1>, 0, ColInnerStride>    SubColumnType;
+  typedef Ref<Matrix<Scalar, 1, Dynamic>, 0, RowInnerStride>    SubRowType;
+  typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder > > SubMatType;
   
   Index brows = A.rows();
   Index bcols = A.cols();
@@ -214,10 +220,10 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
         if(k) u_k -= U_k1.adjoint() * X.row(k).head(k).adjoint();
       }
 
-      // 5 - construct right Householder transform in-placecols
+      // 5 - construct right Householder transform in-place
       u_k.makeHouseholderInPlace(tau_u, upper_diagonal[k]);
 
-      // this eases the application of Householder transforAions
+      // this eases the application of Householder transformations
       // A(k,k+1) will store tau_u later
       A(k,k+1) = Scalar(1);
 
@@ -287,8 +293,18 @@ void upperbidiagonalization_inplace_blocked(MatrixType& A, BidiagType& bidiagona
   Index cols = A.cols();
   Index size = (std::min)(rows, cols);
 
-  Matrix<Scalar,MatrixType::RowsAtCompileTime,Dynamic,ColMajor,MatrixType::MaxRowsAtCompileTime> X(rows,maxBlockSize);
-  Matrix<Scalar,MatrixType::ColsAtCompileTime,Dynamic,ColMajor,MatrixType::MaxColsAtCompileTime> Y(cols,maxBlockSize);
+  // X and Y are work space
+  enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit };
+  Matrix<Scalar,
+         MatrixType::RowsAtCompileTime,
+         Dynamic,
+         StorageOrder,
+         MatrixType::MaxRowsAtCompileTime> X(rows,maxBlockSize);
+  Matrix<Scalar,
+         MatrixType::ColsAtCompileTime,
+         Dynamic,
+         StorageOrder,
+         MatrixType::MaxColsAtCompileTime> Y(cols,maxBlockSize);
   Index blockSize = (std::min)(maxBlockSize,size);
 
   Index k = 0;
diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h
index e1f96ba5a..3c8cef5db 100644
--- a/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h
@@ -33,8 +33,11 @@ enum SimplicialCholeskyMode {
   *
   */
 template<typename Derived>
-class SimplicialCholeskyBase : internal::noncopyable
+class SimplicialCholeskyBase : public SparseSolverBase<Derived>
 {
+    typedef SparseSolverBase<Derived> Base;
+    using Base::m_isInitialized;
+    
   public:
     typedef typename internal::traits<Derived>::MatrixType MatrixType;
     typedef typename internal::traits<Derived>::OrderingType OrderingType;
@@ -46,14 +49,16 @@ class SimplicialCholeskyBase : internal::noncopyable
     typedef Matrix<Scalar,Dynamic,1> VectorType;
 
   public:
+    
+    using Base::derived;
 
     /** Default constructor */
     SimplicialCholeskyBase()
-      : m_info(Success), m_isInitialized(false), m_shiftOffset(0), m_shiftScale(1)
+      : m_info(Success), m_shiftOffset(0), m_shiftScale(1)
     {}
 
     SimplicialCholeskyBase(const MatrixType& matrix)
-      : m_info(Success), m_isInitialized(false), m_shiftOffset(0), m_shiftScale(1)
+      : m_info(Success), m_shiftOffset(0), m_shiftScale(1)
     {
       derived().compute(matrix);
     }
@@ -79,34 +84,6 @@ class SimplicialCholeskyBase : internal::noncopyable
       return m_info;
     }
     
-    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::solve_retval<SimplicialCholeskyBase, Rhs>
-    solve(const MatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_isInitialized && "Simplicial LLT or LDLT is not initialized.");
-      eigen_assert(rows()==b.rows()
-                && "SimplicialCholeskyBase::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<SimplicialCholeskyBase, Rhs>(*this, b.derived());
-    }
-    
-    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::sparse_solve_retval<SimplicialCholeskyBase, Rhs>
-    solve(const SparseMatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_isInitialized && "Simplicial LLT or LDLT is not initialized.");
-      eigen_assert(rows()==b.rows()
-                && "SimplicialCholesky::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::sparse_solve_retval<SimplicialCholeskyBase, Rhs>(*this, b.derived());
-    }
-    
     /** \returns the permutation P
       * \sa permutationPinv() */
     const PermutationMatrix<Dynamic,Dynamic,Index>& permutationP() const
@@ -150,7 +127,7 @@ class SimplicialCholeskyBase : internal::noncopyable
 
     /** \internal */
     template<typename Rhs,typename Dest>
-    void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
+    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
     {
       eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
       eigen_assert(m_matrix.rows()==b.rows());
@@ -175,6 +152,12 @@ class SimplicialCholeskyBase : internal::noncopyable
       if(m_P.size()>0)
         dest = m_Pinv * dest;
     }
+    
+    template<typename Rhs,typename Dest>
+    void _solve_impl(const SparseMatrixBase<Rhs> &b, SparseMatrixBase<Dest> &dest) const
+    {
+      internal::solve_sparse_through_dense_panels(derived(), b, dest);
+    }
 
 #endif // EIGEN_PARSED_BY_DOXYGEN
 
@@ -226,7 +209,6 @@ class SimplicialCholeskyBase : internal::noncopyable
     };
 
     mutable ComputationInfo m_info;
-    bool m_isInitialized;
     bool m_factorizationIsOk;
     bool m_analysisIsOk;
     
@@ -255,8 +237,8 @@ template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<Simp
   typedef typename MatrixType::Scalar                         Scalar;
   typedef typename MatrixType::Index                          Index;
   typedef SparseMatrix<Scalar, ColMajor, Index>               CholMatrixType;
-  typedef SparseTriangularView<CholMatrixType, Eigen::Lower>  MatrixL;
-  typedef SparseTriangularView<typename CholMatrixType::AdjointReturnType, Eigen::Upper>   MatrixU;
+  typedef TriangularView<CholMatrixType, Eigen::Lower>  MatrixL;
+  typedef TriangularView<typename CholMatrixType::AdjointReturnType, Eigen::Upper>   MatrixU;
   static inline MatrixL getL(const MatrixType& m) { return m; }
   static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
 };
@@ -269,8 +251,8 @@ template<typename _MatrixType,int _UpLo, typename _Ordering> struct traits<Simpl
   typedef typename MatrixType::Scalar                             Scalar;
   typedef typename MatrixType::Index                              Index;
   typedef SparseMatrix<Scalar, ColMajor, Index>                   CholMatrixType;
-  typedef SparseTriangularView<CholMatrixType, Eigen::UnitLower>  MatrixL;
-  typedef SparseTriangularView<typename CholMatrixType::AdjointReturnType, Eigen::UnitUpper> MatrixU;
+  typedef TriangularView<CholMatrixType, Eigen::UnitLower>  MatrixL;
+  typedef TriangularView<typename CholMatrixType::AdjointReturnType, Eigen::UnitUpper> MatrixU;
   static inline MatrixL getL(const MatrixType& m) { return m; }
   static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); }
 };
@@ -560,7 +542,7 @@ public:
 
     /** \internal */
     template<typename Rhs,typename Dest>
-    void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
+    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
     {
       eigen_assert(Base::m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
       eigen_assert(Base::m_matrix.rows()==b.rows());
@@ -596,6 +578,13 @@ public:
         dest = Base::m_Pinv * dest;
     }
     
+    /** \internal */
+    template<typename Rhs,typename Dest>
+    void _solve_impl(const SparseMatrixBase<Rhs> &b, SparseMatrixBase<Dest> &dest) const
+    {
+      internal::solve_sparse_through_dense_panels(*this, b, dest);
+    }
+    
     Scalar determinant() const
     {
       if(m_LDLT)
@@ -636,36 +625,6 @@ void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, CholMatrixTy
   ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
 }
 
-namespace internal {
-  
-template<typename Derived, typename Rhs>
-struct solve_retval<SimplicialCholeskyBase<Derived>, Rhs>
-  : solve_retval_base<SimplicialCholeskyBase<Derived>, Rhs>
-{
-  typedef SimplicialCholeskyBase<Derived> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec().derived()._solve(rhs(),dst);
-  }
-};
-
-template<typename Derived, typename Rhs>
-struct sparse_solve_retval<SimplicialCholeskyBase<Derived>, Rhs>
-  : sparse_solve_retval_base<SimplicialCholeskyBase<Derived>, Rhs>
-{
-  typedef SimplicialCholeskyBase<Derived> Dec;
-  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    this->defaultEvalTo(dst);
-  }
-};
-
-} // end namespace internal
-
 } // end namespace Eigen
 
 #endif // EIGEN_SIMPLICIAL_CHOLESKY_H
diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
index 8067565f9..19d9eaa42 100644
--- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -28,6 +28,8 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
   ei_declare_aligned_stack_constructed_variable(bool,   mask,     rows, 0);
   ei_declare_aligned_stack_constructed_variable(Scalar, values,   rows, 0);
   ei_declare_aligned_stack_constructed_variable(Index,  indices,  rows, 0);
+  
+  std::memset(mask,0,sizeof(bool)*rows);
 
   // estimate the number of non zero entries
   // given a rhs column containing Y non zeros, we assume that the respective Y columns
@@ -36,6 +38,9 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
   // per column of the lhs.
   // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
   Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros();
+  
+  typename evaluator<Lhs>::type lhsEval(lhs);
+  typename evaluator<Rhs>::type rhsEval(rhs);
 
   res.setZero();
   res.reserve(Index(estimated_nnz_prod));
@@ -45,11 +50,11 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
 
     res.startVec(j);
     Index nnz = 0;
-    for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
+    for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
     {
       Scalar y = rhsIt.value();
       Index k = rhsIt.index();
-      for (typename Lhs::InnerIterator lhsIt(lhs, k); lhsIt; ++lhsIt)
+      for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
       {
         Index i = lhsIt.index();
         Scalar x = lhsIt.value();
@@ -86,7 +91,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
       // otherwise => loop through the entire vector
       // In order to avoid to perform an expensive log2 when the
       // result is clearly very sparse we use a linear bound up to 200.
-      if((nnz<200 && nnz<t200) || nnz * log2(nnz) < t)
+      if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t)
       {
         if(nnz>1) std::sort(indices,indices+nnz);
         for(Index k=0; k<nnz; ++k)
@@ -136,20 +141,21 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,C
     typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrixAux;
     typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime>::type ColMajorMatrix;
     
-    ColMajorMatrix resCol(lhs.rows(),rhs.cols());
     // FIXME, the following heuristic is probably not very good.
     if(lhs.rows()>=rhs.cols())
     {
+      ColMajorMatrix resCol(lhs.rows(),rhs.cols());
       // perform sorted insertion
       internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, true);
-      res.swap(resCol);
+      res = resCol.markAsRValue();
     }
     else
     {
+      ColMajorMatrixAux resCol(lhs.rows(),rhs.cols());
       // ressort to transpose to sort the entries
-      internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, false);
+      internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrixAux>(lhs, rhs, resCol, false);
       RowMajorMatrix resRow(resCol);
-      res = resRow;
+      res = resRow.markAsRValue();
     }
   }
 };
diff --git a/Eigen/src/SparseCore/MappedSparseMatrix.h b/Eigen/src/SparseCore/MappedSparseMatrix.h
index ab1a266a9..d9aabd049 100644
--- a/Eigen/src/SparseCore/MappedSparseMatrix.h
+++ b/Eigen/src/SparseCore/MappedSparseMatrix.h
@@ -176,6 +176,32 @@ class MappedSparseMatrix<Scalar,_Flags,_Index>::ReverseInnerIterator
     const Index m_end;
 };
 
+namespace internal {
+
+template<typename _Scalar, int _Options, typename _Index>
+struct evaluator<MappedSparseMatrix<_Scalar,_Options,_Index> >
+  : evaluator_base<MappedSparseMatrix<_Scalar,_Options,_Index> >
+{
+  typedef MappedSparseMatrix<_Scalar,_Options,_Index> MappedSparseMatrixType;
+  typedef typename MappedSparseMatrixType::InnerIterator InnerIterator;
+  typedef typename MappedSparseMatrixType::ReverseInnerIterator ReverseInnerIterator;
+  
+  enum {
+    CoeffReadCost = NumTraits<_Scalar>::ReadCost,
+    Flags = MappedSparseMatrixType::Flags
+  };
+  
+  evaluator() : m_matrix(0) {}
+  evaluator(const MappedSparseMatrixType &mat) : m_matrix(&mat) {}
+  
+  operator MappedSparseMatrixType&() { return m_matrix->const_cast_derived(); }
+  operator const MappedSparseMatrixType&() const { return *m_matrix; }
+  
+  const MappedSparseMatrixType *m_matrix;
+};
+
+}
+
 } // end namespace Eigen
 
 #endif // EIGEN_MAPPED_SPARSEMATRIX_H
diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h
new file mode 100644
index 000000000..97c079d3f
--- /dev/null
+++ b/Eigen/src/SparseCore/SparseAssign.h
@@ -0,0 +1,192 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPARSEASSIGN_H
+#define EIGEN_SPARSEASSIGN_H
+
+namespace Eigen { 
+
+template<typename Derived>    
+template<typename OtherDerived>
+Derived& SparseMatrixBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
+{
+  // TODO use the evaluator mechanism
+  other.derived().evalTo(derived());
+  return derived();
+}
+
+template<typename Derived>
+template<typename OtherDerived>
+Derived& SparseMatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
+{
+  // TODO use the evaluator mechanism
+  other.evalTo(derived());
+  return derived();
+}
+
+template<typename Derived>
+template<typename OtherDerived>
+inline Derived& SparseMatrixBase<Derived>::operator=(const SparseMatrixBase<OtherDerived>& other)
+{
+  // FIXME, by default sparse evaluation do not alias, so we should be able to bypass the generic call_assignment
+  internal::call_assignment/*_no_alias*/(derived(), other.derived());
+  return derived();
+}
+
+template<typename Derived>
+inline Derived& SparseMatrixBase<Derived>::operator=(const Derived& other)
+{
+  internal::call_assignment_no_alias(derived(), other.derived());
+  return derived();
+}
+
+namespace internal {
+
+template<>
+struct storage_kind_to_evaluator_kind<Sparse> {
+  typedef IteratorBased Kind;
+};
+
+template<>
+struct storage_kind_to_shape<Sparse> {
+  typedef SparseShape Shape;
+};
+
+struct Sparse2Sparse {};
+struct Sparse2Dense  {};
+
+template<> struct AssignmentKind<SparseShape, SparseShape>           { typedef Sparse2Sparse Kind; };
+template<> struct AssignmentKind<SparseShape, SparseTriangularShape> { typedef Sparse2Sparse Kind; };
+template<> struct AssignmentKind<DenseShape,  SparseShape>           { typedef Sparse2Dense  Kind; };
+
+
+template<typename DstXprType, typename SrcXprType>
+void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src)
+{
+  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+  
+  typedef typename DstXprType::Index Index;
+  typedef typename DstXprType::Scalar Scalar;
+  typedef typename internal::evaluator<DstXprType>::type DstEvaluatorType;
+  typedef typename internal::evaluator<SrcXprType>::type SrcEvaluatorType;
+
+  SrcEvaluatorType srcEvaluator(src);
+
+  const bool transpose = (DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit);
+  const Index outerEvaluationSize = (SrcEvaluatorType::Flags&RowMajorBit) ? src.rows() : src.cols();
+  if ((!transpose) && src.isRValue())
+  {
+    // eval without temporary
+    dst.resize(src.rows(), src.cols());
+    dst.setZero();
+    dst.reserve((std::max)(src.rows(),src.cols())*2);
+    for (Index j=0; j<outerEvaluationSize; ++j)
+    {
+      dst.startVec(j);
+      for (typename SrcEvaluatorType::InnerIterator it(srcEvaluator, j); it; ++it)
+      {
+        Scalar v = it.value();
+        dst.insertBackByOuterInner(j,it.index()) = v;
+      }
+    }
+    dst.finalize();
+  }
+  else
+  {
+    // eval through a temporary
+    eigen_assert(( ((internal::traits<DstXprType>::SupportedAccessPatterns & OuterRandomAccessPattern)==OuterRandomAccessPattern) ||
+              (!((DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit)))) &&
+              "the transpose operation is supposed to be handled in SparseMatrix::operator=");
+
+    enum { Flip = (DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit) };
+
+    
+    DstXprType temp(src.rows(), src.cols());
+
+    temp.reserve((std::max)(src.rows(),src.cols())*2);
+    for (Index j=0; j<outerEvaluationSize; ++j)
+    {
+      temp.startVec(j);
+      for (typename SrcEvaluatorType::InnerIterator it(srcEvaluator, j); it; ++it)
+      {
+        Scalar v = it.value();
+        temp.insertBackByOuterInner(Flip?it.index():j,Flip?j:it.index()) = v;
+      }
+    }
+    temp.finalize();
+
+    dst = temp.markAsRValue();
+  }
+}
+
+// Generic Sparse to Sparse assignment
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Sparse, Scalar>
+{
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+  {
+    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+    
+    assign_sparse_to_sparse(dst.derived(), src.derived());
+  }
+};
+
+// Sparse to Dense assignment
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Dense, Scalar>
+{
+  static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+  {
+    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+    typedef typename SrcXprType::Index Index;
+    
+    typename internal::evaluator<SrcXprType>::type srcEval(src);
+    typename internal::evaluator<DstXprType>::type dstEval(dst);
+    const Index outerEvaluationSize = (internal::evaluator<SrcXprType>::Flags&RowMajorBit) ? src.rows() : src.cols();
+    for (Index j=0; j<outerEvaluationSize; ++j)
+      for (typename internal::evaluator<SrcXprType>::InnerIterator i(srcEval,j); i; ++i)
+        func.assignCoeff(dstEval.coeffRef(i.row(),i.col()), i.value());
+  }
+};
+
+template< typename DstXprType, typename SrcXprType, typename Scalar>
+struct Assignment<DstXprType, SrcXprType, internal::assign_op<typename DstXprType::Scalar>, Sparse2Dense, Scalar>
+{
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &)
+  {
+    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+    typedef typename SrcXprType::Index Index;
+    
+    dst.setZero();
+    typename internal::evaluator<SrcXprType>::type srcEval(src);
+    typename internal::evaluator<DstXprType>::type dstEval(dst);
+    const Index outerEvaluationSize = (internal::evaluator<SrcXprType>::Flags&RowMajorBit) ? src.rows() : src.cols();
+    for (Index j=0; j<outerEvaluationSize; ++j)
+      for (typename internal::evaluator<SrcXprType>::InnerIterator i(srcEval,j); i; ++i)
+        dstEval.coeffRef(i.row(),i.col()) = i.value();
+  }
+};
+
+// Specialization for "dst = dec.solve(rhs)"
+// NOTE we need to specialize it for Sparse2Sparse to avoid ambiguous specialization error
+template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
+struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar>, Sparse2Sparse, Scalar>
+{
+  typedef Solve<DecType,RhsType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  {
+    src.dec()._solve_impl(src.rhs(), dst);
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SPARSEASSIGN_H
diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h
index 491cc72b0..635d58d86 100644
--- a/Eigen/src/SparseCore/SparseBlock.h
+++ b/Eigen/src/SparseCore/SparseBlock.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -12,6 +12,7 @@
 
 namespace Eigen { 
 
+// Subset of columns or rows
 template<typename XprType, int BlockRows, int BlockCols>
 class BlockImpl<XprType,BlockRows,BlockCols,true,Sparse>
   : public SparseMatrixBase<Block<XprType,BlockRows,BlockCols,true> >
@@ -24,31 +25,6 @@ protected:
     enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
 public:
     EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
-    
-    class InnerIterator: public XprType::InnerIterator
-    {
-        typedef typename BlockImpl::Index Index;
-      public:
-        inline InnerIterator(const BlockType& xpr, Index outer)
-          : XprType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
-        {}
-        inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
-        inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
-      protected:
-        Index m_outer;
-    };
-    class ReverseInnerIterator: public XprType::ReverseInnerIterator
-    {
-        typedef typename BlockImpl::Index Index;
-      public:
-        inline ReverseInnerIterator(const BlockType& xpr, Index outer)
-          : XprType::ReverseInnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
-        {}
-        inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
-        inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
-      protected:
-        Index m_outer;
-    };
 
     inline BlockImpl(const XprType& xpr, Index i)
       : m_matrix(xpr), m_outerStart(i), m_outerSize(OuterSize)
@@ -63,13 +39,21 @@ public:
     
     Index nonZeros() const
     {
+      typedef typename internal::evaluator<XprType>::type EvaluatorType;
+      EvaluatorType matEval(m_matrix);
       Index nnz = 0;
       Index end = m_outerStart + m_outerSize.value();
-      for(Index j=m_outerStart; j<end; ++j)
-        for(typename XprType::InnerIterator it(m_matrix, j); it; ++it)
+      for(int j=m_outerStart; j<end; ++j)
+        for(typename EvaluatorType::InnerIterator it(matEval, j); it; ++it)
           ++nnz;
       return nnz;
     }
+    
+    inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+    Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
+    Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
+    Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
+    Index blockCols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
 
   protected:
 
@@ -100,29 +84,6 @@ public:
 protected:
     enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
 public:
-    
-    class InnerIterator: public SparseMatrixType::InnerIterator
-    {
-      public:
-        inline InnerIterator(const BlockType& xpr, Index outer)
-          : SparseMatrixType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
-        {}
-        inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
-        inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
-      protected:
-        Index m_outer;
-    };
-    class ReverseInnerIterator: public SparseMatrixType::ReverseInnerIterator
-    {
-      public:
-        inline ReverseInnerIterator(const BlockType& xpr, Index outer)
-          : SparseMatrixType::ReverseInnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
-        {}
-        inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
-        inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
-      protected:
-        Index m_outer;
-    };
 
     inline sparse_matrix_block_impl(const SparseMatrixType& xpr, Index i)
       : m_matrix(xpr), m_outerStart(i), m_outerSize(OuterSize)
@@ -248,6 +209,12 @@ public:
 
     EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
     EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
+    
+    inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+    Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
+    Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
+    Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
+    Index blockCols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
 
   protected:
 
@@ -407,32 +374,11 @@ public:
     }
     
     inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+    Index startRow() const { return m_startRow.value(); }
+    Index startCol() const { return m_startCol.value(); }
+    Index blockRows() const { return m_blockRows.value(); }
+    Index blockCols() const { return m_blockCols.value(); }
     
-    typedef internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel> InnerIterator;
-    
-    class ReverseInnerIterator : public _MatrixTypeNested::ReverseInnerIterator
-    {
-      typedef typename _MatrixTypeNested::ReverseInnerIterator Base;
-      const BlockType& m_block;
-      Index m_begin;
-    public:
-
-      EIGEN_STRONG_INLINE ReverseInnerIterator(const BlockType& block, Index outer)
-        : Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())),
-          m_block(block),
-          m_begin(IsRowMajor ? block.m_startCol.value() : block.m_startRow.value())
-      {
-        while( (Base::operator bool()) && (Base::index() >= (IsRowMajor ? m_block.m_startCol.value()+block.m_blockCols.value() : m_block.m_startRow.value()+block.m_blockRows.value())) )
-          Base::operator--();
-      }
-
-      inline Index index()  const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
-      inline Index outer()  const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); }
-      inline Index row()    const { return Base::row()   - m_block.m_startRow.value(); }
-      inline Index col()    const { return Base::col()   - m_block.m_startCol.value(); }
-      
-      inline operator bool() const { return Base::operator bool() && Base::index() >= m_begin; }
-    };
   protected:
     friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
     friend class ReverseInnerIterator;
@@ -538,7 +484,120 @@ namespace internal {
     
     inline operator bool() const { return m_outerPos < m_end; }
   };
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased >
+ : public evaluator_base<Block<ArgType,BlockRows,BlockCols,InnerPanel> >
+{
+    class InnerVectorInnerIterator;
+    class OuterVectorInnerIterator;
+  public:
+    typedef Block<ArgType,BlockRows,BlockCols,InnerPanel> XprType;
+    typedef typename XprType::Index Index;
+    typedef typename XprType::Scalar Scalar;
+    
+    class ReverseInnerIterator;
+    
+    enum {
+      IsRowMajor = XprType::IsRowMajor,
+      
+      OuterVector =  (BlockCols==1 && ArgType::IsRowMajor)
+                    | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
+                      // revert to || as soon as not needed anymore. 
+                     (BlockRows==1 && !ArgType::IsRowMajor),
+      
+      CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+      Flags = XprType::Flags
+    };
+    
+    typedef typename internal::conditional<OuterVector,OuterVectorInnerIterator,InnerVectorInnerIterator>::type InnerIterator;
+    
+    unary_evaluator(const XprType& op)
+      : m_argImpl(op.nestedExpression()), m_block(op)
+    {}
+
+  protected:
+    typedef typename evaluator<ArgType>::InnerIterator        EvalIterator;
+    
+    typename evaluator<ArgType>::nestedType m_argImpl;
+    const XprType &m_block;
+};
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+class unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased>::InnerVectorInnerIterator
+ : public EvalIterator
+{
+  const XprType& m_block;
+  Index m_end;
+public:
+  
+  EIGEN_STRONG_INLINE InnerVectorInnerIterator(const unary_evaluator& aEval, Index outer)
+    : EvalIterator(aEval.m_argImpl, outer + (IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol())),
+      m_block(aEval.m_block),
+      m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows())
+  {
+    while( (EvalIterator::operator bool()) && (EvalIterator::index() < (IsRowMajor ? m_block.startCol() : m_block.startRow())) )
+      EvalIterator::operator++();
+  }
+  
+  inline Index index()  const { return EvalIterator::index() - (IsRowMajor ? m_block.startCol() : m_block.startRow()); }
+  inline Index outer()  const { return EvalIterator::outer() - (IsRowMajor ? m_block.startRow() : m_block.startCol()); }
+  inline Index row()    const { return EvalIterator::row()   - m_block.startRow(); }
+  inline Index col()    const { return EvalIterator::col()   - m_block.startCol(); }
+  
+  inline operator bool() const { return EvalIterator::operator bool() && EvalIterator::index() < m_end; }
+};
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+class unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased>::OuterVectorInnerIterator
+{
+  const unary_evaluator& m_eval;
+  Index m_outerPos;
+  Index m_innerIndex;
+  Scalar m_value;
+  Index m_end;
+public:
+
+  EIGEN_STRONG_INLINE OuterVectorInnerIterator(const unary_evaluator& aEval, Index outer)
+    : m_eval(aEval),
+      m_outerPos( (IsRowMajor ? aEval.m_block.startCol() : aEval.m_block.startRow()) - 1), // -1 so that operator++ finds the first non-zero entry
+      m_innerIndex(IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol()),
+      m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows())
+  {
+    EIGEN_UNUSED_VARIABLE(outer);
+    eigen_assert(outer==0);
+    
+    ++(*this);
+  }
+  
+  inline Index index()  const { return m_outerPos - (IsRowMajor ? m_eval.m_block.startCol() : m_eval.m_block.startRow()); }
+  inline Index outer()  const { return 0; }
+  inline Index row()    const { return IsRowMajor ? 0 : index(); }
+  inline Index col()    const { return IsRowMajor ? index() : 0; }
   
+  inline Scalar value() const { return m_value; }
+  
+  inline OuterVectorInnerIterator& operator++()
+  {
+    // search next non-zero entry
+    while(m_outerPos<m_end)
+    {
+      m_outerPos++;
+      EvalIterator it(m_eval.m_argImpl, m_outerPos);
+      // search for the key m_innerIndex in the current outer-vector
+      while(it && it.index() < m_innerIndex) ++it;
+      if(it && it.index()==m_innerIndex)
+      {
+        m_value = it.value();
+        break;
+      }
+    }
+    return *this;
+  }
+  
+  inline operator bool() const { return m_outerPos < m_end; }
+};
+
 } // end namespace internal
 
 
diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
index 60fdd214a..5993c1caf 100644
--- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -31,12 +31,6 @@ namespace Eigen {
 
 namespace internal {
 
-template<> struct promote_storage_type<Dense,Sparse>
-{ typedef Sparse ret; };
-
-template<> struct promote_storage_type<Sparse,Dense>
-{ typedef Sparse ret; };
-
 template<typename BinaryOp, typename Lhs, typename Rhs, typename Derived,
   typename _LhsStorageMode = typename traits<Lhs>::StorageKind,
   typename _RhsStorageMode = typename traits<Rhs>::StorageKind>
@@ -44,71 +38,35 @@ class sparse_cwise_binary_op_inner_iterator_selector;
 
 } // end namespace internal
 
-template<typename BinaryOp, typename Lhs, typename Rhs>
-class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Sparse>
-  : public SparseMatrixBase<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
-{
-  public:
-    class InnerIterator;
-    class ReverseInnerIterator;
-    typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived;
-    EIGEN_SPARSE_PUBLIC_INTERFACE(Derived)
-    CwiseBinaryOpImpl()
-    {
-      typedef typename internal::traits<Lhs>::StorageKind LhsStorageKind;
-      typedef typename internal::traits<Rhs>::StorageKind RhsStorageKind;
-      EIGEN_STATIC_ASSERT((
-                (!internal::is_same<LhsStorageKind,RhsStorageKind>::value)
-            ||  ((Lhs::Flags&RowMajorBit) == (Rhs::Flags&RowMajorBit))),
-            THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH);
-    }
-};
-
-template<typename BinaryOp, typename Lhs, typename Rhs>
-class CwiseBinaryOpImpl<BinaryOp,Lhs,Rhs,Sparse>::InnerIterator
-  : public internal::sparse_cwise_binary_op_inner_iterator_selector<BinaryOp,Lhs,Rhs,typename CwiseBinaryOpImpl<BinaryOp,Lhs,Rhs,Sparse>::InnerIterator>
-{
-  public:
-    typedef internal::sparse_cwise_binary_op_inner_iterator_selector<
-      BinaryOp,Lhs,Rhs, InnerIterator> Base;
-
-    EIGEN_STRONG_INLINE InnerIterator(const CwiseBinaryOpImpl& binOp, Index outer)
-      : Base(binOp.derived(),outer)
-    {}
-};
-
-/***************************************************************************
-* Implementation of inner-iterators
-***************************************************************************/
-
-// template<typename T> struct internal::func_is_conjunction { enum { ret = false }; };
-// template<typename T> struct internal::func_is_conjunction<internal::scalar_product_op<T> > { enum { ret = true }; };
-
-// TODO generalize the internal::scalar_product_op specialization to all conjunctions if any !
-
 namespace internal {
 
-// sparse - sparse  (generic)
-template<typename BinaryOp, typename Lhs, typename Rhs, typename Derived>
-class sparse_cwise_binary_op_inner_iterator_selector<BinaryOp, Lhs, Rhs, Derived, Sparse, Sparse>
+  
+// Generic "sparse OP sparse"
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IteratorBased>
+  : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
 {
-    typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> CwiseBinaryXpr;
-    typedef typename traits<CwiseBinaryXpr>::Scalar Scalar;
-    typedef typename traits<CwiseBinaryXpr>::Index Index;
-    typedef typename traits<CwiseBinaryXpr>::_LhsNested _LhsNested;
-    typedef typename traits<CwiseBinaryXpr>::_RhsNested _RhsNested;
-    typedef typename _LhsNested::InnerIterator LhsIterator;
-    typedef typename _RhsNested::InnerIterator RhsIterator;
+protected:
+  typedef typename evaluator<Lhs>::InnerIterator  LhsIterator;
+  typedef typename evaluator<Rhs>::InnerIterator  RhsIterator;
+public:
+  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+
+  class ReverseInnerIterator;
+  class InnerIterator
+  {
+    typedef typename traits<XprType>::Scalar Scalar;
+    typedef typename XprType::Index Index;
 
   public:
-
-    EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer)
-      : m_lhsIter(xpr.lhs(),outer), m_rhsIter(xpr.rhs(),outer), m_functor(xpr.functor())
+    
+    EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
+      : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor)
     {
       this->operator++();
     }
 
-    EIGEN_STRONG_INLINE Derived& operator++()
+    EIGEN_STRONG_INLINE InnerIterator& operator++()
     {
       if (m_lhsIter && m_rhsIter && (m_lhsIter.index() == m_rhsIter.index()))
       {
@@ -134,7 +92,7 @@ class sparse_cwise_binary_op_inner_iterator_selector<BinaryOp, Lhs, Rhs, Derived
         m_value = 0; // this is to avoid a compilation warning
         m_id = -1;
       }
-      return *static_cast<Derived*>(this);
+      return *this;
     }
 
     EIGEN_STRONG_INLINE Scalar value() const { return m_value; }
@@ -151,24 +109,48 @@ class sparse_cwise_binary_op_inner_iterator_selector<BinaryOp, Lhs, Rhs, Derived
     const BinaryOp& m_functor;
     Scalar m_value;
     Index m_id;
+  };
+  
+  
+  enum {
+    CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+    Flags = XprType::Flags
+  };
+  
+  binary_evaluator(const XprType& xpr) 
+    : m_functor(xpr.functor()),
+      m_lhsImpl(xpr.lhs()), 
+      m_rhsImpl(xpr.rhs())  
+  { }
+
+protected:
+  const BinaryOp m_functor;
+  typename evaluator<Lhs>::nestedType m_lhsImpl;
+  typename evaluator<Rhs>::nestedType m_rhsImpl;
 };
 
-// sparse - sparse  (product)
-template<typename T, typename Lhs, typename Rhs, typename Derived>
-class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs, Rhs, Derived, Sparse, Sparse>
+// "sparse .* sparse"
+template<typename T, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IteratorBased>
+  : evaluator_base<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs> >
 {
-    typedef scalar_product_op<T> BinaryFunc;
-    typedef CwiseBinaryOp<BinaryFunc, Lhs, Rhs> CwiseBinaryXpr;
-    typedef typename CwiseBinaryXpr::Scalar Scalar;
-    typedef typename CwiseBinaryXpr::Index Index;
-    typedef typename traits<CwiseBinaryXpr>::_LhsNested _LhsNested;
-    typedef typename _LhsNested::InnerIterator LhsIterator;
-    typedef typename traits<CwiseBinaryXpr>::_RhsNested _RhsNested;
-    typedef typename _RhsNested::InnerIterator RhsIterator;
-  public:
+protected:
+  typedef scalar_product_op<T> BinaryOp;
+  typedef typename evaluator<Lhs>::InnerIterator  LhsIterator;
+  typedef typename evaluator<Rhs>::InnerIterator  RhsIterator;
+public:
+  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+
+  class ReverseInnerIterator;
+  class InnerIterator
+  {
+    typedef typename traits<XprType>::Scalar Scalar;
+    typedef typename XprType::Index Index;
 
-    EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer)
-      : m_lhsIter(xpr.lhs(),outer), m_rhsIter(xpr.rhs(),outer), m_functor(xpr.functor())
+  public:
+    
+    EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
+      : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor)
     {
       while (m_lhsIter && m_rhsIter && (m_lhsIter.index() != m_rhsIter.index()))
       {
@@ -179,7 +161,7 @@ class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs,
       }
     }
 
-    EIGEN_STRONG_INLINE Derived& operator++()
+    EIGEN_STRONG_INLINE InnerIterator& operator++()
     {
       ++m_lhsIter;
       ++m_rhsIter;
@@ -190,9 +172,9 @@ class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs,
         else
           ++m_rhsIter;
       }
-      return *static_cast<Derived*>(this);
+      return *this;
     }
-
+    
     EIGEN_STRONG_INLINE Scalar value() const { return m_functor(m_lhsIter.value(), m_rhsIter.value()); }
 
     EIGEN_STRONG_INLINE Index index() const { return m_lhsIter.index(); }
@@ -204,91 +186,159 @@ class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs,
   protected:
     LhsIterator m_lhsIter;
     RhsIterator m_rhsIter;
-    const BinaryFunc& m_functor;
+    const BinaryOp& m_functor;
+  };
+  
+  
+  enum {
+    CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+    Flags = XprType::Flags
+  };
+  
+  binary_evaluator(const XprType& xpr) 
+    : m_functor(xpr.functor()),
+      m_lhsImpl(xpr.lhs()), 
+      m_rhsImpl(xpr.rhs())  
+  { }
+
+protected:
+  const BinaryOp m_functor;
+  typename evaluator<Lhs>::nestedType m_lhsImpl;
+  typename evaluator<Rhs>::nestedType m_rhsImpl;
 };
 
-// sparse - dense  (product)
-template<typename T, typename Lhs, typename Rhs, typename Derived>
-class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs, Rhs, Derived, Sparse, Dense>
+// "dense .* sparse"
+template<typename T, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IndexBased, IteratorBased>
+  : evaluator_base<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs> >
 {
-    typedef scalar_product_op<T> BinaryFunc;
-    typedef CwiseBinaryOp<BinaryFunc, Lhs, Rhs> CwiseBinaryXpr;
-    typedef typename CwiseBinaryXpr::Scalar Scalar;
-    typedef typename CwiseBinaryXpr::Index Index;
-    typedef typename traits<CwiseBinaryXpr>::_LhsNested _LhsNested;
-    typedef typename traits<CwiseBinaryXpr>::RhsNested RhsNested;
-    typedef typename _LhsNested::InnerIterator LhsIterator;
-    enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };
-  public:
+protected:
+  typedef scalar_product_op<T> BinaryOp;
+  typedef typename evaluator<Lhs>::type  LhsEvaluator;
+  typedef typename evaluator<Rhs>::InnerIterator  RhsIterator;
+public:
+  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+
+  class ReverseInnerIterator;
+  class InnerIterator
+  {
+    typedef typename traits<XprType>::Scalar Scalar;
+    typedef typename XprType::Index Index;
+    enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit };
 
-    EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer)
-      : m_rhs(xpr.rhs()), m_lhsIter(xpr.lhs(),typename _LhsNested::Index(outer)), m_functor(xpr.functor()), m_outer(outer)
+  public:
+    
+    EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
+      : m_lhsEval(aEval.m_lhsImpl), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor), m_outer(outer)
     {}
 
-    EIGEN_STRONG_INLINE Derived& operator++()
+    EIGEN_STRONG_INLINE InnerIterator& operator++()
     {
-      ++m_lhsIter;
-      return *static_cast<Derived*>(this);
+      ++m_rhsIter;
+      return *this;
     }
 
     EIGEN_STRONG_INLINE Scalar value() const
-    { return m_functor(m_lhsIter.value(),
-                       m_rhs.coeff(IsRowMajor?m_outer:m_lhsIter.index(),IsRowMajor?m_lhsIter.index():m_outer)); }
+    { return m_functor(m_lhsEval.coeff(IsRowMajor?m_outer:m_rhsIter.index(),IsRowMajor?m_rhsIter.index():m_outer), m_rhsIter.value()); }
 
-    EIGEN_STRONG_INLINE Index index() const { return m_lhsIter.index(); }
-    EIGEN_STRONG_INLINE Index row() const { return m_lhsIter.row(); }
-    EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); }
+    EIGEN_STRONG_INLINE Index index() const { return m_rhsIter.index(); }
+    EIGEN_STRONG_INLINE Index row() const { return m_rhsIter.row(); }
+    EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); }
 
-    EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; }
+    EIGEN_STRONG_INLINE operator bool() const { return m_rhsIter; }
 
   protected:
-    RhsNested m_rhs;
-    LhsIterator m_lhsIter;
-    const BinaryFunc m_functor;
+    const LhsEvaluator &m_lhsEval;
+    RhsIterator m_rhsIter;
+    const BinaryOp& m_functor;
     const Index m_outer;
+  };
+  
+  
+  enum {
+    CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+    Flags = XprType::Flags
+  };
+  
+  binary_evaluator(const XprType& xpr) 
+    : m_functor(xpr.functor()),
+      m_lhsImpl(xpr.lhs()), 
+      m_rhsImpl(xpr.rhs())  
+  { }
+
+protected:
+  const BinaryOp m_functor;
+  typename evaluator<Lhs>::nestedType m_lhsImpl;
+  typename evaluator<Rhs>::nestedType m_rhsImpl;
 };
 
-// sparse - dense  (product)
-template<typename T, typename Lhs, typename Rhs, typename Derived>
-class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs, Rhs, Derived, Dense, Sparse>
+// "sparse .* dense"
+template<typename T, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IndexBased>
+  : evaluator_base<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs> >
 {
-    typedef scalar_product_op<T> BinaryFunc;
-    typedef CwiseBinaryOp<BinaryFunc, Lhs, Rhs> CwiseBinaryXpr;
-    typedef typename CwiseBinaryXpr::Scalar Scalar;
-    typedef typename CwiseBinaryXpr::Index Index;
-    typedef typename traits<CwiseBinaryXpr>::_RhsNested _RhsNested;
-    typedef typename _RhsNested::InnerIterator RhsIterator;
+protected:
+  typedef scalar_product_op<T> BinaryOp;
+  typedef typename evaluator<Lhs>::InnerIterator  LhsIterator;
+  typedef typename evaluator<Rhs>::type  RhsEvaluator;
+public:
+  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+
+  class ReverseInnerIterator;
+  class InnerIterator
+  {
+    typedef typename traits<XprType>::Scalar Scalar;
+    typedef typename XprType::Index Index;
+    enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };
 
-    enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit };
   public:
-
-    EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer)
-      : m_xpr(xpr), m_rhsIter(xpr.rhs(),outer), m_functor(xpr.functor()), m_outer(outer)
+    
+    EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
+      : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsEval(aEval.m_rhsImpl), m_functor(aEval.m_functor), m_outer(outer)
     {}
 
-    EIGEN_STRONG_INLINE Derived& operator++()
+    EIGEN_STRONG_INLINE InnerIterator& operator++()
     {
-      ++m_rhsIter;
-      return *static_cast<Derived*>(this);
+      ++m_lhsIter;
+      return *this;
     }
 
     EIGEN_STRONG_INLINE Scalar value() const
-    { return m_functor(m_xpr.lhs().coeff(IsRowMajor?m_outer:m_rhsIter.index(),IsRowMajor?m_rhsIter.index():m_outer), m_rhsIter.value()); }
+    { return m_functor(m_lhsIter.value(),
+                       m_rhsEval.coeff(IsRowMajor?m_outer:m_lhsIter.index(),IsRowMajor?m_lhsIter.index():m_outer)); }
 
-    EIGEN_STRONG_INLINE Index index() const { return m_rhsIter.index(); }
-    EIGEN_STRONG_INLINE Index row() const { return m_rhsIter.row(); }
-    EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); }
+    EIGEN_STRONG_INLINE Index index() const { return m_lhsIter.index(); }
+    EIGEN_STRONG_INLINE Index row() const { return m_lhsIter.row(); }
+    EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); }
 
-    EIGEN_STRONG_INLINE operator bool() const { return m_rhsIter; }
+    EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; }
 
   protected:
-    const CwiseBinaryXpr& m_xpr;
-    RhsIterator m_rhsIter;
-    const BinaryFunc& m_functor;
+    LhsIterator m_lhsIter;
+    const RhsEvaluator &m_rhsEval;
+    const BinaryOp& m_functor;
     const Index m_outer;
+  };
+  
+  
+  enum {
+    CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+    Flags = XprType::Flags
+  };
+  
+  binary_evaluator(const XprType& xpr) 
+    : m_functor(xpr.functor()),
+      m_lhsImpl(xpr.lhs()), 
+      m_rhsImpl(xpr.rhs())  
+  { }
+
+protected:
+  const BinaryOp m_functor;
+  typename evaluator<Lhs>::nestedType m_lhsImpl;
+  typename evaluator<Rhs>::nestedType m_rhsImpl;
 };
 
-} // end namespace internal
+}
 
 /***************************************************************************
 * Implementation of SparseMatrixBase and SparseCwise functions/operators
diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
index 5a50c7803..6036fd0a7 100644
--- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -12,131 +12,154 @@
 
 namespace Eigen { 
 
-template<typename UnaryOp, typename MatrixType>
-class CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>
-  : public SparseMatrixBase<CwiseUnaryOp<UnaryOp, MatrixType> >
+namespace internal {
+  
+template<typename UnaryOp, typename ArgType>
+struct unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>
+  : public evaluator_base<CwiseUnaryOp<UnaryOp,ArgType> >
 {
   public:
+    typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
 
     class InnerIterator;
-    class ReverseInnerIterator;
-
-    typedef CwiseUnaryOp<UnaryOp, MatrixType> Derived;
-    EIGEN_SPARSE_PUBLIC_INTERFACE(Derived)
+//     class ReverseInnerIterator;
+    
+    enum {
+      CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+      Flags = XprType::Flags
+    };
+    
+    unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {}
 
   protected:
-    typedef typename internal::traits<Derived>::_XprTypeNested _MatrixTypeNested;
-    typedef typename _MatrixTypeNested::InnerIterator MatrixTypeIterator;
-    typedef typename _MatrixTypeNested::ReverseInnerIterator MatrixTypeReverseIterator;
+    typedef typename evaluator<ArgType>::InnerIterator        EvalIterator;
+//     typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
+    
+    const UnaryOp m_functor;
+    typename evaluator<ArgType>::nestedType m_argImpl;
 };
 
-template<typename UnaryOp, typename MatrixType>
-class CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::InnerIterator
-    : public CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::MatrixTypeIterator
+template<typename UnaryOp, typename ArgType>
+class unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::InnerIterator
+    : public unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator
 {
-    typedef typename CwiseUnaryOpImpl::Scalar Scalar;
-    typedef typename CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::MatrixTypeIterator Base;
+    typedef typename XprType::Scalar Scalar;
+    typedef typename unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator Base;
   public:
 
-    EIGEN_STRONG_INLINE InnerIterator(const CwiseUnaryOpImpl& unaryOp, typename CwiseUnaryOpImpl::Index outer)
-      : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
+    EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& unaryOp, typename XprType::Index outer)
+      : Base(unaryOp.m_argImpl,outer), m_functor(unaryOp.m_functor)
     {}
 
     EIGEN_STRONG_INLINE InnerIterator& operator++()
     { Base::operator++(); return *this; }
 
-    EIGEN_STRONG_INLINE typename CwiseUnaryOpImpl::Scalar value() const { return m_functor(Base::value()); }
+    EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }
 
   protected:
     const UnaryOp m_functor;
   private:
-    typename CwiseUnaryOpImpl::Scalar& valueRef();
+    Scalar& valueRef();
 };
 
-template<typename UnaryOp, typename MatrixType>
-class CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::ReverseInnerIterator
-    : public CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::MatrixTypeReverseIterator
-{
-    typedef typename CwiseUnaryOpImpl::Scalar Scalar;
-    typedef typename CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::MatrixTypeReverseIterator Base;
-  public:
-
-    EIGEN_STRONG_INLINE ReverseInnerIterator(const CwiseUnaryOpImpl& unaryOp, typename CwiseUnaryOpImpl::Index outer)
-      : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
-    {}
-
-    EIGEN_STRONG_INLINE ReverseInnerIterator& operator--()
-    { Base::operator--(); return *this; }
-
-    EIGEN_STRONG_INLINE typename CwiseUnaryOpImpl::Scalar value() const { return m_functor(Base::value()); }
-
-  protected:
-    const UnaryOp m_functor;
-  private:
-    typename CwiseUnaryOpImpl::Scalar& valueRef();
-};
-
-template<typename ViewOp, typename MatrixType>
-class CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>
-  : public SparseMatrixBase<CwiseUnaryView<ViewOp, MatrixType> >
+// template<typename UnaryOp, typename ArgType>
+// class unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::ReverseInnerIterator
+//     : public unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalReverseIterator
+// {
+//     typedef typename XprType::Scalar Scalar;
+//     typedef typename unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalReverseIterator Base;
+//   public:
+// 
+//     EIGEN_STRONG_INLINE ReverseInnerIterator(const XprType& unaryOp, typename XprType::Index outer)
+//       : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
+//     {}
+// 
+//     EIGEN_STRONG_INLINE ReverseInnerIterator& operator--()
+//     { Base::operator--(); return *this; }
+// 
+//     EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }
+// 
+//   protected:
+//     const UnaryOp m_functor;
+//   private:
+//     Scalar& valueRef();
+// };
+
+
+
+
+
+template<typename ViewOp, typename ArgType>
+struct unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>
+  : public evaluator_base<CwiseUnaryView<ViewOp,ArgType> >
 {
   public:
+    typedef CwiseUnaryView<ViewOp, ArgType> XprType;
 
     class InnerIterator;
     class ReverseInnerIterator;
-
-    typedef CwiseUnaryView<ViewOp, MatrixType> Derived;
-    EIGEN_SPARSE_PUBLIC_INTERFACE(Derived)
+    
+    enum {
+      CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<ViewOp>::Cost,
+      Flags = XprType::Flags
+    };
+    
+    unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {}
 
   protected:
-    typedef typename internal::traits<Derived>::_MatrixTypeNested _MatrixTypeNested;
-    typedef typename _MatrixTypeNested::InnerIterator MatrixTypeIterator;
-    typedef typename _MatrixTypeNested::ReverseInnerIterator MatrixTypeReverseIterator;
+    typedef typename evaluator<ArgType>::InnerIterator        EvalIterator;
+//     typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
+    
+    const ViewOp m_functor;
+    typename evaluator<ArgType>::nestedType m_argImpl;
 };
 
-template<typename ViewOp, typename MatrixType>
-class CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::InnerIterator
-    : public CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::MatrixTypeIterator
+template<typename ViewOp, typename ArgType>
+class unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::InnerIterator
+    : public unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator
 {
-    typedef typename CwiseUnaryViewImpl::Scalar Scalar;
-    typedef typename CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::MatrixTypeIterator Base;
+    typedef typename XprType::Scalar Scalar;
+    typedef typename unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator Base;
   public:
 
-    EIGEN_STRONG_INLINE InnerIterator(const CwiseUnaryViewImpl& unaryOp, typename CwiseUnaryViewImpl::Index outer)
-      : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
+    EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& unaryOp, typename XprType::Index outer)
+      : Base(unaryOp.m_argImpl,outer), m_functor(unaryOp.m_functor)
     {}
 
     EIGEN_STRONG_INLINE InnerIterator& operator++()
     { Base::operator++(); return *this; }
 
-    EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar value() const { return m_functor(Base::value()); }
-    EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar& valueRef() { return m_functor(Base::valueRef()); }
+    EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }
+    EIGEN_STRONG_INLINE Scalar& valueRef() { return m_functor(Base::valueRef()); }
 
   protected:
     const ViewOp m_functor;
 };
 
-template<typename ViewOp, typename MatrixType>
-class CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::ReverseInnerIterator
-    : public CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::MatrixTypeReverseIterator
-{
-    typedef typename CwiseUnaryViewImpl::Scalar Scalar;
-    typedef typename CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::MatrixTypeReverseIterator Base;
-  public:
-
-    EIGEN_STRONG_INLINE ReverseInnerIterator(const CwiseUnaryViewImpl& unaryOp, typename CwiseUnaryViewImpl::Index outer)
-      : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
-    {}
-
-    EIGEN_STRONG_INLINE ReverseInnerIterator& operator--()
-    { Base::operator--(); return *this; }
-
-    EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar value() const { return m_functor(Base::value()); }
-    EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar& valueRef() { return m_functor(Base::valueRef()); }
-
-  protected:
-    const ViewOp m_functor;
-};
+// template<typename ViewOp, typename ArgType>
+// class unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::ReverseInnerIterator
+//     : public unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalReverseIterator
+// {
+//     typedef typename XprType::Scalar Scalar;
+//     typedef typename unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalReverseIterator Base;
+//   public:
+// 
+//     EIGEN_STRONG_INLINE ReverseInnerIterator(const XprType& unaryOp, typename XprType::Index outer)
+//       : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor())
+//     {}
+// 
+//     EIGEN_STRONG_INLINE ReverseInnerIterator& operator--()
+//     { Base::operator--(); return *this; }
+// 
+//     EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); }
+//     EIGEN_STRONG_INLINE Scalar& valueRef() { return m_functor(Base::valueRef()); }
+// 
+//   protected:
+//     const ViewOp m_functor;
+// };
+
+
+} // end namespace internal
 
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h
index d40e966c1..04c838a71 100644
--- a/Eigen/src/SparseCore/SparseDenseProduct.h
+++ b/Eigen/src/SparseCore/SparseDenseProduct.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -12,152 +12,10 @@
 
 namespace Eigen { 
 
-template<typename Lhs, typename Rhs, int InnerSize> struct SparseDenseProductReturnType
-{
-  typedef SparseTimeDenseProduct<Lhs,Rhs> Type;
-};
-
-template<typename Lhs, typename Rhs> struct SparseDenseProductReturnType<Lhs,Rhs,1>
-{
-  typedef typename internal::conditional<
-    Lhs::IsRowMajor,
-    SparseDenseOuterProduct<Rhs,Lhs,true>,
-    SparseDenseOuterProduct<Lhs,Rhs,false> >::type Type;
-};
-
-template<typename Lhs, typename Rhs, int InnerSize> struct DenseSparseProductReturnType
-{
-  typedef DenseTimeSparseProduct<Lhs,Rhs> Type;
-};
-
-template<typename Lhs, typename Rhs> struct DenseSparseProductReturnType<Lhs,Rhs,1>
-{
-  typedef typename internal::conditional<
-    Rhs::IsRowMajor,
-    SparseDenseOuterProduct<Rhs,Lhs,true>,
-    SparseDenseOuterProduct<Lhs,Rhs,false> >::type Type;
-};
-
 namespace internal {
 
-template<typename Lhs, typename Rhs, bool Tr>
-struct traits<SparseDenseOuterProduct<Lhs,Rhs,Tr> >
-{
-  typedef Sparse StorageKind;
-  typedef typename scalar_product_traits<typename traits<Lhs>::Scalar,
-                                         typename traits<Rhs>::Scalar>::ReturnType Scalar;
-  typedef typename Lhs::Index Index;
-  typedef typename Lhs::Nested LhsNested;
-  typedef typename Rhs::Nested RhsNested;
-  typedef typename remove_all<LhsNested>::type _LhsNested;
-  typedef typename remove_all<RhsNested>::type _RhsNested;
-
-  enum {
-    LhsCoeffReadCost = traits<_LhsNested>::CoeffReadCost,
-    RhsCoeffReadCost = traits<_RhsNested>::CoeffReadCost,
-
-    RowsAtCompileTime    = Tr ? int(traits<Rhs>::RowsAtCompileTime)     : int(traits<Lhs>::RowsAtCompileTime),
-    ColsAtCompileTime    = Tr ? int(traits<Lhs>::ColsAtCompileTime)     : int(traits<Rhs>::ColsAtCompileTime),
-    MaxRowsAtCompileTime = Tr ? int(traits<Rhs>::MaxRowsAtCompileTime)  : int(traits<Lhs>::MaxRowsAtCompileTime),
-    MaxColsAtCompileTime = Tr ? int(traits<Lhs>::MaxColsAtCompileTime)  : int(traits<Rhs>::MaxColsAtCompileTime),
-
-    Flags = Tr ? RowMajorBit : 0,
-
-    CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + NumTraits<Scalar>::MulCost
-  };
-};
-
-} // end namespace internal
-
-template<typename Lhs, typename Rhs, bool Tr>
-class SparseDenseOuterProduct
- : public SparseMatrixBase<SparseDenseOuterProduct<Lhs,Rhs,Tr> >
-{
-  public:
-
-    typedef SparseMatrixBase<SparseDenseOuterProduct> Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(SparseDenseOuterProduct)
-    typedef internal::traits<SparseDenseOuterProduct> Traits;
-
-  private:
-
-    typedef typename Traits::LhsNested LhsNested;
-    typedef typename Traits::RhsNested RhsNested;
-    typedef typename Traits::_LhsNested _LhsNested;
-    typedef typename Traits::_RhsNested _RhsNested;
-
-  public:
-
-    class InnerIterator;
-
-    EIGEN_STRONG_INLINE SparseDenseOuterProduct(const Lhs& lhs, const Rhs& rhs)
-      : m_lhs(lhs), m_rhs(rhs)
-    {
-      EIGEN_STATIC_ASSERT(!Tr,YOU_MADE_A_PROGRAMMING_MISTAKE);
-    }
-
-    EIGEN_STRONG_INLINE SparseDenseOuterProduct(const Rhs& rhs, const Lhs& lhs)
-      : m_lhs(lhs), m_rhs(rhs)
-    {
-      EIGEN_STATIC_ASSERT(Tr,YOU_MADE_A_PROGRAMMING_MISTAKE);
-    }
-
-    EIGEN_STRONG_INLINE Index rows() const { return Tr ? Index(m_rhs.rows()) : m_lhs.rows(); }
-    EIGEN_STRONG_INLINE Index cols() const { return Tr ? m_lhs.cols() : Index(m_rhs.cols()); }
-
-    EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; }
-    EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; }
-
-  protected:
-    LhsNested m_lhs;
-    RhsNested m_rhs;
-};
-
-template<typename Lhs, typename Rhs, bool Transpose>
-class SparseDenseOuterProduct<Lhs,Rhs,Transpose>::InnerIterator : public _LhsNested::InnerIterator
-{
-    typedef typename _LhsNested::InnerIterator Base;
-    typedef typename SparseDenseOuterProduct::Index Index;
-  public:
-    EIGEN_STRONG_INLINE InnerIterator(const SparseDenseOuterProduct& prod, Index outer)
-      : Base(prod.lhs(), 0), m_outer(outer), m_empty(false), m_factor(get(prod.rhs(), outer, typename internal::traits<Rhs>::StorageKind() ))
-    {}
-
-    inline Index outer() const { return m_outer; }
-    inline Index row() const { return Transpose ? m_outer : Base::index(); }
-    inline Index col() const { return Transpose ? Base::index() : m_outer; }
-
-    inline Scalar value() const { return Base::value() * m_factor; }
-    inline operator bool() const { return Base::operator bool() && !m_empty; }
-
-  protected:
-    Scalar get(const _RhsNested &rhs, Index outer, Dense = Dense()) const
-    {
-      return rhs.coeff(outer);
-    }
-    
-    Scalar get(const _RhsNested &rhs, Index outer, Sparse = Sparse())
-    {
-      typename Traits::_RhsNested::InnerIterator it(rhs, outer);
-      if (it && it.index()==0 && it.value()!=Scalar(0))
-        return it.value();
-      m_empty = true;
-      return Scalar(0);
-    }
-    
-    Index m_outer;
-    bool m_empty;
-    Scalar m_factor;
-};
-
-namespace internal {
-template<typename Lhs, typename Rhs>
-struct traits<SparseTimeDenseProduct<Lhs,Rhs> >
- : traits<ProductBase<SparseTimeDenseProduct<Lhs,Rhs>, Lhs, Rhs> >
-{
-  typedef Dense StorageKind;
-  typedef MatrixXpr XprKind;
-};
+template <> struct product_promote_storage_type<Sparse,Dense, OuterProduct> { typedef Sparse ret; };
+template <> struct product_promote_storage_type<Dense,Sparse, OuterProduct> { typedef Sparse ret; };
 
 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType,
          typename AlphaType,
@@ -172,16 +30,17 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
   typedef typename internal::remove_all<DenseRhsType>::type Rhs;
   typedef typename internal::remove_all<DenseResType>::type Res;
   typedef typename Lhs::Index Index;
-  typedef typename Lhs::InnerIterator LhsInnerIterator;
+  typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
   {
+    typename evaluator<Lhs>::type lhsEval(lhs);
     for(Index c=0; c<rhs.cols(); ++c)
     {
       Index n = lhs.outerSize();
       for(Index j=0; j<n; ++j)
       {
         typename Res::Scalar tmp(0);
-        for(LhsInnerIterator it(lhs,j); it ;++it)
+        for(LhsInnerIterator it(lhsEval,j); it ;++it)
           tmp += it.value() * rhs.coeff(it.index(),c);
         res.coeffRef(j,c) = alpha * tmp;
       }
@@ -203,17 +62,18 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, A
   typedef typename internal::remove_all<SparseLhsType>::type Lhs;
   typedef typename internal::remove_all<DenseRhsType>::type Rhs;
   typedef typename internal::remove_all<DenseResType>::type Res;
-  typedef typename Lhs::InnerIterator LhsInnerIterator;
   typedef typename Lhs::Index Index;
+  typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
   {
+    typename evaluator<Lhs>::type lhsEval(lhs);
     for(Index c=0; c<rhs.cols(); ++c)
     {
       for(Index j=0; j<lhs.outerSize(); ++j)
       {
 //        typename Res::Scalar rhs_j = alpha * rhs.coeff(j,c);
         typename internal::scalar_product_traits<AlphaType, typename Rhs::Scalar>::ReturnType rhs_j(alpha * rhs.coeff(j,c));
-        for(LhsInnerIterator it(lhs,j); it ;++it)
+        for(LhsInnerIterator it(lhsEval,j); it ;++it)
           res.coeffRef(it.index(),c) += it.value() * rhs_j;
       }
     }
@@ -226,14 +86,15 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
   typedef typename internal::remove_all<SparseLhsType>::type Lhs;
   typedef typename internal::remove_all<DenseRhsType>::type Rhs;
   typedef typename internal::remove_all<DenseResType>::type Res;
-  typedef typename Lhs::InnerIterator LhsInnerIterator;
   typedef typename Lhs::Index Index;
+  typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
   {
+    typename evaluator<Lhs>::type lhsEval(lhs);
     for(Index j=0; j<lhs.outerSize(); ++j)
     {
       typename Res::RowXpr res_j(res.row(j));
-      for(LhsInnerIterator it(lhs,j); it ;++it)
+      for(LhsInnerIterator it(lhsEval,j); it ;++it)
         res_j += (alpha*it.value()) * rhs.row(it.index());
     }
   }
@@ -245,14 +106,15 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
   typedef typename internal::remove_all<SparseLhsType>::type Lhs;
   typedef typename internal::remove_all<DenseRhsType>::type Rhs;
   typedef typename internal::remove_all<DenseResType>::type Res;
-  typedef typename Lhs::InnerIterator LhsInnerIterator;
   typedef typename Lhs::Index Index;
+  typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
   {
+    typename evaluator<Lhs>::type lhsEval(lhs);
     for(Index j=0; j<lhs.outerSize(); ++j)
     {
       typename Rhs::ConstRowXpr rhs_j(rhs.row(j));
-      for(LhsInnerIterator it(lhs,j); it ;++it)
+      for(LhsInnerIterator it(lhsEval,j); it ;++it)
         res.row(it.index()) += (alpha*it.value()) * rhs_j;
     }
   }
@@ -266,58 +128,154 @@ inline void sparse_time_dense_product(const SparseLhsType& lhs, const DenseRhsTy
 
 } // end namespace internal
 
-template<typename Lhs, typename Rhs>
-class SparseTimeDenseProduct
-  : public ProductBase<SparseTimeDenseProduct<Lhs,Rhs>, Lhs, Rhs>
+namespace internal {
+
+template<typename Lhs, typename Rhs, int ProductType>
+struct generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>
 {
-  public:
-    EIGEN_PRODUCT_PUBLIC_INTERFACE(SparseTimeDenseProduct)
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
+    typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
+    LhsNested lhsNested(lhs);
+    RhsNested rhsNested(rhs);
+    
+    dst.setZero();
+    internal::sparse_time_dense_product(lhsNested, rhsNested, dst, typename Dest::Scalar(1));
+  }
+};
 
-    SparseTimeDenseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
+template<typename Lhs, typename Rhs, int ProductType>
+struct generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
+    typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
+    LhsNested lhsNested(lhs);
+    RhsNested rhsNested(rhs);
+    
+    dst.setZero();
+    // transpoe everything
+    Transpose<Dest> dstT(dst);
+    internal::sparse_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, typename Dest::Scalar(1));
+  }
+};
+
+template<typename LhsT, typename RhsT, bool NeedToTranspose>
+struct sparse_dense_outer_product_evaluator
+{
+protected:
+  typedef typename conditional<NeedToTranspose,RhsT,LhsT>::type Lhs1;
+  typedef typename conditional<NeedToTranspose,LhsT,RhsT>::type ActualRhs;
+  typedef Product<LhsT,RhsT,DefaultProduct> ProdXprType;
+  
+  // if the actual left-hand side is a dense vector,
+  // then build a sparse-view so that we can seamlessly iterator over it.
+  typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
+            Lhs1, SparseView<Lhs1> >::type ActualLhs;
+  typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
+            Lhs1 const&, SparseView<Lhs1> >::type LhsArg;
+            
+  typedef typename evaluator<ActualLhs>::type LhsEval;
+  typedef typename evaluator<ActualRhs>::type RhsEval;
+  typedef typename evaluator<ActualLhs>::InnerIterator LhsIterator;
+  typedef typename ProdXprType::Scalar Scalar;
+  typedef typename ProdXprType::Index Index;
+  
+public:
+  enum {
+    Flags = NeedToTranspose ? RowMajorBit : 0,
+    CoeffReadCost = Dynamic
+  };
+  
+  class InnerIterator : public LhsIterator
+  {
+  public:
+    InnerIterator(const sparse_dense_outer_product_evaluator &xprEval, Index outer)
+      : LhsIterator(xprEval.m_lhsXprImpl, 0),
+        m_outer(outer),
+        m_empty(false),
+        m_factor(get(xprEval.m_rhsXprImpl, outer, typename internal::traits<ActualRhs>::StorageKind() ))
     {}
+    
+    EIGEN_STRONG_INLINE Index outer() const { return m_outer; }
+    EIGEN_STRONG_INLINE Index row()   const { return NeedToTranspose ? m_outer : LhsIterator::index(); }
+    EIGEN_STRONG_INLINE Index col()   const { return NeedToTranspose ? LhsIterator::index() : m_outer; }
 
-    template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+    EIGEN_STRONG_INLINE Scalar value() const { return LhsIterator::value() * m_factor; }
+    EIGEN_STRONG_INLINE operator bool() const { return LhsIterator::operator bool() && (!m_empty); }
+    
+  protected:
+    Scalar get(const RhsEval &rhs, Index outer, Dense = Dense()) const
     {
-      internal::sparse_time_dense_product(m_lhs, m_rhs, dest, alpha);
+      return rhs.coeff(outer);
     }
-
-  private:
-    SparseTimeDenseProduct& operator=(const SparseTimeDenseProduct&);
+    
+    Scalar get(const RhsEval &rhs, Index outer, Sparse = Sparse())
+    {
+      typename RhsEval::InnerIterator it(rhs, outer);
+      if (it && it.index()==0 && it.value()!=Scalar(0))
+        return it.value();
+      m_empty = true;
+      return Scalar(0);
+    }
+    
+    Index m_outer;
+    bool m_empty;
+    Scalar m_factor;
+  };
+  
+  sparse_dense_outer_product_evaluator(const ActualLhs &lhs, const ActualRhs &rhs)
+     : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)
+  {}
+  
+  // transpose case
+  sparse_dense_outer_product_evaluator(const ActualRhs &rhs, const Lhs1 &lhs)
+     : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)
+  {}
+    
+protected:
+  const LhsArg m_lhs;
+  typename evaluator<ActualLhs>::nestedType m_lhsXprImpl;
+  typename evaluator<ActualRhs>::nestedType m_rhsXprImpl;
 };
 
-
-// dense = dense * sparse
-namespace internal {
+// sparse * dense outer product
 template<typename Lhs, typename Rhs>
-struct traits<DenseTimeSparseProduct<Lhs,Rhs> >
- : traits<ProductBase<DenseTimeSparseProduct<Lhs,Rhs>, Lhs, Rhs> >
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, SparseShape, DenseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> 
+  : sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor>
 {
-  typedef Dense StorageKind;
+  typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor> Base;
+  
+  typedef Product<Lhs, Rhs> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+
+  product_evaluator(const XprType& xpr)
+    : Base(xpr.lhs(), xpr.rhs())
+  {}
+  
 };
-} // end namespace internal
 
 template<typename Lhs, typename Rhs>
-class DenseTimeSparseProduct
-  : public ProductBase<DenseTimeSparseProduct<Lhs,Rhs>, Lhs, Rhs>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, DenseShape, SparseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> 
+  : sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor>
 {
-  public:
-    EIGEN_PRODUCT_PUBLIC_INTERFACE(DenseTimeSparseProduct)
-
-    DenseTimeSparseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
-    {}
-
-    template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
-    {
-      Transpose<const _LhsNested> lhs_t(m_lhs);
-      Transpose<const _RhsNested> rhs_t(m_rhs);
-      Transpose<Dest> dest_t(dest);
-      internal::sparse_time_dense_product(rhs_t, lhs_t, dest_t, alpha);
-    }
-
-  private:
-    DenseTimeSparseProduct& operator=(const DenseTimeSparseProduct&);
+  typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor> Base;
+  
+  typedef Product<Lhs, Rhs> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+
+  product_evaluator(const XprType& xpr)
+    : Base(xpr.lhs(), xpr.rhs())
+  {}
+  
 };
 
+} // end namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_SPARSEDENSEPRODUCT_H
diff --git a/Eigen/src/SparseCore/SparseDiagonalProduct.h b/Eigen/src/SparseCore/SparseDiagonalProduct.h
index c056b4914..0cb2bd572 100644
--- a/Eigen/src/SparseCore/SparseDiagonalProduct.h
+++ b/Eigen/src/SparseCore/SparseDiagonalProduct.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -26,173 +26,122 @@ namespace Eigen {
 
 namespace internal {
 
-template<typename Lhs, typename Rhs>
-struct traits<SparseDiagonalProduct<Lhs, Rhs> >
-{
-  typedef typename remove_all<Lhs>::type _Lhs;
-  typedef typename remove_all<Rhs>::type _Rhs;
-  typedef typename _Lhs::Scalar Scalar;
-  // propagate the index type of the sparse matrix
-  typedef typename conditional< is_diagonal<_Lhs>::ret,
-                                typename traits<Rhs>::Index,
-                                typename traits<Lhs>::Index>::type Index;
-  typedef Sparse StorageKind;
-  typedef MatrixXpr XprKind;
-  enum {
-    RowsAtCompileTime = _Lhs::RowsAtCompileTime,
-    ColsAtCompileTime = _Rhs::ColsAtCompileTime,
-
-    MaxRowsAtCompileTime = _Lhs::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = _Rhs::MaxColsAtCompileTime,
-
-    SparseFlags = is_diagonal<_Lhs>::ret ? int(_Rhs::Flags) : int(_Lhs::Flags),
-    Flags = (SparseFlags&RowMajorBit),
-    CoeffReadCost = Dynamic
-  };
+enum {
+  SDP_AsScalarProduct,
+  SDP_AsCwiseProduct
 };
+  
+template<typename SparseXprType, typename DiagonalCoeffType, int SDP_Tag>
+struct sparse_diagonal_product_evaluator;
 
-enum {SDP_IsDiagonal, SDP_IsSparseRowMajor, SDP_IsSparseColMajor};
-template<typename Lhs, typename Rhs, typename SparseDiagonalProductType, int RhsMode, int LhsMode>
-class sparse_diagonal_product_inner_iterator_selector;
-
-} // end namespace internal
-
-template<typename Lhs, typename Rhs>
-class SparseDiagonalProduct
-  : public SparseMatrixBase<SparseDiagonalProduct<Lhs,Rhs> >,
-    internal::no_assignment_operator
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, DiagonalShape, SparseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> 
+  : public sparse_diagonal_product_evaluator<Rhs, typename Lhs::DiagonalVectorType, Rhs::Flags&RowMajorBit?SDP_AsScalarProduct:SDP_AsCwiseProduct>
 {
-    typedef typename Lhs::Nested LhsNested;
-    typedef typename Rhs::Nested RhsNested;
-
-    typedef typename internal::remove_all<LhsNested>::type _LhsNested;
-    typedef typename internal::remove_all<RhsNested>::type _RhsNested;
-
-    enum {
-      LhsMode = internal::is_diagonal<_LhsNested>::ret ? internal::SDP_IsDiagonal
-              : (_LhsNested::Flags&RowMajorBit) ? internal::SDP_IsSparseRowMajor : internal::SDP_IsSparseColMajor,
-      RhsMode = internal::is_diagonal<_RhsNested>::ret ? internal::SDP_IsDiagonal
-              : (_RhsNested::Flags&RowMajorBit) ? internal::SDP_IsSparseRowMajor : internal::SDP_IsSparseColMajor
-    };
-
-  public:
-
-    EIGEN_SPARSE_PUBLIC_INTERFACE(SparseDiagonalProduct)
-
-    typedef internal::sparse_diagonal_product_inner_iterator_selector
-                      <_LhsNested,_RhsNested,SparseDiagonalProduct,LhsMode,RhsMode> InnerIterator;
-    
-    // We do not want ReverseInnerIterator for diagonal-sparse products,
-    // but this dummy declaration is needed to make diag * sparse * diag compile.
-    class ReverseInnerIterator;
-
-    EIGEN_STRONG_INLINE SparseDiagonalProduct(const Lhs& lhs, const Rhs& rhs)
-      : m_lhs(lhs), m_rhs(rhs)
-    {
-      eigen_assert(lhs.cols() == rhs.rows() && "invalid sparse matrix * diagonal matrix product");
-    }
-
-    EIGEN_STRONG_INLINE Index rows() const { return Index(m_lhs.rows()); }
-    EIGEN_STRONG_INLINE Index cols() const { return Index(m_rhs.cols()); }
-
-    EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; }
-    EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; }
-
-  protected:
-    LhsNested m_lhs;
-    RhsNested m_rhs;
+  typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+  typedef evaluator<XprType> type;
+  typedef evaluator<XprType> nestedType;
+  enum { CoeffReadCost = Dynamic, Flags = Rhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags
+  
+  typedef sparse_diagonal_product_evaluator<Rhs, typename Lhs::DiagonalVectorType, Rhs::Flags&RowMajorBit?SDP_AsScalarProduct:SDP_AsCwiseProduct> Base;
+  product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {}
 };
 
-namespace internal {
-
-template<typename Lhs, typename Rhs, typename SparseDiagonalProductType>
-class sparse_diagonal_product_inner_iterator_selector
-<Lhs,Rhs,SparseDiagonalProductType,SDP_IsDiagonal,SDP_IsSparseRowMajor>
-  : public CwiseUnaryOp<scalar_multiple_op<typename Lhs::Scalar>,const Rhs>::InnerIterator
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, SparseShape, DiagonalShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> 
+  : public sparse_diagonal_product_evaluator<Lhs, Transpose<const typename Rhs::DiagonalVectorType>, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct>
 {
-    typedef typename CwiseUnaryOp<scalar_multiple_op<typename Lhs::Scalar>,const Rhs>::InnerIterator Base;
-    typedef typename Rhs::Index Index;
-  public:
-    inline sparse_diagonal_product_inner_iterator_selector(
-              const SparseDiagonalProductType& expr, Index outer)
-      : Base(expr.rhs()*(expr.lhs().diagonal().coeff(outer)), outer)
-    {}
+  typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+  typedef evaluator<XprType> type;
+  typedef evaluator<XprType> nestedType;
+  enum { CoeffReadCost = Dynamic, Flags = Lhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags
+  
+  typedef sparse_diagonal_product_evaluator<Lhs, Transpose<const typename Rhs::DiagonalVectorType>, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> Base;
+  product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) {}
 };
 
-template<typename Lhs, typename Rhs, typename SparseDiagonalProductType>
-class sparse_diagonal_product_inner_iterator_selector
-<Lhs,Rhs,SparseDiagonalProductType,SDP_IsDiagonal,SDP_IsSparseColMajor>
-  : public CwiseBinaryOp<
-      scalar_product_op<typename Lhs::Scalar>,
-      const typename Rhs::ConstInnerVectorReturnType,
-      const typename Lhs::DiagonalVectorType>::InnerIterator
+template<typename SparseXprType, typename DiagonalCoeffType>
+struct sparse_diagonal_product_evaluator<SparseXprType, DiagonalCoeffType, SDP_AsScalarProduct> 
 {
-    typedef typename CwiseBinaryOp<
-      scalar_product_op<typename Lhs::Scalar>,
-      const typename Rhs::ConstInnerVectorReturnType,
-      const typename Lhs::DiagonalVectorType>::InnerIterator Base;
-    typedef typename Rhs::Index Index;
-    Index m_outer;
+protected:
+  typedef typename evaluator<SparseXprType>::InnerIterator SparseXprInnerIterator;
+  typedef typename SparseXprType::Scalar Scalar;
+  typedef typename SparseXprType::Index Index;
+  
+public:
+  class InnerIterator : public SparseXprInnerIterator
+  {
   public:
-    inline sparse_diagonal_product_inner_iterator_selector(
-              const SparseDiagonalProductType& expr, Index outer)
-      : Base(expr.rhs().innerVector(outer) .cwiseProduct(expr.lhs().diagonal()), 0), m_outer(outer)
+    InnerIterator(const sparse_diagonal_product_evaluator &xprEval, Index outer)
+      : SparseXprInnerIterator(xprEval.m_sparseXprImpl, outer),
+        m_coeff(xprEval.m_diagCoeffImpl.coeff(outer))
     {}
     
-    inline Index outer() const { return m_outer; }
-    inline Index col() const { return m_outer; }
+    EIGEN_STRONG_INLINE Scalar value() const { return m_coeff * SparseXprInnerIterator::value(); }
+  protected:
+    typename DiagonalCoeffType::Scalar m_coeff;
+  };
+  
+  sparse_diagonal_product_evaluator(const SparseXprType &sparseXpr, const DiagonalCoeffType &diagCoeff)
+    : m_sparseXprImpl(sparseXpr), m_diagCoeffImpl(diagCoeff)
+  {}
+    
+protected:
+  typename evaluator<SparseXprType>::nestedType m_sparseXprImpl;
+  typename evaluator<DiagonalCoeffType>::nestedType m_diagCoeffImpl;
 };
 
-template<typename Lhs, typename Rhs, typename SparseDiagonalProductType>
-class sparse_diagonal_product_inner_iterator_selector
-<Lhs,Rhs,SparseDiagonalProductType,SDP_IsSparseColMajor,SDP_IsDiagonal>
-  : public CwiseUnaryOp<scalar_multiple_op<typename Rhs::Scalar>,const Lhs>::InnerIterator
-{
-    typedef typename CwiseUnaryOp<scalar_multiple_op<typename Rhs::Scalar>,const Lhs>::InnerIterator Base;
-    typedef typename Lhs::Index Index;
-  public:
-    inline sparse_diagonal_product_inner_iterator_selector(
-              const SparseDiagonalProductType& expr, Index outer)
-      : Base(expr.lhs()*expr.rhs().diagonal().coeff(outer), outer)
-    {}
-};
 
-template<typename Lhs, typename Rhs, typename SparseDiagonalProductType>
-class sparse_diagonal_product_inner_iterator_selector
-<Lhs,Rhs,SparseDiagonalProductType,SDP_IsSparseRowMajor,SDP_IsDiagonal>
-  : public CwiseBinaryOp<
-      scalar_product_op<typename Rhs::Scalar>,
-      const typename Lhs::ConstInnerVectorReturnType,
-      const Transpose<const typename Rhs::DiagonalVectorType> >::InnerIterator
+template<typename SparseXprType, typename DiagCoeffType>
+struct sparse_diagonal_product_evaluator<SparseXprType, DiagCoeffType, SDP_AsCwiseProduct> 
 {
-    typedef typename CwiseBinaryOp<
-      scalar_product_op<typename Rhs::Scalar>,
-      const typename Lhs::ConstInnerVectorReturnType,
-      const Transpose<const typename Rhs::DiagonalVectorType> >::InnerIterator Base;
-    typedef typename Lhs::Index Index;
-    Index m_outer;
+  typedef typename SparseXprType::Scalar Scalar;
+  typedef typename SparseXprType::Index Index;
+  
+  typedef CwiseBinaryOp<scalar_product_op<Scalar>,
+                        const typename SparseXprType::ConstInnerVectorReturnType,
+                        const DiagCoeffType> CwiseProductType;
+                        
+  typedef typename evaluator<CwiseProductType>::type CwiseProductEval;
+  typedef typename evaluator<CwiseProductType>::InnerIterator CwiseProductIterator;
+  
+  class InnerIterator
+  {
   public:
-    inline sparse_diagonal_product_inner_iterator_selector(
-              const SparseDiagonalProductType& expr, Index outer)
-      : Base(expr.lhs().innerVector(outer) .cwiseProduct(expr.rhs().diagonal().transpose()), 0), m_outer(outer)
+    InnerIterator(const sparse_diagonal_product_evaluator &xprEval, Index outer)
+      : m_cwiseEval(xprEval.m_sparseXprNested.innerVector(outer).cwiseProduct(xprEval.m_diagCoeffNested)),
+        m_cwiseIter(m_cwiseEval, 0),
+        m_outer(outer)
     {}
     
-    inline Index outer() const { return m_outer; }
-    inline Index row() const { return m_outer; }
+    inline Scalar value() const  { return m_cwiseIter.value(); }
+    inline Index index() const  { return m_cwiseIter.index(); }
+    inline Index outer() const  { return m_outer; }
+    inline Index col() const    { return SparseXprType::IsRowMajor ? m_cwiseIter.index() : m_outer; }
+    inline Index row() const    { return SparseXprType::IsRowMajor ? m_outer : m_cwiseIter.index(); }
+    
+    EIGEN_STRONG_INLINE InnerIterator& operator++()
+    { ++m_cwiseIter; return *this; }
+    inline operator bool() const  { return m_cwiseIter; }
+    
+  protected:
+    CwiseProductEval m_cwiseEval;
+    CwiseProductIterator m_cwiseIter;
+    Index m_outer;
+  };
+  
+  sparse_diagonal_product_evaluator(const SparseXprType &sparseXpr, const DiagCoeffType &diagCoeff)
+    : m_sparseXprNested(sparseXpr), m_diagCoeffNested(diagCoeff)
+  {}
+    
+protected:
+  typename nested_eval<SparseXprType,1>::type m_sparseXprNested;
+  typename nested_eval<DiagCoeffType,SparseXprType::IsRowMajor ? SparseXprType::RowsAtCompileTime
+                                                               : SparseXprType::ColsAtCompileTime>::type m_diagCoeffNested;
 };
 
 } // end namespace internal
 
-// SparseMatrixBase functions
-
-template<typename Derived>
-template<typename OtherDerived>
-const SparseDiagonalProduct<Derived,OtherDerived>
-SparseMatrixBase<Derived>::operator*(const DiagonalBase<OtherDerived> &other) const
-{
-  return SparseDiagonalProduct<Derived,OtherDerived>(this->derived(), other.derived());
-}
-
 } // end namespace Eigen
 
 #endif // EIGEN_SPARSE_DIAGONAL_PRODUCT_H
diff --git a/Eigen/src/SparseCore/SparseDot.h b/Eigen/src/SparseCore/SparseDot.h
index db39c9aec..b10c8058f 100644
--- a/Eigen/src/SparseCore/SparseDot.h
+++ b/Eigen/src/SparseCore/SparseDot.h
@@ -26,7 +26,8 @@ SparseMatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
   eigen_assert(size() == other.size());
   eigen_assert(other.size()>0 && "you are using a non initialized vector");
 
-  typename Derived::InnerIterator i(derived(),0);
+  typename internal::evaluator<Derived>::type thisEval(derived());
+  typename internal::evaluator<Derived>::InnerIterator i(thisEval, 0);
   Scalar res(0);
   while (i)
   {
@@ -49,16 +50,12 @@ SparseMatrixBase<Derived>::dot(const SparseMatrixBase<OtherDerived>& other) cons
 
   eigen_assert(size() == other.size());
 
-  typedef typename Derived::Nested  Nested;
-  typedef typename OtherDerived::Nested  OtherNested;
-  typedef typename internal::remove_all<Nested>::type  NestedCleaned;
-  typedef typename internal::remove_all<OtherNested>::type  OtherNestedCleaned;
+  typename internal::evaluator<Derived>::type thisEval(derived());
+  typename internal::evaluator<Derived>::InnerIterator i(thisEval, 0);
+  
+  typename internal::evaluator<OtherDerived>::type  otherEval(other.derived());
+  typename internal::evaluator<OtherDerived>::InnerIterator j(otherEval, 0);
 
-  Nested nthis(derived());
-  OtherNested nother(other.derived());
-
-  typename NestedCleaned::InnerIterator i(nthis,0);
-  typename OtherNestedCleaned::InnerIterator j(nother,0);
   Scalar res(0);
   while (i && j)
   {
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 2ed2f3ebd..9e7124ff2 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -52,7 +52,6 @@ struct traits<SparseMatrix<_Scalar, _Options, _Index> >
     MaxRowsAtCompileTime = Dynamic,
     MaxColsAtCompileTime = Dynamic,
     Flags = _Options | NestByRefBit | LvalueBit,
-    CoeffReadCost = NumTraits<Scalar>::ReadCost,
     SupportedAccessPatterns = InnerRandomAccessPattern
   };
 };
@@ -74,8 +73,7 @@ struct traits<Diagonal<const SparseMatrix<_Scalar, _Options, _Index>, DiagIndex>
     ColsAtCompileTime = 1,
     MaxRowsAtCompileTime = Dynamic,
     MaxColsAtCompileTime = 1,
-    Flags = 0,
-    CoeffReadCost = _MatrixTypeNested::CoeffReadCost*10
+    Flags = 0
   };
 };
 
@@ -649,7 +647,9 @@ class SparseMatrix
       EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
       check_template_parameters();
-      *this = other.derived();
+      const bool needToTranspose = (Flags & RowMajorBit) != (internal::evaluator<OtherDerived>::Flags & RowMajorBit);
+      if (needToTranspose)  *this = other.derived();
+      else                  internal::call_assignment_no_alias(*this, other.derived());
     }
     
     /** Constructs a sparse matrix from the sparse selfadjoint view \a other */
@@ -658,7 +658,7 @@ class SparseMatrix
       : m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)
     {
       check_template_parameters();
-      *this = other;
+      Base::operator=(other);
     }
 
     /** Copy constructor (it performs a deep copy) */
@@ -722,22 +722,11 @@ class SparseMatrix
       return *this;
     }
 
-    #ifndef EIGEN_PARSED_BY_DOXYGEN
-    template<typename Lhs, typename Rhs>
-    inline SparseMatrix& operator=(const SparseSparseProduct<Lhs,Rhs>& product)
-    { return Base::operator=(product); }
-    
-    template<typename OtherDerived>
-    inline SparseMatrix& operator=(const ReturnByValue<OtherDerived>& other)
-    {
-      initAssignment(other);
-      return Base::operator=(other.derived());
-    }
-    
+#ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename OtherDerived>
     inline SparseMatrix& operator=(const EigenBase<OtherDerived>& other)
     { return Base::operator=(other.derived()); }
-    #endif
+#endif // EIGEN_PARSED_BY_DOXYGEN
 
     template<typename OtherDerived>
     EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other);
@@ -898,6 +887,11 @@ class SparseMatrix<Scalar,_Options,_Index>::InnerIterator
     const Index m_outer;
     Index m_id;
     Index m_end;
+  private:
+    // If you get here, then you're not using the right InnerIterator type, e.g.:
+    //   SparseMatrix<double,RowMajor> A;
+    //   SparseMatrix<double>::InnerIterator it(A,0);
+    template<typename T> InnerIterator(const SparseMatrixBase<T>&,Index outer);
 };
 
 template<typename Scalar, int _Options, typename _Index>
@@ -1061,17 +1055,19 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt
 {
   EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-  
-  const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
+
+  const bool needToTranspose = (Flags & RowMajorBit) != (internal::evaluator<OtherDerived>::Flags & RowMajorBit);
   if (needToTranspose)
   {
     // two passes algorithm:
     //  1 - compute the number of coeffs per dest inner vector
     //  2 - do the actual copy/eval
     // Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed
-    typedef typename internal::nested<OtherDerived,2>::type OtherCopy;
+    typedef typename internal::nested_eval<OtherDerived,2,typename internal::plain_matrix_type<OtherDerived>::type >::type OtherCopy;
     typedef typename internal::remove_all<OtherCopy>::type _OtherCopy;
+    typedef internal::evaluator<_OtherCopy> OtherCopyEval;
     OtherCopy otherCopy(other.derived());
+    OtherCopyEval otherCopyEval(otherCopy);
 
     SparseMatrix dest(other.rows(),other.cols());
     Eigen::Map<Matrix<Index, Dynamic, 1> > (dest.m_outerIndex,dest.outerSize()).setZero();
@@ -1079,7 +1075,7 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt
     // pass 1
     // FIXME the above copy could be merged with that pass
     for (Index j=0; j<otherCopy.outerSize(); ++j)
-      for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
+      for (typename OtherCopyEval::InnerIterator it(otherCopyEval, j); it; ++it)
         ++dest.m_outerIndex[it.index()];
 
     // prefix sum
@@ -1098,7 +1094,7 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt
     // pass 2
     for (Index j=0; j<otherCopy.outerSize(); ++j)
     {
-      for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
+      for (typename OtherCopyEval::InnerIterator it(otherCopyEval, j); it; ++it)
       {
         Index pos = positions[it.index()]++;
         dest.m_data.index(pos) = j;
@@ -1111,7 +1107,9 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt
   else
   {
     if(other.isRValue())
+    {
       initAssignment(other.derived());
+    }
     // there is no special optimization
     return Base::operator=(other.derived());
   }
@@ -1256,6 +1254,36 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& Sparse
   return (m_data.value(p) = 0);
 }
 
+namespace internal {
+
+template<typename _Scalar, int _Options, typename _Index>
+struct evaluator<SparseMatrix<_Scalar,_Options,_Index> >
+  : evaluator_base<SparseMatrix<_Scalar,_Options,_Index> >
+{
+  typedef _Scalar Scalar;
+  typedef _Index Index;
+  typedef SparseMatrix<_Scalar,_Options,_Index> SparseMatrixType;
+  typedef typename SparseMatrixType::InnerIterator InnerIterator;
+  typedef typename SparseMatrixType::ReverseInnerIterator ReverseInnerIterator;
+  
+  enum {
+    CoeffReadCost = NumTraits<_Scalar>::ReadCost,
+    Flags = SparseMatrixType::Flags
+  };
+  
+  evaluator() : m_matrix(0) {}
+  evaluator(const SparseMatrixType &mat) : m_matrix(&mat) {}
+  
+  operator SparseMatrixType&() { return m_matrix->const_cast_derived(); }
+  operator const SparseMatrixType&() const { return *m_matrix; }
+  
+  Scalar coeff(Index row, Index col) const { return m_matrix->coeff(row,col); }
+
+  const SparseMatrixType *m_matrix;
+};
+
+}
+
 } // end namespace Eigen
 
 #endif // EIGEN_SPARSEMATRIX_H
diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h
index fb5025049..b5c50d93a 100644
--- a/Eigen/src/SparseCore/SparseMatrixBase.h
+++ b/Eigen/src/SparseCore/SparseMatrixBase.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -39,11 +39,7 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
     typedef EigenBase<Derived> Base;
     
     template<typename OtherDerived>
-    Derived& operator=(const EigenBase<OtherDerived> &other)
-    {
-      other.derived().evalTo(derived());
-      return derived();
-    }
+    Derived& operator=(const EigenBase<OtherDerived> &other);
 
     enum {
 
@@ -83,11 +79,6 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
           * constructed from this one. See the \ref flags "list of flags".
           */
 
-      CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
-        /**< This is a rough measure of how expensive it is to read one coefficient from
-          * this expression.
-          */
-
       IsRowMajor = Flags&RowMajorBit ? 1 : 0,
       
       InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
@@ -104,10 +95,9 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
                         Transpose<const Derived>
                      >::type AdjointReturnType;
 
-
+    // FIXME storage order do not match evaluator storage order
     typedef SparseMatrix<Scalar, Flags&RowMajorBit ? RowMajor : ColMajor, Index> PlainObject;
 
-
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** This is the "real scalar" type; if the \a Scalar type is already real numbers
       * (e.g. int, float or double) then \a RealScalar is just the same as \a Scalar. If
@@ -175,93 +165,23 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
 
     
     template<typename OtherDerived>
-    Derived& operator=(const ReturnByValue<OtherDerived>& other)
-    {
-      other.evalTo(derived());
-      return derived();
-    }
-
+    Derived& operator=(const ReturnByValue<OtherDerived>& other);
 
     template<typename OtherDerived>
-    inline Derived& operator=(const SparseMatrixBase<OtherDerived>& other)
-    {
-      return assign(other.derived());
-    }
+    inline Derived& operator=(const SparseMatrixBase<OtherDerived>& other);
 
-    inline Derived& operator=(const Derived& other)
-    {
-//       if (other.isRValue())
-//         derived().swap(other.const_cast_derived());
-//       else
-      return assign(other.derived());
-    }
+    inline Derived& operator=(const Derived& other);
 
   protected:
 
     template<typename OtherDerived>
-    inline Derived& assign(const OtherDerived& other)
-    {
-      const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
-      const Index outerSize = (int(OtherDerived::Flags) & RowMajorBit) ? Index(other.rows()) : Index(other.cols());
-      if ((!transpose) && other.isRValue())
-      {
-        // eval without temporary
-        derived().resize(Index(other.rows()), Index(other.cols()));
-        derived().setZero();
-        derived().reserve((std::max)(this->rows(),this->cols())*2);
-        for (Index j=0; j<outerSize; ++j)
-        {
-          derived().startVec(j);
-          for (typename OtherDerived::InnerIterator it(other, typename OtherDerived::Index(j)); it; ++it)
-          {
-            Scalar v = it.value();
-            derived().insertBackByOuterInner(j,Index(it.index())) = v;
-          }
-        }
-        derived().finalize();
-      }
-      else
-      {
-        assignGeneric(other);
-      }
-      return derived();
-    }
+    inline Derived& assign(const OtherDerived& other);
 
     template<typename OtherDerived>
-    inline void assignGeneric(const OtherDerived& other)
-    {
-      //const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
-      eigen_assert(( ((internal::traits<Derived>::SupportedAccessPatterns&OuterRandomAccessPattern)==OuterRandomAccessPattern) ||
-                  (!((Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit)))) &&
-                  "the transpose operation is supposed to be handled in SparseMatrix::operator=");
-
-      enum { Flip = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit) };
-
-      const Index outerSize = Index(other.outerSize());
-      //typedef typename internal::conditional<transpose, LinkedVectorMatrix<Scalar,Flags&RowMajorBit>, Derived>::type TempType;
-      // thanks to shallow copies, we always eval to a tempary
-      Derived temp(Index(other.rows()), Index(other.cols()));
-
-      temp.reserve((std::max)(this->rows(),this->cols())*2);
-      for (Index j=0; j<outerSize; ++j)
-      {
-        temp.startVec(j);
-        for (typename OtherDerived::InnerIterator it(other.derived(), typename OtherDerived::Index(j)); it; ++it)
-        {
-          Scalar v = it.value();
-          temp.insertBackByOuterInner(Flip?Index(it.index()):j,Flip?j:Index(it.index())) = v;
-        }
-      }
-      temp.finalize();
-
-      derived() = temp.markAsRValue();
-    }
+    inline void assignGeneric(const OtherDerived& other);
 
   public:
 
-    template<typename Lhs, typename Rhs>
-    inline Derived& operator=(const SparseSparseProduct<Lhs,Rhs>& product);
-
     friend std::ostream & operator << (std::ostream & s, const SparseMatrixBase& m)
     {
       typedef typename Derived::Nested Nested;
@@ -333,33 +253,34 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
     EIGEN_STRONG_INLINE const EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE
     cwiseProduct(const MatrixBase<OtherDerived> &other) const;
 
-    // sparse * sparse
-    template<typename OtherDerived>
-    const typename SparseSparseProductReturnType<Derived,OtherDerived>::Type
-    operator*(const SparseMatrixBase<OtherDerived> &other) const;
-
     // sparse * diagonal
     template<typename OtherDerived>
-    const SparseDiagonalProduct<Derived,OtherDerived>
-    operator*(const DiagonalBase<OtherDerived> &other) const;
+    const Product<Derived,OtherDerived>
+    operator*(const DiagonalBase<OtherDerived> &other) const
+    { return Product<Derived,OtherDerived>(derived(), other.derived()); }
 
     // diagonal * sparse
     template<typename OtherDerived> friend
-    const SparseDiagonalProduct<OtherDerived,Derived>
+    const Product<OtherDerived,Derived>
     operator*(const DiagonalBase<OtherDerived> &lhs, const SparseMatrixBase& rhs)
-    { return SparseDiagonalProduct<OtherDerived,Derived>(lhs.derived(), rhs.derived()); }
-
-    /** dense * sparse (return a dense object unless it is an outer product) */
-    template<typename OtherDerived> friend
-    const typename DenseSparseProductReturnType<OtherDerived,Derived>::Type
-    operator*(const MatrixBase<OtherDerived>& lhs, const Derived& rhs)
-    { return typename DenseSparseProductReturnType<OtherDerived,Derived>::Type(lhs.derived(),rhs); }
-
-    /** sparse * dense (returns a dense object unless it is an outer product) */
+    { return Product<OtherDerived,Derived>(lhs.derived(), rhs.derived()); }
+    
+    // sparse * sparse
+    template<typename OtherDerived>
+    const Product<Derived,OtherDerived>
+    operator*(const SparseMatrixBase<OtherDerived> &other) const;
+    
+    // sparse * dense
     template<typename OtherDerived>
-    const typename SparseDenseProductReturnType<Derived,OtherDerived>::Type
+    const Product<Derived,OtherDerived>
     operator*(const MatrixBase<OtherDerived> &other) const
-    { return typename SparseDenseProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived()); }
+    { return Product<Derived,OtherDerived>(derived(), other.derived()); }
+    
+    // dense * sparse
+    template<typename OtherDerived> friend
+    const Product<OtherDerived,Derived>
+    operator*(const MatrixBase<OtherDerived> &lhs, const SparseMatrixBase& rhs)
+    { return Product<OtherDerived,Derived>(lhs.derived(), rhs.derived()); }
     
      /** \returns an expression of P H P^-1 where H is the matrix represented by \c *this */
     SparseSymmetricPermutationProduct<Derived,Upper|Lower> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const
@@ -371,7 +292,7 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
     Derived& operator*=(const SparseMatrixBase<OtherDerived>& other);
 
     template<int Mode>
-    inline const SparseTriangularView<Derived, Mode> triangularView() const;
+    inline const TriangularView<Derived, Mode> triangularView() const;
 
     template<unsigned int UpLo> inline const SparseSelfAdjointView<Derived, UpLo> selfadjointView() const;
     template<unsigned int UpLo> inline SparseSelfAdjointView<Derived, UpLo> selfadjointView();
@@ -396,16 +317,6 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
     Block<Derived,Dynamic,Dynamic,true> innerVectors(Index outerStart, Index outerSize);
     const Block<const Derived,Dynamic,Dynamic,true> innerVectors(Index outerStart, Index outerSize) const;
 
-    /** \internal use operator= */
-    template<typename DenseDerived>
-    void evalTo(MatrixBase<DenseDerived>& dst) const
-    {
-      dst.setZero();
-      for (Index j=0; j<outerSize(); ++j)
-        for (typename Derived::InnerIterator i(derived(),typename Derived::Index(j)); i; ++i)
-          dst.coeffRef(i.row(),i.col()) = i.value();
-    }
-
     Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> toDense() const
     {
       return derived();
@@ -430,6 +341,9 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
     { return typename internal::eval<Derived>::type(derived()); }
 
     Scalar sum() const;
+    
+    inline const SparseView<Derived>
+    pruned(const Scalar& reference = Scalar(0), const RealScalar& epsilon = NumTraits<Scalar>::dummy_precision()) const;
 
   protected:
 
diff --git a/Eigen/src/SparseCore/SparsePermutation.h b/Eigen/src/SparseCore/SparsePermutation.h
index b85be93f6..228796bf8 100644
--- a/Eigen/src/SparseCore/SparsePermutation.h
+++ b/Eigen/src/SparseCore/SparsePermutation.h
@@ -103,44 +103,133 @@ struct permut_sparsematrix_product_retval
 
 }
 
+namespace internal {
+
+template <int ProductTag> struct product_promote_storage_type<Sparse,             PermutationStorage, ProductTag> { typedef Sparse ret; };
+template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Sparse,             ProductTag> { typedef Sparse ret; };
+  
+// TODO, the following need cleaning, this is just a copy-past of the dense case  
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs, Rhs, PermutationShape, SparseShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    permut_sparsematrix_product_retval<Lhs, Rhs, OnTheLeft, false> pmpr(lhs, rhs);
+    pmpr.evalTo(dst);
+  }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs, Rhs, SparseShape, PermutationShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    permut_sparsematrix_product_retval<Rhs, Lhs, OnTheRight, false> pmpr(rhs, lhs);
+    pmpr.evalTo(dst);
+  }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Transpose<Lhs>, Rhs, PermutationShape, SparseShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
+  {
+    permut_sparsematrix_product_retval<Lhs, Rhs, OnTheLeft, true> pmpr(lhs.nestedPermutation(), rhs);
+    pmpr.evalTo(dst);
+  }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs, Transpose<Rhs>, SparseShape, PermutationShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
+  {
+    permut_sparsematrix_product_retval<Rhs, Lhs, OnTheRight, true> pmpr(rhs.nestedPermutation(), lhs);
+    pmpr.evalTo(dst);
+  }
+};
+
+// TODO, the following two overloads are only needed to define the right temporary type through 
+// typename traits<permut_sparsematrix_product_retval<Rhs,Lhs,OnTheRight,false> >::ReturnType
+// while it should be correctly handled by traits<Product<> >::PlainObject
 
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, PermutationShape, SparseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> 
+  : public evaluator<typename traits<permut_sparsematrix_product_retval<Lhs,Rhs,OnTheRight,false> >::ReturnType>::type
+{
+  typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+  typedef typename traits<permut_sparsematrix_product_retval<Lhs,Rhs,OnTheRight,false> >::ReturnType PlainObject;
+  typedef typename evaluator<PlainObject>::type Base;
+
+  product_evaluator(const XprType& xpr)
+    : m_result(xpr.rows(), xpr.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    generic_product_impl<Lhs, Rhs, PermutationShape, SparseShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
+  }
+  
+protected:  
+  PlainObject m_result;
+};
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, SparseShape, PermutationShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> 
+  : public evaluator<typename traits<permut_sparsematrix_product_retval<Rhs,Lhs,OnTheRight,false> >::ReturnType>::type
+{
+  typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+  typedef typename traits<permut_sparsematrix_product_retval<Rhs,Lhs,OnTheRight,false> >::ReturnType PlainObject;
+  typedef typename evaluator<PlainObject>::type Base;
+
+  product_evaluator(const XprType& xpr)
+    : m_result(xpr.rows(), xpr.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    generic_product_impl<Lhs, Rhs, SparseShape, PermutationShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
+  }
+  
+protected:  
+  PlainObject m_result;
+};
+
+} // end namespace internal
 
 /** \returns the matrix with the permutation applied to the columns
   */
 template<typename SparseDerived, typename PermDerived>
-inline const internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheRight, false>
+inline const Product<SparseDerived, PermDerived>
 operator*(const SparseMatrixBase<SparseDerived>& matrix, const PermutationBase<PermDerived>& perm)
-{
-  return internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheRight, false>(perm, matrix.derived());
-}
+{ return Product<SparseDerived, PermDerived>(matrix.derived(), perm.derived()); }
 
 /** \returns the matrix with the permutation applied to the rows
   */
 template<typename SparseDerived, typename PermDerived>
-inline const internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheLeft, false>
+inline const Product<PermDerived, SparseDerived>
 operator*( const PermutationBase<PermDerived>& perm, const SparseMatrixBase<SparseDerived>& matrix)
-{
-  return internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheLeft, false>(perm, matrix.derived());
-}
-
+{ return  Product<PermDerived, SparseDerived>(perm.derived(), matrix.derived()); }
 
 
+// TODO, the following specializations should not be needed as Transpose<Permutation*> should be a PermutationBase.
 /** \returns the matrix with the inverse permutation applied to the columns.
   */
 template<typename SparseDerived, typename PermDerived>
-inline const internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheRight, true>
+inline const Product<SparseDerived, Transpose<PermutationBase<PermDerived> > >
 operator*(const SparseMatrixBase<SparseDerived>& matrix, const Transpose<PermutationBase<PermDerived> >& tperm)
 {
-  return internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheRight, true>(tperm.nestedPermutation(), matrix.derived());
+  return Product<SparseDerived, Transpose<PermutationBase<PermDerived> > >(matrix.derived(), tperm);
 }
 
 /** \returns the matrix with the inverse permutation applied to the rows.
   */
 template<typename SparseDerived, typename PermDerived>
-inline const internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheLeft, true>
+inline const Product<Transpose<PermutationBase<PermDerived> >, SparseDerived>
 operator*(const Transpose<PermutationBase<PermDerived> >& tperm, const SparseMatrixBase<SparseDerived>& matrix)
 {
-  return internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheLeft, true>(tperm.nestedPermutation(), matrix.derived());
+  return Product<Transpose<PermutationBase<PermDerived> >, SparseDerived>(tperm, matrix.derived());
 }
 
 } // end namespace Eigen
diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h
index cf7663070..b68fe986a 100644
--- a/Eigen/src/SparseCore/SparseProduct.h
+++ b/Eigen/src/SparseCore/SparseProduct.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -12,158 +12,6 @@
 
 namespace Eigen { 
 
-template<typename Lhs, typename Rhs>
-struct SparseSparseProductReturnType
-{
-  typedef typename internal::traits<Lhs>::Scalar Scalar;
-  typedef typename internal::traits<Lhs>::Index Index;
-  enum {
-    LhsRowMajor = internal::traits<Lhs>::Flags & RowMajorBit,
-    RhsRowMajor = internal::traits<Rhs>::Flags & RowMajorBit,
-    TransposeRhs = (!LhsRowMajor) && RhsRowMajor,
-    TransposeLhs = LhsRowMajor && (!RhsRowMajor)
-  };
-
-  typedef typename internal::conditional<TransposeLhs,
-    SparseMatrix<Scalar,0,Index>,
-    typename internal::nested<Lhs,Rhs::RowsAtCompileTime>::type>::type LhsNested;
-
-  typedef typename internal::conditional<TransposeRhs,
-    SparseMatrix<Scalar,0,Index>,
-    typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type>::type RhsNested;
-
-  typedef SparseSparseProduct<LhsNested, RhsNested> Type;
-};
-
-namespace internal {
-template<typename LhsNested, typename RhsNested>
-struct traits<SparseSparseProduct<LhsNested, RhsNested> >
-{
-  typedef MatrixXpr XprKind;
-  // clean the nested types:
-  typedef typename remove_all<LhsNested>::type _LhsNested;
-  typedef typename remove_all<RhsNested>::type _RhsNested;
-  typedef typename _LhsNested::Scalar Scalar;
-  typedef typename promote_index_type<typename traits<_LhsNested>::Index,
-                                         typename traits<_RhsNested>::Index>::type Index;
-
-  enum {
-    LhsCoeffReadCost = _LhsNested::CoeffReadCost,
-    RhsCoeffReadCost = _RhsNested::CoeffReadCost,
-    LhsFlags = _LhsNested::Flags,
-    RhsFlags = _RhsNested::Flags,
-
-    RowsAtCompileTime    = _LhsNested::RowsAtCompileTime,
-    ColsAtCompileTime    = _RhsNested::ColsAtCompileTime,
-    MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
-
-    InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
-
-    EvalToRowMajor = (RhsFlags & LhsFlags & RowMajorBit),
-
-    RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit),
-
-    Flags = (int(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits)
-          | EvalBeforeAssigningBit
-          | EvalBeforeNestingBit,
-
-    CoeffReadCost = Dynamic
-  };
-
-  typedef Sparse StorageKind;
-};
-
-} // end namespace internal
-
-template<typename LhsNested, typename RhsNested>
-class SparseSparseProduct : internal::no_assignment_operator,
-  public SparseMatrixBase<SparseSparseProduct<LhsNested, RhsNested> >
-{
-  public:
-
-    typedef SparseMatrixBase<SparseSparseProduct> Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(SparseSparseProduct)
-
-  private:
-
-    typedef typename internal::traits<SparseSparseProduct>::_LhsNested _LhsNested;
-    typedef typename internal::traits<SparseSparseProduct>::_RhsNested _RhsNested;
-
-  public:
-
-    template<typename Lhs, typename Rhs>
-    EIGEN_STRONG_INLINE SparseSparseProduct(const Lhs& lhs, const Rhs& rhs)
-      : m_lhs(lhs), m_rhs(rhs), m_tolerance(0), m_conservative(true)
-    {
-      init();
-    }
-
-    template<typename Lhs, typename Rhs>
-    EIGEN_STRONG_INLINE SparseSparseProduct(const Lhs& lhs, const Rhs& rhs, const RealScalar& tolerance)
-      : m_lhs(lhs), m_rhs(rhs), m_tolerance(tolerance), m_conservative(false)
-    {
-      init();
-    }
-
-    SparseSparseProduct pruned(const Scalar& reference = 0, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision()) const
-    {
-      using std::abs;
-      return SparseSparseProduct(m_lhs,m_rhs,abs(reference)*epsilon);
-    }
-
-    template<typename Dest>
-    void evalTo(Dest& result) const
-    {
-      if(m_conservative)
-        internal::conservative_sparse_sparse_product_selector<_LhsNested, _RhsNested, Dest>::run(lhs(),rhs(),result);
-      else
-        internal::sparse_sparse_product_with_pruning_selector<_LhsNested, _RhsNested, Dest>::run(lhs(),rhs(),result,m_tolerance);
-    }
-
-    EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
-    EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
-
-    EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; }
-    EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; }
-
-  protected:
-    void init()
-    {
-      eigen_assert(m_lhs.cols() == m_rhs.rows());
-
-      enum {
-        ProductIsValid = _LhsNested::ColsAtCompileTime==Dynamic
-                      || _RhsNested::RowsAtCompileTime==Dynamic
-                      || int(_LhsNested::ColsAtCompileTime)==int(_RhsNested::RowsAtCompileTime),
-        AreVectors = _LhsNested::IsVectorAtCompileTime && _RhsNested::IsVectorAtCompileTime,
-        SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(_LhsNested,_RhsNested)
-      };
-      // note to the lost user:
-      //    * for a dot product use: v1.dot(v2)
-      //    * for a coeff-wise product use: v1.cwise()*v2
-      EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
-        INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
-      EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
-        INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
-      EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
-    }
-
-    LhsNested m_lhs;
-    RhsNested m_rhs;
-    RealScalar m_tolerance;
-    bool m_conservative;
-};
-
-// sparse = sparse * sparse
-template<typename Derived>
-template<typename Lhs, typename Rhs>
-inline Derived& SparseMatrixBase<Derived>::operator=(const SparseSparseProduct<Lhs,Rhs>& product)
-{
-  product.evalTo(derived());
-  return derived();
-}
-
 /** \returns an expression of the product of two sparse matrices.
   * By default a conservative product preserving the symbolic non zeros is performed.
   * The automatic pruning of the small values can be achieved by calling the pruned() function
@@ -177,12 +25,61 @@ inline Derived& SparseMatrixBase<Derived>::operator=(const SparseSparseProduct<L
   * */
 template<typename Derived>
 template<typename OtherDerived>
-inline const typename SparseSparseProductReturnType<Derived,OtherDerived>::Type
+inline const Product<Derived,OtherDerived>
 SparseMatrixBase<Derived>::operator*(const SparseMatrixBase<OtherDerived> &other) const
 {
-  return typename SparseSparseProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+  return Product<Derived,OtherDerived>(derived(), other.derived());
 }
 
+namespace internal {
+
+template<typename Lhs, typename Rhs, int ProductType>
+struct generic_product_impl<Lhs, Rhs, SparseShape, SparseShape, ProductType>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
+    typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
+    LhsNested lhsNested(lhs);
+    RhsNested rhsNested(rhs);
+    internal::conservative_sparse_sparse_product_selector<typename remove_all<LhsNested>::type,
+                                                          typename remove_all<RhsNested>::type, Dest>::run(lhsNested,rhsNested,dst);
+  }
+};
+
+template<typename Lhs, typename Rhs, int Options>
+struct evaluator<SparseView<Product<Lhs, Rhs, Options> > > 
+ : public evaluator<typename Product<Lhs, Rhs, DefaultProduct>::PlainObject>::type
+{
+  typedef SparseView<Product<Lhs, Rhs, Options> > XprType;
+  typedef typename XprType::PlainObject PlainObject;
+  typedef typename evaluator<PlainObject>::type Base;
+  
+  typedef evaluator type;
+  typedef evaluator nestedType;
+  
+  evaluator(const XprType& xpr)
+    : m_result(xpr.rows(), xpr.cols())
+  {
+    using std::abs;
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
+    typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
+    LhsNested lhsNested(xpr.nestedExpression().lhs());
+    RhsNested rhsNested(xpr.nestedExpression().rhs());
+    
+    internal::sparse_sparse_product_with_pruning_selector<typename remove_all<LhsNested>::type,
+                                                          typename remove_all<RhsNested>::type, PlainObject>::run(lhsNested,rhsNested,m_result,
+                                                                                                                  abs(xpr.reference())*xpr.epsilon());
+  }
+  
+protected:  
+  PlainObject m_result;
+};
+
+} // end namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_SPARSEPRODUCT_H
diff --git a/Eigen/src/SparseCore/SparseRedux.h b/Eigen/src/SparseCore/SparseRedux.h
index f3da93a71..763f2296b 100644
--- a/Eigen/src/SparseCore/SparseRedux.h
+++ b/Eigen/src/SparseCore/SparseRedux.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -18,8 +18,9 @@ SparseMatrixBase<Derived>::sum() const
 {
   eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix");
   Scalar res(0);
+  typename internal::evaluator<Derived>::type thisEval(derived());
   for (Index j=0; j<outerSize(); ++j)
-    for (typename Derived::InnerIterator iter(derived(),j); iter; ++iter)
+    for (typename internal::evaluator<Derived>::InnerIterator iter(thisEval,j); iter; ++iter)
       res += iter.value();
   return res;
 }
diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h
index 56c922929..69ac1a398 100644
--- a/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -11,14 +11,14 @@
 #define EIGEN_SPARSE_SELFADJOINTVIEW_H
 
 namespace Eigen { 
-
+  
 /** \ingroup SparseCore_Module
   * \class SparseSelfAdjointView
   *
   * \brief Pseudo expression to manipulate a triangular sparse matrix as a selfadjoint matrix.
   *
   * \param MatrixType the type of the dense matrix storing the coefficients
-  * \param UpLo can be either \c #Lower or \c #Upper
+  * \param Mode can be either \c #Lower or \c #Upper
   *
   * This class is an expression of a sefladjoint matrix from a triangular part of a matrix
   * with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()
@@ -26,37 +26,33 @@ namespace Eigen {
   *
   * \sa SparseMatrixBase::selfadjointView()
   */
-template<typename Lhs, typename Rhs, int UpLo>
-class SparseSelfAdjointTimeDenseProduct;
-
-template<typename Lhs, typename Rhs, int UpLo>
-class DenseTimeSparseSelfAdjointProduct;
-
 namespace internal {
   
-template<typename MatrixType, unsigned int UpLo>
-struct traits<SparseSelfAdjointView<MatrixType,UpLo> > : traits<MatrixType> {
+template<typename MatrixType, unsigned int Mode>
+struct traits<SparseSelfAdjointView<MatrixType,Mode> > : traits<MatrixType> {
 };
 
-template<int SrcUpLo,int DstUpLo,typename MatrixType,int DestOrder>
+template<int SrcMode,int DstMode,typename MatrixType,int DestOrder>
 void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DestOrder,typename MatrixType::Index>& _dest, const typename MatrixType::Index* perm = 0);
 
-template<int UpLo,typename MatrixType,int DestOrder>
+template<int Mode,typename MatrixType,int DestOrder>
 void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DestOrder,typename MatrixType::Index>& _dest, const typename MatrixType::Index* perm = 0);
 
 }
 
-template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
-  : public EigenBase<SparseSelfAdjointView<MatrixType,UpLo> >
+template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
+  : public EigenBase<SparseSelfAdjointView<MatrixType,_Mode> >
 {
   public:
+    
+    enum { Mode = _Mode };
 
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::Index Index;
     typedef Matrix<Index,Dynamic,1> VectorI;
     typedef typename MatrixType::Nested MatrixTypeNested;
     typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;
-
+    
     inline SparseSelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
     {
       eigen_assert(rows()==cols() && "SelfAdjointView is only for squared matrices");
@@ -75,10 +71,10 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
       * Indeed, the SparseSelfadjointView operand is first copied into a temporary SparseMatrix before computing the product.
       */
     template<typename OtherDerived>
-    SparseSparseProduct<typename OtherDerived::PlainObject, OtherDerived>
+    Product<SparseSelfAdjointView, OtherDerived>
     operator*(const SparseMatrixBase<OtherDerived>& rhs) const
     {
-      return SparseSparseProduct<typename OtherDerived::PlainObject, OtherDerived>(*this, rhs.derived());
+      return Product<SparseSelfAdjointView, OtherDerived>(*this, rhs.derived());
     }
 
     /** \returns an expression of the matrix product between a sparse matrix \a lhs and a sparse self-adjoint matrix \a rhs.
@@ -87,26 +83,26 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
       * Indeed, the SparseSelfadjointView operand is first copied into a temporary SparseMatrix before computing the product.
       */
     template<typename OtherDerived> friend
-    SparseSparseProduct<OtherDerived, typename OtherDerived::PlainObject >
+    Product<OtherDerived, SparseSelfAdjointView>
     operator*(const SparseMatrixBase<OtherDerived>& lhs, const SparseSelfAdjointView& rhs)
     {
-      return SparseSparseProduct<OtherDerived, typename OtherDerived::PlainObject>(lhs.derived(), rhs);
+      return Product<OtherDerived, SparseSelfAdjointView>(lhs.derived(), rhs);
     }
     
     /** Efficient sparse self-adjoint matrix times dense vector/matrix product */
     template<typename OtherDerived>
-    SparseSelfAdjointTimeDenseProduct<MatrixType,OtherDerived,UpLo>
+    Product<SparseSelfAdjointView,OtherDerived>
     operator*(const MatrixBase<OtherDerived>& rhs) const
     {
-      return SparseSelfAdjointTimeDenseProduct<MatrixType,OtherDerived,UpLo>(m_matrix, rhs.derived());
+      return Product<SparseSelfAdjointView,OtherDerived>(*this, rhs.derived());
     }
 
     /** Efficient dense vector/matrix times sparse self-adjoint matrix product */
     template<typename OtherDerived> friend
-    DenseTimeSparseSelfAdjointProduct<OtherDerived,MatrixType,UpLo>
+    Product<OtherDerived,SparseSelfAdjointView>
     operator*(const MatrixBase<OtherDerived>& lhs, const SparseSelfAdjointView& rhs)
     {
-      return DenseTimeSparseSelfAdjointProduct<OtherDerived,_MatrixTypeNested,UpLo>(lhs.derived(), rhs.m_matrix);
+      return Product<OtherDerived,SparseSelfAdjointView>(lhs.derived(), rhs);
     }
 
     /** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
@@ -123,53 +119,49 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
     /** \internal triggered by sparse_matrix = SparseSelfadjointView; */
     template<typename DestScalar,int StorageOrder> void evalTo(SparseMatrix<DestScalar,StorageOrder,Index>& _dest) const
     {
-      internal::permute_symm_to_fullsymm<UpLo>(m_matrix, _dest);
+      internal::permute_symm_to_fullsymm<Mode>(m_matrix, _dest);
     }
     
     template<typename DestScalar> void evalTo(DynamicSparseMatrix<DestScalar,ColMajor,Index>& _dest) const
     {
       // TODO directly evaluate into _dest;
       SparseMatrix<DestScalar,ColMajor,Index> tmp(_dest.rows(),_dest.cols());
-      internal::permute_symm_to_fullsymm<UpLo>(m_matrix, tmp);
+      internal::permute_symm_to_fullsymm<Mode>(m_matrix, tmp);
       _dest = tmp;
     }
     
     /** \returns an expression of P H P^-1 */
-    SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const
+    // TODO implement twists in a more evaluator friendly fashion
+    SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const
     {
-      return SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo>(m_matrix, perm);
+      return SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode>(m_matrix, perm);
     }
-    
-    template<typename SrcMatrixType,int SrcUpLo>
-    SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct<SrcMatrixType,SrcUpLo>& permutedMatrix)
+
+    template<typename SrcMatrixType,int SrcMode>
+    SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct<SrcMatrixType,SrcMode>& permutedMatrix)
     {
       permutedMatrix.evalTo(*this);
       return *this;
     }
 
-
     SparseSelfAdjointView& operator=(const SparseSelfAdjointView& src)
     {
       PermutationMatrix<Dynamic> pnull;
       return *this = src.twistedBy(pnull);
     }
 
-    template<typename SrcMatrixType,unsigned int SrcUpLo>
-    SparseSelfAdjointView& operator=(const SparseSelfAdjointView<SrcMatrixType,SrcUpLo>& src)
+    template<typename SrcMatrixType,unsigned int SrcMode>
+    SparseSelfAdjointView& operator=(const SparseSelfAdjointView<SrcMatrixType,SrcMode>& src)
     {
       PermutationMatrix<Dynamic> pnull;
       return *this = src.twistedBy(pnull);
     }
     
-
-    // const SparseLLT<PlainObject, UpLo> llt() const;
-    // const SparseLDLT<PlainObject, UpLo> ldlt() const;
-
   protected:
 
     typename MatrixType::Nested m_matrix;
-    mutable VectorI m_countPerRow;
-    mutable VectorI m_countPerCol;
+    //mutable VectorI m_countPerRow;
+    //mutable VectorI m_countPerCol;
 };
 
 /***************************************************************************
@@ -177,15 +169,15 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
 ***************************************************************************/
 
 template<typename Derived>
-template<unsigned int UpLo>
-const SparseSelfAdjointView<Derived, UpLo> SparseMatrixBase<Derived>::selfadjointView() const
+template<unsigned int Mode>
+const SparseSelfAdjointView<Derived, Mode> SparseMatrixBase<Derived>::selfadjointView() const
 {
   return derived();
 }
 
 template<typename Derived>
-template<unsigned int UpLo>
-SparseSelfAdjointView<Derived, UpLo> SparseMatrixBase<Derived>::selfadjointView()
+template<unsigned int Mode>
+SparseSelfAdjointView<Derived, Mode> SparseMatrixBase<Derived>::selfadjointView()
 {
   return derived();
 }
@@ -194,16 +186,16 @@ SparseSelfAdjointView<Derived, UpLo> SparseMatrixBase<Derived>::selfadjointView(
 * Implementation of SparseSelfAdjointView methods
 ***************************************************************************/
 
-template<typename MatrixType, unsigned int UpLo>
+template<typename MatrixType, unsigned int Mode>
 template<typename DerivedU>
-SparseSelfAdjointView<MatrixType,UpLo>&
-SparseSelfAdjointView<MatrixType,UpLo>::rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha)
+SparseSelfAdjointView<MatrixType,Mode>&
+SparseSelfAdjointView<MatrixType,Mode>::rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha)
 {
-  SparseMatrix<Scalar,MatrixType::Flags&RowMajorBit?RowMajor:ColMajor> tmp = u * u.adjoint();
+  SparseMatrix<Scalar,(MatrixType::Flags&RowMajorBit)?RowMajor:ColMajor> tmp = u * u.adjoint();
   if(alpha==Scalar(0))
-    m_matrix.const_cast_derived() = tmp.template triangularView<UpLo>();
+    m_matrix.const_cast_derived() = tmp.template triangularView<Mode>();
   else
-    m_matrix.const_cast_derived() += alpha * tmp.template triangularView<UpLo>();
+    m_matrix.const_cast_derived() += alpha * tmp.template triangularView<Mode>();
 
   return *this;
 }
@@ -213,104 +205,154 @@ SparseSelfAdjointView<MatrixType,UpLo>::rankUpdate(const SparseMatrixBase<Derive
 ***************************************************************************/
 
 namespace internal {
-template<typename Lhs, typename Rhs, int UpLo>
-struct traits<SparseSelfAdjointTimeDenseProduct<Lhs,Rhs,UpLo> >
- : traits<ProductBase<SparseSelfAdjointTimeDenseProduct<Lhs,Rhs,UpLo>, Lhs, Rhs> >
-{
-  typedef Dense StorageKind;
-};
-}
 
-template<typename Lhs, typename Rhs, int UpLo>
-class SparseSelfAdjointTimeDenseProduct
-  : public ProductBase<SparseSelfAdjointTimeDenseProduct<Lhs,Rhs,UpLo>, Lhs, Rhs>
+template<int Mode, typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType>
+inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
 {
-  public:
-    EIGEN_PRODUCT_PUBLIC_INTERFACE(SparseSelfAdjointTimeDenseProduct)
-
-    SparseSelfAdjointTimeDenseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
-    {}
-
-    template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+  EIGEN_ONLY_USED_FOR_DEBUG(alpha);
+  // TODO use alpha
+  eigen_assert(alpha==AlphaType(1) && "alpha != 1 is not implemented yet, sorry");
+  
+  typedef typename evaluator<SparseLhsType>::type LhsEval;
+  typedef typename evaluator<SparseLhsType>::InnerIterator LhsIterator;
+  typedef typename SparseLhsType::Index Index;
+  typedef typename SparseLhsType::Scalar LhsScalar;
+  
+  enum {
+    LhsIsRowMajor = (LhsEval::Flags&RowMajorBit)==RowMajorBit,
+    ProcessFirstHalf =
+              ((Mode&(Upper|Lower))==(Upper|Lower))
+          || ( (Mode&Upper) && !LhsIsRowMajor)
+          || ( (Mode&Lower) && LhsIsRowMajor),
+    ProcessSecondHalf = !ProcessFirstHalf
+  };
+  
+  LhsEval lhsEval(lhs);
+  
+  for (Index j=0; j<lhs.outerSize(); ++j)
+  {
+    LhsIterator i(lhsEval,j);
+    if (ProcessSecondHalf)
     {
-      EIGEN_ONLY_USED_FOR_DEBUG(alpha);
-      // TODO use alpha
-      eigen_assert(alpha==Scalar(1) && "alpha != 1 is not implemented yet, sorry");
-      typedef typename internal::remove_all<Lhs>::type _Lhs;
-      typedef typename _Lhs::InnerIterator LhsInnerIterator;
-      enum {
-        LhsIsRowMajor = (_Lhs::Flags&RowMajorBit)==RowMajorBit,
-        ProcessFirstHalf =
-                 ((UpLo&(Upper|Lower))==(Upper|Lower))
-              || ( (UpLo&Upper) && !LhsIsRowMajor)
-              || ( (UpLo&Lower) && LhsIsRowMajor),
-        ProcessSecondHalf = !ProcessFirstHalf
-      };
-      for (typename _Lhs::Index j=0; j<m_lhs.outerSize(); ++j)
+      while (i && i.index()<j) ++i;
+      if(i && i.index()==j)
       {
-        LhsInnerIterator i(m_lhs,j);
-        if (ProcessSecondHalf)
-        {
-          while (i && i.index()<j) ++i;
-          if(i && i.index()==j)
-          {
-            dest.row(j) += i.value() * m_rhs.row(j);
-            ++i;
-          }
-        }
-        for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i)
-        {
-          Index a = LhsIsRowMajor ? j : i.index();
-          Index b = LhsIsRowMajor ? i.index() : j;
-          typename Lhs::Scalar v = i.value();
-          dest.row(a) += (v) * m_rhs.row(b);
-          dest.row(b) += numext::conj(v) * m_rhs.row(a);
-        }
-        if (ProcessFirstHalf && i && (i.index()==j))
-          dest.row(j) += i.value() * m_rhs.row(j);
+        res.row(j) += i.value() * rhs.row(j);
+        ++i;
       }
     }
+    for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i)
+    {
+      Index a = LhsIsRowMajor ? j : i.index();
+      Index b = LhsIsRowMajor ? i.index() : j;
+      LhsScalar v = i.value();
+      res.row(a) += (v) * rhs.row(b);
+      res.row(b) += numext::conj(v) * rhs.row(a);
+    }
+    if (ProcessFirstHalf && i && (i.index()==j))
+      res.row(j) += i.value() * rhs.row(j);
+  }
+}
+  
+// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
+//      in the future selfadjoint-ness should be defined by the expression traits
+//      such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
+template<typename MatrixType, unsigned int Mode>
+struct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> >
+{
+  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
+  typedef SparseSelfAdjointShape Shape;
+  
+  static const int AssumeAliasing = 0;
+};
 
-  private:
-    SparseSelfAdjointTimeDenseProduct& operator=(const SparseSelfAdjointTimeDenseProduct&);
+template<typename LhsView, typename Rhs, int ProductType>
+struct generic_product_impl<LhsView, Rhs, SparseSelfAdjointShape, DenseShape, ProductType>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const LhsView& lhsView, const Rhs& rhs)
+  {
+    typedef typename LhsView::_MatrixTypeNested Lhs;
+    typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
+    typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
+    LhsNested lhsNested(lhsView.matrix());
+    RhsNested rhsNested(rhs);
+    
+    dst.setZero();
+    internal::sparse_selfadjoint_time_dense_product<LhsView::Mode>(lhsNested, rhsNested, dst, typename Dest::Scalar(1));
+  }
 };
 
-namespace internal {
-template<typename Lhs, typename Rhs, int UpLo>
-struct traits<DenseTimeSparseSelfAdjointProduct<Lhs,Rhs,UpLo> >
- : traits<ProductBase<DenseTimeSparseSelfAdjointProduct<Lhs,Rhs,UpLo>, Lhs, Rhs> >
-{};
-}
+template<typename Lhs, typename RhsView, int ProductType>
+struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, ProductType>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const RhsView& rhsView)
+  {
+    typedef typename RhsView::_MatrixTypeNested Rhs;
+    typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
+    typedef typename nested_eval<Rhs,Dynamic>::type RhsNested;
+    LhsNested lhsNested(lhs);
+    RhsNested rhsNested(rhsView.matrix());
+    
+    dst.setZero();
+    // transpoe everything
+    Transpose<Dest> dstT(dst);
+    internal::sparse_selfadjoint_time_dense_product<RhsView::Mode>(rhsNested.transpose(), lhsNested.transpose(), dstT, typename Dest::Scalar(1));
+  }
+};
 
-template<typename Lhs, typename Rhs, int UpLo>
-class DenseTimeSparseSelfAdjointProduct
-  : public ProductBase<DenseTimeSparseSelfAdjointProduct<Lhs,Rhs,UpLo>, Lhs, Rhs>
+// NOTE: these two overloads are needed to evaluate the sparse sefladjoint view into a full sparse matrix
+// TODO: maybe the copy could be handled by generic_product_impl so that these overloads would not be needed anymore
+
+template<typename LhsView, typename Rhs, int ProductTag>
+struct product_evaluator<Product<LhsView, Rhs, DefaultProduct>, ProductTag, SparseSelfAdjointShape, SparseShape, typename traits<LhsView>::Scalar, typename traits<Rhs>::Scalar>
+  : public evaluator<typename Product<typename Rhs::PlainObject, Rhs, DefaultProduct>::PlainObject>::type
 {
-  public:
-    EIGEN_PRODUCT_PUBLIC_INTERFACE(DenseTimeSparseSelfAdjointProduct)
+  typedef Product<LhsView, Rhs, DefaultProduct> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+  typedef typename evaluator<PlainObject>::type Base;
 
-    DenseTimeSparseSelfAdjointProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
-    {}
+  product_evaluator(const XprType& xpr)
+    : m_lhs(xpr.lhs()), m_result(xpr.rows(), xpr.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    generic_product_impl<typename Rhs::PlainObject, Rhs, SparseShape, SparseShape, ProductTag>::evalTo(m_result, m_lhs, xpr.rhs());
+  }
+  
+protected:
+  typename Rhs::PlainObject m_lhs;
+  PlainObject m_result;
+};
 
-    template<typename Dest> void scaleAndAddTo(Dest& /*dest*/, const Scalar& /*alpha*/) const
-    {
-      // TODO
-    }
+template<typename Lhs, typename RhsView, int ProductTag>
+struct product_evaluator<Product<Lhs, RhsView, DefaultProduct>, ProductTag, SparseShape, SparseSelfAdjointShape, typename traits<Lhs>::Scalar, typename traits<RhsView>::Scalar>
+  : public evaluator<typename Product<Lhs, typename Lhs::PlainObject, DefaultProduct>::PlainObject>::type
+{
+  typedef Product<Lhs, RhsView, DefaultProduct> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+  typedef typename evaluator<PlainObject>::type Base;
 
-  private:
-    DenseTimeSparseSelfAdjointProduct& operator=(const DenseTimeSparseSelfAdjointProduct&);
+  product_evaluator(const XprType& xpr)
+    : m_rhs(xpr.rhs()), m_result(xpr.rows(), xpr.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    generic_product_impl<Lhs, typename Lhs::PlainObject, SparseShape, SparseShape, ProductTag>::evalTo(m_result, xpr.lhs(), m_rhs);
+  }
+  
+protected:
+  typename Lhs::PlainObject m_rhs;
+  PlainObject m_result;
 };
 
+} // namespace internal
+
 /***************************************************************************
 * Implementation of symmetric copies and permutations
 ***************************************************************************/
 namespace internal {
-  
-template<typename MatrixType, int UpLo>
-struct traits<SparseSymmetricPermutationProduct<MatrixType,UpLo> > : traits<MatrixType> {
-};
 
-template<int UpLo,typename MatrixType,int DestOrder>
+template<int Mode,typename MatrixType,int DestOrder>
 void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DestOrder,typename MatrixType::Index>& _dest, const typename MatrixType::Index* perm)
 {
   typedef typename MatrixType::Index Index;
@@ -337,11 +379,11 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
       Index r = it.row();
       Index c = it.col();
       Index ip = perm ? perm[i] : i;
-      if(UpLo==(Upper|Lower))
+      if(Mode==(Upper|Lower))
         count[StorageOrderMatch ? jp : ip]++;
       else if(r==c)
         count[ip]++;
-      else if(( UpLo==Lower && r>c) || ( UpLo==Upper && r<c))
+      else if(( Mode==Lower && r>c) || ( Mode==Upper && r<c))
       {
         count[ip]++;
         count[jp]++;
@@ -370,7 +412,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
       Index jp = perm ? perm[j] : j;
       Index ip = perm ? perm[i] : i;
       
-      if(UpLo==(Upper|Lower))
+      if(Mode==(Upper|Lower))
       {
         Index k = count[StorageOrderMatch ? jp : ip]++;
         dest.innerIndexPtr()[k] = StorageOrderMatch ? ip : jp;
@@ -382,7 +424,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
         dest.innerIndexPtr()[k] = ip;
         dest.valuePtr()[k] = it.value();
       }
-      else if(( (UpLo&Lower)==Lower && r>c) || ( (UpLo&Upper)==Upper && r<c))
+      else if(( (Mode&Lower)==Lower && r>c) || ( (Mode&Upper)==Upper && r<c))
       {
         if(!StorageOrderMatch)
           std::swap(ip,jp);
@@ -397,7 +439,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri
   }
 }
 
-template<int _SrcUpLo,int _DstUpLo,typename MatrixType,int DstOrder>
+template<int _SrcMode,int _DstMode,typename MatrixType,int DstOrder>
 void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DstOrder,typename MatrixType::Index>& _dest, const typename MatrixType::Index* perm)
 {
   typedef typename MatrixType::Index Index;
@@ -407,8 +449,8 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
   enum {
     SrcOrder = MatrixType::IsRowMajor ? RowMajor : ColMajor,
     StorageOrderMatch = int(SrcOrder) == int(DstOrder),
-    DstUpLo = DstOrder==RowMajor ? (_DstUpLo==Upper ? Lower : Upper) : _DstUpLo,
-    SrcUpLo = SrcOrder==RowMajor ? (_SrcUpLo==Upper ? Lower : Upper) : _SrcUpLo
+    DstMode = DstOrder==RowMajor ? (_DstMode==Upper ? Lower : Upper) : _DstMode,
+    SrcMode = SrcOrder==RowMajor ? (_SrcMode==Upper ? Lower : Upper) : _SrcMode
   };
   
   Index size = mat.rows();
@@ -421,11 +463,11 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
     for(typename MatrixType::InnerIterator it(mat,j); it; ++it)
     {
       Index i = it.index();
-      if((int(SrcUpLo)==int(Lower) && i<j) || (int(SrcUpLo)==int(Upper) && i>j))
+      if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j))
         continue;
                   
       Index ip = perm ? perm[i] : i;
-      count[int(DstUpLo)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++;
+      count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++;
     }
   }
   dest.outerIndexPtr()[0] = 0;
@@ -441,17 +483,17 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
     for(typename MatrixType::InnerIterator it(mat,j); it; ++it)
     {
       Index i = it.index();
-      if((int(SrcUpLo)==int(Lower) && i<j) || (int(SrcUpLo)==int(Upper) && i>j))
+      if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j))
         continue;
                   
       Index jp = perm ? perm[j] : j;
       Index ip = perm? perm[i] : i;
       
-      Index k = count[int(DstUpLo)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++;
-      dest.innerIndexPtr()[k] = int(DstUpLo)==int(Lower) ? (std::max)(ip,jp) : (std::min)(ip,jp);
+      Index k = count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++;
+      dest.innerIndexPtr()[k] = int(DstMode)==int(Lower) ? (std::max)(ip,jp) : (std::min)(ip,jp);
       
       if(!StorageOrderMatch) std::swap(ip,jp);
-      if( ((int(DstUpLo)==int(Lower) && ip<jp) || (int(DstUpLo)==int(Upper) && ip>jp)))
+      if( ((int(DstMode)==int(Lower) && ip<jp) || (int(DstMode)==int(Upper) && ip>jp)))
         dest.valuePtr()[k] = numext::conj(it.value());
       else
         dest.valuePtr()[k] = it.value();
@@ -461,9 +503,19 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp
 
 }
 
-template<typename MatrixType,int UpLo>
+// TODO implement twists in a more evaluator friendly fashion
+
+namespace internal {
+
+template<typename MatrixType, int Mode>
+struct traits<SparseSymmetricPermutationProduct<MatrixType,Mode> > : traits<MatrixType> {
+};
+
+}
+
+template<typename MatrixType,int Mode>
 class SparseSymmetricPermutationProduct
-  : public EigenBase<SparseSymmetricPermutationProduct<MatrixType,UpLo> >
+  : public EigenBase<SparseSymmetricPermutationProduct<MatrixType,Mode> >
 {
   public:
     typedef typename MatrixType::Scalar Scalar;
@@ -485,15 +537,15 @@ class SparseSymmetricPermutationProduct
     template<typename DestScalar, int Options, typename DstIndex>
     void evalTo(SparseMatrix<DestScalar,Options,DstIndex>& _dest) const
     {
-//       internal::permute_symm_to_fullsymm<UpLo>(m_matrix,_dest,m_perm.indices().data());
+//       internal::permute_symm_to_fullsymm<Mode>(m_matrix,_dest,m_perm.indices().data());
       SparseMatrix<DestScalar,(Options&RowMajor)==RowMajor ? ColMajor : RowMajor, DstIndex> tmp;
-      internal::permute_symm_to_fullsymm<UpLo>(m_matrix,tmp,m_perm.indices().data());
+      internal::permute_symm_to_fullsymm<Mode>(m_matrix,tmp,m_perm.indices().data());
       _dest = tmp;
     }
     
-    template<typename DestType,unsigned int DestUpLo> void evalTo(SparseSelfAdjointView<DestType,DestUpLo>& dest) const
+    template<typename DestType,unsigned int DestMode> void evalTo(SparseSelfAdjointView<DestType,DestMode>& dest) const
     {
-      internal::permute_symm_to_symm<UpLo,DestUpLo>(m_matrix,dest.matrix(),m_perm.indices().data());
+      internal::permute_symm_to_symm<Mode,DestMode>(m_matrix,dest.matrix(),m_perm.indices().data());
     }
     
   protected:
diff --git a/Eigen/src/SparseCore/SparseSolverBase.h b/Eigen/src/SparseCore/SparseSolverBase.h
new file mode 100644
index 000000000..df4e2f017
--- /dev/null
+++ b/Eigen/src/SparseCore/SparseSolverBase.h
@@ -0,0 +1,110 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPARSESOLVERBASE_H
+#define EIGEN_SPARSESOLVERBASE_H
+
+namespace Eigen { 
+
+namespace internal {
+
+  /** \internal
+  * Helper functions to solve with a sparse right-hand-side and result.
+  * The rhs is decomposed into small vertical panels which are solved through dense temporaries.
+  */
+template<typename Decomposition, typename Rhs, typename Dest>
+void solve_sparse_through_dense_panels(const Decomposition &dec, const Rhs& rhs, Dest &dest)
+{
+  EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
+  typedef typename Dest::Scalar DestScalar;
+  // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
+  static const int NbColsAtOnce = 4;
+  int rhsCols = rhs.cols();
+  int size = rhs.rows();
+  // the temporary matrices do not need more columns than NbColsAtOnce:
+  int tmpCols = (std::min)(rhsCols, NbColsAtOnce); 
+  Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,tmpCols);
+  Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,tmpCols);
+  for(int k=0; k<rhsCols; k+=NbColsAtOnce)
+  {
+    int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
+    tmp.leftCols(actualCols) = rhs.middleCols(k,actualCols);
+    tmpX.leftCols(actualCols) = dec.solve(tmp.leftCols(actualCols));
+    dest.middleCols(k,actualCols) = tmpX.leftCols(actualCols).sparseView();
+  }
+}
+
+} // end namespace internal
+
+/** \class SparseSolverBase
+  * \ingroup SparseCore_Module
+  * \brief A base class for sparse solvers
+  *
+  * \tparam Derived the actual type of the solver.
+  *
+  */
+template<typename Derived>
+class SparseSolverBase : internal::noncopyable
+{
+  public:
+
+    /** Default constructor */
+    SparseSolverBase()
+      : m_isInitialized(false)
+    {}
+
+    ~SparseSolverBase()
+    {}
+
+    Derived& derived() { return *static_cast<Derived*>(this); }
+    const Derived& derived() const { return *static_cast<const Derived*>(this); }
+    
+    /** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A.
+      *
+      * \sa compute()
+      */
+    template<typename Rhs>
+    inline const Solve<Derived, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "Solver is not initialized.");
+      eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b");
+      return Solve<Derived, Rhs>(derived(), b.derived());
+    }
+    
+    /** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A.
+      *
+      * \sa compute()
+      */
+    template<typename Rhs>
+    inline const Solve<Derived, Rhs>
+    solve(const SparseMatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "Solver is not initialized.");
+      eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b");
+      return Solve<Derived, Rhs>(derived(), b.derived());
+    }
+    
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** \internal default implementation of solving with a sparse rhs */
+    template<typename Rhs,typename Dest>
+    void _solve_impl(const SparseMatrixBase<Rhs> &b, SparseMatrixBase<Dest> &dest) const
+    {
+      internal::solve_sparse_through_dense_panels(derived(), b.derived(), dest.derived());
+    }
+    #endif // EIGEN_PARSED_BY_DOXYGEN
+
+  protected:
+    
+    mutable bool m_isInitialized;
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_SPARSESOLVERBASE_H
diff --git a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
index fcc18f5c9..f291f8cef 100644
--- a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
+++ b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -46,6 +46,9 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
     res.resize(cols, rows);
   else
     res.resize(rows, cols);
+  
+  typename evaluator<Lhs>::type lhsEval(lhs);
+  typename evaluator<Rhs>::type rhsEval(rhs);
 
   res.reserve(estimated_nnz_prod);
   double ratioColRes = double(estimated_nnz_prod)/double(lhs.rows()*rhs.cols());
@@ -56,12 +59,12 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
     // let's do a more accurate determination of the nnz ratio for the current column j of res
     tempVector.init(ratioColRes);
     tempVector.setZero();
-    for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
+    for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
     {
       // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
       tempVector.restart();
       Scalar x = rhsIt.value();
-      for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
+      for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt)
       {
         tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x;
       }
@@ -140,8 +143,53 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,R
   }
 };
 
-// NOTE the 2 others cases (col row *) must never occur since they are caught
-// by ProductReturnType which transforms it to (col col *) by evaluating rhs.
+template<typename Lhs, typename Rhs, typename ResultType>
+struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,RowMajor>
+{
+  typedef typename ResultType::RealScalar RealScalar;
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
+  {
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::Index> RowMajorMatrixLhs;
+    RowMajorMatrixLhs rowLhs(lhs);
+    sparse_sparse_product_with_pruning_selector<RowMajorMatrixLhs,Rhs,ResultType,RowMajor,RowMajor>(rowLhs,rhs,res,tolerance);
+  }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,RowMajor>
+{
+  typedef typename ResultType::RealScalar RealScalar;
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
+  {
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::Index> RowMajorMatrixRhs;
+    RowMajorMatrixRhs rowRhs(rhs);
+    sparse_sparse_product_with_pruning_selector<Lhs,RowMajorMatrixRhs,ResultType,RowMajor,RowMajor,RowMajor>(lhs,rowRhs,res,tolerance);
+  }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,ColMajor>
+{
+  typedef typename ResultType::RealScalar RealScalar;
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
+  {
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::Index> ColMajorMatrixRhs;
+    ColMajorMatrixRhs colRhs(rhs);
+    internal::sparse_sparse_product_with_pruning_impl<Lhs,ColMajorMatrixRhs,ResultType>(lhs, colRhs, res, tolerance);
+  }
+};
+
+template<typename Lhs, typename Rhs, typename ResultType>
+struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,ColMajor>
+{
+  typedef typename ResultType::RealScalar RealScalar;
+  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
+  {
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::Index> ColMajorMatrixLhs;
+    ColMajorMatrixLhs colLhs(lhs);
+    internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,Rhs,ResultType>(colLhs, rhs, res, tolerance);
+  }
+};
 
 } // end namespace internal
 
diff --git a/Eigen/src/SparseCore/SparseTranspose.h b/Eigen/src/SparseCore/SparseTranspose.h
index 7c300ee8d..fae7cae97 100644
--- a/Eigen/src/SparseCore/SparseTranspose.h
+++ b/Eigen/src/SparseCore/SparseTranspose.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -12,52 +12,64 @@
 
 namespace Eigen { 
 
+// Implement nonZeros() for transpose. I'm not sure that's the best approach for that.
+// Perhaps it should be implemented in Transpose<> itself.
 template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>
   : public SparseMatrixBase<Transpose<MatrixType> >
 {
-    typedef typename internal::remove_all<typename MatrixType::Nested>::type _MatrixTypeNested;
+  protected:
+    typedef SparseMatrixBase<Transpose<MatrixType> > Base;
   public:
-
-    EIGEN_SPARSE_PUBLIC_INTERFACE(Transpose<MatrixType> )
-
-    class InnerIterator;
-    class ReverseInnerIterator;
-
-    inline Index nonZeros() const { return derived().nestedExpression().nonZeros(); }
+    inline typename MatrixType::Index nonZeros() const { return Base::derived().nestedExpression().nonZeros(); }
 };
 
-// NOTE: VC10 trigger an ICE if don't put typename TransposeImpl<MatrixType,Sparse>:: in front of Index,
-// a typedef typename TransposeImpl<MatrixType,Sparse>::Index Index;
-// does not fix the issue.
-// An alternative is to define the nested class in the parent class itself.
-template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>::InnerIterator
-  : public _MatrixTypeNested::InnerIterator
+namespace internal {
+  
+template<typename ArgType>
+struct unary_evaluator<Transpose<ArgType>, IteratorBased>
+  : public evaluator_base<Transpose<ArgType> >
 {
-    typedef typename _MatrixTypeNested::InnerIterator Base;
-    typedef typename TransposeImpl::Index Index;
+    typedef typename evaluator<ArgType>::InnerIterator        EvalIterator;
+    typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
   public:
+    typedef Transpose<ArgType> XprType;
+    typedef typename XprType::Index Index;
 
-    EIGEN_STRONG_INLINE InnerIterator(const TransposeImpl& trans, typename TransposeImpl<MatrixType,Sparse>::Index outer)
-      : Base(trans.derived().nestedExpression(), outer)
-    {}
-    Index row() const { return Base::col(); }
-    Index col() const { return Base::row(); }
-};
-
-template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>::ReverseInnerIterator
-  : public _MatrixTypeNested::ReverseInnerIterator
-{
-    typedef typename _MatrixTypeNested::ReverseInnerIterator Base;
-    typedef typename TransposeImpl::Index Index;
-  public:
+    class InnerIterator : public EvalIterator
+    {
+    public:
+      EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& unaryOp, typename XprType::Index outer)
+        : EvalIterator(unaryOp.m_argImpl,outer)
+      {}
+      
+      Index row() const { return EvalIterator::col(); }
+      Index col() const { return EvalIterator::row(); }
+    };
+    
+    class ReverseInnerIterator : public EvalReverseIterator
+    {
+    public:
+      EIGEN_STRONG_INLINE ReverseInnerIterator(const unary_evaluator& unaryOp, typename XprType::Index outer)
+        : EvalReverseIterator(unaryOp.m_argImpl,outer)
+      {}
+      
+      Index row() const { return EvalReverseIterator::col(); }
+      Index col() const { return EvalReverseIterator::row(); }
+    };
+    
+    enum {
+      CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+      Flags = XprType::Flags
+    };
+    
+    unary_evaluator(const XprType& op) :m_argImpl(op.nestedExpression()) {}
 
-    EIGEN_STRONG_INLINE ReverseInnerIterator(const TransposeImpl& xpr, typename TransposeImpl<MatrixType,Sparse>::Index outer)
-      : Base(xpr.derived().nestedExpression(), outer)
-    {}
-    Index row() const { return Base::col(); }
-    Index col() const { return Base::row(); }
+  protected:
+    typename evaluator<ArgType>::nestedType m_argImpl;
 };
 
+} // end namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_SPARSETRANSPOSE_H
diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h
index 333127b78..744c3d730 100644
--- a/Eigen/src/SparseCore/SparseTriangularView.h
+++ b/Eigen/src/SparseCore/SparseTriangularView.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -13,17 +13,8 @@
 
 namespace Eigen { 
 
-namespace internal {
-  
-template<typename MatrixType, int Mode>
-struct traits<SparseTriangularView<MatrixType,Mode> >
-: public traits<MatrixType>
-{};
-
-} // namespace internal
-
-template<typename MatrixType, int Mode> class SparseTriangularView
-  : public SparseMatrixBase<SparseTriangularView<MatrixType,Mode> >
+template<typename MatrixType, unsigned int Mode> class TriangularViewImpl<MatrixType,Mode,Sparse>
+  : public SparseMatrixBase<TriangularView<MatrixType,Mode> >
 {
     enum { SkipFirst = ((Mode&Lower) && !(MatrixType::Flags&RowMajorBit))
                     || ((Mode&Upper) &&  (MatrixType::Flags&RowMajorBit)),
@@ -31,46 +22,46 @@ template<typename MatrixType, int Mode> class SparseTriangularView
            SkipDiag = (Mode&ZeroDiag) ? 1 : 0,
            HasUnitDiag = (Mode&UnitDiag) ? 1 : 0
     };
+    
+    typedef TriangularView<MatrixType,Mode> TriangularViewType;
+    
+protected:
+    // dummy solve function to make TriangularView happy.
+    void solve() const;
 
   public:
     
-    EIGEN_SPARSE_PUBLIC_INTERFACE(SparseTriangularView)
-
+    EIGEN_SPARSE_PUBLIC_INTERFACE(TriangularViewType)
+    
     class InnerIterator;
     class ReverseInnerIterator;
 
-    inline Index rows() const { return m_matrix.rows(); }
-    inline Index cols() const { return m_matrix.cols(); }
-
     typedef typename MatrixType::Nested MatrixTypeNested;
     typedef typename internal::remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
     typedef typename internal::remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
 
-    inline SparseTriangularView(const MatrixType& matrix) : m_matrix(matrix) {}
-
-    /** \internal */
-    inline const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
-
-    template<typename OtherDerived>
-    typename internal::plain_matrix_type_column_major<OtherDerived>::type
-    solve(const MatrixBase<OtherDerived>& other) const;
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const {
+      if(!(internal::is_same<RhsType,DstType>::value && internal::extract_data(dst) == internal::extract_data(rhs)))
+        dst = rhs;
+      this->solveInPlace(dst);
+    }
 
     template<typename OtherDerived> void solveInPlace(MatrixBase<OtherDerived>& other) const;
     template<typename OtherDerived> void solveInPlace(SparseMatrixBase<OtherDerived>& other) const;
-
-  protected:
-    MatrixTypeNested m_matrix;
+  
 };
 
-template<typename MatrixType, int Mode>
-class SparseTriangularView<MatrixType,Mode>::InnerIterator : public MatrixTypeNestedCleaned::InnerIterator
+template<typename MatrixType, unsigned int Mode>
+class TriangularViewImpl<MatrixType,Mode,Sparse>::InnerIterator : public MatrixTypeNestedCleaned::InnerIterator
 {
     typedef typename MatrixTypeNestedCleaned::InnerIterator Base;
-    typedef typename SparseTriangularView::Index Index;
+    typedef typename TriangularViewType::Index Index;
   public:
 
-    EIGEN_STRONG_INLINE InnerIterator(const SparseTriangularView& view, Index outer)
-      : Base(view.nestedExpression(), outer), m_returnOne(false)
+    EIGEN_STRONG_INLINE InnerIterator(const TriangularViewImpl& view, Index outer)
+      : Base(view.derived().nestedExpression(), outer), m_returnOne(false)
     {
       if(SkipFirst)
       {
@@ -132,15 +123,15 @@ class SparseTriangularView<MatrixType,Mode>::InnerIterator : public MatrixTypeNe
     bool m_returnOne;
 };
 
-template<typename MatrixType, int Mode>
-class SparseTriangularView<MatrixType,Mode>::ReverseInnerIterator : public MatrixTypeNestedCleaned::ReverseInnerIterator
+template<typename MatrixType, unsigned int Mode>
+class TriangularViewImpl<MatrixType,Mode,Sparse>::ReverseInnerIterator : public MatrixTypeNestedCleaned::ReverseInnerIterator
 {
     typedef typename MatrixTypeNestedCleaned::ReverseInnerIterator Base;
-    typedef typename SparseTriangularView::Index Index;
+    typedef typename TriangularViewImpl::Index Index;
   public:
 
-    EIGEN_STRONG_INLINE ReverseInnerIterator(const SparseTriangularView& view, Index outer)
-      : Base(view.nestedExpression(), outer)
+    EIGEN_STRONG_INLINE ReverseInnerIterator(const TriangularViewType& view, Index outer)
+      : Base(view.derived().nestedExpression(), outer)
     {
       eigen_assert((!HasUnitDiag) && "ReverseInnerIterator does not support yet triangular views with a unit diagonal");
       if(SkipLast) {
@@ -166,9 +157,116 @@ class SparseTriangularView<MatrixType,Mode>::ReverseInnerIterator : public Matri
     }
 };
 
+namespace internal {
+
+template<typename ArgType, unsigned int Mode>
+struct unary_evaluator<TriangularView<ArgType,Mode>, IteratorBased>
+ : evaluator_base<TriangularView<ArgType,Mode> >
+{
+  typedef TriangularView<ArgType,Mode> XprType;
+  
+protected:
+  
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::Index Index;
+  typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
+  
+  enum { SkipFirst = ((Mode&Lower) && !(ArgType::Flags&RowMajorBit))
+                    || ((Mode&Upper) &&  (ArgType::Flags&RowMajorBit)),
+         SkipLast = !SkipFirst,
+         SkipDiag = (Mode&ZeroDiag) ? 1 : 0,
+         HasUnitDiag = (Mode&UnitDiag) ? 1 : 0
+  };
+  
+public:
+  
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+    Flags = XprType::Flags
+  };
+    
+  unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()) {}
+  
+  class InnerIterator : public EvalIterator
+  {
+      typedef EvalIterator Base;
+    public:
+
+      EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& xprEval, Index outer)
+        : Base(xprEval.m_argImpl,outer), m_returnOne(false)
+      {
+        if(SkipFirst)
+        {
+          while((*this) && ((HasUnitDiag||SkipDiag)  ? this->index()<=outer : this->index()<outer))
+            Base::operator++();
+          if(HasUnitDiag)
+            m_returnOne = true;
+        }
+        else if(HasUnitDiag && ((!Base::operator bool()) || Base::index()>=Base::outer()))
+        {
+          if((!SkipFirst) && Base::operator bool())
+            Base::operator++();
+          m_returnOne = true;
+        }
+      }
+
+      EIGEN_STRONG_INLINE InnerIterator& operator++()
+      {
+        if(HasUnitDiag && m_returnOne)
+          m_returnOne = false;
+        else
+        {
+          Base::operator++();
+          if(HasUnitDiag && (!SkipFirst) && ((!Base::operator bool()) || Base::index()>=Base::outer()))
+          {
+            if((!SkipFirst) && Base::operator bool())
+              Base::operator++();
+            m_returnOne = true;
+          }
+        }
+        return *this;
+      }
+      
+      EIGEN_STRONG_INLINE operator bool() const
+      {
+        if(HasUnitDiag && m_returnOne)
+          return true;
+        if(SkipFirst) return  Base::operator bool();
+        else
+        {
+          if (SkipDiag) return (Base::operator bool() && this->index() < this->outer());
+          else return (Base::operator bool() && this->index() <= this->outer());
+        }
+      }
+
+//       inline Index row() const { return (ArgType::Flags&RowMajorBit ? Base::outer() : this->index()); }
+//       inline Index col() const { return (ArgType::Flags&RowMajorBit ? this->index() : Base::outer()); }
+      inline Index index() const
+      {
+        if(HasUnitDiag && m_returnOne)  return Base::outer();
+        else                            return Base::index();
+      }
+      inline Scalar value() const
+      {
+        if(HasUnitDiag && m_returnOne)  return Scalar(1);
+        else                            return Base::value();
+      }
+
+    protected:
+      bool m_returnOne;
+    private:
+      Scalar& valueRef();
+  };
+  
+protected:
+  typename evaluator<ArgType>::type m_argImpl;
+};
+
+} // end namespace internal
+
 template<typename Derived>
 template<int Mode>
-inline const SparseTriangularView<Derived, Mode>
+inline const TriangularView<Derived, Mode>
 SparseMatrixBase<Derived>::triangularView() const
 {
   return derived();
diff --git a/Eigen/src/SparseCore/SparseUtil.h b/Eigen/src/SparseCore/SparseUtil.h
index 02c19d18f..8de227b88 100644
--- a/Eigen/src/SparseCore/SparseUtil.h
+++ b/Eigen/src/SparseCore/SparseUtil.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -52,13 +52,12 @@ EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, /=)
   typedef typename Eigen::internal::traits<Derived >::Index Index; \
   enum { RowsAtCompileTime = Eigen::internal::traits<Derived >::RowsAtCompileTime, \
         ColsAtCompileTime = Eigen::internal::traits<Derived >::ColsAtCompileTime, \
-        Flags = Eigen::internal::traits<Derived >::Flags, \
-        CoeffReadCost = Eigen::internal::traits<Derived >::CoeffReadCost, \
+        Flags = Eigen::internal::traits<Derived>::Flags, \
         SizeAtCompileTime = Base::SizeAtCompileTime, \
         IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \
   using Base::derived; \
   using Base::const_cast_derived;
-
+  
 #define EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \
   _EIGEN_SPARSE_PUBLIC_INTERFACE(Derived, Eigen::SparseMatrixBase<Derived >)
 
@@ -73,7 +72,6 @@ template<typename _Scalar, int _Flags = 0, typename _Index = int>  class Dynamic
 template<typename _Scalar, int _Flags = 0, typename _Index = int>  class SparseVector;
 template<typename _Scalar, int _Flags = 0, typename _Index = int>  class MappedSparseMatrix;
 
-template<typename MatrixType, int Mode>           class SparseTriangularView;
 template<typename MatrixType, unsigned int UpLo>  class SparseSelfAdjointView;
 template<typename Lhs, typename Rhs>              class SparseDiagonalProduct;
 template<typename MatrixType> class SparseView;
@@ -131,11 +129,29 @@ template<typename T> struct plain_matrix_type<T,Sparse>
 {
   typedef typename traits<T>::Scalar _Scalar;
   typedef typename traits<T>::Index _Index;
-  enum { _Options = ((traits<T>::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor };
+  enum { _Options = ((evaluator<T>::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor };
   public:
     typedef SparseMatrix<_Scalar, _Options, _Index> type;
 };
 
+template<typename Decomposition, typename RhsType>
+struct solve_traits<Decomposition,RhsType,Sparse>
+{
+  typedef typename sparse_eval<RhsType, RhsType::RowsAtCompileTime, RhsType::ColsAtCompileTime>::type PlainObject;
+};
+
+template<typename Derived>
+struct generic_xpr_base<Derived, MatrixXpr, Sparse>
+{
+  typedef SparseMatrixBase<Derived> type;
+};
+
+struct SparseTriangularShape  { static std::string debugName() { return "SparseTriangularShape"; } };
+struct SparseSelfAdjointShape { static std::string debugName() { return "SparseSelfAdjointShape"; } };
+
+template<> struct glue_shapes<SparseShape,SelfAdjointShape> { typedef SparseSelfAdjointShape type;  };
+template<> struct glue_shapes<SparseShape,TriangularShape > { typedef SparseTriangularShape  type;  };
+
 } // end namespace internal
 
 /** \ingroup SparseCore_Module
diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h
index 0b1b389ce..c9f9d61e9 100644
--- a/Eigen/src/SparseCore/SparseVector.h
+++ b/Eigen/src/SparseCore/SparseVector.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -422,11 +422,34 @@ class SparseVector<Scalar,_Options,_Index>::ReverseInnerIterator
 
 namespace internal {
 
+template<typename _Scalar, int _Options, typename _Index>
+struct evaluator<SparseVector<_Scalar,_Options,_Index> >
+  : evaluator_base<SparseVector<_Scalar,_Options,_Index> >
+{
+  typedef SparseVector<_Scalar,_Options,_Index> SparseVectorType;
+  typedef typename SparseVectorType::InnerIterator InnerIterator;
+  typedef typename SparseVectorType::ReverseInnerIterator ReverseInnerIterator;
+  
+  enum {
+    CoeffReadCost = NumTraits<_Scalar>::ReadCost,
+    Flags = SparseVectorType::Flags
+  };
+  
+  evaluator(const SparseVectorType &mat) : m_matrix(mat) {}
+  
+  operator SparseVectorType&() { return m_matrix.const_cast_derived(); }
+  operator const SparseVectorType&() const { return m_matrix; }
+  
+  const SparseVectorType &m_matrix;
+};
+
 template< typename Dest, typename Src>
 struct sparse_vector_assign_selector<Dest,Src,SVA_Inner> {
   static void run(Dest& dst, const Src& src) {
     eigen_internal_assert(src.innerSize()==src.size());
-    for(typename Src::InnerIterator it(src, 0); it; ++it)
+    typedef typename internal::evaluator<Src>::type SrcEvaluatorType;
+    SrcEvaluatorType srcEval(src);
+    for(typename SrcEvaluatorType::InnerIterator it(srcEval, 0); it; ++it)
       dst.insert(it.index()) = it.value();
   }
 };
@@ -435,9 +458,11 @@ template< typename Dest, typename Src>
 struct sparse_vector_assign_selector<Dest,Src,SVA_Outer> {
   static void run(Dest& dst, const Src& src) {
     eigen_internal_assert(src.outerSize()==src.size());
+    typedef typename internal::evaluator<Src>::type SrcEvaluatorType;
+    SrcEvaluatorType srcEval(src);
     for(typename Dest::Index i=0; i<src.size(); ++i)
     {
-      typename Src::InnerIterator it(src, i);
+      typename SrcEvaluatorType::InnerIterator it(srcEval, i);
       if(it)
         dst.insert(i) = it.value();
     }
diff --git a/Eigen/src/SparseCore/SparseView.h b/Eigen/src/SparseCore/SparseView.h
index fd8450463..d10cc5a35 100644
--- a/Eigen/src/SparseCore/SparseView.h
+++ b/Eigen/src/SparseCore/SparseView.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2010 Daniel Lowengrub <lowdanie@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -34,64 +34,186 @@ class SparseView : public SparseMatrixBase<SparseView<MatrixType> >
   typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;
 public:
   EIGEN_SPARSE_PUBLIC_INTERFACE(SparseView)
+  typedef typename internal::remove_all<MatrixType>::type NestedExpression;
 
   SparseView(const MatrixType& mat, const Scalar& m_reference = Scalar(0),
-             typename NumTraits<Scalar>::Real m_epsilon = NumTraits<Scalar>::dummy_precision()) : 
+             RealScalar m_epsilon = NumTraits<Scalar>::dummy_precision()) : 
     m_matrix(mat), m_reference(m_reference), m_epsilon(m_epsilon) {}
 
-  class InnerIterator;
-
   inline Index rows() const { return m_matrix.rows(); }
   inline Index cols() const { return m_matrix.cols(); }
 
   inline Index innerSize() const { return m_matrix.innerSize(); }
   inline Index outerSize() const { return m_matrix.outerSize(); }
-
+  
+  /** \returns the nested expression */
+  const typename internal::remove_all<MatrixTypeNested>::type&
+  nestedExpression() const { return m_matrix; }
+  
+  Scalar reference() const { return m_reference; }
+  RealScalar epsilon() const { return m_epsilon; }
+  
 protected:
   MatrixTypeNested m_matrix;
   Scalar m_reference;
-  typename NumTraits<Scalar>::Real m_epsilon;
+  RealScalar m_epsilon;
 };
 
-template<typename MatrixType>
-class SparseView<MatrixType>::InnerIterator : public _MatrixTypeNested::InnerIterator
-{
-  typedef typename SparseView::Index Index;
-public:
-  typedef typename _MatrixTypeNested::InnerIterator IterBase;
-  InnerIterator(const SparseView& view, Index outer) :
-  IterBase(view.m_matrix, outer), m_view(view)
-  {
-    incrementToNonZero();
-  }
-
-  EIGEN_STRONG_INLINE InnerIterator& operator++()
-  {
-    IterBase::operator++();
-    incrementToNonZero();
-    return *this;
-  }
-
-  using IterBase::value;
+namespace internal {
 
-protected:
-  const SparseView& m_view;
+// TODO find a way to unify the two following variants
+// This is tricky because implementing an inner iterator on top of an IndexBased evaluator is
+// not easy because the evaluators do not expose the sizes of the underlying expression.
+  
+template<typename ArgType>
+struct unary_evaluator<SparseView<ArgType>, IteratorBased>
+  : public evaluator_base<SparseView<ArgType> >
+{
+    typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
+  public:
+    typedef SparseView<ArgType> XprType;
+    
+    class InnerIterator : public EvalIterator
+    {
+        typedef typename XprType::Scalar Scalar;
+      public:
+
+        EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& sve, typename XprType::Index outer)
+          : EvalIterator(sve.m_argImpl,outer), m_view(sve.m_view)
+        {
+          incrementToNonZero();
+        }
+
+        EIGEN_STRONG_INLINE InnerIterator& operator++()
+        {
+          EvalIterator::operator++();
+          incrementToNonZero();
+          return *this;
+        }
+
+        using EvalIterator::value;
+
+      protected:
+        const XprType &m_view;
+
+      private:
+        void incrementToNonZero()
+        {
+          while((bool(*this)) && internal::isMuchSmallerThan(value(), m_view.reference(), m_view.epsilon()))
+          {
+            EvalIterator::operator++();
+          }
+        }
+    };
+    
+    enum {
+      CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+      Flags = XprType::Flags
+    };
+    
+    unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_view(xpr) {}
+
+  protected:
+    typename evaluator<ArgType>::nestedType m_argImpl;
+    const XprType &m_view;
+};
 
-private:
-  void incrementToNonZero()
-  {
-    while((bool(*this)) && internal::isMuchSmallerThan(value(), m_view.m_reference, m_view.m_epsilon))
+template<typename ArgType>
+struct unary_evaluator<SparseView<ArgType>, IndexBased>
+  : public evaluator_base<SparseView<ArgType> >
+{
+  public:
+    typedef SparseView<ArgType> XprType;
+  protected:
+    enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit };
+    typedef typename XprType::Index Index;
+    typedef typename XprType::Scalar Scalar;
+  public:
+    
+    class InnerIterator
     {
-      IterBase::operator++();
-    }
-  }
+      public:
+
+        EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& sve, typename XprType::Index outer)
+          : m_sve(sve), m_inner(0), m_outer(outer), m_end(sve.m_view.innerSize())
+        {
+          incrementToNonZero();
+        }
+
+        EIGEN_STRONG_INLINE InnerIterator& operator++()
+        {
+          m_inner++;
+          incrementToNonZero();
+          return *this;
+        }
+
+        EIGEN_STRONG_INLINE Scalar value() const
+        {
+          return (IsRowMajor) ? m_sve.m_argImpl.coeff(m_outer, m_inner)
+                              : m_sve.m_argImpl.coeff(m_inner, m_outer);
+        }
+
+        EIGEN_STRONG_INLINE Index index() const { return m_inner; }
+        inline Index row() const { return IsRowMajor ? m_outer : index(); }
+        inline Index col() const { return IsRowMajor ? index() : m_outer; }
+
+        EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; }
+
+      protected:
+        const unary_evaluator &m_sve;
+        Index m_inner;
+        const Index m_outer;
+        const Index m_end;
+
+      private:
+        void incrementToNonZero()
+        {
+          while((bool(*this)) && internal::isMuchSmallerThan(value(), m_sve.m_view.reference(), m_sve.m_view.epsilon()))
+          {
+            m_inner++;
+          }
+        }
+    };
+    
+    enum {
+      CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+      Flags = XprType::Flags
+    };
+    
+    unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_view(xpr) {}
+
+  protected:
+    typename evaluator<ArgType>::nestedType m_argImpl;
+    const XprType &m_view;
 };
 
+} // end namespace internal
+
+template<typename Derived>
+const SparseView<Derived> MatrixBase<Derived>::sparseView(const Scalar& reference,
+                                                          const typename NumTraits<Scalar>::Real& epsilon) const
+{
+  return SparseView<Derived>(derived(), reference, epsilon);
+}
+
+/** \returns an expression of \c *this with values smaller than
+  * \a reference * \a epsilon are removed.
+  *
+  * This method is typically used in conjunction with the product of two sparse matrices
+  * to automatically prune the smallest values as follows:
+  * \code
+  * C = (A*B).pruned();             // suppress numerical zeros (exact)
+  * C = (A*B).pruned(ref);
+  * C = (A*B).pruned(ref,epsilon);
+  * \endcode
+  * where \c ref is a meaningful non zero reference value.
+  * */
 template<typename Derived>
-const SparseView<Derived> MatrixBase<Derived>::sparseView(const Scalar& m_reference,
-                                                          const typename NumTraits<Scalar>::Real& m_epsilon) const
+const SparseView<Derived>
+SparseMatrixBase<Derived>::pruned(const Scalar& reference,
+                                  const RealScalar& epsilon) const
 {
-  return SparseView<Derived>(derived(), m_reference, m_epsilon);
+  return SparseView<Derived>(derived(), reference, epsilon);
 }
 
 } // end namespace Eigen
diff --git a/Eigen/src/SparseCore/TriangularSolver.h b/Eigen/src/SparseCore/TriangularSolver.h
index dd55522a7..98062e9c6 100644
--- a/Eigen/src/SparseCore/TriangularSolver.h
+++ b/Eigen/src/SparseCore/TriangularSolver.h
@@ -29,8 +29,11 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,RowMajor>
 {
   typedef typename Rhs::Scalar Scalar;
   typedef typename Lhs::Index Index;
+  typedef typename evaluator<Lhs>::type LhsEval;
+  typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
   static void run(const Lhs& lhs, Rhs& other)
   {
+    LhsEval lhsEval(lhs);
     for(Index col=0 ; col<other.cols() ; ++col)
     {
       for(Index i=0; i<lhs.rows(); ++i)
@@ -38,7 +41,7 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,RowMajor>
         Scalar tmp = other.coeff(i,col);
         Scalar lastVal(0);
         Index lastIndex = 0;
-        for(typename Lhs::InnerIterator it(lhs, i); it; ++it)
+        for(LhsIterator it(lhsEval, i); it; ++it)
         {
           lastVal = it.value();
           lastIndex = it.index();
@@ -64,15 +67,18 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,RowMajor>
 {
   typedef typename Rhs::Scalar Scalar;
   typedef typename Lhs::Index Index;
+  typedef typename evaluator<Lhs>::type LhsEval;
+  typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
   static void run(const Lhs& lhs, Rhs& other)
   {
+    LhsEval lhsEval(lhs);
     for(Index col=0 ; col<other.cols() ; ++col)
     {
       for(Index i=lhs.rows()-1 ; i>=0 ; --i)
       {
         Scalar tmp = other.coeff(i,col);
         Scalar l_ii = 0;
-        typename Lhs::InnerIterator it(lhs, i);
+        LhsIterator it(lhsEval, i);
         while(it && it.index()<i)
           ++it;
         if(!(Mode & UnitDiag))
@@ -88,10 +94,8 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,RowMajor>
           tmp -= it.value() * other.coeff(it.index(),col);
         }
 
-        if (Mode & UnitDiag)
-          other.coeffRef(i,col) = tmp;
-        else
-          other.coeffRef(i,col) = tmp/l_ii;
+        if (Mode & UnitDiag)  other.coeffRef(i,col) = tmp;
+        else                  other.coeffRef(i,col) = tmp/l_ii;
       }
     }
   }
@@ -103,8 +107,11 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,ColMajor>
 {
   typedef typename Rhs::Scalar Scalar;
   typedef typename Lhs::Index Index;
+  typedef typename evaluator<Lhs>::type LhsEval;
+  typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
   static void run(const Lhs& lhs, Rhs& other)
   {
+    LhsEval lhsEval(lhs);
     for(Index col=0 ; col<other.cols() ; ++col)
     {
       for(Index i=0; i<lhs.cols(); ++i)
@@ -112,7 +119,7 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,ColMajor>
         Scalar& tmp = other.coeffRef(i,col);
         if (tmp!=Scalar(0)) // optimization when other is actually sparse
         {
-          typename Lhs::InnerIterator it(lhs, i);
+          LhsIterator it(lhsEval, i);
           while(it && it.index()<i)
             ++it;
           if(!(Mode & UnitDiag))
@@ -136,8 +143,11 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,ColMajor>
 {
   typedef typename Rhs::Scalar Scalar;
   typedef typename Lhs::Index Index;
+  typedef typename evaluator<Lhs>::type LhsEval;
+  typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
   static void run(const Lhs& lhs, Rhs& other)
   {
+    LhsEval lhsEval(lhs);
     for(Index col=0 ; col<other.cols() ; ++col)
     {
       for(Index i=lhs.cols()-1; i>=0; --i)
@@ -148,13 +158,13 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,ColMajor>
           if(!(Mode & UnitDiag))
           {
             // TODO replace this by a binary search. make sure the binary search is safe for partially sorted elements
-            typename Lhs::ReverseInnerIterator it(lhs, i);
+            LhsIterator it(lhsEval, i);
             while(it && it.index()!=i)
-              --it;
+              ++it;
             eigen_assert(it && it.index()==i);
             other.coeffRef(i,col) /= it.value();
           }
-          typename Lhs::InnerIterator it(lhs, i);
+          LhsIterator it(lhsEval, i);
           for(; it && it.index()<i; ++it)
             other.coeffRef(it.index(), col) -= tmp * it.value();
         }
@@ -165,11 +175,11 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,ColMajor>
 
 } // end namespace internal
 
-template<typename ExpressionType,int Mode>
+template<typename ExpressionType,unsigned int Mode>
 template<typename OtherDerived>
-void SparseTriangularView<ExpressionType,Mode>::solveInPlace(MatrixBase<OtherDerived>& other) const
+void TriangularViewImpl<ExpressionType,Mode,Sparse>::solveInPlace(MatrixBase<OtherDerived>& other) const
 {
-  eigen_assert(m_matrix.cols() == m_matrix.rows() && m_matrix.cols() == other.rows());
+  eigen_assert(derived().cols() == derived().rows() && derived().cols() == other.rows());
   eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
 
   enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit };
@@ -178,22 +188,12 @@ void SparseTriangularView<ExpressionType,Mode>::solveInPlace(MatrixBase<OtherDer
     typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
   OtherCopy otherCopy(other.derived());
 
-  internal::sparse_solve_triangular_selector<ExpressionType, typename internal::remove_reference<OtherCopy>::type, Mode>::run(m_matrix, otherCopy);
+  internal::sparse_solve_triangular_selector<ExpressionType, typename internal::remove_reference<OtherCopy>::type, Mode>::run(derived().nestedExpression(), otherCopy);
 
   if (copy)
     other = otherCopy;
 }
 
-template<typename ExpressionType,int Mode>
-template<typename OtherDerived>
-typename internal::plain_matrix_type_column_major<OtherDerived>::type
-SparseTriangularView<ExpressionType,Mode>::solve(const MatrixBase<OtherDerived>& other) const
-{
-  typename internal::plain_matrix_type_column_major<OtherDerived>::type res(other);
-  solveInPlace(res);
-  return res;
-}
-
 // pure sparse path
 
 namespace internal {
@@ -290,11 +290,11 @@ struct sparse_solve_triangular_sparse_selector<Lhs,Rhs,Mode,UpLo,ColMajor>
 
 } // end namespace internal
 
-template<typename ExpressionType,int Mode>
+template<typename ExpressionType,unsigned int Mode>
 template<typename OtherDerived>
-void SparseTriangularView<ExpressionType,Mode>::solveInPlace(SparseMatrixBase<OtherDerived>& other) const
+void TriangularViewImpl<ExpressionType,Mode,Sparse>::solveInPlace(SparseMatrixBase<OtherDerived>& other) const
 {
-  eigen_assert(m_matrix.cols() == m_matrix.rows() && m_matrix.cols() == other.rows());
+  eigen_assert(derived().cols() == derived().rows() && derived().cols() == other.rows());
   eigen_assert( (!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
 
 //   enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit };
@@ -303,7 +303,7 @@ void SparseTriangularView<ExpressionType,Mode>::solveInPlace(SparseMatrixBase<Ot
 //     typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
 //   OtherCopy otherCopy(other.derived());
 
-  internal::sparse_solve_triangular_sparse_selector<ExpressionType, OtherDerived, Mode>::run(m_matrix, other.derived());
+  internal::sparse_solve_triangular_sparse_selector<ExpressionType, OtherDerived, Mode>::run(derived().nestedExpression(), other.derived());
 
 //   if (copy)
 //     other = otherCopy;
diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h
index 7a9aeec2d..14d7e713e 100644
--- a/Eigen/src/SparseLU/SparseLU.h
+++ b/Eigen/src/SparseLU/SparseLU.h
@@ -2,7 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
-// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2012-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -70,9 +70,14 @@ template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixURetu
   * \sa \ref OrderingMethods_Module
   */
 template <typename _MatrixType, typename _OrderingType>
-class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::Index>
+class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::Index>
 {
+  protected:
+    typedef SparseSolverBase<SparseLU<_MatrixType,_OrderingType> > APIBase;
+    using APIBase::m_isInitialized;
   public:
+    using APIBase::_solve_impl;
+    
     typedef _MatrixType MatrixType; 
     typedef _OrderingType OrderingType;
     typedef typename MatrixType::Scalar Scalar; 
@@ -86,11 +91,11 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
     typedef internal::SparseLUImpl<Scalar, Index> Base;
     
   public:
-    SparseLU():m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
+    SparseLU():m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
     {
       initperfvalues(); 
     }
-    SparseLU(const MatrixType& matrix):m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
+    SparseLU(const MatrixType& matrix):m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
     {
       initperfvalues(); 
       compute(matrix);
@@ -168,6 +173,7 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
       m_diagpivotthresh = thresh; 
     }
 
+#ifdef EIGEN_PARSED_BY_DOXYGEN
     /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
       *
       * \warning the destination matrix X in X = this->solve(B) must be colmun-major.
@@ -175,26 +181,8 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
       * \sa compute()
       */
     template<typename Rhs>
-    inline const internal::solve_retval<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const 
-    {
-      eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); 
-      eigen_assert(rows()==B.rows()
-                    && "SparseLU::solve(): invalid number of rows of the right hand side matrix B");
-          return internal::solve_retval<SparseLU, Rhs>(*this, B.derived());
-    }
-
-    /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::sparse_solve_retval<SparseLU, Rhs> solve(const SparseMatrixBase<Rhs>& B) const 
-    {
-      eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); 
-      eigen_assert(rows()==B.rows()
-                    && "SparseLU::solve(): invalid number of rows of the right hand side matrix B");
-          return internal::sparse_solve_retval<SparseLU, Rhs>(*this, B.derived());
-    }
+    inline const Solve<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const;
+#endif // EIGEN_PARSED_BY_DOXYGEN
     
     /** \brief Reports whether previous computation was successful.
       *
@@ -219,7 +207,7 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
     }
 
     template<typename Rhs, typename Dest>
-    bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const
+    bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const
     {
       Dest& X(X_base.derived());
       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first");
@@ -332,7 +320,6 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ
       
     // Variables 
     mutable ComputationInfo m_info;
-    bool m_isInitialized;
     bool m_factorizationIsOk;
     bool m_analysisIsOk;
     std::string m_lastError;
@@ -463,6 +450,8 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
   
   typedef typename IndexVector::Scalar Index; 
   
+  m_isInitialized = true;
+  
   
   // Apply the column permutation computed in analyzepattern()
   //   m_mat = matrix * m_perm_c.inverse(); 
@@ -728,35 +717,6 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator
   const MatrixUType& m_mapU;
 };
 
-namespace internal {
-  
-template<typename _MatrixType, typename Derived, typename Rhs>
-struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
-  : solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs>
-{
-  typedef SparseLU<_MatrixType,Derived> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-template<typename _MatrixType, typename Derived, typename Rhs>
-struct sparse_solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
-  : sparse_solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs>
-{
-  typedef SparseLU<_MatrixType,Derived> Dec;
-  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    this->defaultEvalTo(dst);
-  }
-};
-} // end namespace internal
-
 } // End namespace Eigen 
 
 #endif
diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h
index 002b4824b..6d85ea9be 100644
--- a/Eigen/src/SparseQR/SparseQR.h
+++ b/Eigen/src/SparseQR/SparseQR.h
@@ -62,9 +62,13 @@ namespace internal {
   * 
   */
 template<typename _MatrixType, typename _OrderingType>
-class SparseQR
+class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
 {
+  protected:
+    typedef SparseSolverBase<SparseQR<_MatrixType,_OrderingType> > Base;
+    using Base::m_isInitialized;
   public:
+    using Base::_solve_impl;
     typedef _MatrixType MatrixType;
     typedef _OrderingType OrderingType;
     typedef typename MatrixType::Scalar Scalar;
@@ -75,7 +79,7 @@ class SparseQR
     typedef Matrix<Scalar, Dynamic, 1> ScalarVector;
     typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType;
   public:
-    SparseQR () : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)
+    SparseQR () :  m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)
     { }
     
     /** Construct a QR factorization of the matrix \a mat.
@@ -84,7 +88,7 @@ class SparseQR
       * 
       * \sa compute()
       */
-    SparseQR(const MatrixType& mat) : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)
+    SparseQR(const MatrixType& mat) : m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false)
     {
       compute(mat);
     }
@@ -162,7 +166,7 @@ class SparseQR
     
     /** \internal */
     template<typename Rhs, typename Dest>
-    bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &dest) const
+    bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &dest) const
     {
       eigen_assert(m_isInitialized && "The factorization should be called first, use compute()");
       eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix");
@@ -178,7 +182,7 @@ class SparseQR
       y.resize((std::max)(cols(),Index(y.rows())),y.cols());
       y.topRows(rank) = this->matrixR().topLeftCorner(rank, rank).template triangularView<Upper>().solve(b.topRows(rank));
       y.bottomRows(y.rows()-rank).setZero();
-
+      
       // Apply the column permutation
       if (m_perm_c.size())  dest = colsPermutation() * y.topRows(cols());
       else                  dest = y.topRows(cols());
@@ -186,7 +190,6 @@ class SparseQR
       m_info = Success;
       return true;
     }
-    
 
     /** Sets the threshold that is used to determine linearly dependent columns during the factorization.
       *
@@ -204,18 +207,18 @@ class SparseQR
       * \sa compute()
       */
     template<typename Rhs>
-    inline const internal::solve_retval<SparseQR, Rhs> solve(const MatrixBase<Rhs>& B) const 
+    inline const Solve<SparseQR, Rhs> solve(const MatrixBase<Rhs>& B) const 
     {
       eigen_assert(m_isInitialized && "The factorization should be called first, use compute()");
       eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix");
-      return internal::solve_retval<SparseQR, Rhs>(*this, B.derived());
+      return Solve<SparseQR, Rhs>(*this, B.derived());
     }
     template<typename Rhs>
-    inline const internal::sparse_solve_retval<SparseQR, Rhs> solve(const SparseMatrixBase<Rhs>& B) const
+    inline const Solve<SparseQR, Rhs> solve(const SparseMatrixBase<Rhs>& B) const
     {
           eigen_assert(m_isInitialized && "The factorization should be called first, use compute()");
           eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix");
-          return internal::sparse_solve_retval<SparseQR, Rhs>(*this, B.derived());
+          return Solve<SparseQR, Rhs>(*this, B.derived());
     }
     
     /** \brief Reports whether previous computation was successful.
@@ -244,7 +247,6 @@ class SparseQR
 
     
   protected:
-    bool m_isInitialized;
     bool m_analysisIsok;
     bool m_factorizationIsok;
     mutable ComputationInfo m_info;
@@ -282,9 +284,11 @@ template <typename MatrixType, typename OrderingType>
 void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
 {
   eigen_assert(mat.isCompressed() && "SparseQR requires a sparse matrix in compressed mode. Call .makeCompressed() before passing it to SparseQR");
+  // Copy to a column major matrix if the input is rowmajor
+  typename internal::conditional<MatrixType::IsRowMajor,QRMatrixType,const MatrixType&>::type matCpy(mat);
   // Compute the column fill reducing ordering
   OrderingType ord; 
-  ord(mat, m_perm_c); 
+  ord(matCpy, m_perm_c); 
   Index n = mat.cols();
   Index m = mat.rows();
   Index diagSize = (std::min)(m,n);
@@ -297,7 +301,7 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
   
   // Compute the column elimination tree of the permuted matrix
   m_outputPerm_c = m_perm_c.inverse();
-  internal::coletree(mat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
+  internal::coletree(matCpy, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
   m_isEtreeOk = true;
   
   m_R.resize(m, n);
@@ -335,21 +339,35 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
   
   m_R.setZero();
   m_Q.setZero();
+  m_pmat = mat;
   if(!m_isEtreeOk)
   {
     m_outputPerm_c = m_perm_c.inverse();
-    internal::coletree(mat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
+    internal::coletree(m_pmat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
     m_isEtreeOk = true;
   }
-    
-  m_pmat = mat;
+
   m_pmat.uncompress(); // To have the innerNonZeroPtr allocated
+  
   // Apply the fill-in reducing permutation lazily:
-  for (int i = 0; i < n; i++)
   {
-    Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i;
-    m_pmat.outerIndexPtr()[p] = mat.outerIndexPtr()[i]; 
-    m_pmat.innerNonZeroPtr()[p] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; 
+    // If the input is row major, copy the original column indices,
+    // otherwise directly use the input matrix
+    // 
+    IndexVector originalOuterIndicesCpy;
+    const Index *originalOuterIndices = mat.outerIndexPtr();
+    if(MatrixType::IsRowMajor)
+    {
+      originalOuterIndicesCpy = IndexVector::Map(m_pmat.outerIndexPtr(),n+1);
+      originalOuterIndices = originalOuterIndicesCpy.data();
+    }
+    
+    for (int i = 0; i < n; i++)
+    {
+      Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i;
+      m_pmat.outerIndexPtr()[p] = originalOuterIndices[i]; 
+      m_pmat.innerNonZeroPtr()[p] = originalOuterIndices[i+1] - originalOuterIndices[i]; 
+    }
   }
   
   /* Compute the default threshold as in MatLab, see:
@@ -384,7 +402,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
     // all the nodes (with indexes lower than rank) reachable through the column elimination tree (etree) rooted at node k.
     // Note: if the diagonal entry does not exist, then its contribution must be explicitly added,
     // thus the trick with found_diag that permits to do one more iteration on the diagonal element if this one has not been found.
-    for (typename MatrixType::InnerIterator itp(m_pmat, col); itp || !found_diag; ++itp)
+    for (typename QRMatrixType::InnerIterator itp(m_pmat, col); itp || !found_diag; ++itp)
     {
       Index curIdx = nonzeroCol;
       if(itp) curIdx = itp.row();
@@ -542,7 +560,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
   if(nonzeroCol<n)
   {
     // Permute the triangular factor to put the 'dead' columns to the end
-    MatrixType tempR(m_R);
+    QRMatrixType tempR(m_R);
     m_R = tempR * m_pivotperm;
     
     // Update the column permutation
@@ -554,34 +572,6 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
   m_info = Success;
 }
 
-namespace internal {
-  
-template<typename _MatrixType, typename OrderingType, typename Rhs>
-struct solve_retval<SparseQR<_MatrixType,OrderingType>, Rhs>
-  : solve_retval_base<SparseQR<_MatrixType,OrderingType>, Rhs>
-{
-  typedef SparseQR<_MatrixType,OrderingType> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-template<typename _MatrixType, typename OrderingType, typename Rhs>
-struct sparse_solve_retval<SparseQR<_MatrixType, OrderingType>, Rhs>
- : sparse_solve_retval_base<SparseQR<_MatrixType, OrderingType>, Rhs>
-{
-  typedef SparseQR<_MatrixType, OrderingType> Dec;
-  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec, Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    this->defaultEvalTo(dst);
-  }
-};
-} // end namespace internal
-
 template <typename SparseQRType, typename Derived>
 struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived> >
 {
diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h
index bcb355760..0137585ca 100644
--- a/Eigen/src/SuperLUSupport/SuperLUSupport.h
+++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -288,8 +288,12 @@ MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat)
   * \brief The base class for the direct and incomplete LU factorization of SuperLU
   */
 template<typename _MatrixType, typename Derived>
-class SuperLUBase : internal::noncopyable
+class SuperLUBase : public SparseSolverBase<Derived>
 {
+  protected:
+    typedef SparseSolverBase<Derived> Base;
+    using Base::derived;
+    using Base::m_isInitialized;
   public:
     typedef _MatrixType MatrixType;
     typedef typename MatrixType::Scalar Scalar;
@@ -309,9 +313,6 @@ class SuperLUBase : internal::noncopyable
       clearFactors();
     }
     
-    Derived& derived() { return *static_cast<Derived*>(this); }
-    const Derived& derived() const { return *static_cast<const Derived*>(this); }
-    
     inline Index rows() const { return m_matrix.rows(); }
     inline Index cols() const { return m_matrix.cols(); }
     
@@ -335,33 +336,7 @@ class SuperLUBase : internal::noncopyable
       derived().analyzePattern(matrix);
       derived().factorize(matrix);
     }
-    
-    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::solve_retval<SuperLUBase, Rhs> solve(const MatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_isInitialized && "SuperLU is not initialized.");
-      eigen_assert(rows()==b.rows()
-                && "SuperLU::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<SuperLUBase, Rhs>(*this, b.derived());
-    }
-    
-    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::sparse_solve_retval<SuperLUBase, Rhs> solve(const SparseMatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_isInitialized && "SuperLU is not initialized.");
-      eigen_assert(rows()==b.rows()
-                && "SuperLU::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::sparse_solve_retval<SuperLUBase, Rhs>(*this, b.derived());
-    }
-    
+
     /** Performs a symbolic decomposition on the sparcity of \a matrix.
       *
       * This function is particularly useful when solving for several problems having the same structure.
@@ -453,7 +428,6 @@ class SuperLUBase : internal::noncopyable
     mutable char m_sluEqued;
 
     mutable ComputationInfo m_info;
-    bool m_isInitialized;
     int m_factorizationIsOk;
     int m_analysisIsOk;
     mutable bool m_extractedDataAreDirty;
@@ -491,6 +465,7 @@ class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> >
     typedef TriangularView<LUMatrixType,  Upper>           UMatrixType;
 
   public:
+    using Base::_solve_impl;
 
     SuperLU() : Base() { init(); }
 
@@ -528,7 +503,7 @@ class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> >
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** \internal */
     template<typename Rhs,typename Dest>
-    void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;
+    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;
     #endif // EIGEN_PARSED_BY_DOXYGEN
     
     inline const LMatrixType& matrixL() const
@@ -637,7 +612,7 @@ void SuperLU<MatrixType>::factorize(const MatrixType& a)
 
 template<typename MatrixType>
 template<typename Rhs,typename Dest>
-void SuperLU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const
+void SuperLU<MatrixType>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const
 {
   eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or analyzePattern()/factorize()");
 
@@ -828,6 +803,7 @@ class SuperILU : public SuperLUBase<_MatrixType,SuperILU<_MatrixType> >
     typedef typename Base::Index Index;
 
   public:
+    using Base::_solve_impl;
 
     SuperILU() : Base() { init(); }
 
@@ -863,7 +839,7 @@ class SuperILU : public SuperLUBase<_MatrixType,SuperILU<_MatrixType> >
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** \internal */
     template<typename Rhs,typename Dest>
-    void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;
+    void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;
     #endif // EIGEN_PARSED_BY_DOXYGEN
     
   protected:
@@ -948,7 +924,7 @@ void SuperILU<MatrixType>::factorize(const MatrixType& a)
 
 template<typename MatrixType>
 template<typename Rhs,typename Dest>
-void SuperILU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const
+void SuperILU<MatrixType>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const
 {
   eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or analyzePattern()/factorize()");
 
@@ -991,36 +967,6 @@ void SuperILU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x)
 }
 #endif
 
-namespace internal {
-  
-template<typename _MatrixType, typename Derived, typename Rhs>
-struct solve_retval<SuperLUBase<_MatrixType,Derived>, Rhs>
-  : solve_retval_base<SuperLUBase<_MatrixType,Derived>, Rhs>
-{
-  typedef SuperLUBase<_MatrixType,Derived> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec().derived()._solve(rhs(),dst);
-  }
-};
-
-template<typename _MatrixType, typename Derived, typename Rhs>
-struct sparse_solve_retval<SuperLUBase<_MatrixType,Derived>, Rhs>
-  : sparse_solve_retval_base<SuperLUBase<_MatrixType,Derived>, Rhs>
-{
-  typedef SuperLUBase<_MatrixType,Derived> Dec;
-  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    this->defaultEvalTo(dst);
-  }
-};
-
-} // end namespace internal
-
 } // end namespace Eigen
 
 #endif // EIGEN_SUPERLUSUPPORT_H
diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h
index 3a48cecf7..7fada5567 100644
--- a/Eigen/src/UmfPackSupport/UmfPackSupport.h
+++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h
@@ -121,9 +121,13 @@ inline int umfpack_get_determinant(std::complex<double> *Mx, double *Ex, void *N
   * \sa \ref TutorialSparseDirectSolvers
   */
 template<typename _MatrixType>
-class UmfPackLU : internal::noncopyable
+class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
 {
+  protected:
+    typedef SparseSolverBase<UmfPackLU<_MatrixType> > Base;
+    using Base::m_isInitialized;
   public:
+    using Base::_solve_impl;
     typedef _MatrixType MatrixType;
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::RealScalar RealScalar;
@@ -198,32 +202,6 @@ class UmfPackLU : internal::noncopyable
       factorize(matrix);
     }
 
-    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::solve_retval<UmfPackLU, Rhs> solve(const MatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_isInitialized && "UmfPackLU is not initialized.");
-      eigen_assert(rows()==b.rows()
-                && "UmfPackLU::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<UmfPackLU, Rhs>(*this, b.derived());
-    }
-
-    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::sparse_solve_retval<UmfPackLU, Rhs> solve(const SparseMatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_isInitialized && "UmfPackLU is not initialized.");
-      eigen_assert(rows()==b.rows()
-                && "UmfPackLU::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::sparse_solve_retval<UmfPackLU, Rhs>(*this, b.derived());
-    }
-
     /** Performs a symbolic decomposition on the sparcity of \a matrix.
       *
       * This function is particularly useful when solving for several problems having the same structure.
@@ -274,7 +252,7 @@ class UmfPackLU : internal::noncopyable
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** \internal */
     template<typename BDerived,typename XDerived>
-    bool _solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const;
+    bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const;
     #endif
 
     Scalar determinant() const;
@@ -328,7 +306,6 @@ class UmfPackLU : internal::noncopyable
     void* m_symbolic;
 
     mutable ComputationInfo m_info;
-    bool m_isInitialized;
     int m_factorizationIsOk;
     int m_analysisIsOk;
     mutable bool m_extractedDataAreDirty;
@@ -376,7 +353,7 @@ typename UmfPackLU<MatrixType>::Scalar UmfPackLU<MatrixType>::determinant() cons
 
 template<typename MatrixType>
 template<typename BDerived,typename XDerived>
-bool UmfPackLU<MatrixType>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const
+bool UmfPackLU<MatrixType>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const
 {
   const int rhsCols = b.cols();
   eigen_assert((BDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major rhs yet");
@@ -396,37 +373,6 @@ bool UmfPackLU<MatrixType>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDe
   return true;
 }
 
-
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<UmfPackLU<_MatrixType>, Rhs>
-  : solve_retval_base<UmfPackLU<_MatrixType>, Rhs>
-{
-  typedef UmfPackLU<_MatrixType> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-template<typename _MatrixType, typename Rhs>
-struct sparse_solve_retval<UmfPackLU<_MatrixType>, Rhs>
-  : sparse_solve_retval_base<UmfPackLU<_MatrixType>, Rhs>
-{
-  typedef UmfPackLU<_MatrixType> Dec;
-  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    this->defaultEvalTo(dst);
-  }
-};
-
-} // end namespace internal
-
 } // end namespace Eigen
 
 #endif // EIGEN_UMFPACKSUPPORT_H
diff --git a/Eigen/src/misc/Solve.h b/Eigen/src/misc/Solve.h
deleted file mode 100644
index 7f70d60af..000000000
--- a/Eigen/src/misc/Solve.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_MISC_SOLVE_H
-#define EIGEN_MISC_SOLVE_H
-
-namespace Eigen { 
-
-namespace internal {
-
-/** \class solve_retval_base
-  *
-  */
-template<typename DecompositionType, typename Rhs>
-struct traits<solve_retval_base<DecompositionType, Rhs> >
-{
-  typedef typename DecompositionType::MatrixType MatrixType;
-  typedef Matrix<typename Rhs::Scalar,
-                 MatrixType::ColsAtCompileTime,
-                 Rhs::ColsAtCompileTime,
-                 Rhs::PlainObject::Options,
-                 MatrixType::MaxColsAtCompileTime,
-                 Rhs::MaxColsAtCompileTime> ReturnType;
-};
-
-template<typename _DecompositionType, typename Rhs> struct solve_retval_base
- : public ReturnByValue<solve_retval_base<_DecompositionType, Rhs> >
-{
-  typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
-  typedef _DecompositionType DecompositionType;
-  typedef ReturnByValue<solve_retval_base> Base;
-  typedef typename Base::Index Index;
-
-  solve_retval_base(const DecompositionType& dec, const Rhs& rhs)
-    : m_dec(dec), m_rhs(rhs)
-  {}
-
-  inline Index rows() const { return m_dec.cols(); }
-  inline Index cols() const { return m_rhs.cols(); }
-  inline const DecompositionType& dec() const { return m_dec; }
-  inline const RhsNestedCleaned& rhs() const { return m_rhs; }
-
-  template<typename Dest> inline void evalTo(Dest& dst) const
-  {
-    static_cast<const solve_retval<DecompositionType,Rhs>*>(this)->evalTo(dst);
-  }
-
-  protected:
-    const DecompositionType& m_dec;
-    typename Rhs::Nested m_rhs;
-};
-
-} // end namespace internal
-
-#define EIGEN_MAKE_SOLVE_HELPERS(DecompositionType,Rhs) \
-  typedef typename DecompositionType::MatrixType MatrixType; \
-  typedef typename MatrixType::Scalar Scalar; \
-  typedef typename MatrixType::RealScalar RealScalar; \
-  typedef typename MatrixType::Index Index; \
-  typedef Eigen::internal::solve_retval_base<DecompositionType,Rhs> Base; \
-  using Base::dec; \
-  using Base::rhs; \
-  using Base::rows; \
-  using Base::cols; \
-  solve_retval(const DecompositionType& dec, const Rhs& rhs) \
-    : Base(dec, rhs) {}
-
-} // end namespace Eigen
-
-#endif // EIGEN_MISC_SOLVE_H
diff --git a/Eigen/src/misc/SparseSolve.h b/Eigen/src/misc/SparseSolve.h
deleted file mode 100644
index 05caa9266..000000000
--- a/Eigen/src/misc/SparseSolve.h
+++ /dev/null
@@ -1,130 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_SPARSE_SOLVE_H
-#define EIGEN_SPARSE_SOLVE_H
-
-namespace Eigen { 
-
-namespace internal {
-
-template<typename _DecompositionType, typename Rhs> struct sparse_solve_retval_base;
-template<typename _DecompositionType, typename Rhs> struct sparse_solve_retval;
-  
-template<typename DecompositionType, typename Rhs>
-struct traits<sparse_solve_retval_base<DecompositionType, Rhs> >
-{
-  typedef typename DecompositionType::MatrixType MatrixType;
-  typedef SparseMatrix<typename Rhs::Scalar, Rhs::Options, typename Rhs::Index> ReturnType;
-};
-
-template<typename _DecompositionType, typename Rhs> struct sparse_solve_retval_base
- : public ReturnByValue<sparse_solve_retval_base<_DecompositionType, Rhs> >
-{
-  typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
-  typedef _DecompositionType DecompositionType;
-  typedef ReturnByValue<sparse_solve_retval_base> Base;
-  typedef typename Base::Index Index;
-
-  sparse_solve_retval_base(const DecompositionType& dec, const Rhs& rhs)
-    : m_dec(dec), m_rhs(rhs)
-  {}
-
-  inline Index rows() const { return m_dec.cols(); }
-  inline Index cols() const { return m_rhs.cols(); }
-  inline const DecompositionType& dec() const { return m_dec; }
-  inline const RhsNestedCleaned& rhs() const { return m_rhs; }
-
-  template<typename Dest> inline void evalTo(Dest& dst) const
-  {
-    static_cast<const sparse_solve_retval<DecompositionType,Rhs>*>(this)->evalTo(dst);
-  }
-
-  protected:
-    template<typename DestScalar, int DestOptions, typename DestIndex>
-    inline void defaultEvalTo(SparseMatrix<DestScalar,DestOptions,DestIndex>& dst) const
-    {
-      // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
-      static const int NbColsAtOnce = 4;
-      int rhsCols = m_rhs.cols();
-      int size = m_rhs.rows();
-      // the temporary matrices do not need more columns than NbColsAtOnce:
-      int tmpCols = (std::min)(rhsCols, NbColsAtOnce); 
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,tmpCols);
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,tmpCols);
-      for(int k=0; k<rhsCols; k+=NbColsAtOnce)
-      {
-        int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
-        tmp.leftCols(actualCols) = m_rhs.middleCols(k,actualCols);
-        tmpX.leftCols(actualCols) = m_dec.solve(tmp.leftCols(actualCols));
-        dst.middleCols(k,actualCols) = tmpX.leftCols(actualCols).sparseView();
-      }
-    }
-    const DecompositionType& m_dec;
-    typename Rhs::Nested m_rhs;
-};
-
-#define EIGEN_MAKE_SPARSE_SOLVE_HELPERS(DecompositionType,Rhs) \
-  typedef typename DecompositionType::MatrixType MatrixType; \
-  typedef typename MatrixType::Scalar Scalar; \
-  typedef typename MatrixType::RealScalar RealScalar; \
-  typedef typename MatrixType::Index Index; \
-  typedef Eigen::internal::sparse_solve_retval_base<DecompositionType,Rhs> Base; \
-  using Base::dec; \
-  using Base::rhs; \
-  using Base::rows; \
-  using Base::cols; \
-  sparse_solve_retval(const DecompositionType& dec, const Rhs& rhs) \
-    : Base(dec, rhs) {}
-
-
-
-template<typename DecompositionType, typename Rhs, typename Guess> struct solve_retval_with_guess;
-
-template<typename DecompositionType, typename Rhs, typename Guess>
-struct traits<solve_retval_with_guess<DecompositionType, Rhs, Guess> >
-{
-  typedef typename DecompositionType::MatrixType MatrixType;
-  typedef Matrix<typename Rhs::Scalar,
-                 MatrixType::ColsAtCompileTime,
-                 Rhs::ColsAtCompileTime,
-                 Rhs::PlainObject::Options,
-                 MatrixType::MaxColsAtCompileTime,
-                 Rhs::MaxColsAtCompileTime> ReturnType;
-};
-
-template<typename DecompositionType, typename Rhs, typename Guess> struct solve_retval_with_guess
- : public ReturnByValue<solve_retval_with_guess<DecompositionType, Rhs, Guess> >
-{
-  typedef typename DecompositionType::Index Index;
-
-  solve_retval_with_guess(const DecompositionType& dec, const Rhs& rhs, const Guess& guess)
-    : m_dec(dec), m_rhs(rhs), m_guess(guess)
-  {}
-
-  inline Index rows() const { return m_dec.cols(); }
-  inline Index cols() const { return m_rhs.cols(); }
-
-  template<typename Dest> inline void evalTo(Dest& dst) const
-  {
-    dst = m_guess;
-    m_dec._solveWithGuess(m_rhs,dst);
-  }
-
-  protected:
-    const DecompositionType& m_dec;
-    const typename Rhs::Nested m_rhs;
-    const typename Guess::Nested m_guess;
-};
-
-} // namepsace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_SPARSE_SOLVE_H
diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
index ce462e951..f6d7d8944 100644
--- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -30,6 +30,9 @@ abs2() const
 
 /** \returns an expression of the coefficient-wise exponential of *this.
   *
+  * This function computes the coefficient-wise exponential. The function MatrixBase::exp() in the
+  * unsupported module MatrixFunctions computes the matrix exponential.
+  *
   * Example: \include Cwise_exp.cpp
   * Output: \verbinclude Cwise_exp.out
   *
@@ -44,6 +47,9 @@ exp() const
 
 /** \returns an expression of the coefficient-wise logarithm of *this.
   *
+  * This function computes the coefficient-wise logarithm. The function MatrixBase::log() in the
+  * unsupported module MatrixFunctions computes the matrix logarithm.
+  *
   * Example: \include Cwise_log.cpp
   * Output: \verbinclude Cwise_log.out
   *
@@ -58,6 +64,9 @@ log() const
 
 /** \returns an expression of the coefficient-wise square root of *this.
   *
+  * This function computes the coefficient-wise square root. The function MatrixBase::sqrt() in the
+  * unsupported module MatrixFunctions computes the matrix square root.
+  *
   * Example: \include Cwise_sqrt.cpp
   * Output: \verbinclude Cwise_sqrt.out
   *
@@ -72,6 +81,9 @@ sqrt() const
 
 /** \returns an expression of the coefficient-wise cosine of *this.
   *
+  * This function computes the coefficient-wise cosine. The function MatrixBase::cos() in the
+  * unsupported module MatrixFunctions computes the matrix cosine.
+  *
   * Example: \include Cwise_cos.cpp
   * Output: \verbinclude Cwise_cos.out
   *
@@ -87,6 +99,9 @@ cos() const
 
 /** \returns an expression of the coefficient-wise sine of *this.
   *
+  * This function computes the coefficient-wise sine. The function MatrixBase::sin() in the
+  * unsupported module MatrixFunctions computes the matrix sine.
+  *
   * Example: \include Cwise_sin.cpp
   * Output: \verbinclude Cwise_sin.out
   *
@@ -156,6 +171,9 @@ atan() const
 
 /** \returns an expression of the coefficient-wise power of *this to the given exponent.
   *
+  * This function computes the coefficient-wise power. The function MatrixBase::pow() in the
+  * unsupported module MatrixFunctions computes the matrix power.
+  *
   * Example: \include Cwise_pow.cpp
   * Output: \verbinclude Cwise_pow.out
   *
diff --git a/bench/bench_norm.cpp b/bench/bench_norm.cpp
index 398fef835..129afcfb2 100644
--- a/bench/bench_norm.cpp
+++ b/bench/bench_norm.cpp
@@ -6,19 +6,25 @@ using namespace Eigen;
 using namespace std;
 
 template<typename T>
-EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(const T& v)
+EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v)
 {
   return v.norm();
 }
 
 template<typename T>
-EIGEN_DONT_INLINE typename T::Scalar hypotNorm(const T& v)
+EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v)
+{
+  return v.stableNorm();
+}
+
+template<typename T>
+EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v)
 {
   return v.hypotNorm();
 }
 
 template<typename T>
-EIGEN_DONT_INLINE typename T::Scalar blueNorm(const T& v)
+EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v)
 {
   return v.blueNorm();
 }
@@ -217,20 +223,21 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
 }
 
 #define BENCH_PERF(NRM) { \
+  float af = 0; double ad = 0; std::complex<float> ac = 0; \
   Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\
   for (int k=0; k<tries; ++k) { \
     tf.start(); \
-    for (int i=0; i<iters; ++i) NRM(vf); \
+    for (int i=0; i<iters; ++i) { af += NRM(vf); } \
     tf.stop(); \
   } \
   for (int k=0; k<tries; ++k) { \
     td.start(); \
-    for (int i=0; i<iters; ++i) NRM(vd); \
+    for (int i=0; i<iters; ++i) { ad += NRM(vd); } \
     td.stop(); \
   } \
   /*for (int k=0; k<std::max(1,tries/3); ++k) { \
     tcf.start(); \
-    for (int i=0; i<iters; ++i) NRM(vcf); \
+    for (int i=0; i<iters; ++i) { ac += NRM(vcf); } \
     tcf.stop(); \
   } */\
   std::cout << #NRM << "\t" << tf.value() << "   " << td.value() <<  "    " << tcf.value() << "\n"; \
@@ -316,14 +323,17 @@ int main(int argc, char** argv)
     std::cout << "\n";
   }
 
+  y = 1;
   std::cout.precision(4);
-  std::cerr << "Performance (out of cache):\n";
+  int s1 = 1024*1024*32;
+  std::cerr << "Performance (out of cache, " << s1 << "):\n";
   {
     int iters = 1;
-    VectorXf vf = VectorXf::Random(1024*1024*32) * y;
-    VectorXd vd = VectorXd::Random(1024*1024*32) * y;
-    VectorXcf vcf = VectorXcf::Random(1024*1024*32) * y;
+    VectorXf vf = VectorXf::Random(s1) * y;
+    VectorXd vd = VectorXd::Random(s1) * y;
+    VectorXcf vcf = VectorXcf::Random(s1) * y;
     BENCH_PERF(sqsumNorm);
+    BENCH_PERF(stableNorm);
     BENCH_PERF(blueNorm);
     BENCH_PERF(pblueNorm);
     BENCH_PERF(lapackNorm);
@@ -332,13 +342,14 @@ int main(int argc, char** argv)
     BENCH_PERF(bl2passNorm);
   }
 
-  std::cerr << "\nPerformance (in cache):\n";
+  std::cerr << "\nPerformance (in cache, " << 512 << "):\n";
   {
     int iters = 100000;
     VectorXf vf = VectorXf::Random(512) * y;
     VectorXd vd = VectorXd::Random(512) * y;
     VectorXcf vcf = VectorXcf::Random(512) * y;
     BENCH_PERF(sqsumNorm);
+    BENCH_PERF(stableNorm);
     BENCH_PERF(blueNorm);
     BENCH_PERF(pblueNorm);
     BENCH_PERF(lapackNorm);
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
index 1b8aaf9aa..46e5fc9d7 100644
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -97,6 +97,7 @@ add_dependencies(doc-unsupported-prerequisites unsupported_snippets unsupported_
 add_custom_target(doc ALL
   COMMAND doxygen
   COMMAND doxygen Doxyfile-unsupported
+  COMMAND ${CMAKE_COMMAND} -E copy ${Eigen_BINARY_DIR}/doc/html/group__TopicUnalignedArrayAssert.html ${Eigen_BINARY_DIR}/doc/html/TopicUnalignedArrayAssert.html
   COMMAND ${CMAKE_COMMAND} -E rename html eigen-doc
   COMMAND ${CMAKE_COMMAND} -E remove eigen-doc/eigen-doc.tgz
   COMMAND ${CMAKE_COMMAND} -E tar cfz eigen-doc/eigen-doc.tgz eigen-doc
diff --git a/doc/snippets/DirectionWise_hnormalized.cpp b/doc/snippets/DirectionWise_hnormalized.cpp
new file mode 100644
index 000000000..3410790a8
--- /dev/null
+++ b/doc/snippets/DirectionWise_hnormalized.cpp
@@ -0,0 +1,7 @@
+typedef Matrix<double,4,Dynamic> Matrix4Xd;
+Matrix4Xd M = Matrix4Xd::Random(4,5);
+Projective3d P(Matrix4d::Random());
+cout << "The matrix M is:" << endl << M << endl << endl;
+cout << "M.colwise().hnormalized():" << endl << M.colwise().hnormalized() << endl << endl;
+cout << "P*M:" << endl << P*M << endl << endl;
+cout << "(P*M).colwise().hnormalized():" << endl << (P*M).colwise().hnormalized() << endl << endl;
+\ No newline at end of file
diff --git a/doc/snippets/MatrixBase_hnormalized.cpp b/doc/snippets/MatrixBase_hnormalized.cpp
new file mode 100644
index 000000000..652cd77c0
--- /dev/null
+++ b/doc/snippets/MatrixBase_hnormalized.cpp
@@ -0,0 +1,6 @@
+Vector4d v = Vector4d::Random();
+Projective3d P(Matrix4d::Random());
+cout << "v                   = " << v.transpose() << "]^T" << endl;
+cout << "v.hnormalized()     = " << v.hnormalized().transpose() << "]^T" << endl;
+cout << "P*v                 = " << (P*v).transpose() << "]^T" << endl;
+cout << "(P*v).hnormalized() = " << (P*v).hnormalized().transpose() << "]^T" << endl;
+\ No newline at end of file
diff --git a/doc/snippets/MatrixBase_homogeneous.cpp b/doc/snippets/MatrixBase_homogeneous.cpp
new file mode 100644
index 000000000..457c28f91
--- /dev/null
+++ b/doc/snippets/MatrixBase_homogeneous.cpp
@@ -0,0 +1,6 @@
+Vector3d v = Vector3d::Random(), w;
+Projective3d P(Matrix4d::Random());
+cout << "v                                   = [" << v.transpose() << "]^T" << endl;
+cout << "h.homogeneous()                     = [" << v.homogeneous().transpose() << "]^T" << endl;
+cout << "(P * v.homogeneous())               = [" << (P * v.homogeneous()).transpose() << "]^T" << endl;
+cout << "(P * v.homogeneous()).hnormalized() = [" << (P * v.homogeneous()).eval().hnormalized().transpose() << "]^T" << endl;
+\ No newline at end of file
diff --git a/doc/snippets/VectorwiseOp_homogeneous.cpp b/doc/snippets/VectorwiseOp_homogeneous.cpp
new file mode 100644
index 000000000..aba4fed0e
--- /dev/null
+++ b/doc/snippets/VectorwiseOp_homogeneous.cpp
@@ -0,0 +1,7 @@
+typedef Matrix<double,3,Dynamic> Matrix3Xd;
+Matrix3Xd M = Matrix3Xd::Random(3,5);
+Projective3d P(Matrix4d::Random());
+cout << "The matrix M is:" << endl << M << endl << endl;
+cout << "M.colwise().homogeneous():" << endl << M.colwise().homogeneous() << endl << endl;
+cout << "P * M.colwise().homogeneous():" << endl << P * M.colwise().homogeneous() << endl << endl;
+cout << "P * M.colwise().homogeneous().hnormalized(): " << endl << (P * M.colwise().homogeneous()).colwise().hnormalized() << endl << endl;
+\ No newline at end of file
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 47aefddb8..530e9e4e1 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -139,17 +139,12 @@ endif(TEST_LIB)
 set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official")
 add_custom_target(BuildOfficial)
 
-option(EIGEN_TEST_EVALUATORS "Enable work in progress evaluators" OFF)
-if(EIGEN_TEST_EVALUATORS)
-  add_definitions("-DEIGEN_TEST_EVALUATORS=1")
-  add_definitions("-DEIGEN_ENABLE_EVALUATORS=1")
-endif(EIGEN_TEST_EVALUATORS)
-
 ei_add_test(meta)
 ei_add_test(sizeof)
 ei_add_test(dynalloc)
 ei_add_test(nomalloc)
 ei_add_test(first_aligned)
+ei_add_test(nullary)
 ei_add_test(mixingtypes)
 ei_add_test(packetmath)
 ei_add_test(unalignedassert)
@@ -165,6 +160,9 @@ ei_add_test(redux)
 ei_add_test(visitor)
 ei_add_test(block)
 ei_add_test(corners)
+ei_add_test(swap)
+ei_add_test(resize)
+ei_add_test(conservative_resize)
 ei_add_test(product_small)
 ei_add_test(product_large)
 ei_add_test(product_extra)
@@ -193,6 +191,7 @@ ei_add_test(product_trsolve)
 ei_add_test(product_mmtr)
 ei_add_test(product_notemporary)
 ei_add_test(stable_norm)
+ei_add_test(permutationmatrices)
 ei_add_test(bandmatrix)
 ei_add_test(cholesky)
 ei_add_test(lu)
@@ -212,30 +211,30 @@ ei_add_test(real_qz)
 ei_add_test(eigensolver_generalized_real)
 ei_add_test(jacobi)
 ei_add_test(jacobisvd)
+ei_add_test(householder)
 ei_add_test(geo_orthomethods)
-ei_add_test(geo_homogeneous)
 ei_add_test(geo_quaternion)
-ei_add_test(geo_transformations)
 ei_add_test(geo_eulerangles)
-ei_add_test(geo_hyperplane)
 ei_add_test(geo_parametrizedline)
 ei_add_test(geo_alignedbox)
+ei_add_test(geo_hyperplane)
+ei_add_test(geo_transformations)
+ei_add_test(geo_homogeneous)
 ei_add_test(stdvector)
 ei_add_test(stdvector_overload)
 ei_add_test(stdlist)
 ei_add_test(stddeque)
-ei_add_test(resize)
-ei_add_test(sparse_vector)
 ei_add_test(sparse_basic)
+ei_add_test(sparse_vector)
 ei_add_test(sparse_product)
 ei_add_test(sparse_solvers)
-ei_add_test(umeyama)
-ei_add_test(householder)
-ei_add_test(swap)
-ei_add_test(conservative_resize)
-ei_add_test(permutationmatrices)
 ei_add_test(sparse_permutations)
-ei_add_test(nullary)
+ei_add_test(simplicial_cholesky)
+ei_add_test(conjugate_gradient)
+ei_add_test(bicgstab)
+ei_add_test(sparselu)
+ei_add_test(sparseqr)
+ei_add_test(umeyama)
 ei_add_test(nesting_ops "${CMAKE_CXX_FLAGS_DEBUG}")
 ei_add_test(zerosized)
 ei_add_test(dontalign)
@@ -249,13 +248,7 @@ ei_add_test(special_numbers)
 ei_add_test(rvalue_types)
 ei_add_test(dense_storage)
 
-ei_add_test(simplicial_cholesky)
-ei_add_test(conjugate_gradient)
-ei_add_test(bicgstab)
-ei_add_test(sparselu)
-ei_add_test(sparseqr)
-
-# ei_add_test(denseLM)
+# # ei_add_test(denseLM)
 
 if(QT4_FOUND)
   ei_add_test(qtvector "" "${QT_QTCORE_LIBRARY}")
diff --git a/test/array.cpp b/test/array.cpp
index 010fead2d..ac9be097d 100644
--- a/test/array.cpp
+++ b/test/array.cpp
@@ -81,6 +81,31 @@ template<typename ArrayType> void array(const ArrayType& m)
   VERIFY_IS_APPROX(m3.rowwise() += rv1, m1.rowwise() + rv1);
   m3 = m1;
   VERIFY_IS_APPROX(m3.rowwise() -= rv1, m1.rowwise() - rv1);
+  
+  // Conversion from scalar
+  VERIFY_IS_APPROX((m3 = s1), ArrayType::Constant(rows,cols,s1));
+  VERIFY_IS_APPROX((m3 = 1),  ArrayType::Constant(rows,cols,1));
+  VERIFY_IS_APPROX((m3.topLeftCorner(rows,cols) = 1),  ArrayType::Constant(rows,cols,1));
+  typedef Array<Scalar,
+                ArrayType::RowsAtCompileTime==Dynamic?2:ArrayType::RowsAtCompileTime,
+                ArrayType::ColsAtCompileTime==Dynamic?2:ArrayType::ColsAtCompileTime,
+                ArrayType::Options> FixedArrayType;
+  FixedArrayType f1(s1);
+  VERIFY_IS_APPROX(f1, FixedArrayType::Constant(s1));
+  FixedArrayType f2(numext::real(s1));
+  VERIFY_IS_APPROX(f2, FixedArrayType::Constant(numext::real(s1)));
+  FixedArrayType f3((int)100*numext::real(s1));
+  VERIFY_IS_APPROX(f3, FixedArrayType::Constant((int)100*numext::real(s1)));
+  f1.setRandom();
+  FixedArrayType f4(f1.data());
+  VERIFY_IS_APPROX(f4, f1);
+  
+  // Check possible conflicts with 1D ctor
+  typedef Array<Scalar, Dynamic, 1> OneDArrayType;
+  OneDArrayType o1(rows);
+  VERIFY(o1.size()==rows);
+  OneDArrayType o4((int)rows);
+  VERIFY(o4.size()==rows);
 }
 
 template<typename ArrayType> void comparisons(const ArrayType& m)
diff --git a/test/block.cpp b/test/block.cpp
index 269acd28e..3b77b704a 100644
--- a/test/block.cpp
+++ b/test/block.cpp
@@ -130,6 +130,14 @@ template<typename MatrixType> void block(const MatrixType& m)
 
   VERIFY(numext::real(ones.col(c1).dot(ones.col(c2))) == RealScalar(rows));
   VERIFY(numext::real(ones.row(r1).dot(ones.row(r2))) == RealScalar(cols));
+  
+  // chekc that linear acccessors works on blocks
+  m1 = m1_copy;
+  if((MatrixType::Flags&RowMajorBit)==0)
+    VERIFY_IS_EQUAL(m1.leftCols(c1).coeff(r1+c1*rows), m1(r1,c1));
+  else
+    VERIFY_IS_EQUAL(m1.topRows(r1).coeff(c1+r1*cols), m1(r1,c1));
+  
 
   // now test some block-inside-of-block.
   
diff --git a/test/evaluators.cpp b/test/evaluators.cpp
index e3922c1be..f41968da8 100644
--- a/test/evaluators.cpp
+++ b/test/evaluators.cpp
@@ -1,7 +1,78 @@
-#define EIGEN_ENABLE_EVALUATORS
+
 #include "main.h"
 
-using internal::copy_using_evaluator;
+namespace Eigen {
+  
+  template<typename DstXprType, typename SrcXprType>
+  EIGEN_STRONG_INLINE
+  DstXprType& copy_using_evaluator(const EigenBase<DstXprType> &dst, const SrcXprType &src)
+  {
+    call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+    return dst.const_cast_derived();
+  }
+  
+  template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
+  EIGEN_STRONG_INLINE
+  const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst, const SrcXprType &src)
+  {
+    call_assignment(dst, src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+    return dst.expression();
+  }
+  
+  template<typename DstXprType, typename SrcXprType>
+  EIGEN_STRONG_INLINE
+  DstXprType& copy_using_evaluator(const PlainObjectBase<DstXprType> &dst, const SrcXprType &src)
+  {
+    #ifdef EIGEN_NO_AUTOMATIC_RESIZING
+    eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size())
+                                                          : (dst.rows() == src.rows() && dst.cols() == src.cols())))
+                && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
+  #else
+    dst.const_cast_derived().resizeLike(src.derived());
+  #endif
+    
+    call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+    return dst.const_cast_derived();
+  }
+
+  template<typename DstXprType, typename SrcXprType>
+  void add_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+  {
+    typedef typename DstXprType::Scalar Scalar;
+    call_assignment(const_cast<DstXprType&>(dst), src.derived(), internal::add_assign_op<Scalar>());
+  }
+
+  template<typename DstXprType, typename SrcXprType>
+  void subtract_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+  {
+    typedef typename DstXprType::Scalar Scalar;
+    call_assignment(const_cast<DstXprType&>(dst), src.derived(), internal::sub_assign_op<Scalar>());
+  }
+
+  template<typename DstXprType, typename SrcXprType>
+  void multiply_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+  {
+    typedef typename DstXprType::Scalar Scalar;
+    call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op<Scalar>());
+  }
+
+  template<typename DstXprType, typename SrcXprType>
+  void divide_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+  {
+    typedef typename DstXprType::Scalar Scalar;
+    call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op<Scalar>());
+  }
+  
+  template<typename DstXprType, typename SrcXprType>
+  void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+  {
+    typedef typename DstXprType::Scalar Scalar;
+    call_assignment(dst.const_cast_derived(), src.const_cast_derived(), internal::swap_assign_op<Scalar>());
+  }
+  
+}
+
+
 using namespace std;
 
 #define VERIFY_IS_APPROX_EVALUATOR(DEST,EXPR) VERIFY_IS_APPROX(copy_using_evaluator(DEST,(EXPR)), (EXPR).eval());
@@ -72,8 +143,19 @@ void test_evaluators()
     c = a*a;
     copy_using_evaluator(a, prod(a,a));
     VERIFY_IS_APPROX(a,c);
+
+    // check compound assignment of products
+    d = c;
+    add_assign_using_evaluator(c.noalias(), prod(a,b));
+    d.noalias() += a*b;
+    VERIFY_IS_APPROX(c, d);
+
+    d = c;
+    subtract_assign_using_evaluator(c.noalias(), prod(a,b));
+    d.noalias() -= a*b;
+    VERIFY_IS_APPROX(c, d);
   }
-  
+
   {
     // test product with all possible sizes
     int s = internal::random<int>(1,100);
@@ -124,7 +206,7 @@ void test_evaluators()
     
     // this does not work because Random is eval-before-nested: 
     // copy_using_evaluator(w, Vector2d::Random().transpose());
-    
+
     // test CwiseUnaryOp
     VERIFY_IS_APPROX_EVALUATOR(v2, 3 * v);
     VERIFY_IS_APPROX_EVALUATOR(w, (3 * v).transpose());
@@ -327,4 +409,56 @@ void test_evaluators()
     arr_ref.row(1) /= (arr_ref.row(2) + 1);
     VERIFY_IS_APPROX(arr, arr_ref);
   }
+  
+  {
+    // test triangular shapes
+    MatrixXd A = MatrixXd::Random(6,6), B(6,6), C(6,6), D(6,6);
+    A.setRandom();B.setRandom();
+    VERIFY_IS_APPROX_EVALUATOR2(B, A.triangularView<Upper>(), MatrixXd(A.triangularView<Upper>()));
+    
+    A.setRandom();B.setRandom();
+    VERIFY_IS_APPROX_EVALUATOR2(B, A.triangularView<UnitLower>(), MatrixXd(A.triangularView<UnitLower>()));
+    
+    A.setRandom();B.setRandom();
+    VERIFY_IS_APPROX_EVALUATOR2(B, A.triangularView<UnitUpper>(), MatrixXd(A.triangularView<UnitUpper>()));
+    
+    A.setRandom();B.setRandom();
+    C = B; C.triangularView<Upper>() = A;
+    copy_using_evaluator(B.triangularView<Upper>(), A);
+    VERIFY(B.isApprox(C) && "copy_using_evaluator(B.triangularView<Upper>(), A)");
+    
+    A.setRandom();B.setRandom();
+    C = B; C.triangularView<Lower>() = A.triangularView<Lower>();
+    copy_using_evaluator(B.triangularView<Lower>(), A.triangularView<Lower>());
+    VERIFY(B.isApprox(C) && "copy_using_evaluator(B.triangularView<Lower>(), A.triangularView<Lower>())");
+    
+    
+    A.setRandom();B.setRandom();
+    C = B; C.triangularView<Lower>() = A.triangularView<Upper>().transpose();
+    copy_using_evaluator(B.triangularView<Lower>(), A.triangularView<Upper>().transpose());
+    VERIFY(B.isApprox(C) && "copy_using_evaluator(B.triangularView<Lower>(), A.triangularView<Lower>().transpose())");
+    
+    
+    A.setRandom();B.setRandom(); C = B; D = A;
+    C.triangularView<Upper>().swap(D.triangularView<Upper>());
+    swap_using_evaluator(B.triangularView<Upper>(), A.triangularView<Upper>());
+    VERIFY(B.isApprox(C) && "swap_using_evaluator(B.triangularView<Upper>(), A.triangularView<Upper>())");
+    
+    
+    VERIFY_IS_APPROX_EVALUATOR2(B, prod(A.triangularView<Upper>(),A), MatrixXd(A.triangularView<Upper>()*A));
+    
+    VERIFY_IS_APPROX_EVALUATOR2(B, prod(A.selfadjointView<Upper>(),A), MatrixXd(A.selfadjointView<Upper>()*A));
+    
+  }
+
+  {
+    // test diagonal shapes
+    VectorXd d = VectorXd::Random(6);
+    MatrixXd A = MatrixXd::Random(6,6), B(6,6);
+    A.setRandom();B.setRandom();
+    
+    VERIFY_IS_APPROX_EVALUATOR2(B, lazyprod(d.asDiagonal(),A), MatrixXd(d.asDiagonal()*A));
+    VERIFY_IS_APPROX_EVALUATOR2(B, lazyprod(A,d.asDiagonal()), MatrixXd(A*d.asDiagonal()));
+    
+  }
 }
diff --git a/test/geo_homogeneous.cpp b/test/geo_homogeneous.cpp
index c91bde819..2f9d18c0f 100644
--- a/test/geo_homogeneous.cpp
+++ b/test/geo_homogeneous.cpp
@@ -38,6 +38,10 @@ template<typename Scalar,int Size> void homogeneous(void)
   hv0 << v0, 1;
   VERIFY_IS_APPROX(v0.homogeneous(), hv0);
   VERIFY_IS_APPROX(v0, hv0.hnormalized());
+  
+  VERIFY_IS_APPROX(v0.homogeneous().sum(), hv0.sum());
+  VERIFY_IS_APPROX(v0.homogeneous().minCoeff(), hv0.minCoeff());
+  VERIFY_IS_APPROX(v0.homogeneous().maxCoeff(), hv0.maxCoeff());
 
   hm0 << m0, ones.transpose();
   VERIFY_IS_APPROX(m0.colwise().homogeneous(), hm0);
@@ -57,7 +61,6 @@ template<typename Scalar,int Size> void homogeneous(void)
 
   VERIFY_IS_APPROX((v0.transpose().rowwise().homogeneous().eval()) * t2,
                     v0.transpose().rowwise().homogeneous() * t2);
-                    m0.transpose().rowwise().homogeneous().eval();
   VERIFY_IS_APPROX((m0.transpose().rowwise().homogeneous().eval()) * t2,
                     m0.transpose().rowwise().homogeneous() * t2);
 
@@ -82,7 +85,7 @@ template<typename Scalar,int Size> void homogeneous(void)
   VERIFY_IS_APPROX(aff  * pts.colwise().homogeneous(), (aff  * pts1).colwise().hnormalized());
   VERIFY_IS_APPROX(caff * pts.colwise().homogeneous(), (caff * pts1).colwise().hnormalized());
   VERIFY_IS_APPROX(proj * pts.colwise().homogeneous(), (proj * pts1));
-  
+
   VERIFY_IS_APPROX((aff  * pts1).colwise().hnormalized(),  aff  * pts);
   VERIFY_IS_APPROX((caff * pts1).colwise().hnormalized(), caff * pts);
   
diff --git a/test/geo_orthomethods.cpp b/test/geo_orthomethods.cpp
index c836dae40..7f8beb205 100644
--- a/test/geo_orthomethods.cpp
+++ b/test/geo_orthomethods.cpp
@@ -33,6 +33,7 @@ template<typename Scalar> void orthomethods_3()
   VERIFY_IS_MUCH_SMALLER_THAN(v1.dot(v1.cross(v2)), Scalar(1));
   VERIFY_IS_MUCH_SMALLER_THAN(v1.cross(v2).dot(v2), Scalar(1));
   VERIFY_IS_MUCH_SMALLER_THAN(v2.dot(v1.cross(v2)), Scalar(1));
+  VERIFY_IS_MUCH_SMALLER_THAN(v1.cross(Vector3::Random()).dot(v1), Scalar(1));
   Matrix3 mat3;
   mat3 << v0.normalized(),
          (v0.cross(v1)).normalized(),
@@ -47,6 +48,13 @@ template<typename Scalar> void orthomethods_3()
   int i = internal::random<int>(0,2);
   mcross = mat3.colwise().cross(vec3);
   VERIFY_IS_APPROX(mcross.col(i), mat3.col(i).cross(vec3));
+  
+  VERIFY_IS_MUCH_SMALLER_THAN((mat3.adjoint() * mat3.colwise().cross(vec3)).diagonal().cwiseAbs().sum(), Scalar(1));
+  VERIFY_IS_MUCH_SMALLER_THAN((mat3.adjoint() * mat3.colwise().cross(Vector3::Random())).diagonal().cwiseAbs().sum(), Scalar(1));
+  
+  VERIFY_IS_MUCH_SMALLER_THAN((vec3.adjoint() * mat3.colwise().cross(vec3)).cwiseAbs().sum(), Scalar(1));
+  VERIFY_IS_MUCH_SMALLER_THAN((vec3.adjoint() * Matrix3::Random().colwise().cross(vec3)).cwiseAbs().sum(), Scalar(1));
+  
   mcross = mat3.rowwise().cross(vec3);
   VERIFY_IS_APPROX(mcross.row(i), mat3.row(i).cross(vec3));
 
@@ -57,6 +65,7 @@ template<typename Scalar> void orthomethods_3()
   v40.w() = v41.w() = v42.w() = 0;
   v42.template head<3>() = v40.template head<3>().cross(v41.template head<3>());
   VERIFY_IS_APPROX(v40.cross3(v41), v42);
+  VERIFY_IS_MUCH_SMALLER_THAN(v40.cross3(Vector4::Random()).dot(v40), Scalar(1));
   
   // check mixed product
   typedef Matrix<RealScalar, 3, 1> RealVector3;
diff --git a/test/inverse.cpp b/test/inverse.cpp
index 8187b088d..1e7b20958 100644
--- a/test/inverse.cpp
+++ b/test/inverse.cpp
@@ -68,6 +68,15 @@ template<typename MatrixType> void inverse(const MatrixType& m)
   VERIFY_IS_MUCH_SMALLER_THAN(abs(det-m3.determinant()), RealScalar(1));
   m3.computeInverseWithCheck(m4, invertible);
   VERIFY( rows==1 ? invertible : !invertible );
+  
+  // check with submatrices
+  {
+    Matrix<Scalar, MatrixType::RowsAtCompileTime+1, MatrixType::RowsAtCompileTime+1, MatrixType::Options> m3;
+    m3.setRandom();
+    m3.topLeftCorner(rows,rows) = m1;
+    m2 = m3.template topLeftCorner<MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime>().inverse();
+    VERIFY_IS_APPROX( (m3.template topLeftCorner<MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime>()), m2.inverse() );
+  }
 #endif
 
   // check in-place inversion
diff --git a/test/jacobisvd.cpp b/test/jacobisvd.cpp
index 36721b496..bfcadce95 100644
--- a/test/jacobisvd.cpp
+++ b/test/jacobisvd.cpp
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -14,273 +14,47 @@
 #include "main.h"
 #include <Eigen/SVD>
 
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_check_full(const MatrixType& m, const JacobiSVD<MatrixType, QRPreconditioner>& svd)
-{
-  typedef typename MatrixType::Index Index;
-  Index rows = m.rows();
-  Index cols = m.cols();
-
-  enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime
-  };
-
-  typedef typename MatrixType::Scalar Scalar;
-  typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime> MatrixUType;
-  typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixVType;
-
-  MatrixType sigma = MatrixType::Zero(rows,cols);
-  sigma.diagonal() = svd.singularValues().template cast<Scalar>();
-  MatrixUType u = svd.matrixU();
-  MatrixVType v = svd.matrixV();
-
-  VERIFY_IS_APPROX(m, u * sigma * v.adjoint());
-  VERIFY_IS_UNITARY(u);
-  VERIFY_IS_UNITARY(v);
-}
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_compare_to_full(const MatrixType& m,
-                               unsigned int computationOptions,
-                               const JacobiSVD<MatrixType, QRPreconditioner>& referenceSvd)
-{
-  typedef typename MatrixType::Index Index;
-  Index rows = m.rows();
-  Index cols = m.cols();
-  Index diagSize = (std::min)(rows, cols);
-
-  JacobiSVD<MatrixType, QRPreconditioner> svd(m, computationOptions);
-
-  VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues());
-  if(computationOptions & ComputeFullU)
-    VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU());
-  if(computationOptions & ComputeThinU)
-    VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize));
-  if(computationOptions & ComputeFullV)
-    VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV());
-  if(computationOptions & ComputeThinV)
-    VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize));
-}
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_solve(const MatrixType& m, unsigned int computationOptions)
-{
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar RealScalar;
-  typedef typename MatrixType::Index Index;
-  Index rows = m.rows();
-  Index cols = m.cols();
-
-  enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime
-  };
-
-  typedef Matrix<Scalar, RowsAtCompileTime, Dynamic> RhsType;
-  typedef Matrix<Scalar, ColsAtCompileTime, Dynamic> SolutionType;
-
-  RhsType rhs = RhsType::Random(rows, internal::random<Index>(1, cols));
-  JacobiSVD<MatrixType, QRPreconditioner> svd(m, computationOptions);
-
-       if(internal::is_same<RealScalar,double>::value) svd.setThreshold(1e-8);
-  else if(internal::is_same<RealScalar,float>::value)  svd.setThreshold(1e-4);
-  
-  SolutionType x = svd.solve(rhs);
-  
-  RealScalar residual = (m*x-rhs).norm();
-  // Check that there is no significantly better solution in the neighborhood of x
-  if(!test_isMuchSmallerThan(residual,rhs.norm()))
-  {
-    // If the residual is very small, then we have an exact solution, so we are already good.
-    for(int k=0;k<x.rows();++k)
-    {
-      SolutionType y(x);
-      y.row(k).array() += 2*NumTraits<RealScalar>::epsilon();
-      RealScalar residual_y = (m*y-rhs).norm();
-      VERIFY( test_isApprox(residual_y,residual) || residual < residual_y );
-      
-      y.row(k) = x.row(k).array() - 2*NumTraits<RealScalar>::epsilon();
-      residual_y = (m*y-rhs).norm();
-      VERIFY( test_isApprox(residual_y,residual) || residual < residual_y );
-    }
-  }
-  
-  // evaluate normal equation which works also for least-squares solutions
-  if(internal::is_same<RealScalar,double>::value)
-  {
-    // This test is not stable with single precision.
-    // This is probably because squaring m signicantly affects the precision.
-    VERIFY_IS_APPROX(m.adjoint()*m*x,m.adjoint()*rhs);
-  }
-  
-  // check minimal norm solutions
-  {
-    // generate a full-rank m x n problem with m<n
-    enum {
-      RankAtCompileTime2 = ColsAtCompileTime==Dynamic ? Dynamic : (ColsAtCompileTime)/2+1,
-      RowsAtCompileTime3 = ColsAtCompileTime==Dynamic ? Dynamic : ColsAtCompileTime+1
-    };
-    typedef Matrix<Scalar, RankAtCompileTime2, ColsAtCompileTime> MatrixType2;
-    typedef Matrix<Scalar, RankAtCompileTime2, 1> RhsType2;
-    typedef Matrix<Scalar, ColsAtCompileTime, RankAtCompileTime2> MatrixType2T;
-    Index rank = RankAtCompileTime2==Dynamic ? internal::random<Index>(1,cols) : Index(RankAtCompileTime2);
-    MatrixType2 m2(rank,cols);
-    int guard = 0;
-    do {
-      m2.setRandom();
-    } while(m2.jacobiSvd().setThreshold(test_precision<Scalar>()).rank()!=rank && (++guard)<10);
-    VERIFY(guard<10);
-    RhsType2 rhs2 = RhsType2::Random(rank);
-    // use QR to find a reference minimal norm solution
-    HouseholderQR<MatrixType2T> qr(m2.adjoint());
-    Matrix<Scalar,Dynamic,1> tmp = qr.matrixQR().topLeftCorner(rank,rank).template triangularView<Upper>().adjoint().solve(rhs2);
-    tmp.conservativeResize(cols);
-    tmp.tail(cols-rank).setZero();
-    SolutionType x21 = qr.householderQ() * tmp;
-    // now check with SVD
-    JacobiSVD<MatrixType2, ColPivHouseholderQRPreconditioner> svd2(m2, computationOptions);
-    SolutionType x22 = svd2.solve(rhs2);
-    VERIFY_IS_APPROX(m2*x21, rhs2);
-    VERIFY_IS_APPROX(m2*x22, rhs2);
-    VERIFY_IS_APPROX(x21, x22);
-    
-    // Now check with a rank deficient matrix
-    typedef Matrix<Scalar, RowsAtCompileTime3, ColsAtCompileTime> MatrixType3;
-    typedef Matrix<Scalar, RowsAtCompileTime3, 1> RhsType3;
-    Index rows3 = RowsAtCompileTime3==Dynamic ? internal::random<Index>(rank+1,2*cols) : Index(RowsAtCompileTime3);
-    Matrix<Scalar,RowsAtCompileTime3,Dynamic> C = Matrix<Scalar,RowsAtCompileTime3,Dynamic>::Random(rows3,rank);
-    MatrixType3 m3 = C * m2;
-    RhsType3 rhs3 = C * rhs2;
-    JacobiSVD<MatrixType3, ColPivHouseholderQRPreconditioner> svd3(m3, computationOptions);
-    SolutionType x3 = svd3.solve(rhs3);
-    VERIFY_IS_APPROX(m3*x3, rhs3);
-    VERIFY_IS_APPROX(m3*x21, rhs3);
-    VERIFY_IS_APPROX(m2*x3, rhs2);
-    
-    VERIFY_IS_APPROX(x21, x3);
-  }
-}
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_test_all_computation_options(const MatrixType& m)
-{
-  if (QRPreconditioner == NoQRPreconditioner && m.rows() != m.cols())
-    return;
-  JacobiSVD<MatrixType, QRPreconditioner> fullSvd(m, ComputeFullU|ComputeFullV);
-  CALL_SUBTEST(( jacobisvd_check_full(m, fullSvd) ));
-  CALL_SUBTEST(( jacobisvd_solve<MatrixType, QRPreconditioner>(m, ComputeFullU | ComputeFullV) ));
-  
-  #if defined __INTEL_COMPILER
-  // remark #111: statement is unreachable
-  #pragma warning disable 111
-  #endif
-  if(QRPreconditioner == FullPivHouseholderQRPreconditioner)
-    return;
-
-  CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeFullU, fullSvd) ));
-  CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeFullV, fullSvd) ));
-  CALL_SUBTEST(( jacobisvd_compare_to_full(m, 0, fullSvd) ));
-
-  if (MatrixType::ColsAtCompileTime == Dynamic) {
-    // thin U/V are only available with dynamic number of columns
-    CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeFullU|ComputeThinV, fullSvd) ));
-    CALL_SUBTEST(( jacobisvd_compare_to_full(m,              ComputeThinV, fullSvd) ));
-    CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeThinU|ComputeFullV, fullSvd) ));
-    CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeThinU             , fullSvd) ));
-    CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeThinU|ComputeThinV, fullSvd) ));
-    CALL_SUBTEST(( jacobisvd_solve<MatrixType, QRPreconditioner>(m, ComputeFullU | ComputeThinV) ));
-    CALL_SUBTEST(( jacobisvd_solve<MatrixType, QRPreconditioner>(m, ComputeThinU | ComputeFullV) ));
-    CALL_SUBTEST(( jacobisvd_solve<MatrixType, QRPreconditioner>(m, ComputeThinU | ComputeThinV) ));
-
-    // test reconstruction
-    typedef typename MatrixType::Index Index;
-    Index diagSize = (std::min)(m.rows(), m.cols());
-    JacobiSVD<MatrixType, QRPreconditioner> svd(m, ComputeThinU | ComputeThinV);
-    VERIFY_IS_APPROX(m, svd.matrixU().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint());
-  }
-}
+#define SVD_DEFAULT(M) JacobiSVD<M>
+#define SVD_FOR_MIN_NORM(M) JacobiSVD<M,ColPivHouseholderQRPreconditioner>
+#include "svd_common.h"
 
+// Check all variants of JacobiSVD
 template<typename MatrixType>
 void jacobisvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
 {
   MatrixType m = a;
   if(pickrandom)
-  {
-    typedef typename MatrixType::Scalar Scalar;
-    typedef typename MatrixType::RealScalar RealScalar;
-    typedef typename MatrixType::Index Index;
-    Index diagSize = (std::min)(a.rows(), a.cols());
-    RealScalar s = std::numeric_limits<RealScalar>::max_exponent10/4;
-    s = internal::random<RealScalar>(1,s);
-    Matrix<RealScalar,Dynamic,1> d =  Matrix<RealScalar,Dynamic,1>::Random(diagSize);
-    for(Index k=0; k<diagSize; ++k)
-      d(k) = d(k)*std::pow(RealScalar(10),internal::random<RealScalar>(-s,s));
-    m = Matrix<Scalar,Dynamic,Dynamic>::Random(a.rows(),diagSize) * d.asDiagonal() * Matrix<Scalar,Dynamic,Dynamic>::Random(diagSize,a.cols());
-    // cancel some coeffs
-    Index n  = internal::random<Index>(0,m.size()-1);
-    for(Index i=0; i<n; ++i)
-      m(internal::random<Index>(0,m.rows()-1), internal::random<Index>(0,m.cols()-1)) = Scalar(0);
-  }
+    svd_fill_random(m);
 
-  CALL_SUBTEST(( jacobisvd_test_all_computation_options<MatrixType, FullPivHouseholderQRPreconditioner>(m) ));
-  CALL_SUBTEST(( jacobisvd_test_all_computation_options<MatrixType, ColPivHouseholderQRPreconditioner>(m) ));
-  CALL_SUBTEST(( jacobisvd_test_all_computation_options<MatrixType, HouseholderQRPreconditioner>(m) ));
-  CALL_SUBTEST(( jacobisvd_test_all_computation_options<MatrixType, NoQRPreconditioner>(m) ));
+  CALL_SUBTEST(( svd_test_all_computation_options<JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner> >(m, true)  )); // check full only
+  CALL_SUBTEST(( svd_test_all_computation_options<JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>  >(m, false) ));
+  CALL_SUBTEST(( svd_test_all_computation_options<JacobiSVD<MatrixType, HouseholderQRPreconditioner>        >(m, false) ));
+  if(m.rows()==m.cols())
+    CALL_SUBTEST(( svd_test_all_computation_options<JacobiSVD<MatrixType, NoQRPreconditioner>               >(m, false) ));
 }
 
 template<typename MatrixType> void jacobisvd_verify_assert(const MatrixType& m)
 {
-  typedef typename MatrixType::Scalar Scalar;
+  svd_verify_assert<JacobiSVD<MatrixType> >(m);
   typedef typename MatrixType::Index Index;
   Index rows = m.rows();
   Index cols = m.cols();
 
   enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
     ColsAtCompileTime = MatrixType::ColsAtCompileTime
   };
 
-  typedef Matrix<Scalar, RowsAtCompileTime, 1> RhsType;
-
-  RhsType rhs(rows);
-
-  JacobiSVD<MatrixType> svd;
-  VERIFY_RAISES_ASSERT(svd.matrixU())
-  VERIFY_RAISES_ASSERT(svd.singularValues())
-  VERIFY_RAISES_ASSERT(svd.matrixV())
-  VERIFY_RAISES_ASSERT(svd.solve(rhs))
 
   MatrixType a = MatrixType::Zero(rows, cols);
   a.setZero();
-  svd.compute(a, 0);
-  VERIFY_RAISES_ASSERT(svd.matrixU())
-  VERIFY_RAISES_ASSERT(svd.matrixV())
-  svd.singularValues();
-  VERIFY_RAISES_ASSERT(svd.solve(rhs))
 
   if (ColsAtCompileTime == Dynamic)
   {
-    svd.compute(a, ComputeThinU);
-    svd.matrixU();
-    VERIFY_RAISES_ASSERT(svd.matrixV())
-    VERIFY_RAISES_ASSERT(svd.solve(rhs))
-
-    svd.compute(a, ComputeThinV);
-    svd.matrixV();
-    VERIFY_RAISES_ASSERT(svd.matrixU())
-    VERIFY_RAISES_ASSERT(svd.solve(rhs))
-
     JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner> svd_fullqr;
     VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeFullU|ComputeThinV))
     VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeThinV))
     VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeFullV))
   }
-  else
-  {
-    VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinU))
-    VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinV))
-  }
 }
 
 template<typename MatrixType>
@@ -296,128 +70,17 @@ void jacobisvd_method()
   VERIFY_IS_APPROX(m.jacobiSvd(ComputeFullU|ComputeFullV).solve(m), m);
 }
 
-// work around stupid msvc error when constructing at compile time an expression that involves
-// a division by zero, even if the numeric type has floating point
-template<typename Scalar>
-EIGEN_DONT_INLINE Scalar zero() { return Scalar(0); }
-
-// workaround aggressive optimization in ICC
-template<typename T> EIGEN_DONT_INLINE  T sub(T a, T b) { return a - b; }
-
-template<typename MatrixType>
-void jacobisvd_inf_nan()
-{
-  // all this function does is verify we don't iterate infinitely on nan/inf values
-
-  JacobiSVD<MatrixType> svd;
-  typedef typename MatrixType::Scalar Scalar;
-  Scalar some_inf = Scalar(1) / zero<Scalar>();
-  VERIFY(sub(some_inf, some_inf) != sub(some_inf, some_inf));
-  svd.compute(MatrixType::Constant(10,10,some_inf), ComputeFullU | ComputeFullV);
-
-  Scalar some_nan = zero<Scalar>() / zero<Scalar>();
-  VERIFY(some_nan != some_nan);
-  svd.compute(MatrixType::Constant(10,10,some_nan), ComputeFullU | ComputeFullV);
-
-  MatrixType m = MatrixType::Zero(10,10);
-  m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_inf;
-  svd.compute(m, ComputeFullU | ComputeFullV);
-
-  m = MatrixType::Zero(10,10);
-  m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_nan;
-  svd.compute(m, ComputeFullU | ComputeFullV);
-}
-
-// Regression test for bug 286: JacobiSVD loops indefinitely with some
-// matrices containing denormal numbers.
-void jacobisvd_underoverflow()
-{
-#if defined __INTEL_COMPILER
-// shut up warning #239: floating point underflow
-#pragma warning push
-#pragma warning disable 239
-#endif
-  Matrix2d M;
-  M << -7.90884e-313, -4.94e-324,
-                 0, 5.60844e-313;
-#if defined __INTEL_COMPILER
-#pragma warning pop
-#endif
-  JacobiSVD<Matrix2d> svd;
-  svd.compute(M); // just check we don't loop indefinitely
-  
-  // Check for overflow:
-  Matrix3d M3;
-  M3 << 4.4331978442502944e+307, -5.8585363752028680e+307,  6.4527017443412964e+307,
-        3.7841695601406358e+307,  2.4331702789740617e+306, -3.5235707140272905e+307,
-       -8.7190887618028355e+307, -7.3453213709232193e+307, -2.4367363684472105e+307;
-
-  JacobiSVD<Matrix3d> svd3;
-  svd3.compute(M3); // just check we don't loop indefinitely
-}
-
-void jacobisvd_preallocate()
-{
-  Vector3f v(3.f, 2.f, 1.f);
-  MatrixXf m = v.asDiagonal();
-
-  internal::set_is_malloc_allowed(false);
-  VERIFY_RAISES_ASSERT(VectorXf tmp(10);)
-  JacobiSVD<MatrixXf> svd;
-  internal::set_is_malloc_allowed(true);
-  svd.compute(m);
-  VERIFY_IS_APPROX(svd.singularValues(), v);
-
-  JacobiSVD<MatrixXf> svd2(3,3);
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m);
-  internal::set_is_malloc_allowed(true);
-  VERIFY_IS_APPROX(svd2.singularValues(), v);
-  VERIFY_RAISES_ASSERT(svd2.matrixU());
-  VERIFY_RAISES_ASSERT(svd2.matrixV());
-  svd2.compute(m, ComputeFullU | ComputeFullV);
-  VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
-  VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m);
-  internal::set_is_malloc_allowed(true);
-
-  JacobiSVD<MatrixXf> svd3(3,3,ComputeFullU|ComputeFullV);
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m);
-  internal::set_is_malloc_allowed(true);
-  VERIFY_IS_APPROX(svd2.singularValues(), v);
-  VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
-  VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m, ComputeFullU|ComputeFullV);
-  internal::set_is_malloc_allowed(true);
-}
-
 void test_jacobisvd()
 {
   CALL_SUBTEST_3(( jacobisvd_verify_assert(Matrix3f()) ));
   CALL_SUBTEST_4(( jacobisvd_verify_assert(Matrix4d()) ));
   CALL_SUBTEST_7(( jacobisvd_verify_assert(MatrixXf(10,12)) ));
   CALL_SUBTEST_8(( jacobisvd_verify_assert(MatrixXcd(7,5)) ));
+  
+  svd_all_trivial_2x2(jacobisvd<Matrix2cd>);
+  svd_all_trivial_2x2(jacobisvd<Matrix2d>);
 
   for(int i = 0; i < g_repeat; i++) {
-    Matrix2cd m;
-    m << 0, 1,
-         0, 1;
-    CALL_SUBTEST_1(( jacobisvd(m, false) ));
-    m << 1, 0,
-         1, 0;
-    CALL_SUBTEST_1(( jacobisvd(m, false) ));
-
-    Matrix2d n;
-    n << 0, 0,
-         0, 0;
-    CALL_SUBTEST_2(( jacobisvd(n, false) ));
-    n << 0, 0,
-         0, 1;
-    CALL_SUBTEST_2(( jacobisvd(n, false) ));
-    
     CALL_SUBTEST_3(( jacobisvd<Matrix3f>() ));
     CALL_SUBTEST_4(( jacobisvd<Matrix4d>() ));
     CALL_SUBTEST_5(( jacobisvd<Matrix<float,3,5> >() ));
@@ -436,7 +99,8 @@ void test_jacobisvd()
     (void) c;
 
     // Test on inf/nan matrix
-    CALL_SUBTEST_7( jacobisvd_inf_nan<MatrixXf>() );
+    CALL_SUBTEST_7(  (svd_inf_nan<JacobiSVD<MatrixXf>, MatrixXf>()) );
+    CALL_SUBTEST_10( (svd_inf_nan<JacobiSVD<MatrixXd>, MatrixXd>()) );
   }
 
   CALL_SUBTEST_7(( jacobisvd<MatrixXf>(MatrixXf(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
@@ -450,8 +114,7 @@ void test_jacobisvd()
   CALL_SUBTEST_7( JacobiSVD<MatrixXf>(10,10) );
 
   // Check that preallocation avoids subsequent mallocs
-  CALL_SUBTEST_9( jacobisvd_preallocate() );
+  CALL_SUBTEST_9( svd_preallocate() );
 
-  // Regression check for bug 286
-  CALL_SUBTEST_2( jacobisvd_underoverflow() );
+  CALL_SUBTEST_2( svd_underoverflow() );
 }
diff --git a/test/linearstructure.cpp b/test/linearstructure.cpp
index 618984d5c..87dfa1b6b 100644
--- a/test/linearstructure.cpp
+++ b/test/linearstructure.cpp
@@ -2,11 +2,15 @@
 // for linear algebra.
 //
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+static bool g_called;
+#define EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN { g_called = true; }
+
 #include "main.h"
 
 template<typename MatrixType> void linearStructure(const MatrixType& m)
@@ -68,6 +72,24 @@ template<typename MatrixType> void linearStructure(const MatrixType& m)
   VERIFY_IS_APPROX(m1.block(0,0,rows,cols) * s1, m1 * s1);
 }
 
+// Make sure that complex * real and real * complex are properly optimized
+template<typename MatrixType> void real_complex(DenseIndex rows = MatrixType::RowsAtCompileTime, DenseIndex cols = MatrixType::ColsAtCompileTime)
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  
+  RealScalar s = internal::random<RealScalar>();
+  MatrixType m1 = MatrixType::Random(rows, cols);
+  
+  g_called = false;
+  VERIFY_IS_APPROX(s*m1, Scalar(s)*m1);
+  VERIFY(g_called && "real * matrix<complex> not properly optimized");
+  
+  g_called = false;
+  VERIFY_IS_APPROX(m1*s, m1*Scalar(s));
+  VERIFY(g_called && "matrix<complex> * real not properly optimized");
+}
+
 void test_linearstructure()
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -80,5 +102,23 @@ void test_linearstructure()
     CALL_SUBTEST_7( linearStructure(MatrixXi (internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
     CALL_SUBTEST_8( linearStructure(MatrixXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
     CALL_SUBTEST_9( linearStructure(ArrayXXf (internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
+    
+    CALL_SUBTEST_10( real_complex<Matrix4cd>() );
+    CALL_SUBTEST_10( real_complex<MatrixXcf>(10,10) );
+  }
+  
+#ifdef EIGEN_TEST_PART_4
+  {
+    // make sure that /=scalar and /scalar do not overflow
+    // rational: 1.0/4.94e-320 overflow, but m/4.94e-320 should not
+    Matrix4d m2, m3;
+    m3 = m2 =  Matrix4d::Random()*1e-20;
+    m2 = m2 / 4.9e-320;
+    VERIFY_IS_APPROX(m2.cwiseQuotient(m2), Matrix4d::Ones());
+    m3 /= 4.9e-320;
+    VERIFY_IS_APPROX(m3.cwiseQuotient(m3), Matrix4d::Ones());
+    
+    
   }
+#endif
 }
diff --git a/test/main.h b/test/main.h
index 7667eaa18..371c7e602 100644
--- a/test/main.h
+++ b/test/main.h
@@ -94,6 +94,9 @@ namespace Eigen
   static bool g_has_set_repeat, g_has_set_seed;
 }
 
+#define TRACK std::cerr << __FILE__ << " " << __LINE__ << std::endl
+// #define TRACK while()
+
 #define EI_PP_MAKE_STRING2(S) #S
 #define EI_PP_MAKE_STRING(S) EI_PP_MAKE_STRING2(S)
 
@@ -311,13 +314,7 @@ inline bool test_isApproxOrLessThan(const long double& a, const long double& b)
 template<typename Type1, typename Type2>
 inline bool test_isApprox(const Type1& a, const Type2& b)
 {
-#ifdef EIGEN_TEST_EVALUATORS
-  typename internal::eval<Type1>::type a_eval(a);
-  typename internal::eval<Type2>::type b_eval(b);
-  return a_eval.isApprox(b_eval, test_precision<typename Type1::Scalar>());
-#else
   return a.isApprox(b, test_precision<typename Type1::Scalar>());
-#endif
 }
 
 // The idea behind this function is to compare the two scalars a and b where
diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp
index 1e0e2d4c1..048f7255a 100644
--- a/test/mixingtypes.cpp
+++ b/test/mixingtypes.cpp
@@ -53,10 +53,11 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
   mf+mf;
   VERIFY_RAISES_ASSERT(mf+md);
   VERIFY_RAISES_ASSERT(mf+mcf);
-  VERIFY_RAISES_ASSERT(vf=vd);
-  VERIFY_RAISES_ASSERT(vf+=vd);
-  VERIFY_RAISES_ASSERT(mcd=md);
-
+  // the following do not even compile since the introduction of evaluators
+//   VERIFY_RAISES_ASSERT(vf=vd);
+//   VERIFY_RAISES_ASSERT(vf+=vd);
+//   VERIFY_RAISES_ASSERT(mcd=md);
+  
   // check scalar products
   VERIFY_IS_APPROX(vcf * sf , vcf * complex<float>(sf));
   VERIFY_IS_APPROX(sd * vcd, complex<double>(sd) * vcd);
diff --git a/test/nesting_ops.cpp b/test/nesting_ops.cpp
index 1e8523283..6e772c70f 100644
--- a/test/nesting_ops.cpp
+++ b/test/nesting_ops.cpp
@@ -11,7 +11,7 @@
 
 template <typename MatrixType> void run_nesting_ops(const MatrixType& _m)
 {
-  typename MatrixType::Nested m(_m);
+  typename internal::nested_eval<MatrixType,2>::type m(_m);
 
   // Make really sure that we are in debug mode!
   VERIFY_RAISES_ASSERT(eigen_assert(false));
diff --git a/test/product.h b/test/product.h
index 856b234ac..0b3abe402 100644
--- a/test/product.h
+++ b/test/product.h
@@ -139,4 +139,12 @@ template<typename MatrixType> void product(const MatrixType& m)
   // inner product
   Scalar x = square2.row(c) * square2.col(c2);
   VERIFY_IS_APPROX(x, square2.row(c).transpose().cwiseProduct(square2.col(c2)).sum());
+  
+  // outer product
+  VERIFY_IS_APPROX(m1.col(c) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
+  VERIFY_IS_APPROX(m1.row(r).transpose() * m1.col(c).transpose(), m1.block(r,0,1,cols).transpose() * m1.block(0,c,rows,1).transpose());
+  VERIFY_IS_APPROX(m1.block(0,c,rows,1) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
+  VERIFY_IS_APPROX(m1.col(c) * m1.block(r,0,1,cols), m1.block(0,c,rows,1) * m1.block(r,0,1,cols));
+  VERIFY_IS_APPROX(m1.leftCols(1) * m1.row(r), m1.block(0,0,rows,1) * m1.block(r,0,1,cols));
+  VERIFY_IS_APPROX(m1.col(c) * m1.topRows(1), m1.block(0,c,rows,1) * m1.block(0,0,1,cols));  
 }
diff --git a/test/product_mmtr.cpp b/test/product_mmtr.cpp
index 7d6746800..92e6b668f 100644
--- a/test/product_mmtr.cpp
+++ b/test/product_mmtr.cpp
@@ -13,7 +13,8 @@
     ref2 = ref1 = DEST;                               \
     DEST.template triangularView<TRI>() OP;           \
     ref1 OP;                                          \
-    ref2.template triangularView<TRI>() = ref1;       \
+    ref2.template triangularView<TRI>()               \
+      = ref1.template triangularView<TRI>();          \
     VERIFY_IS_APPROX(DEST,ref2);                      \
   }
 
diff --git a/test/product_notemporary.cpp b/test/product_notemporary.cpp
index 3a9df618b..805cc8939 100644
--- a/test/product_notemporary.cpp
+++ b/test/product_notemporary.cpp
@@ -113,8 +113,7 @@ template<typename MatrixType> void product_notemporary(const MatrixType& m)
   VERIFY_EVALUATION_COUNT( Scalar tmp = 0; tmp += Scalar(RealScalar(1)) /  (m3.transpose() * m3).diagonal().array().abs().sum(), 0 );
 
   // Zero temporaries for ... CoeffBasedProductMode
-  // - does not work with GCC because of the <..>, we'ld need variadic macros ...
-  //VERIFY_EVALUATION_COUNT( m3.col(0).head<5>() * m3.col(0).transpose() + m3.col(0).head<5>() * m3.col(0).transpose(), 0 );
+  VERIFY_EVALUATION_COUNT( m3.col(0).template head<5>() * m3.col(0).transpose() + m3.col(0).template head<5>() * m3.col(0).transpose(), 0 );
 
   // Check matrix * vectors
   VERIFY_EVALUATION_COUNT( cvres.noalias() = m1 * cv1, 0 );
diff --git a/test/qr_fullpivoting.cpp b/test/qr_fullpivoting.cpp
index 511f2473f..601773404 100644
--- a/test/qr_fullpivoting.cpp
+++ b/test/qr_fullpivoting.cpp
@@ -40,7 +40,11 @@ template<typename MatrixType> void qr()
   MatrixType c = qr.matrixQ() * r * qr.colsPermutation().inverse();
 
   VERIFY_IS_APPROX(m1, c);
-
+  
+  // stress the ReturnByValue mechanism
+  MatrixType tmp;
+  VERIFY_IS_APPROX(tmp.noalias() = qr.matrixQ() * r, (qr.matrixQ() * r).eval());
+  
   MatrixType m2 = MatrixType::Random(cols,cols2);
   MatrixType m3 = m1*m2;
   m2 = MatrixType::Random(cols,cols2);
diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp
index 4c9b9111e..c86534bad 100644
--- a/test/sparse_basic.cpp
+++ b/test/sparse_basic.cpp
@@ -201,9 +201,9 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
     
     VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros());
 
-    //m2.innerVector(j0) = 2*m2.innerVector(j1);
-    //refMat2.col(j0) = 2*refMat2.col(j1);
-    //VERIFY_IS_APPROX(m2, refMat2);
+//     m2.innerVector(j0) = 2*m2.innerVector(j1);
+//     refMat2.col(j0) = 2*refMat2.col(j1);
+//     VERIFY_IS_APPROX(m2, refMat2);
   }
 
   // test innerVectors()
@@ -239,7 +239,7 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
     
     VERIFY_IS_APPROX(m2, refMat2);
   }
-  
+
   // test basic computations
   {
     DenseMatrix refM1 = DenseMatrix::Zero(rows, rows);
@@ -255,6 +255,7 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
     initSparse<Scalar>(density, refM3, m3);
     initSparse<Scalar>(density, refM4, m4);
 
+    VERIFY_IS_APPROX(m1*s1, refM1*s1);
     VERIFY_IS_APPROX(m1+m2, refM1+refM2);
     VERIFY_IS_APPROX(m1+m2+m3, refM1+refM2+refM3);
     VERIFY_IS_APPROX(m3.cwiseProduct(m1+m2), refM3.cwiseProduct(refM1+refM2));
diff --git a/test/sparse_product.cpp b/test/sparse_product.cpp
index 0f52164c8..fa9be5440 100644
--- a/test/sparse_product.cpp
+++ b/test/sparse_product.cpp
@@ -19,7 +19,7 @@ template<typename SparseMatrixType> void sparse_product()
   typedef typename SparseMatrixType::Scalar Scalar;
   enum { Flags = SparseMatrixType::Flags };
 
-  double density = (std::max)(8./(rows*cols), 0.1);
+  double density = (std::max)(8./(rows*cols), 0.2);
   typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
   typedef Matrix<Scalar,Dynamic,1> DenseVector;
   typedef Matrix<Scalar,1,Dynamic> RowDenseVector;
@@ -77,17 +77,27 @@ template<typename SparseMatrixType> void sparse_product()
     m4 = m2; refMat4 = refMat2;
     VERIFY_IS_APPROX(m4=m4*m3, refMat4=refMat4*refMat3);
 
-    // sparse * dense
+    // sparse * dense matrix
     VERIFY_IS_APPROX(dm4=m2*refMat3, refMat4=refMat2*refMat3);
     VERIFY_IS_APPROX(dm4=m2*refMat3t.transpose(), refMat4=refMat2*refMat3t.transpose());
     VERIFY_IS_APPROX(dm4=m2t.transpose()*refMat3, refMat4=refMat2t.transpose()*refMat3);
     VERIFY_IS_APPROX(dm4=m2t.transpose()*refMat3t.transpose(), refMat4=refMat2t.transpose()*refMat3t.transpose());
 
+    VERIFY_IS_APPROX(dm4=m2*refMat3, refMat4=refMat2*refMat3);
+    VERIFY_IS_APPROX(dm4=dm4+m2*refMat3, refMat4=refMat4+refMat2*refMat3);
     VERIFY_IS_APPROX(dm4=m2*(refMat3+refMat3), refMat4=refMat2*(refMat3+refMat3));
     VERIFY_IS_APPROX(dm4=m2t.transpose()*(refMat3+refMat5)*0.5, refMat4=refMat2t.transpose()*(refMat3+refMat5)*0.5);
+    
+    // sparse * dense vector
+    VERIFY_IS_APPROX(dm4.col(0)=m2*refMat3.col(0), refMat4.col(0)=refMat2*refMat3.col(0));
+    VERIFY_IS_APPROX(dm4.col(0)=m2*refMat3t.transpose().col(0), refMat4.col(0)=refMat2*refMat3t.transpose().col(0));
+    VERIFY_IS_APPROX(dm4.col(0)=m2t.transpose()*refMat3.col(0), refMat4.col(0)=refMat2t.transpose()*refMat3.col(0));
+    VERIFY_IS_APPROX(dm4.col(0)=m2t.transpose()*refMat3t.transpose().col(0), refMat4.col(0)=refMat2t.transpose()*refMat3t.transpose().col(0));
 
     // dense * sparse
     VERIFY_IS_APPROX(dm4=refMat2*m3, refMat4=refMat2*refMat3);
+    VERIFY_IS_APPROX(dm4=dm4+refMat2*m3, refMat4=refMat4+refMat2*refMat3);
+    VERIFY_IS_APPROX(dm4+=refMat2*m3, refMat4+=refMat2*refMat3);
     VERIFY_IS_APPROX(dm4=refMat2*m3t.transpose(), refMat4=refMat2*refMat3t.transpose());
     VERIFY_IS_APPROX(dm4=refMat2t.transpose()*m3, refMat4=refMat2t.transpose()*refMat3);
     VERIFY_IS_APPROX(dm4=refMat2t.transpose()*m3t.transpose(), refMat4=refMat2t.transpose()*refMat3t.transpose());
@@ -99,7 +109,7 @@ template<typename SparseMatrixType> void sparse_product()
       Index c1 = internal::random<Index>(0,cols-1);
       Index r1 = internal::random<Index>(0,depth-1);
       DenseMatrix dm5  = DenseMatrix::Random(depth, cols);
-      
+
       VERIFY_IS_APPROX( m4=m2.col(c)*dm5.col(c1).transpose(), refMat4=refMat2.col(c)*dm5.col(c1).transpose());
       VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count());
       VERIFY_IS_APPROX( m4=m2.middleCols(c,1)*dm5.col(c1).transpose(), refMat4=refMat2.col(c)*dm5.col(c1).transpose());
@@ -143,11 +153,11 @@ template<typename SparseMatrixType> void sparse_product()
     RowSpVector rv0(depth), rv1;
     RowDenseVector drv0(depth), drv1(rv1);
     initSparse(2*density,drv0, rv0);
-    
-    VERIFY_IS_APPROX(cv1=rv0*m3, dcv1=drv0*refMat3);
+
+    VERIFY_IS_APPROX(cv1=m3*cv0, dcv1=refMat3*dcv0);    
     VERIFY_IS_APPROX(rv1=rv0*m3, drv1=drv0*refMat3);
-    VERIFY_IS_APPROX(cv1=m3*cv0, dcv1=refMat3*dcv0);
     VERIFY_IS_APPROX(cv1=m3t.adjoint()*cv0, dcv1=refMat3t.adjoint()*dcv0);
+    VERIFY_IS_APPROX(cv1=rv0*m3, dcv1=drv0*refMat3);
     VERIFY_IS_APPROX(rv1=m3*cv0, drv1=refMat3*dcv0);
   }
   
diff --git a/test/sparse_vector.cpp b/test/sparse_vector.cpp
index 0c9476803..5eea9edfd 100644
--- a/test/sparse_vector.cpp
+++ b/test/sparse_vector.cpp
@@ -71,6 +71,7 @@ template<typename Scalar,typename Index> void sparse_vector(int rows, int cols)
   VERIFY_IS_APPROX(v1.dot(v2), refV1.dot(refV2));
   VERIFY_IS_APPROX(v1.dot(refV2), refV1.dot(refV2));
 
+  VERIFY_IS_APPROX(m1*v2, refM1*refV2);
   VERIFY_IS_APPROX(v1.dot(m1*v2), refV1.dot(refM1*refV2));
   int i = internal::random<int>(0,rows-1);
   VERIFY_IS_APPROX(v1.dot(m1.col(i)), refV1.dot(refM1.col(i)));
diff --git a/test/stable_norm.cpp b/test/stable_norm.cpp
index 549f91fbf..6cd65c64a 100644
--- a/test/stable_norm.cpp
+++ b/test/stable_norm.cpp
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -14,6 +14,21 @@ template<typename T> bool isNotNaN(const T& x)
   return x==x;
 }
 
+template<typename T> bool isNaN(const T& x)
+{
+  return x!=x;
+}
+
+template<typename T> bool isInf(const T& x)
+{
+  return x > NumTraits<T>::highest();
+}
+
+template<typename T> bool isMinusInf(const T& x)
+{
+  return x < NumTraits<T>::lowest();
+}
+
 // workaround aggressive optimization in ICC
 template<typename T> EIGEN_DONT_INLINE  T sub(T a, T b) { return a - b; }
 
@@ -106,6 +121,58 @@ template<typename MatrixType> void stable_norm(const MatrixType& m)
   VERIFY_IS_APPROX(vrand.rowwise().stableNorm(),      vrand.rowwise().norm());
   VERIFY_IS_APPROX(vrand.rowwise().blueNorm(),        vrand.rowwise().norm());
   VERIFY_IS_APPROX(vrand.rowwise().hypotNorm(),       vrand.rowwise().norm());
+  
+  // test NaN, +inf, -inf 
+  MatrixType v;
+  Index i = internal::random<Index>(0,rows-1);
+  Index j = internal::random<Index>(0,cols-1);
+
+  // NaN
+  {
+    v = vrand;
+    v(i,j) = std::numeric_limits<RealScalar>::quiet_NaN();
+    VERIFY(!isFinite(v.squaredNorm()));   VERIFY(isNaN(v.squaredNorm()));
+    VERIFY(!isFinite(v.norm()));          VERIFY(isNaN(v.norm()));
+    VERIFY(!isFinite(v.stableNorm()));    VERIFY(isNaN(v.stableNorm()));
+    VERIFY(!isFinite(v.blueNorm()));      VERIFY(isNaN(v.blueNorm()));
+    VERIFY(!isFinite(v.hypotNorm()));     VERIFY(isNaN(v.hypotNorm()));
+  }
+  
+  // +inf
+  {
+    v = vrand;
+    v(i,j) = std::numeric_limits<RealScalar>::infinity();
+    VERIFY(!isFinite(v.squaredNorm()));   VERIFY(isInf(v.squaredNorm()));
+    VERIFY(!isFinite(v.norm()));          VERIFY(isInf(v.norm()));
+    VERIFY(!isFinite(v.stableNorm()));    VERIFY(isInf(v.stableNorm()));
+    VERIFY(!isFinite(v.blueNorm()));      VERIFY(isInf(v.blueNorm()));
+    VERIFY(!isFinite(v.hypotNorm()));     VERIFY(isInf(v.hypotNorm()));
+  }
+  
+  // -inf
+  {
+    v = vrand;
+    v(i,j) = -std::numeric_limits<RealScalar>::infinity();
+    VERIFY(!isFinite(v.squaredNorm()));   VERIFY(isInf(v.squaredNorm()));
+    VERIFY(!isFinite(v.norm()));          VERIFY(isInf(v.norm()));
+    VERIFY(!isFinite(v.stableNorm()));    VERIFY(isInf(v.stableNorm()));
+    VERIFY(!isFinite(v.blueNorm()));      VERIFY(isInf(v.blueNorm()));
+    VERIFY(!isFinite(v.hypotNorm()));     VERIFY(isInf(v.hypotNorm()));
+  }
+  
+  // mix
+  {
+    Index i2 = internal::random<Index>(0,rows-1);
+    Index j2 = internal::random<Index>(0,cols-1);
+    v = vrand;
+    v(i,j) = -std::numeric_limits<RealScalar>::infinity();
+    v(i2,j2) = std::numeric_limits<RealScalar>::quiet_NaN();
+    VERIFY(!isFinite(v.squaredNorm()));   VERIFY(isNaN(v.squaredNorm()));
+    VERIFY(!isFinite(v.norm()));          VERIFY(isNaN(v.norm()));
+    VERIFY(!isFinite(v.stableNorm()));    VERIFY(isNaN(v.stableNorm()));
+    VERIFY(!isFinite(v.blueNorm()));      VERIFY(isNaN(v.blueNorm()));
+    VERIFY(!isFinite(v.hypotNorm()));     VERIFY(isNaN(v.hypotNorm()));
+  }
 }
 
 void test_stable_norm()
diff --git a/test/svd_common.h b/test/svd_common.h
new file mode 100644
index 000000000..4631939e5
--- /dev/null
+++ b/test/svd_common.h
@@ -0,0 +1,454 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef SVD_DEFAULT
+#error a macro SVD_DEFAULT(MatrixType) must be defined prior to including svd_common.h
+#endif
+
+#ifndef SVD_FOR_MIN_NORM
+#error a macro SVD_FOR_MIN_NORM(MatrixType) must be defined prior to including svd_common.h
+#endif
+
+// Check that the matrix m is properly reconstructed and that the U and V factors are unitary
+// The SVD must have already been computed.
+template<typename SvdType, typename MatrixType>
+void svd_check_full(const MatrixType& m, const SvdType& svd)
+{
+  typedef typename MatrixType::Index Index;
+  Index rows = m.rows();
+  Index cols = m.cols();
+
+  enum {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime
+  };
+
+  typedef typename MatrixType::Scalar Scalar;
+  typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime> MatrixUType;
+  typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixVType;
+
+  MatrixType sigma = MatrixType::Zero(rows,cols);
+  sigma.diagonal() = svd.singularValues().template cast<Scalar>();
+  MatrixUType u = svd.matrixU();
+  MatrixVType v = svd.matrixV();
+
+  VERIFY_IS_APPROX(m, u * sigma * v.adjoint());
+  VERIFY_IS_UNITARY(u);
+  VERIFY_IS_UNITARY(v);
+}
+
+// Compare partial SVD defined by computationOptions to a full SVD referenceSvd
+template<typename SvdType, typename MatrixType>
+void svd_compare_to_full(const MatrixType& m,
+                         unsigned int computationOptions,
+                         const SvdType& referenceSvd)
+{
+  typedef typename MatrixType::Index Index;
+  Index rows = m.rows();
+  Index cols = m.cols();
+  Index diagSize = (std::min)(rows, cols);
+
+  SvdType svd(m, computationOptions);
+
+  VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues());
+  if(computationOptions & ComputeFullU)  VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU());
+  if(computationOptions & ComputeThinU)  VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize));
+  if(computationOptions & ComputeFullV)  VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV());
+  if(computationOptions & ComputeThinV)  VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize));
+}
+
+//
+template<typename SvdType, typename MatrixType>
+void svd_least_square(const MatrixType& m, unsigned int computationOptions)
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef typename MatrixType::Index Index;
+  Index rows = m.rows();
+  Index cols = m.cols();
+
+  enum {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime
+  };
+
+  typedef Matrix<Scalar, RowsAtCompileTime, Dynamic> RhsType;
+  typedef Matrix<Scalar, ColsAtCompileTime, Dynamic> SolutionType;
+
+  RhsType rhs = RhsType::Random(rows, internal::random<Index>(1, cols));
+  SvdType svd(m, computationOptions);
+
+       if(internal::is_same<RealScalar,double>::value) svd.setThreshold(1e-8);
+  else if(internal::is_same<RealScalar,float>::value)  svd.setThreshold(1e-4);
+  
+  SolutionType x = svd.solve(rhs);
+  
+  RealScalar residual = (m*x-rhs).norm();
+  // Check that there is no significantly better solution in the neighborhood of x
+  if(!test_isMuchSmallerThan(residual,rhs.norm()))
+  {
+    // If the residual is very small, then we have an exact solution, so we are already good.
+    for(int k=0;k<x.rows();++k)
+    {
+      SolutionType y(x);
+      y.row(k).array() += 2*NumTraits<RealScalar>::epsilon();
+      RealScalar residual_y = (m*y-rhs).norm();
+      VERIFY( test_isApprox(residual_y,residual) || residual < residual_y );
+      
+      y.row(k) = x.row(k).array() - 2*NumTraits<RealScalar>::epsilon();
+      residual_y = (m*y-rhs).norm();
+      VERIFY( test_isApprox(residual_y,residual) || residual < residual_y );
+    }
+  }
+  
+  // evaluate normal equation which works also for least-squares solutions
+  if(internal::is_same<RealScalar,double>::value)
+  {
+    // This test is not stable with single precision.
+    // This is probably because squaring m signicantly affects the precision.
+    VERIFY_IS_APPROX(m.adjoint()*m*x,m.adjoint()*rhs);
+  }
+}
+
+// check minimal norm solutions, the inoput matrix m is only used to recover problem size
+template<typename MatrixType>
+void svd_min_norm(const MatrixType& m, unsigned int computationOptions)
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::Index Index;
+  Index cols = m.cols();
+
+  enum {
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime
+  };
+
+  typedef Matrix<Scalar, ColsAtCompileTime, Dynamic> SolutionType;
+
+  // generate a full-rank m x n problem with m<n
+  enum {
+    RankAtCompileTime2 = ColsAtCompileTime==Dynamic ? Dynamic : (ColsAtCompileTime)/2+1,
+    RowsAtCompileTime3 = ColsAtCompileTime==Dynamic ? Dynamic : ColsAtCompileTime+1
+  };
+  typedef Matrix<Scalar, RankAtCompileTime2, ColsAtCompileTime> MatrixType2;
+  typedef Matrix<Scalar, RankAtCompileTime2, 1> RhsType2;
+  typedef Matrix<Scalar, ColsAtCompileTime, RankAtCompileTime2> MatrixType2T;
+  Index rank = RankAtCompileTime2==Dynamic ? internal::random<Index>(1,cols) : Index(RankAtCompileTime2);
+  MatrixType2 m2(rank,cols);
+  int guard = 0;
+  do {
+    m2.setRandom();
+  } while(SVD_FOR_MIN_NORM(MatrixType2)(m2).setThreshold(test_precision<Scalar>()).rank()!=rank && (++guard)<10);
+  VERIFY(guard<10);
+  RhsType2 rhs2 = RhsType2::Random(rank);
+  // use QR to find a reference minimal norm solution
+  HouseholderQR<MatrixType2T> qr(m2.adjoint());
+  Matrix<Scalar,Dynamic,1> tmp = qr.matrixQR().topLeftCorner(rank,rank).template triangularView<Upper>().adjoint().solve(rhs2);
+  tmp.conservativeResize(cols);
+  tmp.tail(cols-rank).setZero();
+  SolutionType x21 = qr.householderQ() * tmp;
+  // now check with SVD
+  SVD_FOR_MIN_NORM(MatrixType2) svd2(m2, computationOptions);
+  SolutionType x22 = svd2.solve(rhs2);
+  VERIFY_IS_APPROX(m2*x21, rhs2);
+  VERIFY_IS_APPROX(m2*x22, rhs2);
+  VERIFY_IS_APPROX(x21, x22);
+  
+  // Now check with a rank deficient matrix
+  typedef Matrix<Scalar, RowsAtCompileTime3, ColsAtCompileTime> MatrixType3;
+  typedef Matrix<Scalar, RowsAtCompileTime3, 1> RhsType3;
+  Index rows3 = RowsAtCompileTime3==Dynamic ? internal::random<Index>(rank+1,2*cols) : Index(RowsAtCompileTime3);
+  Matrix<Scalar,RowsAtCompileTime3,Dynamic> C = Matrix<Scalar,RowsAtCompileTime3,Dynamic>::Random(rows3,rank);
+  MatrixType3 m3 = C * m2;
+  RhsType3 rhs3 = C * rhs2;
+  SVD_FOR_MIN_NORM(MatrixType3) svd3(m3, computationOptions);
+  SolutionType x3 = svd3.solve(rhs3);
+  VERIFY_IS_APPROX(m3*x3, rhs3);
+  VERIFY_IS_APPROX(m3*x21, rhs3);
+  VERIFY_IS_APPROX(m2*x3, rhs2);
+  
+  VERIFY_IS_APPROX(x21, x3);
+}
+
+// Check full, compare_to_full, least_square, and min_norm for all possible compute-options
+template<typename SvdType, typename MatrixType>
+void svd_test_all_computation_options(const MatrixType& m, bool full_only)
+{
+//   if (QRPreconditioner == NoQRPreconditioner && m.rows() != m.cols())
+//     return;
+  SvdType fullSvd(m, ComputeFullU|ComputeFullV);
+  CALL_SUBTEST(( svd_check_full(m, fullSvd) ));
+  CALL_SUBTEST(( svd_least_square<SvdType>(m, ComputeFullU | ComputeFullV) ));
+  CALL_SUBTEST(( svd_min_norm(m, ComputeFullU | ComputeFullV) ));
+  
+  #if defined __INTEL_COMPILER
+  // remark #111: statement is unreachable
+  #pragma warning disable 111
+  #endif
+  if(full_only)
+    return;
+
+  CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullU, fullSvd) ));
+  CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullV, fullSvd) ));
+  CALL_SUBTEST(( svd_compare_to_full(m, 0, fullSvd) ));
+
+  if (MatrixType::ColsAtCompileTime == Dynamic) {
+    // thin U/V are only available with dynamic number of columns
+    CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullU|ComputeThinV, fullSvd) ));
+    CALL_SUBTEST(( svd_compare_to_full(m,              ComputeThinV, fullSvd) ));
+    CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU|ComputeFullV, fullSvd) ));
+    CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU             , fullSvd) ));
+    CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU|ComputeThinV, fullSvd) ));
+    
+    CALL_SUBTEST(( svd_least_square<SvdType>(m, ComputeFullU | ComputeThinV) ));
+    CALL_SUBTEST(( svd_least_square<SvdType>(m, ComputeThinU | ComputeFullV) ));
+    CALL_SUBTEST(( svd_least_square<SvdType>(m, ComputeThinU | ComputeThinV) ));
+    
+    CALL_SUBTEST(( svd_min_norm(m, ComputeFullU | ComputeThinV) ));
+    CALL_SUBTEST(( svd_min_norm(m, ComputeThinU | ComputeFullV) ));
+    CALL_SUBTEST(( svd_min_norm(m, ComputeThinU | ComputeThinV) ));
+
+    // test reconstruction
+    typedef typename MatrixType::Index Index;
+    Index diagSize = (std::min)(m.rows(), m.cols());
+    SvdType svd(m, ComputeThinU | ComputeThinV);
+    VERIFY_IS_APPROX(m, svd.matrixU().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint());
+  }
+}
+
+template<typename MatrixType>
+void svd_fill_random(MatrixType &m)
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef typename MatrixType::Index Index;
+  Index diagSize = (std::min)(m.rows(), m.cols());
+  RealScalar s = std::numeric_limits<RealScalar>::max_exponent10/4;
+  s = internal::random<RealScalar>(1,s);
+  Matrix<RealScalar,Dynamic,1> d =  Matrix<RealScalar,Dynamic,1>::Random(diagSize);
+  for(Index k=0; k<diagSize; ++k)
+    d(k) = d(k)*std::pow(RealScalar(10),internal::random<RealScalar>(-s,s));
+  m = Matrix<Scalar,Dynamic,Dynamic>::Random(m.rows(),diagSize) * d.asDiagonal() * Matrix<Scalar,Dynamic,Dynamic>::Random(diagSize,m.cols());
+  // cancel some coeffs
+  Index n  = internal::random<Index>(0,m.size()-1);
+  for(Index i=0; i<n; ++i)
+    m(internal::random<Index>(0,m.rows()-1), internal::random<Index>(0,m.cols()-1)) = Scalar(0);
+}
+
+
+// work around stupid msvc error when constructing at compile time an expression that involves
+// a division by zero, even if the numeric type has floating point
+template<typename Scalar>
+EIGEN_DONT_INLINE Scalar zero() { return Scalar(0); }
+
+// workaround aggressive optimization in ICC
+template<typename T> EIGEN_DONT_INLINE  T sub(T a, T b) { return a - b; }
+
+// all this function does is verify we don't iterate infinitely on nan/inf values
+template<typename SvdType, typename MatrixType>
+void svd_inf_nan()
+{
+  SvdType svd;
+  typedef typename MatrixType::Scalar Scalar;
+  Scalar some_inf = Scalar(1) / zero<Scalar>();
+  VERIFY(sub(some_inf, some_inf) != sub(some_inf, some_inf));
+  svd.compute(MatrixType::Constant(10,10,some_inf), ComputeFullU | ComputeFullV);
+
+  Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+  VERIFY(nan != nan);
+  svd.compute(MatrixType::Constant(10,10,nan), ComputeFullU | ComputeFullV);
+
+  MatrixType m = MatrixType::Zero(10,10);
+  m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_inf;
+  svd.compute(m, ComputeFullU | ComputeFullV);
+
+  m = MatrixType::Zero(10,10);
+  m(internal::random<int>(0,9), internal::random<int>(0,9)) = nan;
+  svd.compute(m, ComputeFullU | ComputeFullV);
+  
+  // regression test for bug 791
+  m.resize(3,3);
+  m << 0,    2*NumTraits<Scalar>::epsilon(),  0.5,
+       0,   -0.5,                             0,
+       nan,  0,                               0;
+  svd.compute(m, ComputeFullU | ComputeFullV);
+  
+  m.resize(4,4);
+  m <<  1, 0, 0, 0,
+        0, 3, 1, 2e-308,
+        1, 0, 1, nan,
+        0, nan, nan, 0;
+  svd.compute(m, ComputeFullU | ComputeFullV);
+}
+
+// Regression test for bug 286: JacobiSVD loops indefinitely with some
+// matrices containing denormal numbers.
+void svd_underoverflow()
+{
+#if defined __INTEL_COMPILER
+// shut up warning #239: floating point underflow
+#pragma warning push
+#pragma warning disable 239
+#endif
+  Matrix2d M;
+  M << -7.90884e-313, -4.94e-324,
+                 0, 5.60844e-313;
+  SVD_DEFAULT(Matrix2d) svd;
+  svd.compute(M,ComputeFullU|ComputeFullV);
+  svd_check_full(M,svd);
+  
+  // Check all 2x2 matrices made with the following coefficients:
+  VectorXd value_set(9);
+  value_set << 0, 1, -1, 5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324, -4.94e-223, 4.94e-223;
+  Array4i id(0,0,0,0);
+  int k = 0;
+  do
+  {
+    M << value_set(id(0)), value_set(id(1)), value_set(id(2)), value_set(id(3));
+    svd.compute(M,ComputeFullU|ComputeFullV);
+    svd_check_full(M,svd);
+    
+    id(k)++;
+    if(id(k)>=value_set.size())
+    {
+      while(k<3 && id(k)>=value_set.size()) id(++k)++;
+      id.head(k).setZero();
+      k=0;
+    }
+    
+  } while((id<int(value_set.size())).all());
+  
+#if defined __INTEL_COMPILER
+#pragma warning pop
+#endif
+  
+  // Check for overflow:
+  Matrix3d M3;
+  M3 << 4.4331978442502944e+307, -5.8585363752028680e+307,  6.4527017443412964e+307,
+        3.7841695601406358e+307,  2.4331702789740617e+306, -3.5235707140272905e+307,
+       -8.7190887618028355e+307, -7.3453213709232193e+307, -2.4367363684472105e+307;
+
+  SVD_DEFAULT(Matrix3d) svd3;
+  svd3.compute(M3,ComputeFullU|ComputeFullV); // just check we don't loop indefinitely
+  svd_check_full(M3,svd3);
+}
+
+// void jacobisvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
+
+template<typename MatrixType>
+void svd_all_trivial_2x2( void (*cb)(const MatrixType&,bool) )
+{
+  MatrixType M;
+  VectorXd value_set(3);
+  value_set << 0, 1, -1;
+  Array4i id(0,0,0,0);
+  int k = 0;
+  do
+  {
+    M << value_set(id(0)), value_set(id(1)), value_set(id(2)), value_set(id(3));
+    
+    cb(M,false);
+    
+    id(k)++;
+    if(id(k)>=value_set.size())
+    {
+      while(k<3 && id(k)>=value_set.size()) id(++k)++;
+      id.head(k).setZero();
+      k=0;
+    }
+    
+  } while((id<int(value_set.size())).all());
+}
+
+void svd_preallocate()
+{
+  Vector3f v(3.f, 2.f, 1.f);
+  MatrixXf m = v.asDiagonal();
+
+  internal::set_is_malloc_allowed(false);
+  VERIFY_RAISES_ASSERT(VectorXf tmp(10);)
+  SVD_DEFAULT(MatrixXf) svd;
+  internal::set_is_malloc_allowed(true);
+  svd.compute(m);
+  VERIFY_IS_APPROX(svd.singularValues(), v);
+
+  SVD_DEFAULT(MatrixXf) svd2(3,3);
+  internal::set_is_malloc_allowed(false);
+  svd2.compute(m);
+  internal::set_is_malloc_allowed(true);
+  VERIFY_IS_APPROX(svd2.singularValues(), v);
+  VERIFY_RAISES_ASSERT(svd2.matrixU());
+  VERIFY_RAISES_ASSERT(svd2.matrixV());
+  svd2.compute(m, ComputeFullU | ComputeFullV);
+  VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
+  VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
+  internal::set_is_malloc_allowed(false);
+  svd2.compute(m);
+  internal::set_is_malloc_allowed(true);
+
+  SVD_DEFAULT(MatrixXf) svd3(3,3,ComputeFullU|ComputeFullV);
+  internal::set_is_malloc_allowed(false);
+  svd2.compute(m);
+  internal::set_is_malloc_allowed(true);
+  VERIFY_IS_APPROX(svd2.singularValues(), v);
+  VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
+  VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
+  internal::set_is_malloc_allowed(false);
+  svd2.compute(m, ComputeFullU|ComputeFullV);
+  internal::set_is_malloc_allowed(true);
+}
+
+template<typename SvdType,typename MatrixType> 
+void svd_verify_assert(const MatrixType& m)
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::Index Index;
+  Index rows = m.rows();
+  Index cols = m.cols();
+
+  enum {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime
+  };
+
+  typedef Matrix<Scalar, RowsAtCompileTime, 1> RhsType;
+  RhsType rhs(rows);
+  SvdType svd;
+  VERIFY_RAISES_ASSERT(svd.matrixU())
+  VERIFY_RAISES_ASSERT(svd.singularValues())
+  VERIFY_RAISES_ASSERT(svd.matrixV())
+  VERIFY_RAISES_ASSERT(svd.solve(rhs))
+  MatrixType a = MatrixType::Zero(rows, cols);
+  a.setZero();
+  svd.compute(a, 0);
+  VERIFY_RAISES_ASSERT(svd.matrixU())
+  VERIFY_RAISES_ASSERT(svd.matrixV())
+  svd.singularValues();
+  VERIFY_RAISES_ASSERT(svd.solve(rhs))
+    
+  if (ColsAtCompileTime == Dynamic)
+  {
+    svd.compute(a, ComputeThinU);
+    svd.matrixU();
+    VERIFY_RAISES_ASSERT(svd.matrixV())
+    VERIFY_RAISES_ASSERT(svd.solve(rhs))
+    svd.compute(a, ComputeThinV);
+    svd.matrixV();
+    VERIFY_RAISES_ASSERT(svd.matrixU())
+    VERIFY_RAISES_ASSERT(svd.solve(rhs))
+  }
+  else
+  {
+    VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinU))
+    VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinV))
+  }
+}
+
+#undef SVD_DEFAULT
+#undef SVD_FOR_MIN_NORM
diff --git a/test/upperbidiagonalization.cpp b/test/upperbidiagonalization.cpp
index d15bf588b..847b34b55 100644
--- a/test/upperbidiagonalization.cpp
+++ b/test/upperbidiagonalization.cpp
@@ -35,7 +35,7 @@ void test_upperbidiagonalization()
    CALL_SUBTEST_1( upperbidiag(MatrixXf(3,3)) );
    CALL_SUBTEST_2( upperbidiag(MatrixXd(17,12)) );
    CALL_SUBTEST_3( upperbidiag(MatrixXcf(20,20)) );
-   CALL_SUBTEST_4( upperbidiag(MatrixXcd(16,15)) );
+   CALL_SUBTEST_4( upperbidiag(Matrix<std::complex<double>,Dynamic,Dynamic,RowMajor>(16,15)) );
    CALL_SUBTEST_5( upperbidiag(Matrix<float,6,4>()) );
    CALL_SUBTEST_6( upperbidiag(Matrix<float,5,5>()) );
    CALL_SUBTEST_7( upperbidiag(Matrix<double,4,3>()) );
diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp
index b069f0771..2f839cf51 100644
--- a/test/vectorization_logic.cpp
+++ b/test/vectorization_logic.cpp
@@ -27,19 +27,37 @@ std::string demangle_unrolling(int t)
   if(t==CompleteUnrolling) return "CompleteUnrolling";
   return "?";
 }
+std::string demangle_flags(int f)
+{
+  std::string res;
+  if(f&RowMajorBit)                 res += " | RowMajor";
+  if(f&PacketAccessBit)             res += " | Packet";
+  if(f&LinearAccessBit)             res += " | Linear";
+  if(f&LvalueBit)                   res += " | Lvalue";
+  if(f&DirectAccessBit)             res += " | Direct";
+  if(f&AlignedBit)                  res += " | Aligned";
+  if(f&NestByRefBit)                res += " | NestByRef";
+  if(f&NoPreferredStorageOrderBit)  res += " | NoPreferredStorageOrderBit";
+  
+  return res;
+}
 
 template<typename Dst, typename Src>
 bool test_assign(const Dst&, const Src&, int traversal, int unrolling)
 {
-  internal::assign_traits<Dst,Src>::debug();
-  bool res = internal::assign_traits<Dst,Src>::Traversal==traversal
-          && internal::assign_traits<Dst,Src>::Unrolling==unrolling;
+  typedef internal::copy_using_evaluator_traits<internal::evaluator<Dst>,internal::evaluator<Src>, internal::assign_op<typename Dst::Scalar> > traits;
+  bool res = traits::Traversal==traversal && traits::Unrolling==unrolling;
   if(!res)
   {
+    std::cerr << "Src: " << demangle_flags(Src::Flags) << std::endl;
+    std::cerr << "     " << demangle_flags(internal::evaluator<Src>::Flags) << std::endl;
+    std::cerr << "Dst: " << demangle_flags(Dst::Flags) << std::endl;
+    std::cerr << "     " << demangle_flags(internal::evaluator<Dst>::Flags) << std::endl;
+    traits::debug();
     std::cerr << " Expected Traversal == " << demangle_traversal(traversal)
-              << " got " << demangle_traversal(internal::assign_traits<Dst,Src>::Traversal) << "\n";
+              << " got " << demangle_traversal(traits::Traversal) << "\n";
     std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling)
-              << " got " << demangle_unrolling(internal::assign_traits<Dst,Src>::Unrolling) << "\n";
+              << " got " << demangle_unrolling(traits::Unrolling) << "\n";
   }
   return res;
 }
@@ -47,15 +65,19 @@ bool test_assign(const Dst&, const Src&, int traversal, int unrolling)
 template<typename Dst, typename Src>
 bool test_assign(int traversal, int unrolling)
 {
-  internal::assign_traits<Dst,Src>::debug();
-  bool res = internal::assign_traits<Dst,Src>::Traversal==traversal
-          && internal::assign_traits<Dst,Src>::Unrolling==unrolling;
+  typedef internal::copy_using_evaluator_traits<internal::evaluator<Dst>,internal::evaluator<Src>, internal::assign_op<typename Dst::Scalar> > traits;
+  bool res = traits::Traversal==traversal && traits::Unrolling==unrolling;
   if(!res)
   {
+    std::cerr << "Src: " << demangle_flags(Src::Flags) << std::endl;
+    std::cerr << "     " << demangle_flags(internal::evaluator<Src>::Flags) << std::endl;
+    std::cerr << "Dst: " << demangle_flags(Dst::Flags) << std::endl;
+    std::cerr << "     " << demangle_flags(internal::evaluator<Dst>::Flags) << std::endl;
+    traits::debug();
     std::cerr << " Expected Traversal == " << demangle_traversal(traversal)
-              << " got " << demangle_traversal(internal::assign_traits<Dst,Src>::Traversal) << "\n";
+              << " got " << demangle_traversal(traits::Traversal) << "\n";
     std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling)
-              << " got " << demangle_unrolling(internal::assign_traits<Dst,Src>::Unrolling) << "\n";
+              << " got " << demangle_unrolling(traits::Unrolling) << "\n";
   }
   return res;
 }
@@ -63,10 +85,15 @@ bool test_assign(int traversal, int unrolling)
 template<typename Xpr>
 bool test_redux(const Xpr&, int traversal, int unrolling)
 {
-  typedef internal::redux_traits<internal::scalar_sum_op<typename Xpr::Scalar>,Xpr> traits;
+  typedef internal::redux_traits<internal::scalar_sum_op<typename Xpr::Scalar>,internal::redux_evaluator<Xpr> > traits;
+  
   bool res = traits::Traversal==traversal && traits::Unrolling==unrolling;
   if(!res)
   {
+    std::cerr << demangle_flags(Xpr::Flags) << std::endl;
+    std::cerr << demangle_flags(internal::evaluator<Xpr>::Flags) << std::endl;
+    traits::debug();
+    
     std::cerr << " Expected Traversal == " << demangle_traversal(traversal)
               << " got " << demangle_traversal(traits::Traversal) << "\n";
     std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling)
diff --git a/test/vectorwiseop.cpp b/test/vectorwiseop.cpp
index 6cd1acdda..1631d54c4 100644
--- a/test/vectorwiseop.cpp
+++ b/test/vectorwiseop.cpp
@@ -104,8 +104,8 @@ template<typename ArrayType> void vectorwiseop_array(const ArrayType& m)
   
   m2 = m1;
   // yes, there might be an aliasing issue there but ".rowwise() /="
-  // is suppposed to evaluate " m2.colwise().sum()" into to temporary to avoid
-  // evaluating the reducions multiple times
+  // is supposed to evaluate " m2.colwise().sum()" into a temporary to avoid
+  // evaluating the reduction multiple times
   if(ArrayType::RowsAtCompileTime>2 || ArrayType::RowsAtCompileTime==Dynamic)
   {
     m2.rowwise() /= m2.colwise().sum();
diff --git a/unsupported/Eigen/AlignedVector3 b/unsupported/Eigen/AlignedVector3
index 7b45e6cce..35493e87b 100644
--- a/unsupported/Eigen/AlignedVector3
+++ b/unsupported/Eigen/AlignedVector3
@@ -57,6 +57,10 @@ template<typename _Scalar> class AlignedVector3
 
     inline Index rows() const { return 3; }
     inline Index cols() const { return 1; }
+    
+    Scalar* data() { return m_coeffs.data(); }
+    const Scalar* data() const { return m_coeffs.data(); }
+    Index innerStride() const { return 1; }
 
     inline const Scalar& coeff(Index row, Index col) const
     { return m_coeffs.coeff(row, col); }
@@ -181,8 +185,28 @@ template<typename _Scalar> class AlignedVector3
     {
       return m_coeffs.template head<3>().isApprox(other,eps);
     }
+    
+    CoeffType& coeffs() { return m_coeffs; }
+    const CoeffType& coeffs() const { return m_coeffs; }
 };
 
+namespace internal {
+
+template<typename Scalar>
+struct evaluator<AlignedVector3<Scalar> >
+  : evaluator<Matrix<Scalar,4,1> >::type
+{
+  typedef AlignedVector3<Scalar> XprType;
+  typedef typename evaluator<Matrix<Scalar,4,1> >::type Base;
+  
+  typedef evaluator type;
+  typedef evaluator nestedType;
+
+  evaluator(const XprType &m) : Base(m.coeffs()) {}  
+};
+
+}
+
 //@}
 
 }
diff --git a/unsupported/Eigen/BDCSVD b/unsupported/Eigen/BDCSVD
new file mode 100644
index 000000000..44649dbd0
--- /dev/null
+++ b/unsupported/Eigen/BDCSVD
@@ -0,0 +1,26 @@
+#ifndef EIGEN_BDCSVD_MODULE_H
+#define EIGEN_BDCSVD_MODULE_H
+
+#include <Eigen/SVD>
+
+#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
+
+/** \defgroup BDCSVD_Module BDCSVD module
+  *
+  *
+  *
+  * This module provides Divide & Conquer SVD decomposition for matrices (both real and complex).
+  * This decomposition is accessible via the following MatrixBase method:
+  *  - MatrixBase::bdcSvd()
+  *
+  * \code
+  * #include <Eigen/BDCSVD>
+  * \endcode
+  */
+
+#include "src/BDCSVD/BDCSVD.h"
+
+#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_BDCSVD_MODULE_H
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
diff --git a/unsupported/Eigen/IterativeSolvers b/unsupported/Eigen/IterativeSolvers
index aa15403db..ff0d59b6e 100644
--- a/unsupported/Eigen/IterativeSolvers
+++ b/unsupported/Eigen/IterativeSolvers
@@ -24,9 +24,6 @@
   */
 //@{
 
-#include "../../Eigen/src/misc/Solve.h"
-#include "../../Eigen/src/misc/SparseSolve.h"
-
 #ifndef EIGEN_MPL2_ONLY
 #include "src/IterativeSolvers/IterationController.h"
 #include "src/IterativeSolvers/ConstrainedConjGrad.h"
diff --git a/unsupported/Eigen/MPRealSupport b/unsupported/Eigen/MPRealSupport
index 632de3854..8e42965a3 100644
--- a/unsupported/Eigen/MPRealSupport
+++ b/unsupported/Eigen/MPRealSupport
@@ -159,10 +159,10 @@ int main()
       {
         if(rows==0 || cols==0 || depth==0)
           return;
-        
+
         mpreal  acc1(0,mpfr_get_prec(blockA[0].mpfr_srcptr())),
                 tmp (0,mpfr_get_prec(blockA[0].mpfr_srcptr()));
-        
+
         if(strideA==-1) strideA = depth;
         if(strideB==-1) strideB = depth;
 
diff --git a/unsupported/Eigen/MatrixFunctions b/unsupported/Eigen/MatrixFunctions
index 0b12aaffb..0320606c1 100644
--- a/unsupported/Eigen/MatrixFunctions
+++ b/unsupported/Eigen/MatrixFunctions
@@ -82,7 +82,9 @@ const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::cos() const
 \param[in]  M  a square matrix.
 \returns  expression representing \f$ \cos(M) \f$.
 
-This function calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::cos().
+This function computes the matrix cosine. Use ArrayBase::cos() for computing the entry-wise cosine.
+
+The implementation calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::cos().
 
 \sa \ref matrixbase_sin "sin()" for an example.
 
@@ -123,6 +125,9 @@ differential equations: the solution of \f$ y' = My \f$ with the
 initial condition \f$ y(0) = y_0 \f$ is given by
 \f$ y(t) = \exp(M) y_0 \f$.
 
+The matrix exponential is different from applying the exp function to all the entries in the matrix.
+Use ArrayBase::exp() if you want to do the latter.
+
 The cost of the computation is approximately \f$ 20 n^3 \f$ for
 matrices of size \f$ n \f$. The number 20 depends weakly on the
 norm of the matrix.
@@ -177,6 +182,9 @@ the scalar logarithm, the equation \f$ \exp(X) = M \f$ may have
 multiple solutions; this function returns a matrix whose eigenvalues
 have imaginary part in the interval \f$ (-\pi,\pi] \f$.
 
+The matrix logarithm is different from applying the log function to all the entries in the matrix.
+Use ArrayBase::log() if you want to do the latter.
+
 In the real case, the matrix \f$ M \f$ should be invertible and
 it should have no eigenvalues which are real and negative (pairs of
 complex conjugate eigenvalues are allowed). In the complex case, it
@@ -232,7 +240,8 @@ const MatrixPowerReturnValue<Derived> MatrixBase<Derived>::pow(RealScalar p) con
 
 The matrix power \f$ M^p \f$ is defined as \f$ \exp(p \log(M)) \f$,
 where exp denotes the matrix exponential, and log denotes the matrix
-logarithm.
+logarithm. This is different from raising all the entries in the matrix
+to the p-th power. Use ArrayBase::pow() if you want to do the latter.
 
 If \p p is complex, the scalar type of \p M should be the type of \p
 p . \f$ M^p \f$ simply evaluates into \f$ \exp(p \log(M)) \f$.
@@ -391,7 +400,9 @@ const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::sin() const
 \param[in]  M  a square matrix.
 \returns  expression representing \f$ \sin(M) \f$.
 
-This function calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::sin().
+This function computes the matrix sine. Use ArrayBase::sin() for computing the entry-wise sine.
+
+The implementation calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::sin().
 
 Example: \include MatrixSine.cpp
 Output: \verbinclude MatrixSine.out
@@ -428,7 +439,9 @@ const MatrixSquareRootReturnValue<Derived> MatrixBase<Derived>::sqrt() const
 
 The matrix square root of \f$ M \f$ is the matrix \f$ M^{1/2} \f$
 whose square is the original matrix; so if \f$ S = M^{1/2} \f$ then
-\f$ S^2 = M \f$. 
+\f$ S^2 = M \f$. This is different from taking the square root of all
+the entries in the matrix; use ArrayBase::sqrt() if you want to do the
+latter.
 
 In the <b>real case</b>, the matrix \f$ M \f$ should be invertible and
 it should have no eigenvalues which are real and negative (pairs of
diff --git a/unsupported/Eigen/SVD b/unsupported/Eigen/SVD
deleted file mode 100644
index 900a8aa60..000000000
--- a/unsupported/Eigen/SVD
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef EIGEN_SVD_MODULE_H
-#define EIGEN_SVD_MODULE_H
-
-#include <Eigen/QR>
-#include <Eigen/Householder>
-#include <Eigen/Jacobi>
-
-#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
-
-/** \defgroup SVD_Module SVD module
-  *
-  *
-  *
-  * This module provides SVD decomposition for matrices (both real and complex).
-  * This decomposition is accessible via the following MatrixBase method:
-  *  - MatrixBase::jacobiSvd()
-  *
-  * \code
-  * #include <Eigen/SVD>
-  * \endcode
-  */
-
-#include "../../Eigen/src/misc/Solve.h"
-#include "../../Eigen/src/SVD/UpperBidiagonalization.h"
-#include "src/SVD/SVDBase.h"
-#include "src/SVD/JacobiSVD.h"
-#include "src/SVD/BDCSVD.h"
-#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
-#include "../../Eigen/src/SVD/JacobiSVD_MKL.h"
-#endif
-
-#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
-
-#endif // EIGEN_SVD_MODULE_H
-/* vim: set filetype=cpp et sw=2 ts=2 ai: */
diff --git a/unsupported/Eigen/SparseExtra b/unsupported/Eigen/SparseExtra
index b5597902a..819cffa27 100644
--- a/unsupported/Eigen/SparseExtra
+++ b/unsupported/Eigen/SparseExtra
@@ -37,9 +37,6 @@
   */
 
 
-#include "../../Eigen/src/misc/Solve.h"
-#include "../../Eigen/src/misc/SparseSolve.h"
-
 #include "src/SparseExtra/DynamicSparseMatrix.h"
 #include "src/SparseExtra/BlockOfDynamicSparseMatrix.h"
 #include "src/SparseExtra/RandomSetter.h"
diff --git a/unsupported/Eigen/src/SVD/BDCSVD.h b/unsupported/Eigen/src/BDCSVD/BDCSVD.h
index 80006fd60..0167872af 100644
--- a/unsupported/Eigen/src/SVD/BDCSVD.h
+++ b/unsupported/Eigen/src/BDCSVD/BDCSVD.h
@@ -19,11 +19,21 @@
 #ifndef EIGEN_BDCSVD_H
 #define EIGEN_BDCSVD_H
 
-#define EPSILON 0.0000000000000001
+namespace Eigen {
 
-#define ALGOSWAP 16
+template<typename _MatrixType> class BDCSVD;
 
-namespace Eigen {
+namespace internal {
+
+template<typename _MatrixType> 
+struct traits<BDCSVD<_MatrixType> >
+{
+  typedef _MatrixType MatrixType;
+};  
+
+} // end namespace internal
+  
+  
 /** \ingroup SVD_Module
  *
  *
@@ -36,13 +46,15 @@ namespace Eigen {
  * It should be used to speed up the calcul of SVD for big matrices. 
  */
 template<typename _MatrixType> 
-class BDCSVD : public SVDBase<_MatrixType>
+class BDCSVD : public SVDBase<BDCSVD<_MatrixType> >
 {
-  typedef SVDBase<_MatrixType> Base;
+  typedef SVDBase<BDCSVD> Base;
     
 public:
   using Base::rows;
   using Base::cols;
+  using Base::computeU;
+  using Base::computeV;
   
   typedef _MatrixType MatrixType;
   typedef typename MatrixType::Scalar Scalar;
@@ -58,15 +70,10 @@ public:
     MatrixOptions = MatrixType::Options
   };
 
-  typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime, 
-		 MatrixOptions, MaxRowsAtCompileTime, MaxRowsAtCompileTime>
-  MatrixUType;
-  typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime, 
-		 MatrixOptions, MaxColsAtCompileTime, MaxColsAtCompileTime>
-  MatrixVType;
-  typedef typename internal::plain_diag_type<MatrixType, RealScalar>::type SingularValuesType;
-  typedef typename internal::plain_row_type<MatrixType>::type RowType;
-  typedef typename internal::plain_col_type<MatrixType>::type ColType;
+  typedef typename Base::MatrixUType MatrixUType;
+  typedef typename Base::MatrixVType MatrixVType;
+  typedef typename Base::SingularValuesType SingularValuesType;
+  
   typedef Matrix<Scalar, Dynamic, Dynamic> MatrixX;
   typedef Matrix<RealScalar, Dynamic, Dynamic> MatrixXr;
   typedef Matrix<RealScalar, Dynamic, 1> VectorType;
@@ -77,9 +84,7 @@ public:
    * The default constructor is useful in cases in which the user intends to
    * perform decompositions via BDCSVD::compute(const MatrixType&).
    */
-  BDCSVD()
-    : SVDBase<_MatrixType>::SVDBase(), 
-      algoswap(ALGOSWAP), m_numIters(0)
+  BDCSVD() : m_algoswap(16), m_numIters(0)
   {}
 
 
@@ -90,8 +95,7 @@ public:
    * \sa BDCSVD()
    */
   BDCSVD(Index rows, Index cols, unsigned int computationOptions = 0)
-    : SVDBase<_MatrixType>::SVDBase(), 
-      algoswap(ALGOSWAP), m_numIters(0)
+    : m_algoswap(16), m_numIters(0)
   {
     allocate(rows, cols, computationOptions);
   }
@@ -107,8 +111,7 @@ public:
    * available with the (non - default) FullPivHouseholderQR preconditioner.
    */
   BDCSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
-    : SVDBase<_MatrixType>::SVDBase(), 
-      algoswap(ALGOSWAP), m_numIters(0)
+    : m_algoswap(16), m_numIters(0)
   {
     compute(matrix, computationOptions);
   }
@@ -116,6 +119,7 @@ public:
   ~BDCSVD() 
   {
   }
+  
   /** \brief Method performing the decomposition of given matrix using custom options.
    *
    * \param matrix the matrix to decompose
@@ -126,7 +130,7 @@ public:
    * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
    * available with the (non - default) FullPivHouseholderQR preconditioner.
    */
-  SVDBase<MatrixType>& compute(const MatrixType& matrix, unsigned int computationOptions);
+  BDCSVD& compute(const MatrixType& matrix, unsigned int computationOptions);
 
   /** \brief Method performing the decomposition of given matrix using current options.
    *
@@ -134,7 +138,7 @@ public:
    *
    * This method uses the current \a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).
    */
-  SVDBase<MatrixType>& compute(const MatrixType& matrix)
+  BDCSVD& compute(const MatrixType& matrix)
   {
     return compute(matrix, this->m_computationOptions);
   }
@@ -142,58 +146,7 @@ public:
   void setSwitchSize(int s) 
   {
     eigen_assert(s>3 && "BDCSVD the size of the algo switch has to be greater than 3");
-    algoswap = s;
-  }
-
-
-  /** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A.
-   *
-   * \param b the right - hand - side of the equation to solve.
-   *
-   * \note Solving requires both U and V to be computed. Thin U and V are enough, there is no need for full U or V.
-   *
-   * \note SVD solving is implicitly least - squares. Thus, this method serves both purposes of exact solving and least - squares solving.
-   * In other words, the returned solution is guaranteed to minimize the Euclidean norm \f$ \Vert A x - b \Vert \f$.
-   */
-  template<typename Rhs>
-  inline const internal::solve_retval<BDCSVD, Rhs>
-  solve(const MatrixBase<Rhs>& b) const
-  {
-    eigen_assert(this->m_isInitialized && "BDCSVD is not initialized.");
-    eigen_assert(SVDBase<_MatrixType>::computeU() && SVDBase<_MatrixType>::computeV() && 
-		 "BDCSVD::solve() requires both unitaries U and V to be computed (thin unitaries suffice).");
-    return internal::solve_retval<BDCSVD, Rhs>(*this, b.derived());
-  }
-
- 
-  const MatrixUType& matrixU() const
-  {
-    eigen_assert(this->m_isInitialized && "SVD is not initialized.");
-    if (isTranspose){
-      eigen_assert(this->computeV() && "This SVD decomposition didn't compute U. Did you ask for it?");
-      return this->m_matrixV;
-    }
-    else 
-    {
-      eigen_assert(this->computeU() && "This SVD decomposition didn't compute U. Did you ask for it?");
-      return this->m_matrixU;
-    }
-     
-  }
-
-
-  const MatrixVType& matrixV() const
-  {
-    eigen_assert(this->m_isInitialized && "SVD is not initialized.");
-    if (isTranspose){
-      eigen_assert(this->computeU() && "This SVD decomposition didn't compute V. Did you ask for it?");
-      return this->m_matrixU;
-    }
-    else
-    {
-      eigen_assert(this->computeV() && "This SVD decomposition didn't compute V. Did you ask for it?");
-      return this->m_matrixV;
-    }
+    m_algoswap = s;
   }
  
 private:
@@ -209,15 +162,27 @@ private:
   void deflation43(Index firstCol, Index shift, Index i, Index size);
   void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size);
   void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift);
-  void copyUV(const typename internal::UpperBidiagonalization<MatrixX>::HouseholderUSequenceType& householderU, 
-              const typename internal::UpperBidiagonalization<MatrixX>::HouseholderVSequenceType& householderV);
+  template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
+  void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev);
+  static void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);
 
 protected:
   MatrixXr m_naiveU, m_naiveV;
   MatrixXr m_computed;
-  Index nRec;
-  int algoswap;
-  bool isTranspose, compU, compV;
+  Index m_nRec;
+  int m_algoswap;
+  bool m_isTranspose, m_compU, m_compV;
+  
+  using Base::m_singularValues;
+  using Base::m_diagSize;
+  using Base::m_computeFullU;
+  using Base::m_computeFullV;
+  using Base::m_computeThinU;
+  using Base::m_computeThinV;
+  using Base::m_matrixU;
+  using Base::m_matrixV;
+  using Base::m_isInitialized;
+  using Base::m_nonzeroSingularValues;
 
 public:  
   int m_numIters;
@@ -228,117 +193,147 @@ public:
 template<typename MatrixType>
 void BDCSVD<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions)
 {
-  isTranspose = (cols > rows);
-  if (SVDBase<MatrixType>::allocate(rows, cols, computationOptions)) return;
-  m_computed = MatrixXr::Zero(this->m_diagSize + 1, this->m_diagSize );
-  if (isTranspose){
-    compU = this->computeU();
-    compV = this->computeV();    
-  } 
-  else
-  {
-    compV = this->computeU();
-    compU = this->computeV();   
-  }
-  if (compU) m_naiveU = MatrixXr::Zero(this->m_diagSize + 1, this->m_diagSize + 1 );
-  else m_naiveU = MatrixXr::Zero(2, this->m_diagSize + 1 );
+  m_isTranspose = (cols > rows);
+  if (Base::allocate(rows, cols, computationOptions))
+    return;
   
-  if (compV) m_naiveV = MatrixXr::Zero(this->m_diagSize, this->m_diagSize);
+  m_computed = MatrixXr::Zero(m_diagSize + 1, m_diagSize );
+  m_compU = computeV();
+  m_compV = computeU();
+  if (m_isTranspose)
+    std::swap(m_compU, m_compV);
   
-
-  //should be changed for a cleaner implementation
-  if (isTranspose){
-    bool aux;
-    if (this->computeU()||this->computeV()){
-      aux = this->m_computeFullU;
-      this->m_computeFullU = this->m_computeFullV;
-      this->m_computeFullV = aux;
-      aux = this->m_computeThinU;
-      this->m_computeThinU = this->m_computeThinV;
-      this->m_computeThinV = aux;
-    } 
-  }
+  if (m_compU) m_naiveU = MatrixXr::Zero(m_diagSize + 1, m_diagSize + 1 );
+  else         m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 );
+  
+  if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize);
 }// end allocate
 
 // Methode which compute the BDCSVD for the int
 template<>
-SVDBase<Matrix<int, Dynamic, Dynamic> >&
-BDCSVD<Matrix<int, Dynamic, Dynamic> >::compute(const MatrixType& matrix, unsigned int computationOptions) {
+BDCSVD<Matrix<int, Dynamic, Dynamic> >& BDCSVD<Matrix<int, Dynamic, Dynamic> >::compute(const MatrixType& matrix, unsigned int computationOptions)
+{
   allocate(matrix.rows(), matrix.cols(), computationOptions);
-  this->m_nonzeroSingularValues = 0;
+  m_nonzeroSingularValues = 0;
   m_computed = Matrix<int, Dynamic, Dynamic>::Zero(rows(), cols());
-  for (int i=0; i<this->m_diagSize; i++)   {
-    this->m_singularValues.coeffRef(i) = 0;
-  }
-  if (this->m_computeFullU) this->m_matrixU = Matrix<int, Dynamic, Dynamic>::Zero(rows(), rows());
-  if (this->m_computeFullV) this->m_matrixV = Matrix<int, Dynamic, Dynamic>::Zero(cols(), cols()); 
-  this->m_isInitialized = true;
+  
+  m_singularValues.head(m_diagSize).setZero();
+  
+  if (m_computeFullU) m_matrixU.setZero(rows(), rows());
+  if (m_computeFullV) m_matrixV.setZero(cols(), cols()); 
+  m_isInitialized = true;
   return *this;
 }
 
 
 // Methode which compute the BDCSVD
 template<typename MatrixType>
-SVDBase<MatrixType>&
-BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsigned int computationOptions) 
+BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsigned int computationOptions) 
 {
   allocate(matrix.rows(), matrix.cols(), computationOptions);
   using std::abs;
 
-  //**** step 1 Bidiagonalization  isTranspose = (matrix.cols()>matrix.rows()) ;
+  //**** step 1 Bidiagonalization  m_isTranspose = (matrix.cols()>matrix.rows()) ;
   MatrixType copy;
-  if (isTranspose) copy = matrix.adjoint();
-  else copy = matrix;
+  if (m_isTranspose) copy = matrix.adjoint();
+  else               copy = matrix;
   
   internal::UpperBidiagonalization<MatrixX> bid(copy);
 
   //**** step 2 Divide
-  m_computed.topRows(this->m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
+  m_naiveU.setZero();
+  m_naiveV.setZero();
+  m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
   m_computed.template bottomRows<1>().setZero();
-  divide(0, this->m_diagSize - 1, 0, 0, 0);
+  divide(0, m_diagSize - 1, 0, 0, 0);
 
   //**** step 3 copy
-  for (int i=0; i<this->m_diagSize; i++)   {
+  for (int i=0; i<m_diagSize; i++)
+  {
     RealScalar a = abs(m_computed.coeff(i, i));
-    this->m_singularValues.coeffRef(i) = a;
-    if (a == 0){
-      this->m_nonzeroSingularValues = i;
-      this->m_singularValues.tail(this->m_diagSize - i - 1).setZero();
+    m_singularValues.coeffRef(i) = a;
+    if (a == 0)
+    {
+      m_nonzeroSingularValues = i;
+      m_singularValues.tail(m_diagSize - i - 1).setZero();
       break;
     }
-    else  if (i == this->m_diagSize - 1)
+    else if (i == m_diagSize - 1)
     {
-      this->m_nonzeroSingularValues = i + 1;
+      m_nonzeroSingularValues = i + 1;
       break;
     }
   }
-  copyUV(bid.householderU(), bid.householderV());
-  this->m_isInitialized = true;
+  if(m_isTranspose) copyUV(bid.householderV(), bid.householderU(), m_naiveV, m_naiveU);
+  else              copyUV(bid.householderU(), bid.householderV(), m_naiveU, m_naiveV);
+  m_isInitialized = true;
   return *this;
 }// end compute
 
 
 template<typename MatrixType>
-void BDCSVD<MatrixType>::copyUV(const typename internal::UpperBidiagonalization<MatrixX>::HouseholderUSequenceType& householderU, 
-                                const typename internal::UpperBidiagonalization<MatrixX>::HouseholderVSequenceType& householderV)
+template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
+void BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naiveV)
 {
   // Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa
-  if (this->computeU()){
-    Index Ucols = this->m_computeThinU ? this->m_nonzeroSingularValues : householderU.cols();
-    this->m_matrixU = MatrixX::Identity(householderU.cols(), Ucols);
-    Index blockCols = this->m_computeThinU ? this->m_nonzeroSingularValues : this->m_diagSize;
-    this->m_matrixU.block(0, 0, this->m_diagSize, blockCols) = 
-        m_naiveV.template cast<Scalar>().block(0, 0, this->m_diagSize, blockCols);
-    this->m_matrixU = householderU * this->m_matrixU;
+  if (computeU())
+  {
+    Index Ucols = m_computeThinU ? m_nonzeroSingularValues : householderU.cols();
+    m_matrixU = MatrixX::Identity(householderU.cols(), Ucols);
+    Index blockCols = m_computeThinU ? m_nonzeroSingularValues : m_diagSize;
+    m_matrixU.topLeftCorner(m_diagSize, blockCols) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, blockCols);
+    householderU.applyThisOnTheLeft(m_matrixU);
+  }
+  if (computeV())
+  {
+    Index Vcols = m_computeThinV ? m_nonzeroSingularValues : householderV.cols();
+    m_matrixV = MatrixX::Identity(householderV.cols(), Vcols);
+    Index blockCols = m_computeThinV ? m_nonzeroSingularValues : m_diagSize;
+    m_matrixV.topLeftCorner(m_diagSize, blockCols) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, blockCols);
+    householderV.applyThisOnTheLeft(m_matrixV);
   }
-  if (this->computeV()){
-    Index Vcols = this->m_computeThinV ? this->m_nonzeroSingularValues : householderV.cols();
-    this->m_matrixV = MatrixX::Identity(householderV.cols(), Vcols);
-    Index blockCols = this->m_computeThinV ? this->m_nonzeroSingularValues : this->m_diagSize;
-    this->m_matrixV.block(0, 0, this->m_diagSize, blockCols) = 
-        m_naiveU.template cast<Scalar>().block(0, 0, this->m_diagSize, blockCols);
-    this->m_matrixV = householderV * this->m_matrixV;
+}
+
+/** \internal
+  * Performs A = A * B exploiting the special structure of the matrix A. Splitting A as:
+  *  A = [A1]
+  *      [A2]
+  * such that A1.rows()==n1, then we assume that at least half of the columns of A1 and A2 are zeros.
+  * We can thus pack them prior to the the matrix product. However, this is only worth the effort if the matrix is large
+  * enough.
+  */
+template<typename MatrixType>
+void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1)
+{
+  Index n = A.rows();
+  if(n>100)
+  {
+    // If the matrices are large enough, let's exploit the sparse strucure of A by
+    // splitting it in half (wrt n1), and packing the non-zero columns.
+    DenseIndex n2 = n - n1;
+    MatrixXr A1(n1,n), A2(n2,n), B1(n,n), B2(n,n);
+    Index k1=0, k2=0;
+    for(Index j=0; j<n; ++j)
+    {
+      if( (A.col(j).head(n1).array()!=0).any() )
+      {
+        A1.col(k1) = A.col(j).head(n1);
+        B1.row(k1) = B.row(j);
+        ++k1;
+      }
+      if( (A.col(j).tail(n2).array()!=0).any() )
+      {
+        A2.col(k2) = A.col(j).tail(n2);
+        B2.row(k2) = B.row(j);
+        ++k2;
+      }
+    }
+  
+    A.topRows(n1).noalias()    = A1.leftCols(k1) * B1.topRows(k1);
+    A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2);
   }
+  else
+    A *= B;  // FIXME this requires a temporary
 }
 
 // The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the 
@@ -352,8 +347,7 @@ void BDCSVD<MatrixType>::copyUV(const typename internal::UpperBidiagonalization<
 //@param shift : Each time one takes the left submatrix, one must add 1 to the shift. Why? Because! We actually want the last column of the U submatrix 
 // to become the first column (*coeff) and to shift all the other columns to the right. There are more details on the reference paper.
 template<typename MatrixType>
-void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, 
-				 Index firstColW, Index shift)
+void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift)
 {
   // requires nbRows = nbCols + 1;
   using std::pow;
@@ -365,24 +359,22 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
   RealScalar betaK; 
   RealScalar r0; 
   RealScalar lambda, phi, c0, s0;
-  MatrixXr l, f;
+  VectorType l, f;
   // We use the other algorithm which is more efficient for small 
   // matrices.
-  if (n < algoswap){
-    JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), 
-			  ComputeFullU | (ComputeFullV * compV)) ;
-    if (compU) m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() << b.matrixU();
+  if (n < m_algoswap)
+  {
+    JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0)) ;
+    if (m_compU)
+      m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU();
     else 
     {
-      m_naiveU.row(0).segment(firstCol, n + 1).real() << b.matrixU().row(0);
-      m_naiveU.row(1).segment(firstCol, n + 1).real() << b.matrixU().row(n);
+      m_naiveU.row(0).segment(firstCol, n + 1).real() = b.matrixU().row(0);
+      m_naiveU.row(1).segment(firstCol, n + 1).real() = b.matrixU().row(n);
     }
-    if (compV) m_naiveV.block(firstRowW, firstColW, n, n).real() << b.matrixV();
+    if (m_compV) m_naiveV.block(firstRowW, firstColW, n, n).real() = b.matrixV();
     m_computed.block(firstCol + shift, firstCol + shift, n + 1, n).setZero();
-    for (int i=0; i<n; i++)
-    {
-      m_computed(firstCol + shift + i, firstCol + shift +i) = b.singularValues().coeffRef(i);
-    }
+    m_computed.diagonal().segment(firstCol + shift, n) = b.singularValues().head(n);
     return;
   }
   // We use the divide and conquer algorithm
@@ -393,7 +385,7 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
   // right submatrix before the left one. 
   divide(k + 1 + firstCol, lastCol, k + 1 + firstRowW, k + 1 + firstColW, shift);
   divide(firstCol, k - 1 + firstCol, firstRowW, firstColW + 1, shift + 1);
-  if (compU)
+  if (m_compU)
   {
     lambda = m_naiveU(firstCol + k, firstCol + k);
     phi = m_naiveU(firstCol + k + 1, lastCol + 1);
@@ -403,9 +395,8 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
     lambda = m_naiveU(1, firstCol + k);
     phi = m_naiveU(0, lastCol + 1);
   }
-  r0 = sqrt((abs(alphaK * lambda) * abs(alphaK * lambda))
-	    + abs(betaK * phi) * abs(betaK * phi));
-  if (compU)
+  r0 = sqrt((abs(alphaK * lambda) * abs(alphaK * lambda)) + abs(betaK * phi) * abs(betaK * phi));
+  if (m_compU)
   {
     l = m_naiveU.row(firstCol + k).segment(firstCol, k);
     f = m_naiveU.row(firstCol + k + 1).segment(firstCol + k + 1, n - k - 1);
@@ -415,7 +406,7 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
     l = m_naiveU.row(1).segment(firstCol, k);
     f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);
   }
-  if (compV) m_naiveV(firstRowW+k, firstColW) = 1;
+  if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1;
   if (r0 == 0)
   {
     c0 = 1;
@@ -426,32 +417,27 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
     c0 = alphaK * lambda / r0;
     s0 = betaK * phi / r0;
   }
-  if (compU)
+  if (m_compU)
   {
     MatrixXr q1 (m_naiveU.col(firstCol + k).segment(firstCol, k + 1));     
     // we shiftW Q1 to the right
     for (Index i = firstCol + k - 1; i >= firstCol; i--) 
-    {
-      m_naiveU.col(i + 1).segment(firstCol, k + 1) << m_naiveU.col(i).segment(firstCol, k + 1);
-    }
+      m_naiveU.col(i + 1).segment(firstCol, k + 1) = m_naiveU.col(i).segment(firstCol, k + 1);
     // we shift q1 at the left with a factor c0
-    m_naiveU.col(firstCol).segment( firstCol, k + 1) << (q1 * c0);
+    m_naiveU.col(firstCol).segment( firstCol, k + 1) = (q1 * c0);
     // last column = q1 * - s0
-    m_naiveU.col(lastCol + 1).segment(firstCol, k + 1) << (q1 * ( - s0));
+    m_naiveU.col(lastCol + 1).segment(firstCol, k + 1) = (q1 * ( - s0));
     // first column = q2 * s0
-    m_naiveU.col(firstCol).segment(firstCol + k + 1, n - k) << 
-      m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) *s0; 
+    m_naiveU.col(firstCol).segment(firstCol + k + 1, n - k) = m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) * s0; 
     // q2 *= c0
-    m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) *= c0; 
+    m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) *= c0;
   } 
   else 
   {
     RealScalar q1 = (m_naiveU(0, firstCol + k));
     // we shift Q1 to the right
     for (Index i = firstCol + k - 1; i >= firstCol; i--) 
-    {
       m_naiveU(0, i + 1) = m_naiveU(0, i);
-    }
     // we shift q1 at the left with a factor c0
     m_naiveU(0, firstCol) = (q1 * c0);
     // last column = q1 * - s0
@@ -464,9 +450,8 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
     m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1).setZero();
   }
   m_computed(firstCol + shift, firstCol + shift) = r0;
-  m_computed.col(firstCol + shift).segment(firstCol + shift + 1, k) << alphaK * l.transpose().real();
-  m_computed.col(firstCol + shift).segment(firstCol + shift + k + 1, n - k - 1) << betaK * f.transpose().real();
-
+  m_computed.col(firstCol + shift).segment(firstCol + shift + 1, k) = alphaK * l.transpose().real();
+  m_computed.col(firstCol + shift).segment(firstCol + shift + k + 1, n - k - 1) = betaK * f.transpose().real();
 
   // Second part: try to deflate singular values in combined matrix
   deflation(firstCol, lastCol, k, firstRowW, firstColW, shift);
@@ -475,9 +460,12 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
   MatrixXr UofSVD, VofSVD;
   VectorType singVals;
   computeSVDofM(firstCol + shift, n, UofSVD, singVals, VofSVD);
-  if (compU) m_naiveU.block(firstCol, firstCol, n + 1, n + 1) *= UofSVD;
-  else m_naiveU.block(0, firstCol, 2, n + 1) *= UofSVD;
-  if (compV) m_naiveV.block(firstRowW, firstColW, n, n) *= VofSVD;
+  
+  if (m_compU)  structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2);
+  else          m_naiveU.middleCols(firstCol, n + 1) *= UofSVD; // FIXME this requires a temporary, and exploit that there are 2 rows at compile time
+  
+  if (m_compV)  structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2);
+  
   m_computed.block(firstCol + shift, firstCol + shift, n, n).setZero();
   m_computed.block(firstCol + shift, firstCol + shift, n, n).diagonal() = singVals;
 }// end divide
@@ -485,7 +473,7 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
 // Compute SVD of m_computed.block(firstCol, firstCol, n + 1, n); this block only has non-zeros in
 // the first column and on the diagonal and has undergone deflation, so diagonal is in increasing
 // order except for possibly the (0,0) entry. The computed SVD is stored U, singVals and V, except
-// that if compV is false, then V is not computed. Singular values are sorted in decreasing order.
+// that if m_compV is false, then V is not computed. Singular values are sorted in decreasing order.
 //
 // TODO Opportunities for optimization: better root finding algo, better stopping criterion, better
 // handling of round-off errors, be consistent in ordering
@@ -493,26 +481,28 @@ template <typename MatrixType>
 void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
 {
   // TODO Get rid of these copies (?)
-  ArrayXr col0 = m_computed.block(firstCol, firstCol, n, 1);
+  // FIXME at least preallocate them
+  ArrayXr col0 = m_computed.col(firstCol).segment(firstCol, n);
   ArrayXr diag = m_computed.block(firstCol, firstCol, n, n).diagonal();
   diag(0) = 0;
 
   // compute singular values and vectors (in decreasing order)
   singVals.resize(n);
   U.resize(n+1, n+1);
-  if (compV) V.resize(n, n);
+  if (m_compV) V.resize(n, n);
 
   if (col0.hasNaN() || diag.hasNaN()) return;
 
   ArrayXr shifts(n), mus(n), zhat(n);
+  
   computeSingVals(col0, diag, singVals, shifts, mus);
   perturbCol0(col0, diag, singVals, shifts, mus, zhat);
   computeSingVecs(zhat, diag, singVals, shifts, mus, U, V);
 
   // Reverse order so that singular values in increased order
   singVals.reverseInPlace();
-  U.leftCols(n) = U.leftCols(n).rowwise().reverse().eval();
-  if (compV) V = V.rowwise().reverse().eval();
+  U.leftCols(n) = U.leftCols(n).rowwise().reverse().eval(); // FIXME this requires a temporary
+  if (m_compV) V = V.rowwise().reverse().eval();            // FIXME this requires a temporary
 }
 
 template <typename MatrixType>
@@ -521,10 +511,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
 {
   using std::abs;
   using std::swap;
+  using std::max;
 
   Index n = col0.size();
-  for (Index k = 0; k < n; ++k) {
-    if (col0(k) == 0) {
+  for (Index k = 0; k < n; ++k)
+  {
+    if (col0(k) == 0)
+    {
       // entry is deflated, so singular value is on diagonal
       singVals(k) = diag(k);
       mus(k) = 0;
@@ -540,27 +533,29 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
     RealScalar mid = left + (right-left) / 2;
     RealScalar fMid = 1 + (col0.square() / ((diag + mid) * (diag - mid))).sum();
 
-    RealScalar shift;
-    if (k == n-1 || fMid > 0) shift = left;
-    else shift = right;
+    RealScalar shift = (k == n-1 || fMid > 0) ? left : right;
     
     // measure everything relative to shift
     ArrayXr diagShifted = diag - shift;
     
     // initial guess
     RealScalar muPrev, muCur;
-    if (shift == left) {
+    if (shift == left)
+    {
       muPrev = (right - left) * 0.1;
       if (k == n-1) muCur = right - left;
-      else muCur = (right - left) * 0.5; 
-    } else {
+      else          muCur = (right - left) * 0.5; 
+    }
+    else
+    {
       muPrev = -(right - left) * 0.1;
       muCur = -(right - left) * 0.5;
     }
 
     RealScalar fPrev = 1 + (col0.square() / ((diagShifted - muPrev) * (diag + shift + muPrev))).sum();
     RealScalar fCur = 1 + (col0.square() / ((diagShifted - muCur) * (diag + shift + muCur))).sum();
-    if (abs(fPrev) < abs(fCur)) {
+    if (abs(fPrev) < abs(fCur))
+    {
       swap(fPrev, fCur);
       swap(muPrev, muCur);
     }
@@ -568,7 +563,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
     // rational interpolation: fit a function of the form a / mu + b through the two previous
     // iterates and use its zero to compute the next iterate
     bool useBisection = false;
-    while (abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * (std::max)(abs(muCur), abs(muPrev)) && fCur != fPrev && !useBisection) {
+    while (abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * (max)(abs(muCur), abs(muPrev)) && fCur != fPrev && !useBisection)
+    {
       ++m_numIters;
 
       RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev);
@@ -584,13 +580,17 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
     }
 
     // fall back on bisection method if rational interpolation did not work
-    if (useBisection) {
+    if (useBisection)
+    {
       RealScalar leftShifted, rightShifted;
-      if (shift == left) {
+      if (shift == left)
+      {
         leftShifted = 1e-30;
         if (k == 0) rightShifted = right - left;
-        else rightShifted = (right - left) * 0.6; // theoretically we can take 0.5, but let's be safe
-      } else {
+        else        rightShifted = (right - left) * 0.6; // theoretically we can take 0.5, but let's be safe
+      }
+      else
+      {
         leftShifted = -(right - left) * 0.6;
         rightShifted = -1e-30;
       }
@@ -599,13 +599,17 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
       RealScalar fRight = 1 + (col0.square() / ((diagShifted - rightShifted) * (diag + shift + rightShifted))).sum();
       assert(fLeft * fRight < 0);
       
-      while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * (std::max)(abs(leftShifted), abs(rightShifted))) {
+      while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * (max)(abs(leftShifted), abs(rightShifted)))
+      {
         RealScalar midShifted = (leftShifted + rightShifted) / 2;
         RealScalar fMid = 1 + (col0.square() / ((diagShifted - midShifted) * (diag + shift + midShifted))).sum();
-        if (fLeft * fMid < 0) {
+        if (fLeft * fMid < 0)
+        {
           rightShifted = midShifted;
           fRight = fMid;
-        } else {
+        }
+        else
+        {
           leftShifted = midShifted;
           fLeft = fMid;
         }
@@ -632,13 +636,15 @@ void BDCSVD<MatrixType>::perturbCol0
    (const ArrayXr& col0, const ArrayXr& diag, const VectorType& singVals,
     const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat)
 {
+  using std::sqrt;
   Index n = col0.size();
-  for (Index k = 0; k < n; ++k) {
+  for (Index k = 0; k < n; ++k)
+  {
     if (col0(k) == 0) 
       zhat(k) = 0;
-    else {
+    else
+    {
       // see equation (3.6)
-      using std::sqrt;
       RealScalar tmp = 
         sqrt(
              (singVals(n-1) + diag(k)) * (mus(n-1) + (shifts(n-1) - diag(k)))
@@ -664,16 +670,21 @@ void BDCSVD<MatrixType>::computeSingVecs
     const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V)
 {
   Index n = zhat.size();
-  for (Index k = 0; k < n; ++k) {
-    if (zhat(k) == 0) {
+  for (Index k = 0; k < n; ++k)
+  {
+    if (zhat(k) == 0)
+    {
       U.col(k) = VectorType::Unit(n+1, k);
-      if (compV) V.col(k) = VectorType::Unit(n, k);
-    } else {
+      if (m_compV) V.col(k) = VectorType::Unit(n, k);
+    }
+    else
+    {
       U.col(k).head(n) = zhat / (((diag - shifts(k)) - mus(k)) * (diag + singVals[k]));
       U(n,k) = 0;
       U.col(k).normalize();
     
-      if (compV) {
+      if (m_compV)
+      {
         V.col(k).tail(n-1) = (diag * zhat / (((diag - shifts(k)) - mus(k)) * (diag + singVals[k]))).tail(n-1);
         V(0,k) = -1;
         V.col(k).normalize();
@@ -688,15 +699,17 @@ void BDCSVD<MatrixType>::computeSingVecs
 // i >= 1, di almost null and zi non null.
 // We use a rotation to zero out zi applied to the left of M
 template <typename MatrixType>
-void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index size){
+void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index size)
+{
   using std::abs;
   using std::sqrt;
   using std::pow;
   RealScalar c = m_computed(firstCol + shift, firstCol + shift);
   RealScalar s = m_computed(i, firstCol + shift);
   RealScalar r = sqrt(pow(abs(c), 2) + pow(abs(s), 2));
-  if (r == 0){
-    m_computed(i, i)=0;
+  if (r == 0)
+  {
+    m_computed(i, i) = 0;
     return;
   }
   c/=r;
@@ -704,7 +717,8 @@ void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index
   m_computed(firstCol + shift, firstCol + shift) = r;  
   m_computed(i, firstCol + shift) = 0;
   m_computed(i, i) = 0;
-  if (compU){
+  if (m_compU)
+  {
     m_naiveU.col(firstCol).segment(firstCol,size) = 
       c * m_naiveU.col(firstCol).segment(firstCol, size) - 
       s * m_naiveU.col(i).segment(firstCol, size) ;
@@ -720,7 +734,8 @@ void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index
 // i,j >= 1, i != j and |di - dj| < epsilon * norm2(M)
 // We apply two rotations to have zj = 0;
 template <typename MatrixType>
-void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size){
+void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size)
+{
   using std::abs;
   using std::sqrt;
   using std::conj;
@@ -728,7 +743,8 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
   RealScalar c = m_computed(firstColm, firstColm + j - 1);
   RealScalar s = m_computed(firstColm, firstColm + i - 1);
   RealScalar r = sqrt(pow(abs(c), 2) + pow(abs(s), 2));
-  if (r==0){
+  if (r==0)
+  {
     m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
     return;
   }
@@ -737,7 +753,8 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
   m_computed(firstColm + i, firstColm) = r;  
   m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
   m_computed(firstColm + j, firstColm) = 0;
-  if (compU){
+  if (m_compU)
+  {
     m_naiveU.col(firstColu + i).segment(firstColu, size) = 
       c * m_naiveU.col(firstColu + i).segment(firstColu, size) - 
       s * m_naiveU.col(firstColu + j).segment(firstColu, size) ;
@@ -746,7 +763,8 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
       (c + s*s/c) *  m_naiveU.col(firstColu + j).segment(firstColu, size) + 
       (s/c) * m_naiveU.col(firstColu + i).segment(firstColu, size);
   } 
-  if (compV){
+  if (m_compV)
+  {
     m_naiveV.col(firstColW + i).segment(firstRowW, size - 1) = 
       c * m_naiveV.col(firstColW + i).segment(firstRowW, size - 1) + 
       s * m_naiveV.col(firstColW + j).segment(firstRowW, size - 1) ;
@@ -760,72 +778,56 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
 
 // acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive]
 template <typename MatrixType>
-void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift){
+void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift)
+{
   //condition 4.1
   using std::sqrt;
+  using std::abs;
   const Index length = lastCol + 1 - firstCol;
   RealScalar norm1 = m_computed.block(firstCol+shift, firstCol+shift, length, 1).squaredNorm();
   RealScalar norm2 = m_computed.block(firstCol+shift, firstCol+shift, length, length).diagonal().squaredNorm();
-  RealScalar EPS = 10 * NumTraits<RealScalar>::epsilon() * sqrt(norm1 + norm2);
-  if (m_computed(firstCol + shift, firstCol + shift) < EPS){
-    m_computed(firstCol + shift, firstCol + shift) = EPS;
-  }
+  RealScalar epsilon = 10 * NumTraits<RealScalar>::epsilon() * sqrt(norm1 + norm2);
+  if (m_computed(firstCol + shift, firstCol + shift) < epsilon)
+    m_computed(firstCol + shift, firstCol + shift) = epsilon;
 
   //condition 4.2
-  for (Index i=firstCol + shift + 1;i<=lastCol + shift;i++){
-    if (std::abs(m_computed(i, firstCol + shift)) < EPS){
+  for (Index i=firstCol + shift + 1;i<=lastCol + shift;i++)
+    if (abs(m_computed(i, firstCol + shift)) < epsilon)
       m_computed(i, firstCol + shift) = 0;
-    }
-  }
 
   //condition 4.3
-  for (Index i=firstCol + shift + 1;i<=lastCol + shift; i++){
-    if (m_computed(i, i) < EPS){
+  for (Index i=firstCol + shift + 1;i<=lastCol + shift; i++)
+    if (m_computed(i, i) < epsilon)
       deflation43(firstCol, shift, i, length);
-    }
-  }
 
   //condition 4.4
  
   Index i=firstCol + shift + 1, j=firstCol + shift + k + 1;
   //we stock the final place of each line
-  Index *permutation = new Index[length];
+  Index *permutation = new Index[length]; // FIXME avoid repeated dynamic memory allocation
 
-  for (Index p =1; p < length; p++) {
-    if (i> firstCol + shift + k){
-      permutation[p] = j;
-      j++;
-    } else if (j> lastCol + shift) 
-    {
-      permutation[p] = i;
-      i++;
-    }
-    else 
-    {
-      if (m_computed(i, i) < m_computed(j, j)){
-        permutation[p] = j;
-        j++;
-      } 
-      else
-      {
-        permutation[p] = i;
-        i++;
-      }
-    }
+  for (Index p =1; p < length; p++)
+  {
+         if (i> firstCol + shift + k)             permutation[p] = j++;
+    else if (j> lastCol + shift)                  permutation[p] = i++;
+    else if (m_computed(i, i) < m_computed(j, j)) permutation[p] = j++;
+    else                                          permutation[p] = i++;
   }
   //we do the permutation
   RealScalar aux;
   //we stock the current index of each col
   //and the column of each index
-  Index *realInd = new Index[length];
-  Index *realCol = new Index[length];
-  for (int pos = 0; pos< length; pos++){
+  Index *realInd = new Index[length];  // FIXME avoid repeated dynamic memory allocation
+  Index *realCol = new Index[length];  // FIXME avoid repeated dynamic memory allocation
+  for (int pos = 0; pos< length; pos++)
+  {
     realCol[pos] = pos + firstCol + shift;
     realInd[pos] = pos;
   }
   const Index Zero = firstCol + shift;
   VectorType temp;
-  for (int i = 1; i < length - 1; i++){
+  for (int i = 1; i < length - 1; i++)
+  {
     const Index I = i + Zero;
     const Index realI = realInd[i];
     const Index j  = permutation[length - i] - Zero;
@@ -842,25 +844,25 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
     m_computed(J, Zero) = aux;
 
     // change columns
-    if (compU) {
+    if (m_compU)
+    {
       temp = m_naiveU.col(I - shift).segment(firstCol, length + 1);
-      m_naiveU.col(I - shift).segment(firstCol, length + 1) << 
-        m_naiveU.col(J - shift).segment(firstCol, length + 1);
-      m_naiveU.col(J - shift).segment(firstCol, length + 1) << temp;
+      m_naiveU.col(I - shift).segment(firstCol, length + 1) = m_naiveU.col(J - shift).segment(firstCol, length + 1);
+      m_naiveU.col(J - shift).segment(firstCol, length + 1) = temp;
     } 
     else
     {
       temp = m_naiveU.col(I - shift).segment(0, 2);
-      m_naiveU.col(I - shift).segment(0, 2) << 
-        m_naiveU.col(J - shift).segment(0, 2);
-      m_naiveU.col(J - shift).segment(0, 2) << temp;      
+      m_naiveU.col(I - shift).template head<2>() = m_naiveU.col(J - shift).segment(0, 2);
+      m_naiveU.col(J - shift).template head<2>() = temp;      
     }
-    if (compV) {
+    if (m_compV)
+    {
       const Index CWI = I + firstColW - Zero;
       const Index CWJ = J + firstColW - Zero;
       temp = m_naiveV.col(CWI).segment(firstRowW, length);
-      m_naiveV.col(CWI).segment(firstRowW, length) << m_naiveV.col(CWJ).segment(firstRowW, length);
-      m_naiveV.col(CWJ).segment(firstRowW, length) << temp;
+      m_naiveV.col(CWI).segment(firstRowW, length) = m_naiveV.col(CWJ).segment(firstRowW, length);
+      m_naiveV.col(CWJ).segment(firstRowW, length) = temp;
     }
 
     //update real pos
@@ -869,53 +871,16 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
     realInd[J - Zero] = realI;
     realInd[I - Zero] = j;
   }
-  for (Index i = firstCol + shift + 1; i<lastCol + shift;i++){
-    if ((m_computed(i + 1, i + 1) - m_computed(i, i)) < EPS){
-      deflation44(firstCol , 
-		  firstCol + shift, 
-		  firstRowW, 
-		  firstColW, 
-		  i - Zero, 
-		  i + 1 - Zero, 
-		  length);
-    }
-  }
-  delete [] permutation;
-  delete [] realInd;
-  delete [] realCol;
+  for (Index i = firstCol + shift + 1; i<lastCol + shift;i++)
+    if ((m_computed(i + 1, i + 1) - m_computed(i, i)) < epsilon)
+      deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i - Zero, i + 1 - Zero, length);
+  
+  delete[] permutation;
+  delete[] realInd;
+  delete[] realCol;
 }//end deflation
 
 
-namespace internal{
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<BDCSVD<_MatrixType>, Rhs>
-  : solve_retval_base<BDCSVD<_MatrixType>, Rhs>
-{
-  typedef BDCSVD<_MatrixType> BDCSVDType;
-  EIGEN_MAKE_SOLVE_HELPERS(BDCSVDType, Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    eigen_assert(rhs().rows() == dec().rows());
-    // A = U S V^*
-    // So A^{ - 1} = V S^{ - 1} U^*    
-    Index diagSize = (std::min)(dec().rows(), dec().cols());
-    typename BDCSVDType::SingularValuesType invertedSingVals(diagSize);
-    Index nonzeroSingVals = dec().nonzeroSingularValues();
-    invertedSingVals.head(nonzeroSingVals) = dec().singularValues().head(nonzeroSingVals).array().inverse();
-    invertedSingVals.tail(diagSize - nonzeroSingVals).setZero();
-    
-    dst = dec().matrixV().leftCols(diagSize)
-      * invertedSingVals.asDiagonal()
-      * dec().matrixU().leftCols(diagSize).adjoint()
-      * rhs();	
-    return;
-  }
-};
-
-} //end namespace internal
-
   /** \svd_module
    *
    * \return the singular value decomposition of \c *this computed by 
diff --git a/unsupported/Eigen/src/BDCSVD/CMakeLists.txt b/unsupported/Eigen/src/BDCSVD/CMakeLists.txt
new file mode 100644
index 000000000..73b89ea18
--- /dev/null
+++ b/unsupported/Eigen/src/BDCSVD/CMakeLists.txt
@@ -0,0 +1,6 @@
+FILE(GLOB Eigen_BDCSVD_SRCS "*.h")
+
+INSTALL(FILES
+  ${Eigen_BDCSVD_SRCS}
+  DESTINATION ${INCLUDE_INSTALL_DIR}unsupported/Eigen/src/BDCSVD COMPONENT Devel
+  )
diff --git a/unsupported/Eigen/src/SVD/TODOBdcsvd.txt b/unsupported/Eigen/src/BDCSVD/TODOBdcsvd.txt
index 0bc9a46e6..0bc9a46e6 100644
--- a/unsupported/Eigen/src/SVD/TODOBdcsvd.txt
+++ b/unsupported/Eigen/src/BDCSVD/TODOBdcsvd.txt
diff --git a/unsupported/Eigen/src/SVD/doneInBDCSVD.txt b/unsupported/Eigen/src/BDCSVD/doneInBDCSVD.txt
index 8563ddab8..8563ddab8 100644
--- a/unsupported/Eigen/src/SVD/doneInBDCSVD.txt
+++ b/unsupported/Eigen/src/BDCSVD/doneInBDCSVD.txt
diff --git a/unsupported/Eigen/src/CMakeLists.txt b/unsupported/Eigen/src/CMakeLists.txt
index 8eb2808e3..654a2327f 100644
--- a/unsupported/Eigen/src/CMakeLists.txt
+++ b/unsupported/Eigen/src/CMakeLists.txt
@@ -12,3 +12,4 @@ ADD_SUBDIRECTORY(Skyline)
 ADD_SUBDIRECTORY(SparseExtra)
 ADD_SUBDIRECTORY(KroneckerProduct)
 ADD_SUBDIRECTORY(Splines)
+ADD_SUBDIRECTORY(BDCSVD)
diff --git a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
index 9fcc8a8d9..0e1b7d977 100644
--- a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
@@ -108,6 +108,7 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
     using Base::m_isInitialized;
     using Base::m_tolerance; 
   public:
+    using Base::_solve_impl;
     typedef _MatrixType MatrixType;
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::Index Index;
@@ -138,25 +139,9 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
 
   ~DGMRES() {}
   
-  /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
-    * \a x0 as an initial solution.
-    *
-    * \sa compute()
-    */
-  template<typename Rhs,typename Guess>
-  inline const internal::solve_retval_with_guess<DGMRES, Rhs, Guess>
-  solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
-  {
-    eigen_assert(m_isInitialized && "DGMRES is not initialized.");
-    eigen_assert(Base::rows()==b.rows()
-              && "DGMRES::solve(): invalid number of rows of the right hand side matrix b");
-    return internal::solve_retval_with_guess
-            <DGMRES, Rhs, Guess>(*this, b.derived(), x0);
-  }
-  
   /** \internal */
   template<typename Rhs,typename Dest>
-  void _solveWithGuess(const Rhs& b, Dest& x) const
+  void _solve_with_guess_impl(const Rhs& b, Dest& x) const
   {    
     bool failed = false;
     for(int j=0; j<b.cols(); ++j)
@@ -175,10 +160,10 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
 
   /** \internal */
   template<typename Rhs,typename Dest>
-  void _solve(const Rhs& b, Dest& x) const
+  void _solve_impl(const Rhs& b, MatrixBase<Dest>& x) const
   {
     x = b;
-    _solveWithGuess(b,x);
+    _solve_with_guess_impl(b,x.derived());
   }
   /** 
    * Get the restart value
@@ -522,21 +507,5 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x,
   return 0; 
 }
 
-namespace internal {
-
-  template<typename _MatrixType, typename _Preconditioner, typename Rhs>
-struct solve_retval<DGMRES<_MatrixType, _Preconditioner>, Rhs>
-  : solve_retval_base<DGMRES<_MatrixType, _Preconditioner>, Rhs>
-{
-  typedef DGMRES<_MatrixType, _Preconditioner> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-} // end namespace internal
-
 } // end namespace Eigen
 #endif 
diff --git a/unsupported/Eigen/src/IterativeSolvers/GMRES.h b/unsupported/Eigen/src/IterativeSolvers/GMRES.h
index 67498705b..cd15ce0bf 100644
--- a/unsupported/Eigen/src/IterativeSolvers/GMRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/GMRES.h
@@ -281,6 +281,7 @@ private:
   int m_restart;
 
 public:
+  using Base::_solve_impl;
   typedef _MatrixType MatrixType;
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::Index Index;
@@ -315,25 +316,9 @@ public:
     */
   void set_restart(const int restart) { m_restart=restart; }
 
-  /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
-    * \a x0 as an initial solution.
-    *
-    * \sa compute()
-    */
-  template<typename Rhs,typename Guess>
-  inline const internal::solve_retval_with_guess<GMRES, Rhs, Guess>
-  solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
-  {
-    eigen_assert(m_isInitialized && "GMRES is not initialized.");
-    eigen_assert(Base::rows()==b.rows()
-              && "GMRES::solve(): invalid number of rows of the right hand side matrix b");
-    return internal::solve_retval_with_guess
-            <GMRES, Rhs, Guess>(*this, b.derived(), x0);
-  }
-
   /** \internal */
   template<typename Rhs,typename Dest>
-  void _solveWithGuess(const Rhs& b, Dest& x) const
+  void _solve_with_guess_impl(const Rhs& b, Dest& x) const
   {
     bool failed = false;
     for(int j=0; j<b.cols(); ++j)
@@ -353,35 +338,17 @@ public:
 
   /** \internal */
   template<typename Rhs,typename Dest>
-  void _solve(const Rhs& b, Dest& x) const
+  void _solve_impl(const Rhs& b, MatrixBase<Dest> &x) const
   {
     x = b;
     if(x.squaredNorm() == 0) return; // Check Zero right hand side
-    _solveWithGuess(b,x);
+    _solve_with_guess_impl(b,x.derived());
   }
 
 protected:
 
 };
 
-
-namespace internal {
-
-  template<typename _MatrixType, typename _Preconditioner, typename Rhs>
-struct solve_retval<GMRES<_MatrixType, _Preconditioner>, Rhs>
-  : solve_retval_base<GMRES<_MatrixType, _Preconditioner>, Rhs>
-{
-  typedef GMRES<_MatrixType, _Preconditioner> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-} // end namespace internal
-
 } // end namespace Eigen
 
 #endif // EIGEN_GMRES_H
diff --git a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h
index 661c1f2e0..dd43de6b3 100644
--- a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h
+++ b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h
@@ -27,8 +27,11 @@ namespace Eigen {
  */
 
 template <typename Scalar, int _UpLo = Lower, typename _OrderingType = NaturalOrdering<int> >
-class IncompleteCholesky : internal::noncopyable
+class IncompleteCholesky : public SparseSolverBase<IncompleteCholesky<Scalar,_UpLo,_OrderingType> >
 {
+  protected:
+    typedef SparseSolverBase<IncompleteCholesky<Scalar,_UpLo,_OrderingType> > Base;
+    using Base::m_isInitialized;
   public:
     typedef SparseMatrix<Scalar,ColMajor> MatrixType;
     typedef _OrderingType OrderingType;
@@ -89,7 +92,7 @@ class IncompleteCholesky : internal::noncopyable
     }
     
     template<typename Rhs, typename Dest>
-    void _solve(const Rhs& b, Dest& x) const
+    void _solve_impl(const Rhs& b, Dest& x) const
     {
       eigen_assert(m_factorizationIsOk && "factorize() should be called first");
       if (m_perm.rows() == b.rows())
@@ -103,22 +106,13 @@ class IncompleteCholesky : internal::noncopyable
         x = m_perm * x;
       x = m_scal.asDiagonal() * x;
     }
-    template<typename Rhs> inline const internal::solve_retval<IncompleteCholesky, Rhs>
-    solve(const MatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_factorizationIsOk && "IncompleteLLT did not succeed");
-      eigen_assert(m_isInitialized && "IncompleteLLT is not initialized.");
-      eigen_assert(cols()==b.rows()
-                && "IncompleteLLT::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<IncompleteCholesky, Rhs>(*this, b.derived());
-    }
+
   protected:
     SparseMatrix<Scalar,ColMajor> m_L;  // The lower part stored in CSC
     ScalarType m_scal; // The vector for scaling the matrix 
     Scalar m_shift; //The initial shift parameter
     bool m_analysisIsOk; 
     bool m_factorizationIsOk; 
-    bool m_isInitialized;
     ComputationInfo m_info;
     PermutationType m_perm; 
     
@@ -256,22 +250,6 @@ inline void IncompleteCholesky<Scalar,_UpLo, OrderingType>::updateList(const Idx
     listCol[rowIdx(jk)].push_back(col);
   }
 }
-namespace internal {
-
-template<typename _Scalar, int _UpLo, typename OrderingType, typename Rhs>
-struct solve_retval<IncompleteCholesky<_Scalar,  _UpLo, OrderingType>, Rhs>
-  : solve_retval_base<IncompleteCholesky<_Scalar, _UpLo, OrderingType>, Rhs>
-{
-  typedef IncompleteCholesky<_Scalar, _UpLo, OrderingType> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-} // end namespace internal
 
 } // end namespace Eigen 
 
diff --git a/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h b/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h
index 67e780181..7d08c3515 100644
--- a/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h
+++ b/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h
@@ -13,8 +13,12 @@
 namespace Eigen { 
 
 template <typename _Scalar>
-class IncompleteLU
+class IncompleteLU : public SparseSolverBase<IncompleteLU<_Scalar> >
 {
+  protected:
+    typedef SparseSolverBase<IncompleteLU<_Scalar> > Base;
+    using Base::m_isInitialized;
+    
     typedef _Scalar Scalar;
     typedef Matrix<Scalar,Dynamic,1> Vector;
     typedef typename Vector::Index Index;
@@ -23,10 +27,10 @@ class IncompleteLU
   public:
     typedef Matrix<Scalar,Dynamic,Dynamic> MatrixType;
 
-    IncompleteLU() : m_isInitialized(false) {}
+    IncompleteLU() {}
 
     template<typename MatrixType>
-    IncompleteLU(const MatrixType& mat) : m_isInitialized(false)
+    IncompleteLU(const MatrixType& mat)
     {
       compute(mat);
     }
@@ -71,43 +75,16 @@ class IncompleteLU
     }
 
     template<typename Rhs, typename Dest>
-    void _solve(const Rhs& b, Dest& x) const
+    void _solve_impl(const Rhs& b, Dest& x) const
     {
       x = m_lu.template triangularView<UnitLower>().solve(b);
       x = m_lu.template triangularView<Upper>().solve(x);
     }
 
-    template<typename Rhs> inline const internal::solve_retval<IncompleteLU, Rhs>
-    solve(const MatrixBase<Rhs>& b) const
-    {
-      eigen_assert(m_isInitialized && "IncompleteLU is not initialized.");
-      eigen_assert(cols()==b.rows()
-                && "IncompleteLU::solve(): invalid number of rows of the right hand side matrix b");
-      return internal::solve_retval<IncompleteLU, Rhs>(*this, b.derived());
-    }
-
   protected:
     FactorType m_lu;
-    bool m_isInitialized;
-};
-
-namespace internal {
-
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<IncompleteLU<_MatrixType>, Rhs>
-  : solve_retval_base<IncompleteLU<_MatrixType>, Rhs>
-{
-  typedef IncompleteLU<_MatrixType> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
 };
 
-} // end namespace internal
-
 } // end namespace Eigen
 
 #endif // EIGEN_INCOMPLETE_LU_H
diff --git a/unsupported/Eigen/src/IterativeSolvers/MINRES.h b/unsupported/Eigen/src/IterativeSolvers/MINRES.h
index 98f9ecc17..aaf42c78a 100644
--- a/unsupported/Eigen/src/IterativeSolvers/MINRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/MINRES.h
@@ -2,7 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2012 Giacomo Po <gpo@ucla.edu>
-// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -217,6 +217,7 @@ namespace Eigen {
         using Base::m_info;
         using Base::m_isInitialized;
     public:
+        using Base::_solve_impl;
         typedef _MatrixType MatrixType;
         typedef typename MatrixType::Scalar Scalar;
         typedef typename MatrixType::Index Index;
@@ -244,26 +245,10 @@ namespace Eigen {
         
         /** Destructor. */
         ~MINRES(){}
-		
-        /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
-         * \a x0 as an initial solution.
-         *
-         * \sa compute()
-         */
-        template<typename Rhs,typename Guess>
-        inline const internal::solve_retval_with_guess<MINRES, Rhs, Guess>
-        solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const
-        {
-            eigen_assert(m_isInitialized && "MINRES is not initialized.");
-            eigen_assert(Base::rows()==b.rows()
-                         && "MINRES::solve(): invalid number of rows of the right hand side matrix b");
-            return internal::solve_retval_with_guess
-            <MINRES, Rhs, Guess>(*this, b.derived(), x0);
-        }
-        
+
         /** \internal */
         template<typename Rhs,typename Dest>
-        void _solveWithGuess(const Rhs& b, Dest& x) const
+        void _solve_with_guess_impl(const Rhs& b, Dest& x) const
         {
             m_iterations = Base::maxIterations();
             m_error = Base::m_tolerance;
@@ -284,33 +269,16 @@ namespace Eigen {
         
         /** \internal */
         template<typename Rhs,typename Dest>
-        void _solve(const Rhs& b, Dest& x) const
+        void _solve_impl(const Rhs& b, MatrixBase<Dest> &x) const
         {
             x.setZero();
-            _solveWithGuess(b,x);
+            _solve_with_guess_impl(b,x.derived());
         }
         
     protected:
         
     };
-    
-    namespace internal {
-        
-        template<typename _MatrixType, int _UpLo, typename _Preconditioner, typename Rhs>
-        struct solve_retval<MINRES<_MatrixType,_UpLo,_Preconditioner>, Rhs>
-        : solve_retval_base<MINRES<_MatrixType,_UpLo,_Preconditioner>, Rhs>
-        {
-            typedef MINRES<_MatrixType,_UpLo,_Preconditioner> Dec;
-            EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-            
-            template<typename Dest> void evalTo(Dest& dst) const
-            {
-                dec()._solve(rhs(),dst);
-            }
-        };
-        
-    } // end namespace internal
-    
+
 } // end namespace Eigen
 
 #endif // EIGEN_MINRES_H
diff --git a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
index b8f2cba17..72e25db19 100644
--- a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
+++ b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
@@ -154,16 +154,41 @@ void KroneckerProductSparse<Lhs,Rhs>::evalTo(Dest& dst) const
   dst.resize(this->rows(), this->cols());
   dst.resizeNonZeros(0);
   
+  // 1 - evaluate the operands if needed:
+  typedef typename internal::nested_eval<Lhs,10>::type Lhs1;
+  typedef typename internal::remove_all<Lhs1>::type Lhs1Cleaned;
+  const Lhs1 lhs1(m_A);
+  typedef typename internal::nested_eval<Rhs,10>::type Rhs1;
+  typedef typename internal::remove_all<Rhs1>::type Rhs1Cleaned;
+  const Rhs1 rhs1(m_B);
+  
+  // 2 - construct a SparseView for dense operands
+  typedef typename internal::conditional<internal::is_same<typename internal::traits<Lhs1Cleaned>::StorageKind,Sparse>::value, Lhs1, SparseView<const Lhs1Cleaned> >::type Lhs2;
+  typedef typename internal::remove_all<Lhs2>::type Lhs2Cleaned;
+  const Lhs2 lhs2(lhs1);
+  typedef typename internal::conditional<internal::is_same<typename internal::traits<Rhs1Cleaned>::StorageKind,Sparse>::value, Rhs1, SparseView<const Rhs1Cleaned> >::type Rhs2;
+  typedef typename internal::remove_all<Rhs2>::type Rhs2Cleaned;
+  const Rhs2 rhs2(rhs1);
+  
+  // 3 - construct respective evaluators
+  typedef typename internal::evaluator<Lhs2Cleaned>::type LhsEval;
+  LhsEval lhsEval(lhs2);
+  typedef typename internal::evaluator<Rhs2Cleaned>::type RhsEval;
+  RhsEval rhsEval(rhs2);
+  
+  typedef typename LhsEval::InnerIterator LhsInnerIterator;
+  typedef typename RhsEval::InnerIterator RhsInnerIterator;
+  
   // compute number of non-zeros per innervectors of dst
   {
     VectorXi nnzA = VectorXi::Zero(Dest::IsRowMajor ? m_A.rows() : m_A.cols());
     for (Index kA=0; kA < m_A.outerSize(); ++kA)
-      for (typename Lhs::InnerIterator itA(m_A,kA); itA; ++itA)
+      for (LhsInnerIterator itA(lhsEval,kA); itA; ++itA)
         nnzA(Dest::IsRowMajor ? itA.row() : itA.col())++;
       
     VectorXi nnzB = VectorXi::Zero(Dest::IsRowMajor ? m_B.rows() : m_B.cols());
     for (Index kB=0; kB < m_B.outerSize(); ++kB)
-      for (typename Rhs::InnerIterator itB(m_B,kB); itB; ++itB)
+      for (RhsInnerIterator itB(rhsEval,kB); itB; ++itB)
         nnzB(Dest::IsRowMajor ? itB.row() : itB.col())++;
     
     Matrix<int,Dynamic,Dynamic,ColMajor> nnzAB = nnzB * nnzA.transpose();
@@ -174,9 +199,9 @@ void KroneckerProductSparse<Lhs,Rhs>::evalTo(Dest& dst) const
   {
     for (Index kB=0; kB < m_B.outerSize(); ++kB)
     {
-      for (typename Lhs::InnerIterator itA(m_A,kA); itA; ++itA)
+      for (LhsInnerIterator itA(lhsEval,kA); itA; ++itA)
       {
-        for (typename Rhs::InnerIterator itB(m_B,kB); itB; ++itB)
+        for (RhsInnerIterator itB(rhsEval,kB); itB; ++itB)
         {
           const Index i = itA.row() * Br + itB.row(),
                       j = itA.col() * Bc + itB.col();
@@ -201,8 +226,7 @@ struct traits<KroneckerProduct<_Lhs,_Rhs> >
     Rows = size_at_compile_time<traits<Lhs>::RowsAtCompileTime, traits<Rhs>::RowsAtCompileTime>::ret,
     Cols = size_at_compile_time<traits<Lhs>::ColsAtCompileTime, traits<Rhs>::ColsAtCompileTime>::ret,
     MaxRows = size_at_compile_time<traits<Lhs>::MaxRowsAtCompileTime, traits<Rhs>::MaxRowsAtCompileTime>::ret,
-    MaxCols = size_at_compile_time<traits<Lhs>::MaxColsAtCompileTime, traits<Rhs>::MaxColsAtCompileTime>::ret,
-    CoeffReadCost = Lhs::CoeffReadCost + Rhs::CoeffReadCost + NumTraits<Scalar>::MulCost
+    MaxCols = size_at_compile_time<traits<Lhs>::MaxColsAtCompileTime, traits<Rhs>::MaxColsAtCompileTime>::ret
   };
 
   typedef Matrix<Scalar,Rows,Cols> ReturnType;
@@ -215,7 +239,7 @@ struct traits<KroneckerProductSparse<_Lhs,_Rhs> >
   typedef typename remove_all<_Lhs>::type Lhs;
   typedef typename remove_all<_Rhs>::type Rhs;
   typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
-  typedef typename promote_storage_type<typename traits<Lhs>::StorageKind, typename traits<Rhs>::StorageKind>::ret StorageKind;
+  typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind, typename traits<Rhs>::StorageKind, scalar_product_op<typename Lhs::Scalar, typename Rhs::Scalar> >::ret StorageKind;
   typedef typename promote_index_type<typename Lhs::Index, typename Rhs::Index>::type Index;
 
   enum {
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
index 160120d03..9e0545660 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
@@ -392,14 +392,15 @@ template<typename Derived> struct MatrixExponentialReturnValue
     template <typename ResultType>
     inline void evalTo(ResultType& result) const
     {
-      internal::matrix_exp_compute(m_src, result);
+      const typename internal::nested_eval<Derived, 10>::type tmp(m_src);
+      internal::matrix_exp_compute(tmp, result);
     }
 
     Index rows() const { return m_src.rows(); }
     Index cols() const { return m_src.cols(); }
 
   protected:
-    const typename internal::nested<Derived, 10>::type m_src;
+    const typename internal::nested<Derived>::type m_src;
 };
 
 namespace internal {
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
index a35c11be5..b68aae5e8 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
@@ -485,7 +485,7 @@ template<typename Derived> class MatrixFunctionReturnValue
     typedef typename internal::stem_function<Scalar>::type StemFunction;
 
   protected:
-    typedef typename internal::nested<Derived, 10>::type DerivedNested;
+    typedef typename internal::nested<Derived>::type DerivedNested;
 
   public:
 
@@ -503,18 +503,19 @@ template<typename Derived> class MatrixFunctionReturnValue
     template <typename ResultType>
     inline void evalTo(ResultType& result) const
     {
-      typedef typename internal::remove_all<DerivedNested>::type DerivedNestedClean;
-      typedef internal::traits<DerivedNestedClean> Traits;
+      typedef typename internal::nested_eval<Derived, 10>::type NestedEvalType;
+      typedef typename internal::remove_all<NestedEvalType>::type NestedEvalTypeClean;
+      typedef internal::traits<NestedEvalTypeClean> Traits;
       static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
       static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
-      static const int Options = DerivedNestedClean::Options;
+      static const int Options = NestedEvalTypeClean::Options;
       typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
       typedef Matrix<ComplexScalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
 
       typedef internal::MatrixFunctionAtomic<DynMatrixType> AtomicType;
       AtomicType atomic(m_f);
 
-      internal::matrix_function_compute<DerivedNestedClean>::run(m_A, atomic, result);
+      internal::matrix_function_compute<NestedEvalTypeClean>::run(m_A, atomic, result);
     }
 
     Index rows() const { return m_A.rows(); }
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
index d46ccc145..42b60b9b1 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
@@ -310,7 +310,7 @@ public:
   typedef typename Derived::Index Index;
 
 protected:
-  typedef typename internal::nested<Derived, 10>::type DerivedNested;
+  typedef typename internal::nested<Derived>::type DerivedNested;
 
 public:
 
@@ -327,17 +327,18 @@ public:
   template <typename ResultType>
   inline void evalTo(ResultType& result) const
   {
-    typedef typename internal::remove_all<DerivedNested>::type DerivedNestedClean;
-    typedef internal::traits<DerivedNestedClean> Traits;
+    typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
+    typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean;
+    typedef internal::traits<DerivedEvalTypeClean> Traits;
     static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
     static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
-    static const int Options = DerivedNestedClean::Options;
+    static const int Options = DerivedEvalTypeClean::Options;
     typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
     typedef Matrix<ComplexScalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
     typedef internal::MatrixLogarithmAtomic<DynMatrixType> AtomicType;
     AtomicType atomic;
     
-    internal::matrix_function_compute<DerivedNestedClean>::run(m_A, atomic, result);
+    internal::matrix_function_compute<DerivedEvalTypeClean>::run(m_A, atomic, result);
   }
 
   Index rows() const { return m_A.rows(); }
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h b/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
index 8ca4f4864..3a4d6eb3f 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
@@ -320,7 +320,7 @@ template<typename Derived> class MatrixSquareRootReturnValue
 {
   protected:
     typedef typename Derived::Index Index;
-    typedef typename internal::nested<Derived, 10>::type DerivedNested;
+    typedef typename internal::nested<Derived>::type DerivedNested;
 
   public:
     /** \brief Constructor.
@@ -338,8 +338,10 @@ template<typename Derived> class MatrixSquareRootReturnValue
     template <typename ResultType>
     inline void evalTo(ResultType& result) const
     {
-      typedef typename internal::remove_all<DerivedNested>::type DerivedNestedClean;
-      internal::matrix_sqrt_compute<DerivedNestedClean>::run(m_src, result);
+      typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
+      typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean;
+      DerivedEvalType tmp(m_src);
+      internal::matrix_sqrt_compute<DerivedEvalTypeClean>::run(tmp, result);
     }
 
     Index rows() const { return m_src.rows(); }
diff --git a/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h b/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
index ecb8dccf4..69106ddc5 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
@@ -45,18 +45,24 @@ namespace LevenbergMarquardtSpace {
 template<typename FunctorType, typename Scalar=double>
 class LevenbergMarquardt
 {
+    static Scalar sqrt_epsilon()
+    {
+      using std::sqrt;
+      return sqrt(NumTraits<Scalar>::epsilon());
+    }
+    
 public:
     LevenbergMarquardt(FunctorType &_functor)
         : functor(_functor) { nfev = njev = iter = 0;  fnorm = gnorm = 0.; useExternalScaling=false; }
 
     typedef DenseIndex Index;
-
+    
     struct Parameters {
         Parameters()
             : factor(Scalar(100.))
             , maxfev(400)
-            , ftol(sqrt_(NumTraits<Scalar>::epsilon()))
-            , xtol(sqrt_(NumTraits<Scalar>::epsilon()))
+            , ftol(sqrt_epsilon())
+            , xtol(sqrt_epsilon())
             , gtol(Scalar(0.))
             , epsfcn(Scalar(0.)) {}
         Scalar factor;
@@ -72,7 +78,7 @@ public:
 
     LevenbergMarquardtSpace::Status lmder1(
             FVectorType &x,
-            const Scalar tol = sqrt_(NumTraits<Scalar>::epsilon())
+            const Scalar tol = sqrt_epsilon()
             );
 
     LevenbergMarquardtSpace::Status minimize(FVectorType &x);
@@ -83,12 +89,12 @@ public:
             FunctorType &functor,
             FVectorType &x,
             Index *nfev,
-            const Scalar tol = sqrt_(NumTraits<Scalar>::epsilon())
+            const Scalar tol = sqrt_epsilon()
             );
 
     LevenbergMarquardtSpace::Status lmstr1(
             FVectorType  &x,
-            const Scalar tol = sqrt_(NumTraits<Scalar>::epsilon())
+            const Scalar tol = sqrt_epsilon()
             );
 
     LevenbergMarquardtSpace::Status minimizeOptimumStorage(FVectorType  &x);
@@ -109,7 +115,6 @@ public:
 
     Scalar lm_param(void) { return par; }
 private:
-    static Scalar sqrt_(const Scalar& x) { using std::sqrt; return sqrt(x); }
     
     FunctorType &functor;
     Index n;
diff --git a/unsupported/Eigen/src/SVD/CMakeLists.txt b/unsupported/Eigen/src/SVD/CMakeLists.txt
deleted file mode 100644
index b40baf092..000000000
--- a/unsupported/Eigen/src/SVD/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_SVD_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_SVD_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}unsupported/Eigen/src/SVD COMPONENT Devel
-  )
diff --git a/unsupported/Eigen/src/SVD/JacobiSVD.h b/unsupported/Eigen/src/SVD/JacobiSVD.h
deleted file mode 100644
index 02fac409e..000000000
--- a/unsupported/Eigen/src/SVD/JacobiSVD.h
+++ /dev/null
@@ -1,782 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_JACOBISVD_H
-#define EIGEN_JACOBISVD_H
-
-namespace Eigen { 
-
-namespace internal {
-// forward declaration (needed by ICC)
-// the empty body is required by MSVC
-template<typename MatrixType, int QRPreconditioner,
-         bool IsComplex = NumTraits<typename MatrixType::Scalar>::IsComplex>
-struct svd_precondition_2x2_block_to_be_real {};
-
-/*** QR preconditioners (R-SVD)
- ***
- *** Their role is to reduce the problem of computing the SVD to the case of a square matrix.
- *** This approach, known as R-SVD, is an optimization for rectangular-enough matrices, and is a requirement for
- *** JacobiSVD which by itself is only able to work on square matrices.
- ***/
-
-enum { PreconditionIfMoreColsThanRows, PreconditionIfMoreRowsThanCols };
-
-template<typename MatrixType, int QRPreconditioner, int Case>
-struct qr_preconditioner_should_do_anything
-{
-  enum { a = MatrixType::RowsAtCompileTime != Dynamic &&
-             MatrixType::ColsAtCompileTime != Dynamic &&
-             MatrixType::ColsAtCompileTime <= MatrixType::RowsAtCompileTime,
-         b = MatrixType::RowsAtCompileTime != Dynamic &&
-             MatrixType::ColsAtCompileTime != Dynamic &&
-             MatrixType::RowsAtCompileTime <= MatrixType::ColsAtCompileTime,
-         ret = !( (QRPreconditioner == NoQRPreconditioner) ||
-                  (Case == PreconditionIfMoreColsThanRows && bool(a)) ||
-                  (Case == PreconditionIfMoreRowsThanCols && bool(b)) )
-  };
-};
-
-template<typename MatrixType, int QRPreconditioner, int Case,
-         bool DoAnything = qr_preconditioner_should_do_anything<MatrixType, QRPreconditioner, Case>::ret
-> struct qr_preconditioner_impl {};
-
-template<typename MatrixType, int QRPreconditioner, int Case>
-class qr_preconditioner_impl<MatrixType, QRPreconditioner, Case, false>
-{
-public:
-  typedef typename MatrixType::Index Index;
-  void allocate(const JacobiSVD<MatrixType, QRPreconditioner>&) {}
-  bool run(JacobiSVD<MatrixType, QRPreconditioner>&, const MatrixType&)
-  {
-    return false;
-  }
-};
-
-/*** preconditioner using FullPivHouseholderQR ***/
-
-template<typename MatrixType>
-class qr_preconditioner_impl<MatrixType, FullPivHouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols, true>
-{
-public:
-  typedef typename MatrixType::Index Index;
-  typedef typename MatrixType::Scalar Scalar;
-  enum
-  {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime
-  };
-  typedef Matrix<Scalar, 1, RowsAtCompileTime, RowMajor, 1, MaxRowsAtCompileTime> WorkspaceType;
-
-  void allocate(const JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd)
-  {
-    if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
-    {
-      m_qr.~QRType();
-      ::new (&m_qr) QRType(svd.rows(), svd.cols());
-    }
-    if (svd.m_computeFullU) m_workspace.resize(svd.rows());
-  }
-
-  bool run(JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)
-  {
-    if(matrix.rows() > matrix.cols())
-    {
-      m_qr.compute(matrix);
-      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.cols(),matrix.cols()).template triangularView<Upper>();
-      if(svd.m_computeFullU) m_qr.matrixQ().evalTo(svd.m_matrixU, m_workspace);
-      if(svd.computeV()) svd.m_matrixV = m_qr.colsPermutation();
-      return true;
-    }
-    return false;
-  }
-private:
-  typedef FullPivHouseholderQR<MatrixType> QRType;
-  QRType m_qr;
-  WorkspaceType m_workspace;
-};
-
-template<typename MatrixType>
-class qr_preconditioner_impl<MatrixType, FullPivHouseholderQRPreconditioner, PreconditionIfMoreColsThanRows, true>
-{
-public:
-  typedef typename MatrixType::Index Index;
-  typedef typename MatrixType::Scalar Scalar;
-  enum
-  {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    Options = MatrixType::Options
-  };
-  typedef Matrix<Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime>
-          TransposeTypeWithSameStorageOrder;
-
-  void allocate(const JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd)
-  {
-    if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
-    {
-      m_qr.~QRType();
-      ::new (&m_qr) QRType(svd.cols(), svd.rows());
-    }
-    m_adjoint.resize(svd.cols(), svd.rows());
-    if (svd.m_computeFullV) m_workspace.resize(svd.cols());
-  }
-
-  bool run(JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)
-  {
-    if(matrix.cols() > matrix.rows())
-    {
-      m_adjoint = matrix.adjoint();
-      m_qr.compute(m_adjoint);
-      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.rows(),matrix.rows()).template triangularView<Upper>().adjoint();
-      if(svd.m_computeFullV) m_qr.matrixQ().evalTo(svd.m_matrixV, m_workspace);
-      if(svd.computeU()) svd.m_matrixU = m_qr.colsPermutation();
-      return true;
-    }
-    else return false;
-  }
-private:
-  typedef FullPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
-  QRType m_qr;
-  TransposeTypeWithSameStorageOrder m_adjoint;
-  typename internal::plain_row_type<MatrixType>::type m_workspace;
-};
-
-/*** preconditioner using ColPivHouseholderQR ***/
-
-template<typename MatrixType>
-class qr_preconditioner_impl<MatrixType, ColPivHouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols, true>
-{
-public:
-  typedef typename MatrixType::Index Index;
-
-  void allocate(const JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd)
-  {
-    if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
-    {
-      m_qr.~QRType();
-      ::new (&m_qr) QRType(svd.rows(), svd.cols());
-    }
-    if (svd.m_computeFullU) m_workspace.resize(svd.rows());
-    else if (svd.m_computeThinU) m_workspace.resize(svd.cols());
-  }
-
-  bool run(JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)
-  {
-    if(matrix.rows() > matrix.cols())
-    {
-      m_qr.compute(matrix);
-      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.cols(),matrix.cols()).template triangularView<Upper>();
-      if(svd.m_computeFullU) m_qr.householderQ().evalTo(svd.m_matrixU, m_workspace);
-      else if(svd.m_computeThinU)
-      {
-        svd.m_matrixU.setIdentity(matrix.rows(), matrix.cols());
-        m_qr.householderQ().applyThisOnTheLeft(svd.m_matrixU, m_workspace);
-      }
-      if(svd.computeV()) svd.m_matrixV = m_qr.colsPermutation();
-      return true;
-    }
-    return false;
-  }
-
-private:
-  typedef ColPivHouseholderQR<MatrixType> QRType;
-  QRType m_qr;
-  typename internal::plain_col_type<MatrixType>::type m_workspace;
-};
-
-template<typename MatrixType>
-class qr_preconditioner_impl<MatrixType, ColPivHouseholderQRPreconditioner, PreconditionIfMoreColsThanRows, true>
-{
-public:
-  typedef typename MatrixType::Index Index;
-  typedef typename MatrixType::Scalar Scalar;
-  enum
-  {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    Options = MatrixType::Options
-  };
-
-  typedef Matrix<Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime>
-          TransposeTypeWithSameStorageOrder;
-
-  void allocate(const JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd)
-  {
-    if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
-    {
-      m_qr.~QRType();
-      ::new (&m_qr) QRType(svd.cols(), svd.rows());
-    }
-    if (svd.m_computeFullV) m_workspace.resize(svd.cols());
-    else if (svd.m_computeThinV) m_workspace.resize(svd.rows());
-    m_adjoint.resize(svd.cols(), svd.rows());
-  }
-
-  bool run(JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)
-  {
-    if(matrix.cols() > matrix.rows())
-    {
-      m_adjoint = matrix.adjoint();
-      m_qr.compute(m_adjoint);
-
-      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.rows(),matrix.rows()).template triangularView<Upper>().adjoint();
-      if(svd.m_computeFullV) m_qr.householderQ().evalTo(svd.m_matrixV, m_workspace);
-      else if(svd.m_computeThinV)
-      {
-        svd.m_matrixV.setIdentity(matrix.cols(), matrix.rows());
-        m_qr.householderQ().applyThisOnTheLeft(svd.m_matrixV, m_workspace);
-      }
-      if(svd.computeU()) svd.m_matrixU = m_qr.colsPermutation();
-      return true;
-    }
-    else return false;
-  }
-
-private:
-  typedef ColPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
-  QRType m_qr;
-  TransposeTypeWithSameStorageOrder m_adjoint;
-  typename internal::plain_row_type<MatrixType>::type m_workspace;
-};
-
-/*** preconditioner using HouseholderQR ***/
-
-template<typename MatrixType>
-class qr_preconditioner_impl<MatrixType, HouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols, true>
-{
-public:
-  typedef typename MatrixType::Index Index;
-
-  void allocate(const JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd)
-  {
-    if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
-    {
-      m_qr.~QRType();
-      ::new (&m_qr) QRType(svd.rows(), svd.cols());
-    }
-    if (svd.m_computeFullU) m_workspace.resize(svd.rows());
-    else if (svd.m_computeThinU) m_workspace.resize(svd.cols());
-  }
-
-  bool run(JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd, const MatrixType& matrix)
-  {
-    if(matrix.rows() > matrix.cols())
-    {
-      m_qr.compute(matrix);
-      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.cols(),matrix.cols()).template triangularView<Upper>();
-      if(svd.m_computeFullU) m_qr.householderQ().evalTo(svd.m_matrixU, m_workspace);
-      else if(svd.m_computeThinU)
-      {
-        svd.m_matrixU.setIdentity(matrix.rows(), matrix.cols());
-        m_qr.householderQ().applyThisOnTheLeft(svd.m_matrixU, m_workspace);
-      }
-      if(svd.computeV()) svd.m_matrixV.setIdentity(matrix.cols(), matrix.cols());
-      return true;
-    }
-    return false;
-  }
-private:
-  typedef HouseholderQR<MatrixType> QRType;
-  QRType m_qr;
-  typename internal::plain_col_type<MatrixType>::type m_workspace;
-};
-
-template<typename MatrixType>
-class qr_preconditioner_impl<MatrixType, HouseholderQRPreconditioner, PreconditionIfMoreColsThanRows, true>
-{
-public:
-  typedef typename MatrixType::Index Index;
-  typedef typename MatrixType::Scalar Scalar;
-  enum
-  {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-    Options = MatrixType::Options
-  };
-
-  typedef Matrix<Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime>
-          TransposeTypeWithSameStorageOrder;
-
-  void allocate(const JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd)
-  {
-    if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
-    {
-      m_qr.~QRType();
-      ::new (&m_qr) QRType(svd.cols(), svd.rows());
-    }
-    if (svd.m_computeFullV) m_workspace.resize(svd.cols());
-    else if (svd.m_computeThinV) m_workspace.resize(svd.rows());
-    m_adjoint.resize(svd.cols(), svd.rows());
-  }
-
-  bool run(JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd, const MatrixType& matrix)
-  {
-    if(matrix.cols() > matrix.rows())
-    {
-      m_adjoint = matrix.adjoint();
-      m_qr.compute(m_adjoint);
-
-      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.rows(),matrix.rows()).template triangularView<Upper>().adjoint();
-      if(svd.m_computeFullV) m_qr.householderQ().evalTo(svd.m_matrixV, m_workspace);
-      else if(svd.m_computeThinV)
-      {
-        svd.m_matrixV.setIdentity(matrix.cols(), matrix.rows());
-        m_qr.householderQ().applyThisOnTheLeft(svd.m_matrixV, m_workspace);
-      }
-      if(svd.computeU()) svd.m_matrixU.setIdentity(matrix.rows(), matrix.rows());
-      return true;
-    }
-    else return false;
-  }
-
-private:
-  typedef HouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
-  QRType m_qr;
-  TransposeTypeWithSameStorageOrder m_adjoint;
-  typename internal::plain_row_type<MatrixType>::type m_workspace;
-};
-
-/*** 2x2 SVD implementation
- ***
- *** JacobiSVD consists in performing a series of 2x2 SVD subproblems
- ***/
-
-template<typename MatrixType, int QRPreconditioner>
-struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, false>
-{
-  typedef JacobiSVD<MatrixType, QRPreconditioner> SVD;
-  typedef typename SVD::Index Index;
-  static void run(typename SVD::WorkMatrixType&, SVD&, Index, Index) {}
-};
-
-template<typename MatrixType, int QRPreconditioner>
-struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
-{
-  typedef JacobiSVD<MatrixType, QRPreconditioner> SVD;
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar RealScalar;
-  typedef typename SVD::Index Index;
-  static void run(typename SVD::WorkMatrixType& work_matrix, SVD& svd, Index p, Index q)
-  {
-    using std::sqrt;
-    Scalar z;
-    JacobiRotation<Scalar> rot;
-    RealScalar n = sqrt(numext::abs2(work_matrix.coeff(p,p)) + numext::abs2(work_matrix.coeff(q,p)));
-    if(n==0)
-    {
-      z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);
-      work_matrix.row(p) *= z;
-      if(svd.computeU()) svd.m_matrixU.col(p) *= conj(z);
-      z = abs(work_matrix.coeff(q,q)) / work_matrix.coeff(q,q);
-      work_matrix.row(q) *= z;
-      if(svd.computeU()) svd.m_matrixU.col(q) *= conj(z);
-    }
-    else
-    {
-      rot.c() = conj(work_matrix.coeff(p,p)) / n;
-      rot.s() = work_matrix.coeff(q,p) / n;
-      work_matrix.applyOnTheLeft(p,q,rot);
-      if(svd.computeU()) svd.m_matrixU.applyOnTheRight(p,q,rot.adjoint());
-      if(work_matrix.coeff(p,q) != Scalar(0))
-      {
-        Scalar z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);
-        work_matrix.col(q) *= z;
-        if(svd.computeV()) svd.m_matrixV.col(q) *= z;
-      }
-      if(work_matrix.coeff(q,q) != Scalar(0))
-      {
-        z = abs(work_matrix.coeff(q,q)) / work_matrix.coeff(q,q);
-        work_matrix.row(q) *= z;
-        if(svd.computeU()) svd.m_matrixU.col(q) *= conj(z);
-      }
-    }
-  }
-};
-
-template<typename MatrixType, typename RealScalar, typename Index>
-void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
-                            JacobiRotation<RealScalar> *j_left,
-                            JacobiRotation<RealScalar> *j_right)
-{
-  using std::sqrt;
-  Matrix<RealScalar,2,2> m;
-  m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)),
-       numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q));
-  JacobiRotation<RealScalar> rot1;
-  RealScalar t = m.coeff(0,0) + m.coeff(1,1);
-  RealScalar d = m.coeff(1,0) - m.coeff(0,1);
-  if(t == RealScalar(0))
-  {
-    rot1.c() = RealScalar(0);
-    rot1.s() = d > RealScalar(0) ? RealScalar(1) : RealScalar(-1);
-  }
-  else
-  {
-    RealScalar u = d / t;
-    rot1.c() = RealScalar(1) / sqrt(RealScalar(1) + numext::abs2(u));
-    rot1.s() = rot1.c() * u;
-  }
-  m.applyOnTheLeft(0,1,rot1);
-  j_right->makeJacobi(m,0,1);
-  *j_left  = rot1 * j_right->transpose();
-}
-
-} // end namespace internal
-
-/** \ingroup SVD_Module
-  *
-  *
-  * \class JacobiSVD
-  *
-  * \brief Two-sided Jacobi SVD decomposition of a rectangular matrix
-  *
-  * \param MatrixType the type of the matrix of which we are computing the SVD decomposition
-  * \param QRPreconditioner this optional parameter allows to specify the type of QR decomposition that will be used internally
-  *                        for the R-SVD step for non-square matrices. See discussion of possible values below.
-  *
-  * SVD decomposition consists in decomposing any n-by-p matrix \a A as a product
-  *   \f[ A = U S V^* \f]
-  * where \a U is a n-by-n unitary, \a V is a p-by-p unitary, and \a S is a n-by-p real positive matrix which is zero outside of its main diagonal;
-  * the diagonal entries of S are known as the \em singular \em values of \a A and the columns of \a U and \a V are known as the left
-  * and right \em singular \em vectors of \a A respectively.
-  *
-  * Singular values are always sorted in decreasing order.
-  *
-  * This JacobiSVD decomposition computes only the singular values by default. If you want \a U or \a V, you need to ask for them explicitly.
-  *
-  * You can ask for only \em thin \a U or \a V to be computed, meaning the following. In case of a rectangular n-by-p matrix, letting \a m be the
-  * smaller value among \a n and \a p, there are only \a m singular vectors; the remaining columns of \a U and \a V do not correspond to actual
-  * singular vectors. Asking for \em thin \a U or \a V means asking for only their \a m first columns to be formed. So \a U is then a n-by-m matrix,
-  * and \a V is then a p-by-m matrix. Notice that thin \a U and \a V are all you need for (least squares) solving.
-  *
-  * Here's an example demonstrating basic usage:
-  * \include JacobiSVD_basic.cpp
-  * Output: \verbinclude JacobiSVD_basic.out
-  *
-  * This JacobiSVD class is a two-sided Jacobi R-SVD decomposition, ensuring optimal reliability and accuracy. The downside is that it's slower than
-  * bidiagonalizing SVD algorithms for large square matrices; however its complexity is still \f$ O(n^2p) \f$ where \a n is the smaller dimension and
-  * \a p is the greater dimension, meaning that it is still of the same order of complexity as the faster bidiagonalizing R-SVD algorithms.
-  * In particular, like any R-SVD, it takes advantage of non-squareness in that its complexity is only linear in the greater dimension.
-  *
-  * If the input matrix has inf or nan coefficients, the result of the computation is undefined, but the computation is guaranteed to
-  * terminate in finite (and reasonable) time.
-  *
-  * The possible values for QRPreconditioner are:
-  * \li ColPivHouseholderQRPreconditioner is the default. In practice it's very safe. It uses column-pivoting QR.
-  * \li FullPivHouseholderQRPreconditioner, is the safest and slowest. It uses full-pivoting QR.
-  *     Contrary to other QRs, it doesn't allow computing thin unitaries.
-  * \li HouseholderQRPreconditioner is the fastest, and less safe and accurate than the pivoting variants. It uses non-pivoting QR.
-  *     This is very similar in safety and accuracy to the bidiagonalization process used by bidiagonalizing SVD algorithms (since bidiagonalization
-  *     is inherently non-pivoting). However the resulting SVD is still more reliable than bidiagonalizing SVDs because the Jacobi-based iterarive
-  *     process is more reliable than the optimized bidiagonal SVD iterations.
-  * \li NoQRPreconditioner allows not to use a QR preconditioner at all. This is useful if you know that you will only be computing
-  *     JacobiSVD decompositions of square matrices. Non-square matrices require a QR preconditioner. Using this option will result in
-  *     faster compilation and smaller executable code. It won't significantly speed up computation, since JacobiSVD is always checking
-  *     if QR preconditioning is needed before applying it anyway.
-  *
-  * \sa MatrixBase::jacobiSvd()
-  */
-template<typename _MatrixType, int QRPreconditioner> 
-class JacobiSVD : public SVDBase<_MatrixType>
-{
-  public:
-
-    typedef _MatrixType MatrixType;
-    typedef typename MatrixType::Scalar Scalar;
-    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
-    typedef typename MatrixType::Index Index;
-    enum {
-      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-      DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime),
-      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
-      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-      MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime,MaxColsAtCompileTime),
-      MatrixOptions = MatrixType::Options
-    };
-
-    typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime,
-                   MatrixOptions, MaxRowsAtCompileTime, MaxRowsAtCompileTime>
-            MatrixUType;
-    typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime,
-                   MatrixOptions, MaxColsAtCompileTime, MaxColsAtCompileTime>
-            MatrixVType;
-    typedef typename internal::plain_diag_type<MatrixType, RealScalar>::type SingularValuesType;
-    typedef typename internal::plain_row_type<MatrixType>::type RowType;
-    typedef typename internal::plain_col_type<MatrixType>::type ColType;
-    typedef Matrix<Scalar, DiagSizeAtCompileTime, DiagSizeAtCompileTime,
-                   MatrixOptions, MaxDiagSizeAtCompileTime, MaxDiagSizeAtCompileTime>
-            WorkMatrixType;
-
-    /** \brief Default Constructor.
-      *
-      * The default constructor is useful in cases in which the user intends to
-      * perform decompositions via JacobiSVD::compute(const MatrixType&).
-      */
-    JacobiSVD()
-      : SVDBase<_MatrixType>::SVDBase()
-    {}
-
-
-    /** \brief Default Constructor with memory preallocation
-      *
-      * Like the default constructor but with preallocation of the internal data
-      * according to the specified problem size.
-      * \sa JacobiSVD()
-      */
-    JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0)
-      : SVDBase<_MatrixType>::SVDBase() 
-    {
-      allocate(rows, cols, computationOptions);
-    }
-
-    /** \brief Constructor performing the decomposition of given matrix.
-     *
-     * \param matrix the matrix to decompose
-     * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
-     *                           By default, none is computed. This is a bit-field, the possible bits are #ComputeFullU, #ComputeThinU,
-     *                           #ComputeFullV, #ComputeThinV.
-     *
-     * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
-     * available with the (non-default) FullPivHouseholderQR preconditioner.
-     */
-    JacobiSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
-      : SVDBase<_MatrixType>::SVDBase()
-    {
-      compute(matrix, computationOptions);
-    }
-
-    /** \brief Method performing the decomposition of given matrix using custom options.
-     *
-     * \param matrix the matrix to decompose
-     * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
-     *                           By default, none is computed. This is a bit-field, the possible bits are #ComputeFullU, #ComputeThinU,
-     *                           #ComputeFullV, #ComputeThinV.
-     *
-     * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
-     * available with the (non-default) FullPivHouseholderQR preconditioner.
-     */
-    SVDBase<MatrixType>& compute(const MatrixType& matrix, unsigned int computationOptions);
-
-    /** \brief Method performing the decomposition of given matrix using current options.
-     *
-     * \param matrix the matrix to decompose
-     *
-     * This method uses the current \a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).
-     */
-    SVDBase<MatrixType>& compute(const MatrixType& matrix)
-    {
-      return compute(matrix, this->m_computationOptions);
-    }
-    
-    /** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A.
-      *
-      * \param b the right-hand-side of the equation to solve.
-      *
-      * \note Solving requires both U and V to be computed. Thin U and V are enough, there is no need for full U or V.
-      *
-      * \note SVD solving is implicitly least-squares. Thus, this method serves both purposes of exact solving and least-squares solving.
-      * In other words, the returned solution is guaranteed to minimize the Euclidean norm \f$ \Vert A x - b \Vert \f$.
-      */
-    template<typename Rhs>
-    inline const internal::solve_retval<JacobiSVD, Rhs>
-    solve(const MatrixBase<Rhs>& b) const
-    {
-      eigen_assert(this->m_isInitialized && "JacobiSVD is not initialized.");
-      eigen_assert(SVDBase<MatrixType>::computeU() && SVDBase<MatrixType>::computeV() && "JacobiSVD::solve() requires both unitaries U and V to be computed (thin unitaries suffice).");
-      return internal::solve_retval<JacobiSVD, Rhs>(*this, b.derived());
-    }
-
-    
-
-  private:
-    void allocate(Index rows, Index cols, unsigned int computationOptions);
-
-  protected:
-    WorkMatrixType m_workMatrix;
-   
-    template<typename __MatrixType, int _QRPreconditioner, bool _IsComplex>
-    friend struct internal::svd_precondition_2x2_block_to_be_real;
-    template<typename __MatrixType, int _QRPreconditioner, int _Case, bool _DoAnything>
-    friend struct internal::qr_preconditioner_impl;
-
-    internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreColsThanRows> m_qr_precond_morecols;
-    internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreRowsThanCols> m_qr_precond_morerows;
-};
-
-template<typename MatrixType, int QRPreconditioner>
-void JacobiSVD<MatrixType, QRPreconditioner>::allocate(Index rows, Index cols, unsigned int computationOptions)
-{
-  if (SVDBase<MatrixType>::allocate(rows, cols, computationOptions)) return;
-
-  if (QRPreconditioner == FullPivHouseholderQRPreconditioner)
-  {
-      eigen_assert(!(this->m_computeThinU || this->m_computeThinV) &&
-              "JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. "
-              "Use the ColPivHouseholderQR preconditioner instead.");
-  }
-
-  m_workMatrix.resize(this->m_diagSize, this->m_diagSize);
-  
-  if(this->m_cols>this->m_rows) m_qr_precond_morecols.allocate(*this);
-  if(this->m_rows>this->m_cols) m_qr_precond_morerows.allocate(*this);
-}
-
-template<typename MatrixType, int QRPreconditioner>
-SVDBase<MatrixType>&
-JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsigned int computationOptions)
-{
-  using std::abs;
-  allocate(matrix.rows(), matrix.cols(), computationOptions);
-
-  // currently we stop when we reach precision 2*epsilon as the last bit of precision can require an unreasonable number of iterations,
-  // only worsening the precision of U and V as we accumulate more rotations
-  const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();
-
-  // limit for very small denormal numbers to be considered zero in order to avoid infinite loops (see bug 286)
-  const RealScalar considerAsZero = RealScalar(2) * std::numeric_limits<RealScalar>::denorm_min();
-
-  /*** step 1. The R-SVD step: we use a QR decomposition to reduce to the case of a square matrix */
-
-  if(!m_qr_precond_morecols.run(*this, matrix) && !m_qr_precond_morerows.run(*this, matrix))
-  {
-    m_workMatrix = matrix.block(0,0,this->m_diagSize,this->m_diagSize);
-    if(this->m_computeFullU) this->m_matrixU.setIdentity(this->m_rows,this->m_rows);
-    if(this->m_computeThinU) this->m_matrixU.setIdentity(this->m_rows,this->m_diagSize);
-    if(this->m_computeFullV) this->m_matrixV.setIdentity(this->m_cols,this->m_cols);
-    if(this->m_computeThinV) this->m_matrixV.setIdentity(this->m_cols, this->m_diagSize);
-  }
-
-  /*** step 2. The main Jacobi SVD iteration. ***/
-
-  bool finished = false;
-  while(!finished)
-  {
-    finished = true;
-
-    // do a sweep: for all index pairs (p,q), perform SVD of the corresponding 2x2 sub-matrix
-
-    for(Index p = 1; p < this->m_diagSize; ++p)
-    {
-      for(Index q = 0; q < p; ++q)
-      {
-        // if this 2x2 sub-matrix is not diagonal already...
-        // notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't
-        // keep us iterating forever. Similarly, small denormal numbers are considered zero.
-        using std::max;
-        RealScalar threshold = (max)(considerAsZero, precision * (max)(abs(m_workMatrix.coeff(p,p)),
-                                                                       abs(m_workMatrix.coeff(q,q))));
-        if((max)(abs(m_workMatrix.coeff(p,q)),abs(m_workMatrix.coeff(q,p))) > threshold)
-        {
-          finished = false;
-
-          // perform SVD decomposition of 2x2 sub-matrix corresponding to indices p,q to make it diagonal
-          internal::svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner>::run(m_workMatrix, *this, p, q);
-          JacobiRotation<RealScalar> j_left, j_right;
-          internal::real_2x2_jacobi_svd(m_workMatrix, p, q, &j_left, &j_right);
-
-          // accumulate resulting Jacobi rotations
-          m_workMatrix.applyOnTheLeft(p,q,j_left);
-          if(SVDBase<MatrixType>::computeU()) this->m_matrixU.applyOnTheRight(p,q,j_left.transpose());
-
-          m_workMatrix.applyOnTheRight(p,q,j_right);
-          if(SVDBase<MatrixType>::computeV()) this->m_matrixV.applyOnTheRight(p,q,j_right);
-        }
-      }
-    }
-  }
-
-  /*** step 3. The work matrix is now diagonal, so ensure it's positive so its diagonal entries are the singular values ***/
-
-  for(Index i = 0; i < this->m_diagSize; ++i)
-  {
-    RealScalar a = abs(m_workMatrix.coeff(i,i));
-    this->m_singularValues.coeffRef(i) = a;
-    if(SVDBase<MatrixType>::computeU() && (a!=RealScalar(0))) this->m_matrixU.col(i) *= this->m_workMatrix.coeff(i,i)/a;
-  }
-
-  /*** step 4. Sort singular values in descending order and compute the number of nonzero singular values ***/
-
-  this->m_nonzeroSingularValues = this->m_diagSize;
-  for(Index i = 0; i < this->m_diagSize; i++)
-  {
-    Index pos;
-    RealScalar maxRemainingSingularValue = this->m_singularValues.tail(this->m_diagSize-i).maxCoeff(&pos);
-    if(maxRemainingSingularValue == RealScalar(0))
-    {
-      this->m_nonzeroSingularValues = i;
-      break;
-    }
-    if(pos)
-    {
-      pos += i;
-      std::swap(this->m_singularValues.coeffRef(i), this->m_singularValues.coeffRef(pos));
-      if(SVDBase<MatrixType>::computeU()) this->m_matrixU.col(pos).swap(this->m_matrixU.col(i));
-      if(SVDBase<MatrixType>::computeV()) this->m_matrixV.col(pos).swap(this->m_matrixV.col(i));
-    }
-  }
-
-  this->m_isInitialized = true;
-  return *this;
-}
-
-namespace internal {
-template<typename _MatrixType, int QRPreconditioner, typename Rhs>
-struct solve_retval<JacobiSVD<_MatrixType, QRPreconditioner>, Rhs>
-  : solve_retval_base<JacobiSVD<_MatrixType, QRPreconditioner>, Rhs>
-{
-  typedef JacobiSVD<_MatrixType, QRPreconditioner> JacobiSVDType;
-  EIGEN_MAKE_SOLVE_HELPERS(JacobiSVDType,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    eigen_assert(rhs().rows() == dec().rows());
-
-    // A = U S V^*
-    // So A^{-1} = V S^{-1} U^*
-
-    Index diagSize = (std::min)(dec().rows(), dec().cols());
-    typename JacobiSVDType::SingularValuesType invertedSingVals(diagSize);
-
-    Index nonzeroSingVals = dec().nonzeroSingularValues();
-    invertedSingVals.head(nonzeroSingVals) = dec().singularValues().head(nonzeroSingVals).array().inverse();
-    invertedSingVals.tail(diagSize - nonzeroSingVals).setZero();
-
-    dst = dec().matrixV().leftCols(diagSize)
-        * invertedSingVals.asDiagonal()
-        * dec().matrixU().leftCols(diagSize).adjoint()
-        * rhs();
-  }
-};
-} // end namespace internal
-
-/** \svd_module
-  *
-  * \return the singular value decomposition of \c *this computed by two-sided
-  * Jacobi transformations.
-  *
-  * \sa class JacobiSVD
-  */
-template<typename Derived>
-JacobiSVD<typename MatrixBase<Derived>::PlainObject>
-MatrixBase<Derived>::jacobiSvd(unsigned int computationOptions) const
-{
-  return JacobiSVD<PlainObject>(*this, computationOptions);
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_JACOBISVD_H
diff --git a/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h b/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h
index dec16df28..e4dc1c1de 100644
--- a/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h
+++ b/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h
@@ -352,6 +352,36 @@ class DynamicSparseMatrix<Scalar,_Options,_Index>::ReverseInnerIterator : public
     const Index m_outer;
 };
 
+namespace internal {
+
+template<typename _Scalar, int _Options, typename _Index>
+struct evaluator<DynamicSparseMatrix<_Scalar,_Options,_Index> >
+  : evaluator_base<DynamicSparseMatrix<_Scalar,_Options,_Index> >
+{
+  typedef _Scalar Scalar;
+  typedef _Index Index;
+  typedef DynamicSparseMatrix<_Scalar,_Options,_Index> SparseMatrixType;
+  typedef typename SparseMatrixType::InnerIterator InnerIterator;
+  typedef typename SparseMatrixType::ReverseInnerIterator ReverseInnerIterator;
+  
+  enum {
+    CoeffReadCost = NumTraits<_Scalar>::ReadCost,
+    Flags = SparseMatrixType::Flags
+  };
+  
+  evaluator() : m_matrix(0) {}
+  evaluator(const SparseMatrixType &mat) : m_matrix(&mat) {}
+  
+  operator SparseMatrixType&() { return m_matrix->const_cast_derived(); }
+  operator const SparseMatrixType&() const { return *m_matrix; }
+  
+  Scalar coeff(Index row, Index col) const { return m_matrix->coeff(row,col); }
+
+  const SparseMatrixType *m_matrix;
+};
+
+}
+
 } // end namespace Eigen
 
 #endif // EIGEN_DYNAMIC_SPARSEMATRIX_H
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 0a6c56c19..48b61cde0 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -5,6 +5,7 @@ add_custom_target(BuildUnsupported)
 include_directories(../../test ../../unsupported ../../Eigen 
                     ${CMAKE_CURRENT_BINARY_DIR}/../../test)
 
+                    
 find_package(GoogleHash)
 if(GOOGLEHASH_FOUND)
   add_definitions("-DEIGEN_GOOGLEHASH_SUPPORT")
@@ -40,6 +41,7 @@ ei_add_test(matrix_function)
 ei_add_test(matrix_power)
 ei_add_test(matrix_square_root)
 ei_add_test(alignedvector3)
+
 ei_add_test(FFT)
 
 find_package(MPFR 2.3.0)
@@ -86,12 +88,12 @@ endif()
 
 ei_add_test(polynomialsolver)
 ei_add_test(polynomialutils)
-ei_add_test(kronecker_product)
 ei_add_test(splines)
 ei_add_test(gmres)
 ei_add_test(minres)
 ei_add_test(levenberg_marquardt)
 ei_add_test(bdcsvd)
+ei_add_test(kronecker_product)
 
 option(EIGEN_TEST_CXX11 "Enable testing of C++11 features (e.g. Tensor module)." OFF)
 if(EIGEN_TEST_CXX11)
diff --git a/unsupported/test/NonLinearOptimization.cpp b/unsupported/test/NonLinearOptimization.cpp
index d7376b0f5..75974f84f 100644
--- a/unsupported/test/NonLinearOptimization.cpp
+++ b/unsupported/test/NonLinearOptimization.cpp
@@ -1022,7 +1022,8 @@ void testNistLanczos1(void)
   VERIFY_IS_EQUAL(lm.nfev, 79);
   VERIFY_IS_EQUAL(lm.njev, 72);
   // check norm^2
-  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.430899764097e-25);  // should be 1.4307867721E-25, but nist results are on 128-bit floats
+  std::cout.precision(30);
+  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4290986055242372e-25);  // should be 1.4307867721E-25, but nist results are on 128-bit floats
   // check x
   VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
   VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -1043,7 +1044,7 @@ void testNistLanczos1(void)
   VERIFY_IS_EQUAL(lm.nfev, 9);
   VERIFY_IS_EQUAL(lm.njev, 8);
   // check norm^2
-  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.428595533845e-25);  // should be 1.4307867721E-25, but nist results are on 128-bit floats
+  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.430571737783119393e-25);  // should be 1.4307867721E-25, but nist results are on 128-bit floats
   // check x
   VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
   VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -1262,8 +1263,8 @@ void testNistBoxBOD(void)
 
   // check return value
   VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev, 31);
-  VERIFY_IS_EQUAL(lm.njev, 25);
+  VERIFY(lm.nfev < 31); // 31
+  VERIFY(lm.njev < 25); // 25
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03);
   // check x
@@ -1342,10 +1343,6 @@ void testNistMGH17(void)
   lm.parameters.maxfev = 1000;
   info = lm.minimize(x);
 
-  // check return value
-  VERIFY_IS_EQUAL(info, 2); 
-  VERIFY_IS_EQUAL(lm.nfev, 602 ); 
-  VERIFY_IS_EQUAL(lm.njev, 545 ); 
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05);
   // check x
@@ -1354,6 +1351,11 @@ void testNistMGH17(void)
   VERIFY_IS_APPROX(x[2], -1.4646871366E+00);
   VERIFY_IS_APPROX(x[3], 1.2867534640E-02);
   VERIFY_IS_APPROX(x[4], 2.2122699662E-02);
+  
+  // check return value
+  VERIFY_IS_EQUAL(info, 2); 
+  VERIFY(lm.nfev < 650);  // 602
+  VERIFY(lm.njev < 600);  // 545
 
   /*
    * Second try
@@ -1832,8 +1834,8 @@ void test_NonLinearOptimization()
     // NIST tests, level of difficulty = "Average"
     CALL_SUBTEST/*_5*/(testNistHahn1());
     CALL_SUBTEST/*_6*/(testNistMisra1d());
-//     CALL_SUBTEST/*_7*/(testNistMGH17());
-//     CALL_SUBTEST/*_8*/(testNistLanczos1());
+    CALL_SUBTEST/*_7*/(testNistMGH17());
+    CALL_SUBTEST/*_8*/(testNistLanczos1());
 
 //     // NIST tests, level of difficulty = "Higher"
     CALL_SUBTEST/*_9*/(testNistRat42());
diff --git a/unsupported/test/bdcsvd.cpp b/unsupported/test/bdcsvd.cpp
index 115a649b0..4ad991522 100644
--- a/unsupported/test/bdcsvd.cpp
+++ b/unsupported/test/bdcsvd.cpp
@@ -10,204 +10,105 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/
 
-#include "svd_common.h"
+// discard stack allocation as that too bypasses malloc
+#define EIGEN_STACK_ALLOCATION_LIMIT 0
+#define EIGEN_RUNTIME_NO_MALLOC
+
+#include "main.h"
+#include <unsupported/Eigen/BDCSVD>
 #include <iostream>
 #include <Eigen/LU>
 
-// check if "svd" is the good image of "m"  
-template<typename MatrixType>
-void bdcsvd_check_full(const MatrixType& m, const BDCSVD<MatrixType>& svd)
-{
-  svd_check_full< MatrixType, BDCSVD< MatrixType > >(m, svd);
-}
-
-// Compare to a reference value
-template<typename MatrixType>
-void bdcsvd_compare_to_full(const MatrixType& m,
-			    unsigned int computationOptions,
-			    const BDCSVD<MatrixType>& referenceSvd)
-{
-  svd_compare_to_full< MatrixType, BDCSVD< MatrixType > >(m, computationOptions, referenceSvd);
-} // end bdcsvd_compare_to_full
 
+#define SVD_DEFAULT(M) BDCSVD<M>
+// #define SVD_FOR_MIN_NORM(M) BDCSVD<M>
+#define SVD_FOR_MIN_NORM(M) JacobiSVD<M,ColPivHouseholderQRPreconditioner>
+#include "../../test/svd_common.h"
 
-template<typename MatrixType>
-void bdcsvd_solve(const MatrixType& m, unsigned int computationOptions)
-{
-  svd_solve< MatrixType, BDCSVD< MatrixType > >(m, computationOptions);
-} //  end template bdcsvd_solve
-
-
-// test the computations options
-template<typename MatrixType>
-void bdcsvd_test_all_computation_options(const MatrixType& m)
-{
-  BDCSVD<MatrixType> fullSvd(m, ComputeFullU|ComputeFullV);
-  svd_test_computation_options_1< MatrixType, BDCSVD< MatrixType > >(m, fullSvd); 
-  svd_test_computation_options_2< MatrixType, BDCSVD< MatrixType > >(m, fullSvd); 
-} // end bdcsvd_test_all_computation_options
-
-
-// Call a test with all the computations options
+// Check all variants of JacobiSVD
 template<typename MatrixType>
 void bdcsvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
 {
-  MatrixType m = pickrandom ? MatrixType::Random(a.rows(), a.cols()) : a;
-  bdcsvd_test_all_computation_options<MatrixType>(m);
-} // end template bdcsvd
-
-
-// verify assert
-template<typename MatrixType> 
-void bdcsvd_verify_assert(const MatrixType& m)
-{
-  svd_verify_assert< MatrixType, BDCSVD< MatrixType > >(m);
-}// end template bdcsvd_verify_assert
+  MatrixType m = a;
+  if(pickrandom)
+    svd_fill_random(m);
 
+  CALL_SUBTEST(( svd_test_all_computation_options<BDCSVD<MatrixType> >(m, false)  ));
+}
 
-// test weird values
-template<typename MatrixType>
-void bdcsvd_inf_nan()
-{
-  svd_inf_nan< MatrixType, BDCSVD< MatrixType > >();
-}// end template bdcsvd_inf_nan
-
-
-
-void bdcsvd_preallocate()
-{
-  svd_preallocate< BDCSVD< MatrixXf > >();
-} // end bdcsvd_preallocate
-
+// template<typename MatrixType>
+// void bdcsvd_method()
+// {
+//   enum { Size = MatrixType::RowsAtCompileTime };
+//   typedef typename MatrixType::RealScalar RealScalar;
+//   typedef Matrix<RealScalar, Size, 1> RealVecType;
+//   MatrixType m = MatrixType::Identity();
+//   VERIFY_IS_APPROX(m.bdcSvd().singularValues(), RealVecType::Ones());
+//   VERIFY_RAISES_ASSERT(m.bdcSvd().matrixU());
+//   VERIFY_RAISES_ASSERT(m.bdcSvd().matrixV());
+//   VERIFY_IS_APPROX(m.bdcSvd(ComputeFullU|ComputeFullV).solve(m), m);
+// }
 
 // compare the Singular values returned with Jacobi and Bdc
 template<typename MatrixType> 
 void compare_bdc_jacobi(const MatrixType& a = MatrixType(), unsigned int computationOptions = 0)
 {
-  std::cout << "debut compare" << std::endl;
   MatrixType m = MatrixType::Random(a.rows(), a.cols());
   BDCSVD<MatrixType> bdc_svd(m);
   JacobiSVD<MatrixType> jacobi_svd(m);
   VERIFY_IS_APPROX(bdc_svd.singularValues(), jacobi_svd.singularValues());
-  if(computationOptions & ComputeFullU)
-    VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU());
-  if(computationOptions & ComputeThinU)
-    VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU());
-  if(computationOptions & ComputeFullV)
-    VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV());
-  if(computationOptions & ComputeThinV)
-    VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV());
-  std::cout << "fin compare" << std::endl;
-} // end template compare_bdc_jacobi
-
-
-// call the tests
+  if(computationOptions & ComputeFullU) VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU());
+  if(computationOptions & ComputeThinU) VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU());
+  if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV());
+  if(computationOptions & ComputeThinV) VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV());
+}
+
 void test_bdcsvd()
 {
-  // test of Dynamic defined Matrix (42, 42) of float 
-  CALL_SUBTEST_11(( bdcsvd_verify_assert<Matrix<float,Dynamic,Dynamic> >
-		    (Matrix<float,Dynamic,Dynamic>(42,42)) ));
-  CALL_SUBTEST_11(( compare_bdc_jacobi<Matrix<float,Dynamic,Dynamic> >
-		    (Matrix<float,Dynamic,Dynamic>(42,42), 0) ));
-  CALL_SUBTEST_11(( bdcsvd<Matrix<float,Dynamic,Dynamic> >
-		    (Matrix<float,Dynamic,Dynamic>(42,42)) ));
-
-  // test of Dynamic defined Matrix (50, 50) of double 
-  CALL_SUBTEST_13(( bdcsvd_verify_assert<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(50,50)) ));
-  CALL_SUBTEST_13(( compare_bdc_jacobi<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(50,50), 0) ));
-  CALL_SUBTEST_13(( bdcsvd<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(50, 50)) )); 
-
-  // test of Dynamic defined Matrix (22, 22) of complex double
-  CALL_SUBTEST_14(( bdcsvd_verify_assert<Matrix<std::complex<double>,Dynamic,Dynamic> >
-  		    (Matrix<std::complex<double>,Dynamic,Dynamic>(22,22)) ));
-  CALL_SUBTEST_14(( compare_bdc_jacobi<Matrix<std::complex<double>,Dynamic,Dynamic> >
-  		    (Matrix<std::complex<double>, Dynamic, Dynamic> (22,22), 0) ));
-  CALL_SUBTEST_14(( bdcsvd<Matrix<std::complex<double>,Dynamic,Dynamic> >
-  		    (Matrix<std::complex<double>,Dynamic,Dynamic>(22, 22)) )); 
-
-  // test of Dynamic defined Matrix (10, 10) of int
-  //CALL_SUBTEST_15(( bdcsvd_verify_assert<Matrix<int,Dynamic,Dynamic> >
-  //		    (Matrix<int,Dynamic,Dynamic>(10,10)) ));		    
-  //CALL_SUBTEST_15(( compare_bdc_jacobi<Matrix<int,Dynamic,Dynamic> >
-  //		    (Matrix<int,Dynamic,Dynamic>(10,10), 0) ));
-  //CALL_SUBTEST_15(( bdcsvd<Matrix<int,Dynamic,Dynamic> >
-  //		    (Matrix<int,Dynamic,Dynamic>(10, 10)) )); 
+  CALL_SUBTEST_3(( svd_verify_assert<BDCSVD<Matrix3f>  >(Matrix3f()) ));
+  CALL_SUBTEST_4(( svd_verify_assert<BDCSVD<Matrix4d>  >(Matrix4d()) ));
+  CALL_SUBTEST_7(( svd_verify_assert<BDCSVD<MatrixXf>  >(MatrixXf(10,12)) ));
+  CALL_SUBTEST_8(( svd_verify_assert<BDCSVD<MatrixXcd> >(MatrixXcd(7,5)) ));
   
+//   svd_all_trivial_2x2(bdcsvd<Matrix2cd>);
+//   svd_all_trivial_2x2(bdcsvd<Matrix2d>);
 
-  // test of Dynamic defined Matrix (8, 6) of double 
- 
-  CALL_SUBTEST_16(( bdcsvd_verify_assert<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(8,6)) ));
-  CALL_SUBTEST_16(( compare_bdc_jacobi<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(8, 6), 0) )); 
-  CALL_SUBTEST_16(( bdcsvd<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(8, 6)) ));
-
-
-  
-  // test of Dynamic defined Matrix (36, 12) of float
-  CALL_SUBTEST_17(( compare_bdc_jacobi<Matrix<float,Dynamic,Dynamic> >
-		    (Matrix<float,Dynamic,Dynamic>(36, 12), 0) )); 
-  CALL_SUBTEST_17(( bdcsvd<Matrix<float,Dynamic,Dynamic> >
-		    (Matrix<float,Dynamic,Dynamic>(36, 12)) )); 
-
-  // test of Dynamic defined Matrix (5, 8) of double 
-  CALL_SUBTEST_18(( compare_bdc_jacobi<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(5, 8), 0) )); 
-  CALL_SUBTEST_18(( bdcsvd<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(5, 8)) )); 
-
-
-  // non regression tests
-  CALL_SUBTEST_3(( bdcsvd_verify_assert(Matrix3f()) ));
-  CALL_SUBTEST_4(( bdcsvd_verify_assert(Matrix4d()) ));
-  CALL_SUBTEST_7(( bdcsvd_verify_assert(MatrixXf(10,12)) ));
-  CALL_SUBTEST_8(( bdcsvd_verify_assert(MatrixXcd(7,5)) ));
-
-  // SUBTESTS 1 and 2 on specifics matrix
   for(int i = 0; i < g_repeat; i++) {
-    Matrix2cd m;
-    m << 0, 1,
-      0, 1;
-    CALL_SUBTEST_1(( bdcsvd(m, false) ));
-    m << 1, 0,
-      1, 0;
-    CALL_SUBTEST_1(( bdcsvd(m, false) ));
-
-    Matrix2d n;
-    n << 0, 0,
-      0, 0;
-    CALL_SUBTEST_2(( bdcsvd(n, false) ));
-    n << 0, 0,
-      0, 1;
-    CALL_SUBTEST_2(( bdcsvd(n, false) ));
+//     CALL_SUBTEST_3(( bdcsvd<Matrix3f>() ));
+//     CALL_SUBTEST_4(( bdcsvd<Matrix4d>() ));
+//     CALL_SUBTEST_5(( bdcsvd<Matrix<float,3,5> >() ));
+
+    int r = internal::random<int>(1, EIGEN_TEST_MAX_SIZE/2),
+        c = internal::random<int>(1, EIGEN_TEST_MAX_SIZE/2);
     
-    // Statics matrix don't work with BDSVD yet
-    // bdc algo on a random 3x3 float matrix
-    // CALL_SUBTEST_3(( bdcsvd<Matrix3f>() ));
-    // bdc algo on a random 4x4 double matrix
-    // CALL_SUBTEST_4(( bdcsvd<Matrix4d>() ));
-    // bdc algo on a random 3x5 float matrix
-    // CALL_SUBTEST_5(( bdcsvd<Matrix<float,3,5> >() ));
-
-    int r = internal::random<int>(1, 30),
-      c = internal::random<int>(1, 30);
-    CALL_SUBTEST_7(( bdcsvd<MatrixXf>(MatrixXf(r,c)) ));
-    CALL_SUBTEST_8(( bdcsvd<MatrixXcd>(MatrixXcd(r,c)) ));
+    TEST_SET_BUT_UNUSED_VARIABLE(r)
+    TEST_SET_BUT_UNUSED_VARIABLE(c)
+    
+    CALL_SUBTEST_6((  bdcsvd(Matrix<double,Dynamic,2>(r,2)) ));
+    CALL_SUBTEST_7((  bdcsvd(MatrixXf(r,c)) ));
+    CALL_SUBTEST_7((  compare_bdc_jacobi(MatrixXf(r,c)) ));
+    CALL_SUBTEST_10(( bdcsvd(MatrixXd(r,c)) ));
+    CALL_SUBTEST_10(( compare_bdc_jacobi(MatrixXd(r,c)) ));
+    CALL_SUBTEST_8((  bdcsvd(MatrixXcd(r,c)) ));
+    CALL_SUBTEST_8((  compare_bdc_jacobi(MatrixXcd(r,c)) ));
     (void) r;
     (void) c;
 
     // Test on inf/nan matrix
-    CALL_SUBTEST_7( bdcsvd_inf_nan<MatrixXf>() );
+    CALL_SUBTEST_7(  (svd_inf_nan<BDCSVD<MatrixXf>, MatrixXf>()) );
+    CALL_SUBTEST_10( (svd_inf_nan<BDCSVD<MatrixXd>, MatrixXd>()) );
   }
 
-  CALL_SUBTEST_7(( bdcsvd<MatrixXf>(MatrixXf(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
-  CALL_SUBTEST_8(( bdcsvd<MatrixXcd>(MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3))) ));
+  // test matrixbase method
+//   CALL_SUBTEST_1(( bdcsvd_method<Matrix2cd>() ));
+//   CALL_SUBTEST_3(( bdcsvd_method<Matrix3f>() ));
 
   // Test problem size constructors
   CALL_SUBTEST_7( BDCSVD<MatrixXf>(10,10) );
 
-} // end test_bdcsvd
+  // Check that preallocation avoids subsequent mallocs
+  CALL_SUBTEST_9( svd_preallocate() );
+
+  CALL_SUBTEST_2( svd_underoverflow() );
+}
+
diff --git a/unsupported/test/jacobisvd.cpp b/unsupported/test/jacobisvd.cpp
deleted file mode 100644
index b4e884eee..000000000
--- a/unsupported/test/jacobisvd.cpp
+++ /dev/null
@@ -1,198 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#include "svd_common.h"
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_check_full(const MatrixType& m, const JacobiSVD<MatrixType, QRPreconditioner>& svd)
-{
-  svd_check_full<MatrixType, JacobiSVD<MatrixType, QRPreconditioner > >(m, svd);
-}
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_compare_to_full(const MatrixType& m,
-                               unsigned int computationOptions,
-                               const JacobiSVD<MatrixType, QRPreconditioner>& referenceSvd)
-{
-  svd_compare_to_full<MatrixType, JacobiSVD<MatrixType, QRPreconditioner> >(m, computationOptions, referenceSvd);
-}
-
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_solve(const MatrixType& m, unsigned int computationOptions)
-{
-  svd_solve< MatrixType, JacobiSVD< MatrixType, QRPreconditioner > >(m, computationOptions);
-}
-
-
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_test_all_computation_options(const MatrixType& m)
-{
-  
-  if (QRPreconditioner == NoQRPreconditioner && m.rows() != m.cols())
-    return;
-
-  JacobiSVD< MatrixType, QRPreconditioner > fullSvd(m, ComputeFullU|ComputeFullV);
-  svd_test_computation_options_1< MatrixType, JacobiSVD< MatrixType, QRPreconditioner > >(m, fullSvd);
-
-  if(QRPreconditioner == FullPivHouseholderQRPreconditioner)
-    return;
-  svd_test_computation_options_2< MatrixType, JacobiSVD< MatrixType, QRPreconditioner > >(m, fullSvd);
-
-}
-
-template<typename MatrixType>
-void jacobisvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
-{
-  MatrixType m = pickrandom ? MatrixType::Random(a.rows(), a.cols()) : a;
-
-  jacobisvd_test_all_computation_options<MatrixType, FullPivHouseholderQRPreconditioner>(m);
-  jacobisvd_test_all_computation_options<MatrixType, ColPivHouseholderQRPreconditioner>(m);
-  jacobisvd_test_all_computation_options<MatrixType, HouseholderQRPreconditioner>(m);
-  jacobisvd_test_all_computation_options<MatrixType, NoQRPreconditioner>(m);
-}
-
-
-template<typename MatrixType> 
-void jacobisvd_verify_assert(const MatrixType& m)
-{
-  
-  svd_verify_assert<MatrixType, JacobiSVD< MatrixType > >(m);
-
-  typedef typename MatrixType::Index Index;
-  Index rows = m.rows();
-  Index cols = m.cols();
-
-  enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime
-  };
-
-  MatrixType a = MatrixType::Zero(rows, cols);
-  a.setZero();
-
-  if (ColsAtCompileTime == Dynamic)
-  {
-    JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner> svd_fullqr;
-    VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeFullU|ComputeThinV))
-    VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeThinV))
-    VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeFullV))
-  }
-}
-
-template<typename MatrixType>
-void jacobisvd_method()
-{
-  enum { Size = MatrixType::RowsAtCompileTime };
-  typedef typename MatrixType::RealScalar RealScalar;
-  typedef Matrix<RealScalar, Size, 1> RealVecType;
-  MatrixType m = MatrixType::Identity();
-  VERIFY_IS_APPROX(m.jacobiSvd().singularValues(), RealVecType::Ones());
-  VERIFY_RAISES_ASSERT(m.jacobiSvd().matrixU());
-  VERIFY_RAISES_ASSERT(m.jacobiSvd().matrixV());
-  VERIFY_IS_APPROX(m.jacobiSvd(ComputeFullU|ComputeFullV).solve(m), m);
-}
-
-
-
-template<typename MatrixType>
-void jacobisvd_inf_nan()
-{
-  svd_inf_nan<MatrixType, JacobiSVD< MatrixType > >();
-}
-
-
-// Regression test for bug 286: JacobiSVD loops indefinitely with some
-// matrices containing denormal numbers.
-void jacobisvd_bug286()
-{
-#if defined __INTEL_COMPILER
-// shut up warning #239: floating point underflow
-#pragma warning push
-#pragma warning disable 239
-#endif
-  Matrix2d M;
-  M << -7.90884e-313, -4.94e-324,
-                 0, 5.60844e-313;
-#if defined __INTEL_COMPILER
-#pragma warning pop
-#endif
-  JacobiSVD<Matrix2d> svd;
-  svd.compute(M); // just check we don't loop indefinitely
-}
-
-
-void jacobisvd_preallocate()
-{
-  svd_preallocate< JacobiSVD <MatrixXf> >();
-}
-
-void test_jacobisvd()
-{
-  CALL_SUBTEST_11(( jacobisvd<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(16, 6)) ));
-
-  CALL_SUBTEST_3(( jacobisvd_verify_assert(Matrix3f()) ));
-  CALL_SUBTEST_4(( jacobisvd_verify_assert(Matrix4d()) ));
-  CALL_SUBTEST_7(( jacobisvd_verify_assert(MatrixXf(10,12)) ));
-  CALL_SUBTEST_8(( jacobisvd_verify_assert(MatrixXcd(7,5)) ));
-
-  for(int i = 0; i < g_repeat; i++) {
-    Matrix2cd m;
-    m << 0, 1,
-         0, 1;
-    CALL_SUBTEST_1(( jacobisvd(m, false) ));
-    m << 1, 0,
-         1, 0;
-    CALL_SUBTEST_1(( jacobisvd(m, false) ));
-
-    Matrix2d n;
-    n << 0, 0,
-         0, 0;
-    CALL_SUBTEST_2(( jacobisvd(n, false) ));
-    n << 0, 0,
-         0, 1;
-    CALL_SUBTEST_2(( jacobisvd(n, false) ));
-    
-    CALL_SUBTEST_3(( jacobisvd<Matrix3f>() ));
-    CALL_SUBTEST_4(( jacobisvd<Matrix4d>() ));
-    CALL_SUBTEST_5(( jacobisvd<Matrix<float,3,5> >() ));
-    CALL_SUBTEST_6(( jacobisvd<Matrix<double,Dynamic,2> >(Matrix<double,Dynamic,2>(10,2)) ));
-
-    int r = internal::random<int>(1, 30),
-        c = internal::random<int>(1, 30);
-    CALL_SUBTEST_7(( jacobisvd<MatrixXf>(MatrixXf(r,c)) ));
-    CALL_SUBTEST_8(( jacobisvd<MatrixXcd>(MatrixXcd(r,c)) ));
-    (void) r;
-    (void) c;
-
-    // Test on inf/nan matrix
-    CALL_SUBTEST_7( jacobisvd_inf_nan<MatrixXf>() );
-  }
-
-  CALL_SUBTEST_7(( jacobisvd<MatrixXf>(MatrixXf(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
-  CALL_SUBTEST_8(( jacobisvd<MatrixXcd>(MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3))) ));
-
-
-  // test matrixbase method
-  CALL_SUBTEST_1(( jacobisvd_method<Matrix2cd>() ));
-  CALL_SUBTEST_3(( jacobisvd_method<Matrix3f>() ));
-
-
-  // Test problem size constructors
-  CALL_SUBTEST_7( JacobiSVD<MatrixXf>(10,10) );
-
-  // Check that preallocation avoids subsequent mallocs
-  CALL_SUBTEST_9( jacobisvd_preallocate() );
-
-  // Regression check for bug 286
-  CALL_SUBTEST_2( jacobisvd_bug286() );
-}
diff --git a/unsupported/test/levenberg_marquardt.cpp b/unsupported/test/levenberg_marquardt.cpp
index 04464727d..1fa1c3c22 100644
--- a/unsupported/test/levenberg_marquardt.cpp
+++ b/unsupported/test/levenberg_marquardt.cpp
@@ -787,16 +787,17 @@ void testNistMGH10(void)
   LevenbergMarquardt<MGH10_functor> lm(functor);
   info = lm.minimize(x);
 
-  // check return value
-  VERIFY_IS_EQUAL(info, 1); 
-  VERIFY_IS_EQUAL(lm.nfev(), 284 ); 
-  VERIFY_IS_EQUAL(lm.njev(), 249 ); 
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
   // check x
   VERIFY_IS_APPROX(x[0], 5.6096364710E-03);
   VERIFY_IS_APPROX(x[1], 6.1813463463E+03);
   VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
+  
+  // check return value
+  //VERIFY_IS_EQUAL(info, 1);
+  VERIFY_IS_EQUAL(lm.nfev(), 284 );
+  VERIFY_IS_EQUAL(lm.njev(), 249 );
 
   /*
    * Second try
@@ -805,16 +806,17 @@ void testNistMGH10(void)
   // do the computation
   info = lm.minimize(x);
 
-  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 126);
-  VERIFY_IS_EQUAL(lm.njev(), 116);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
   // check x
   VERIFY_IS_APPROX(x[0], 5.6096364710E-03);
   VERIFY_IS_APPROX(x[1], 6.1813463463E+03);
   VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
+  
+  // check return value
+  //VERIFY_IS_EQUAL(info, 1);
+  VERIFY_IS_EQUAL(lm.nfev(), 126);
+  VERIFY_IS_EQUAL(lm.njev(), 116);
 }
 
 
@@ -866,15 +868,16 @@ void testNistBoxBOD(void)
   lm.setFactor(10);
   info = lm.minimize(x);
 
-  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 31);
-  VERIFY_IS_EQUAL(lm.njev(), 25);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03);
   // check x
   VERIFY_IS_APPROX(x[0], 2.1380940889E+02);
   VERIFY_IS_APPROX(x[1], 5.4723748542E-01);
+  
+  // check return value
+  VERIFY_IS_EQUAL(info, 1);
+  VERIFY(lm.nfev() < 31); // 31
+  VERIFY(lm.njev() < 25); // 25
 
   /*
    * Second try
@@ -948,10 +951,6 @@ void testNistMGH17(void)
   lm.setMaxfev(1000);
   info = lm.minimize(x);
 
-  // check return value
-//   VERIFY_IS_EQUAL(info, 2);  //FIXME Use (lm.info() == Success)
-//   VERIFY_IS_EQUAL(lm.nfev(), 602 ); 
-  VERIFY_IS_EQUAL(lm.njev(), 545 ); 
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.4648946975E-05);
   // check x
@@ -960,6 +959,11 @@ void testNistMGH17(void)
   VERIFY_IS_APPROX(x[2], -1.4646871366E+00);
   VERIFY_IS_APPROX(x[3], 1.2867534640E-02);
   VERIFY_IS_APPROX(x[4], 2.2122699662E-02);
+  
+    // check return value
+//   VERIFY_IS_EQUAL(info, 2);  //FIXME Use (lm.info() == Success)
+  VERIFY(lm.nfev() < 700 ); // 602
+  VERIFY(lm.njev() < 600 ); // 545
 
   /*
    * Second try
@@ -1035,10 +1039,6 @@ void testNistMGH09(void)
   lm.setMaxfev(1000);
   info = lm.minimize(x);
 
-  // check return value
-  VERIFY_IS_EQUAL(info, 1); 
-  VERIFY_IS_EQUAL(lm.nfev(), 490 ); 
-  VERIFY_IS_EQUAL(lm.njev(), 376 ); 
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 3.0750560385E-04);
   // check x
@@ -1046,6 +1046,10 @@ void testNistMGH09(void)
   VERIFY_IS_APPROX(x[1], 0.19126423573); // should be 1.9128232873E-01
   VERIFY_IS_APPROX(x[2], 0.12305309914); // should be 1.2305650693E-01
   VERIFY_IS_APPROX(x[3], 0.13605395375); // should be 1.3606233068E-01
+  // check return value
+  VERIFY_IS_EQUAL(info, 1); 
+  VERIFY(lm.nfev() < 510 ); // 490
+  VERIFY(lm.njev() < 400 ); // 376
 
   /*
    * Second try
diff --git a/unsupported/test/svd_common.h b/unsupported/test/svd_common.h
deleted file mode 100644
index b40c23a2b..000000000
--- a/unsupported/test/svd_common.h
+++ /dev/null
@@ -1,261 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-//
-// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
-// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
-// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
-// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-// discard stack allocation as that too bypasses malloc
-#define EIGEN_STACK_ALLOCATION_LIMIT 0
-#define EIGEN_RUNTIME_NO_MALLOC
-
-#include "main.h"
-#include <unsupported/Eigen/SVD>
-#include <Eigen/LU>
-
-
-// check if "svd" is the good image of "m"  
-template<typename MatrixType, typename SVD>
-void svd_check_full(const MatrixType& m, const SVD& svd)
-{
-  typedef typename MatrixType::Index Index;
-  Index rows = m.rows();
-  Index cols = m.cols();
-  enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime
-  };
-
-  typedef typename MatrixType::Scalar Scalar;
-  typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime> MatrixUType;
-  typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixVType;
-
-  
-  MatrixType sigma = MatrixType::Zero(rows, cols);
-  sigma.diagonal() = svd.singularValues().template cast<Scalar>();
-  MatrixUType u = svd.matrixU();
-  MatrixVType v = svd.matrixV();
-  VERIFY_IS_APPROX(m, u * sigma * v.adjoint());
-  VERIFY_IS_UNITARY(u);
-  VERIFY_IS_UNITARY(v);
-} // end svd_check_full
-
-
-
-// Compare to a reference value
-template<typename MatrixType, typename SVD>
-void svd_compare_to_full(const MatrixType& m,
-			 unsigned int computationOptions,
-			 const SVD& referenceSvd)
-{
-  typedef typename MatrixType::Index Index;
-  Index rows = m.rows();
-  Index cols = m.cols();
-  Index diagSize = (std::min)(rows, cols);
-
-  SVD svd(m, computationOptions);
-
-  VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues());
-  if(computationOptions & ComputeFullU)
-    VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU());
-  if(computationOptions & ComputeThinU)
-    VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize));
-  if(computationOptions & ComputeFullV)
-    VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV());
-  if(computationOptions & ComputeThinV)
-    VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize));
-} // end svd_compare_to_full
-
-
-
-template<typename MatrixType, typename SVD>
-void svd_solve(const MatrixType& m, unsigned int computationOptions)
-{
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::Index Index;
-  Index rows = m.rows();
-  Index cols = m.cols();
-
-  enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime
-  };
-
-  typedef Matrix<Scalar, RowsAtCompileTime, Dynamic> RhsType;
-  typedef Matrix<Scalar, ColsAtCompileTime, Dynamic> SolutionType;
-
-  RhsType rhs = RhsType::Random(rows, internal::random<Index>(1, cols));
-  SVD svd(m, computationOptions);
-  SolutionType x = svd.solve(rhs);
-  // evaluate normal equation which works also for least-squares solutions
-  VERIFY_IS_APPROX(m.adjoint()*m*x,m.adjoint()*rhs);
-} // end svd_solve
-
-
-// test computations options
-// 2 functions because Jacobisvd can return before the second function
-template<typename MatrixType, typename SVD>
-void svd_test_computation_options_1(const MatrixType& m, const SVD& fullSvd)
-{
-  svd_check_full< MatrixType, SVD >(m, fullSvd);
-  svd_solve< MatrixType, SVD >(m, ComputeFullU | ComputeFullV);
-}
-
-
-template<typename MatrixType, typename SVD>
-void svd_test_computation_options_2(const MatrixType& m, const SVD& fullSvd)
-{
-  svd_compare_to_full< MatrixType, SVD >(m, ComputeFullU, fullSvd);
-  svd_compare_to_full< MatrixType, SVD >(m, ComputeFullV, fullSvd);
-  svd_compare_to_full< MatrixType, SVD >(m, 0, fullSvd);
-
-  if (MatrixType::ColsAtCompileTime == Dynamic) {
-    // thin U/V are only available with dynamic number of columns
- 
-    svd_compare_to_full< MatrixType, SVD >(m, ComputeFullU|ComputeThinV, fullSvd);
-    svd_compare_to_full< MatrixType, SVD >(m,              ComputeThinV, fullSvd);
-    svd_compare_to_full< MatrixType, SVD >(m, ComputeThinU|ComputeFullV, fullSvd);
-    svd_compare_to_full< MatrixType, SVD >(m, ComputeThinU             , fullSvd);
-    svd_compare_to_full< MatrixType, SVD >(m, ComputeThinU|ComputeThinV, fullSvd);
-    svd_solve<MatrixType, SVD>(m, ComputeFullU | ComputeThinV);
-    svd_solve<MatrixType, SVD>(m, ComputeThinU | ComputeFullV);
-    svd_solve<MatrixType, SVD>(m, ComputeThinU | ComputeThinV);
-    
-    typedef typename MatrixType::Index Index;
-    Index diagSize = (std::min)(m.rows(), m.cols());
-    SVD svd(m, ComputeThinU | ComputeThinV);
-    VERIFY_IS_APPROX(m, svd.matrixU().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint());
-  }
-}
-
-template<typename MatrixType, typename SVD> 
-void svd_verify_assert(const MatrixType& m)
-{
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::Index Index;
-  Index rows = m.rows();
-  Index cols = m.cols();
-
-  enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime
-  };
-
-  typedef Matrix<Scalar, RowsAtCompileTime, 1> RhsType;
-  RhsType rhs(rows);
-  SVD svd;
-  VERIFY_RAISES_ASSERT(svd.matrixU())
-  VERIFY_RAISES_ASSERT(svd.singularValues())
-  VERIFY_RAISES_ASSERT(svd.matrixV())
-  VERIFY_RAISES_ASSERT(svd.solve(rhs))
-  MatrixType a = MatrixType::Zero(rows, cols);
-  a.setZero();
-  svd.compute(a, 0);
-  VERIFY_RAISES_ASSERT(svd.matrixU())
-  VERIFY_RAISES_ASSERT(svd.matrixV())
-  svd.singularValues();
-  VERIFY_RAISES_ASSERT(svd.solve(rhs))
-    
-  if (ColsAtCompileTime == Dynamic)
-  {
-    svd.compute(a, ComputeThinU);
-    svd.matrixU();
-    VERIFY_RAISES_ASSERT(svd.matrixV())
-    VERIFY_RAISES_ASSERT(svd.solve(rhs))
-    svd.compute(a, ComputeThinV);
-    svd.matrixV();
-    VERIFY_RAISES_ASSERT(svd.matrixU())
-    VERIFY_RAISES_ASSERT(svd.solve(rhs))
-  }
-  else
-  {
-    VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinU))
-    VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinV))
-  }
-}
-
-// work around stupid msvc error when constructing at compile time an expression that involves
-// a division by zero, even if the numeric type has floating point
-template<typename Scalar>
-EIGEN_DONT_INLINE Scalar zero() { return Scalar(0); }
-
-// workaround aggressive optimization in ICC
-template<typename T> EIGEN_DONT_INLINE  T sub(T a, T b) { return a - b; }
-
-
-template<typename MatrixType, typename SVD>
-void svd_inf_nan()
-{
-  // all this function does is verify we don't iterate infinitely on nan/inf values
-
-  SVD svd;
-  typedef typename MatrixType::Scalar Scalar;
-  Scalar some_inf = Scalar(1) / zero<Scalar>();
-  VERIFY(sub(some_inf, some_inf) != sub(some_inf, some_inf));
-  svd.compute(MatrixType::Constant(10,10,some_inf), ComputeFullU | ComputeFullV);
-
-  Scalar some_nan = zero<Scalar> () / zero<Scalar> ();
-  VERIFY(some_nan != some_nan);
-  svd.compute(MatrixType::Constant(10,10,some_nan), ComputeFullU | ComputeFullV);
-
-  MatrixType m = MatrixType::Zero(10,10);
-  m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_inf;
-  svd.compute(m, ComputeFullU | ComputeFullV);
-
-  m = MatrixType::Zero(10,10);
-  m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_nan;
-  svd.compute(m, ComputeFullU | ComputeFullV);
-}
-
-
-template<typename SVD>
-void svd_preallocate()
-{
-  Vector3f v(3.f, 2.f, 1.f);
-  MatrixXf m = v.asDiagonal();
-
-  internal::set_is_malloc_allowed(false);
-  VERIFY_RAISES_ASSERT(VectorXf v(10);)
-    SVD svd;
-  internal::set_is_malloc_allowed(true);
-  svd.compute(m);
-  VERIFY_IS_APPROX(svd.singularValues(), v);
-
-  SVD svd2(3,3);
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m);
-  internal::set_is_malloc_allowed(true);
-  VERIFY_IS_APPROX(svd2.singularValues(), v);
-  VERIFY_RAISES_ASSERT(svd2.matrixU());
-  VERIFY_RAISES_ASSERT(svd2.matrixV());
-  svd2.compute(m, ComputeFullU | ComputeFullV);
-  VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
-  VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m);
-  internal::set_is_malloc_allowed(true);
-
-  SVD svd3(3,3,ComputeFullU|ComputeFullV);
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m);
-  internal::set_is_malloc_allowed(true);
-  VERIFY_IS_APPROX(svd2.singularValues(), v);
-  VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
-  VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m, ComputeFullU|ComputeFullV);
-  internal::set_is_malloc_allowed(true);
-}
-
-
-
-
-
author	Konstantinos Margaritis <konstantinos.margaritis@freevec.org>	2014-09-21 14:02:51 +0300
committer	Konstantinos Margaritis <konstantinos.margaritis@freevec.org>	2014-09-21 14:02:51 +0300
commit	60e093a9dce2f8d4c0f3b2ea3e0386d5f01bff8d (patch)
tree	05442eeff0bcfe7fe85ce59cf5fa72aa06ee2a07
parent	56408504e4e3fa5f9c59d9edac14ca1ba1255e5a (diff)
parent	03dd4dd91a5d8963f56eebe3b9d2eb924bc06e02 (diff)