merge with default branch

author: Gael Guennebaud <g.gael@free.fr> 2013-04-19 11:21:39 +0200
committer: Gael Guennebaud <g.gael@free.fr> 2013-04-19 11:21:39 +0200
commit: 9cd2d14005def8e7df0b0bf5fd6eb51f8a6591e9 (patch)
tree: ca4df13b58e923bdebd9d5f59aecda9d1e30ca58 /Eigen
parent: 4e2e615a7c2c719d2d708ab32840bad353322d8c (diff)
parent: 46755648ec341aa5e0283b47456108bb2897b1b3 (diff)
125 files changed, 3489 insertions, 2890 deletions
diff --git a/Eigen/Core b/Eigen/Core
index 26a37cf42..1798264e9 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -65,7 +65,7 @@
   #endif
 #else
   // Remember that usage of defined() in a #define is undefined by the standard
-  #if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
+  #if (defined __SSE2__) && ( (!defined __GNUC__) || (defined __INTEL_COMPILER) || EIGEN_GNUC_AT_LEAST(4,2) )
     #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
   #endif
 #endif
diff --git a/Eigen/OrderingMethods b/Eigen/OrderingMethods
index 423cfb0cd..7c0f1ffff 100644
--- a/Eigen/OrderingMethods
+++ b/Eigen/OrderingMethods
@@ -56,7 +56,10 @@
   * \endcode
   */
 
+#ifndef EIGEN_MPL2_ONLY
 #include "src/OrderingMethods/Amd.h"
+#endif
+
 #include "src/OrderingMethods/Ordering.h"
 #include "src/Core/util/ReenableStupidWarnings.h"
 
diff --git a/Eigen/Sparse b/Eigen/Sparse
index a3cee2482..9d4da4c06 100644
--- a/Eigen/Sparse
+++ b/Eigen/Sparse
@@ -7,6 +7,8 @@
   * - SparseCore
   * - OrderingMethods
   * - SparseCholesky
+  * - SparseLU
+  * - SparseQR
   * - IterativeLinearSolvers
   *
   * \code
@@ -17,6 +19,8 @@
 #include "SparseCore"
 #include "OrderingMethods"
 #include "SparseCholesky"
+#include "SparseLU"
+#include "SparseQR"
 #include "IterativeLinearSolvers"
 
 #endif // EIGEN_SPARSE_MODULE_H
diff --git a/Eigen/SparseCholesky b/Eigen/SparseCholesky
index f94e2e765..800f17bc4 100644
--- a/Eigen/SparseCholesky
+++ b/Eigen/SparseCholesky
@@ -20,11 +20,19 @@
   * \endcode
   */
 
+#ifdef EIGEN_MPL2_ONLY
+#error The SparseCholesky module has nothing to offer in MPL2 only mode
+#endif
+
 #include "src/misc/Solve.h"
 #include "src/misc/SparseSolve.h"
 
 #include "src/SparseCholesky/SimplicialCholesky.h"
 
+#ifndef EIGEN_MPL2_ONLY
+#include "src/SparseCholesky/SimplicialCholesky_impl.h"
+#endif
+
 #include "src/Core/util/ReenableStupidWarnings.h"
 
 #endif // EIGEN_SPARSECHOLESKY_MODULE_H
diff --git a/Eigen/SparseLU b/Eigen/SparseLU
index 69cc6beca..38b38b531 100644
--- a/Eigen/SparseLU
+++ b/Eigen/SparseLU
@@ -2,6 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -14,7 +15,9 @@
 
 /** 
   * \defgroup SparseLU_Module SparseLU module
-  *
+  * This module defines a supernodal factorization of general sparse matrices.
+  * The code is fully optimized for supernode-panel updates with specialized kernels.
+  * Please, see the documentation of the SparseLU class for more details.
   */
 
 // Ordering interface
@@ -23,8 +26,8 @@
 #include "src/SparseLU/SparseLU_gemm_kernel.h"
 
 #include "src/SparseLU/SparseLU_Structs.h"
-#include "src/SparseLU/SparseLU_Matrix.h"
-#include "src/SparseLU/SparseLUBase.h"
+#include "src/SparseLU/SparseLU_SupernodalMatrix.h"
+#include "src/SparseLU/SparseLUImpl.h"
 #include "src/SparseCore/SparseColEtree.h"
 #include "src/SparseLU/SparseLU_Memory.h"
 #include "src/SparseLU/SparseLU_heap_relax_snode.h"
diff --git a/Eigen/SparseQR b/Eigen/SparseQR
index 8030d641d..f51913f7b 100644
--- a/Eigen/SparseQR
+++ b/Eigen/SparseQR
@@ -26,4 +26,4 @@
 
 #include "src/Core/util/ReenableStupidWarnings.h"
 
-#endif
-\ No newline at end of file
+#endif
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index 5dede8081..9bd60d648 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -196,7 +196,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
     LDLT& compute(const MatrixType& matrix);
 
     template <typename Derived>
-    LDLT& rankUpdate(const MatrixBase<Derived>& w,RealScalar alpha=1);
+    LDLT& rankUpdate(const MatrixBase<Derived>& w, const RealScalar& alpha=1);
 
     /** \returns the internal LDLT decomposition matrix
       *
@@ -347,7 +347,7 @@ template<> struct ldlt_inplace<Lower>
   // Here only rank-1 updates are implemented, to reduce the
   // requirement for intermediate storage and improve accuracy
   template<typename MatrixType, typename WDerived>
-  static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, typename MatrixType::RealScalar sigma=1)
+  static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, const typename MatrixType::RealScalar& sigma=1)
   {
     using internal::isfinite;
     typedef typename MatrixType::Scalar Scalar;
@@ -386,7 +386,7 @@ template<> struct ldlt_inplace<Lower>
   }
 
   template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
-  static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, typename MatrixType::RealScalar sigma=1)
+  static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, const typename MatrixType::RealScalar& sigma=1)
   {
     // Apply the permutation to the input w
     tmp = transpositions * w;
@@ -405,7 +405,7 @@ template<> struct ldlt_inplace<Upper>
   }
 
   template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
-  static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, typename MatrixType::RealScalar sigma=1)
+  static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, const typename MatrixType::RealScalar& sigma=1)
   {
     Transpose<MatrixType> matt(mat);
     return ldlt_inplace<Lower>::update(matt, transpositions, tmp, w.conjugate(), sigma);
@@ -457,7 +457,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
   */
 template<typename MatrixType, int _UpLo>
 template<typename Derived>
-LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w,typename NumTraits<typename MatrixType::Scalar>::Real sigma)
+LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename NumTraits<typename MatrixType::Scalar>::Real& sigma)
 {
   const Index size = w.rows();
   if (m_isInitialized)
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index 478fad251..db22a2f85 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -200,7 +200,7 @@ static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const V
   typedef Matrix<Scalar,Dynamic,1> TempVectorType;
   typedef typename TempVectorType::SegmentReturnType TempVecSegment;
 
-  int n = mat.cols();
+  Index n = mat.cols();
   eigen_assert(mat.rows()==n && vec.size()==n);
 
   TempVectorType temp;
@@ -212,12 +212,12 @@ static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const V
     // i.e., for sigma > 0
     temp = sqrt(sigma) * vec;
 
-    for(int i=0; i<n; ++i)
+    for(Index i=0; i<n; ++i)
     {
       JacobiRotation<Scalar> g;
       g.makeGivens(mat(i,i), -temp(i), &mat(i,i));
 
-      int rs = n-i-1;
+      Index rs = n-i-1;
       if(rs>0)
       {
         ColXprSegment x(mat.col(i).tail(rs));
@@ -230,7 +230,7 @@ static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const V
   {
     temp = vec;
     RealScalar beta = 1;
-    for(int j=0; j<n; ++j)
+    for(Index j=0; j<n; ++j)
     {
       RealScalar Ljj = real(mat.coeff(j,j));
       RealScalar dj = abs2(Ljj);
diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h
index 26abaf48f..42d289ad8 100644
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -51,7 +51,6 @@ void cholmod_configure_matrix(CholmodType& mat)
 template<typename _Scalar, int _Options, typename _Index>
 cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
 {
-  typedef SparseMatrix<_Scalar,_Options,_Index> MatrixType;
   cholmod_sparse res;
   res.nzmax   = mat.nonZeros();
   res.nrow    = mat.rows();;
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h
index 707a9d7f2..4c83f7e7f 100644
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -114,7 +114,7 @@ class Array
     EIGEN_STRONG_INLINE explicit Array() : Base()
     {
       Base::_check_template_params();
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -125,7 +125,7 @@ class Array
       : Base(internal::constructor_without_unaligned_array_assert())
     {
       Base::_check_template_params();
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 #endif
 
@@ -143,7 +143,7 @@ class Array
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array)
       eigen_assert(dim >= 0);
       eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index dc9b55fa4..8c9078f06 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -158,7 +158,7 @@ template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_DefaultTraversal_InnerUnrolling
 {
   EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
   {
     dst.copyCoeffByOuterInner(outer, Index, src);
     assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
@@ -169,7 +169,7 @@ template<typename Derived1, typename Derived2, int Stop>
 struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 {
   EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
+  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
 };
 
 /***********************
@@ -224,7 +224,7 @@ struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_innervec_InnerUnrolling
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
   {
     dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
     assign_innervec_InnerUnrolling<Derived1, Derived2,
@@ -235,7 +235,7 @@ struct assign_innervec_InnerUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
+  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
 };
 
 /***************************************************************************
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 5e134c83a..8d835b2f6 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -149,8 +149,8 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
     inner = Index % DstXprType::InnerSizeAtCompileTime
   };
 
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator)
   {
     dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
     copy_using_evaluator_DefaultTraversal_CompleteUnrolling
@@ -162,15 +162,15 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
 };
 
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator, 
-				      int outer)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator, 
+                                      int outer)
   {
     dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator);
     copy_using_evaluator_DefaultTraversal_InnerUnrolling
@@ -182,7 +182,7 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
 };
 
 /***********************
@@ -192,8 +192,8 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, Sr
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator)
   {
     dstEvaluator.copyCoeff(Index, srcEvaluator);
     copy_using_evaluator_LinearTraversal_CompleteUnrolling
@@ -205,7 +205,7 @@ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
 };
 
 /**************************
@@ -224,8 +224,8 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
     JointAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
   };
 
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator)
   {
     dstEvaluator.template copyPacketByOuterInner<Aligned, JointAlignment>(outer, inner, srcEvaluator);
     enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
@@ -238,15 +238,15 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_innervec_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
 };
 
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
 struct copy_using_evaluator_innervec_InnerUnrolling
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator, 
-				      int outer)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator, 
+                                      int outer)
   {
     dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, Index, srcEvaluator);
     typedef typename DstEvaluatorType::XprType DstXprType;
@@ -260,7 +260,7 @@ struct copy_using_evaluator_innervec_InnerUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_innervec_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
 };
 
 /***************************************************************************
@@ -301,7 +301,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnr
 template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, CompleteUnrolling>
 {
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -319,7 +319,7 @@ template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, InnerUnrolling>
 {
   typedef typename DstXprType::Index Index;
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -330,7 +330,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, Inner
     const Index outerSize = dst.outerSize();
     for(Index outer = 0; outer < outerSize; ++outer)
       copy_using_evaluator_DefaultTraversal_InnerUnrolling
-	<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
+        <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
         ::run(dstEvaluator, srcEvaluator, outer);
   }
 };
@@ -345,7 +345,7 @@ struct unaligned_copy_using_evaluator_impl
   // if IsAligned = true, then do nothing
   template <typename SrcEvaluatorType, typename DstEvaluatorType>
   static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, 
-				      typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {}
+                                      typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {}
 };
 
 template <>
@@ -356,15 +356,15 @@ struct unaligned_copy_using_evaluator_impl<false>
 #ifdef _MSC_VER
   template <typename DstEvaluatorType, typename SrcEvaluatorType>
   static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator, 
-				    const SrcEvaluatorType &srcEvaluator, 
-				    typename DstEvaluatorType::Index start, 
-				    typename DstEvaluatorType::Index end)
+                                    const SrcEvaluatorType &srcEvaluator,
+                                    typename DstEvaluatorType::Index start,
+                                    typename DstEvaluatorType::Index end)
 #else
   template <typename DstEvaluatorType, typename SrcEvaluatorType>
   static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, 
-				      const SrcEvaluatorType &srcEvaluator, 
-				      typename DstEvaluatorType::Index start, 
-				      typename DstEvaluatorType::Index end)
+                                      const SrcEvaluatorType &srcEvaluator,
+                                      typename DstEvaluatorType::Index start,
+                                      typename DstEvaluatorType::Index end)
 #endif
   {
     for (typename DstEvaluatorType::Index index = start; index < end; ++index)
@@ -375,7 +375,7 @@ struct unaligned_copy_using_evaluator_impl<false>
 template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, NoUnrolling>
 {
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -392,7 +392,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers
       dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
       srcAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
     };
-    const Index alignedStart = dstIsAligned ? 0 : first_aligned(&dstEvaluator.coeffRef(0), size);
+    const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&dstEvaluator.coeffRef(0), size);
     const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
 
     unaligned_copy_using_evaluator_impl<dstIsAligned!=0>::run(dstEvaluator, srcEvaluator, 0, alignedStart);
@@ -410,7 +410,7 @@ template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, CompleteUnrolling>
 {
   typedef typename DstXprType::Index Index;
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -460,7 +460,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
 template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, CompleteUnrolling>
 {
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -478,7 +478,7 @@ template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, InnerUnrolling>
 {
   typedef typename DstXprType::Index Index;
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -489,7 +489,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
     const Index outerSize = dst.outerSize();
     for(Index outer = 0; outer < outerSize; ++outer)
       copy_using_evaluator_innervec_InnerUnrolling
-	<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
+        <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
         ::run(dstEvaluator, srcEvaluator, outer);
   }
 };
@@ -519,7 +519,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnro
 template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, CompleteUnrolling>
 {
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -560,7 +560,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversa
     const Index outerSize = dst.outerSize();
     const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
     Index alignedStart = ((!alignable) || copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned) ? 0
-                       : first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);
+                       : internal::first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);
 
     for(Index outer = 0; outer < outerSize; ++outer)
     {
@@ -596,7 +596,6 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoU
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-    typedef typename DstXprType::Index Index;
 
     DstEvaluatorType dstEvaluator(dst);
     SrcEvaluatorType srcEvaluator(src);
@@ -616,7 +615,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoU
 template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
 EIGEN_STRONG_INLINE
 const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst, 
-				       const EigenBase<SrcXprType>& src)
+                                       const EigenBase<SrcXprType>& src)
 {
   return noalias_copy_using_evaluator(dst.expression(), src.derived());
 }
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index fbc2cf227..e8377f67a 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -140,8 +140,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
     {
       eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
           && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
-      eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow + blockRows <= xpr.rows()
-          && a_startCol >= 0 && blockCols >= 0 && a_startCol + blockCols <= xpr.cols());
+      eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow  <= xpr.rows() - blockRows
+          && a_startCol >= 0 && blockCols >= 0 && a_startCol <= xpr.cols() - blockCols);
     }
 };
          
diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h
index 57efd8e69..f6afeb034 100644
--- a/Eigen/src/Core/BooleanRedux.h
+++ b/Eigen/src/Core/BooleanRedux.h
@@ -85,9 +85,7 @@ inline bool DenseBase<Derived>::all() const
           && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
   };
   if(unroll)
-    return internal::all_unroller<Derived,
-                           unroll ? int(SizeAtCompileTime) : Dynamic
-     >::run(derived());
+    return internal::all_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
   else
   {
     for(Index j = 0; j < cols(); ++j)
@@ -111,9 +109,7 @@ inline bool DenseBase<Derived>::any() const
           && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
   };
   if(unroll)
-    return internal::any_unroller<Derived,
-                           unroll ? int(SizeAtCompileTime) : Dynamic
-           >::run(derived());
+    return internal::any_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
   else
   {
     for(Index j = 0; j < cols(); ++j)
@@ -133,6 +129,26 @@ inline typename DenseBase<Derived>::Index DenseBase<Derived>::count() const
   return derived().template cast<bool>().template cast<Index>().sum();
 }
 
+/** \returns true is \c *this contains at least one Not A Number (NaN).
+  *
+  * \sa isFinite()
+  */
+template<typename Derived>
+inline bool DenseBase<Derived>::hasNaN() const
+{
+  return !((derived().array()==derived().array()).all());
+}
+
+/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values.
+  *
+  * \sa hasNaN()
+  */
+template<typename Derived>
+inline bool DenseBase<Derived>::isFinite() const
+{
+  return !((derived()-derived()).hasNaN());
+}
+    
 } // end namespace Eigen
 
 #endif // EIGEN_ALLANDANY_H
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 5d991b74b..272027c7b 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -349,7 +349,7 @@ struct evaluator_impl<EvalToTemp<ArgType> >
   template<int LoadMode> 
   PacketReturnType packet(Index row, Index col) const
   {
-    return m_resultImpl.packet<LoadMode>(row, col);
+    return m_resultImpl.template packet<LoadMode>(row, col);
   }
 
   template<int LoadMode> 
@@ -361,13 +361,13 @@ struct evaluator_impl<EvalToTemp<ArgType> >
   template<int StoreMode> 
   void writePacket(Index row, Index col, const PacketScalar& x)
   {
-    m_resultImpl.writePacket<StoreMode>(row, col, x);
+    m_resultImpl.template writePacket<StoreMode>(row, col, x);
   }
 
   template<int StoreMode> 
   void writePacket(Index index, const PacketScalar& x)
   {
-    m_resultImpl.writePacket<StoreMode>(index, x);
+    m_resultImpl.template writePacket<StoreMode>(index, x);
   }
 
 protected:
diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h
index 532b2b96e..e603fb9d6 100644
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -94,8 +94,8 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
 // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
 // add together a float matrix and a double matrix.
 #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
-  EIGEN_STATIC_ASSERT((internal::functor_allows_mixing_real_and_complex<BINOP>::ret \
-                        ? int(internal::is_same<typename NumTraits<LHS>::Real, typename NumTraits<RHS>::Real>::value) \
+  EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \
+                        ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \
                         : int(internal::is_same<LHS, RHS>::value)), \
     YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
 
diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h
index 66f73a950..9f9d4972d 100644
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@@ -44,9 +44,10 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
     // "error: no integral type can represent all of the enumerator values
     InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic
                              ? int(Dynamic)
-                             : int(MatrixTypeInnerStride)
-                               * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
-    OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
+                             : int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
+    OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret == Dynamic
+                             ? int(Dynamic)
+                             : outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar))
   };
 };
 }
@@ -98,6 +99,9 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
     typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;
 
     EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+    
+    inline Scalar* data() { return &coeffRef(0); }
+    inline const Scalar* data() const { return &coeff(0); }
 
     inline Index innerStride() const
     {
@@ -106,7 +110,7 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
 
     inline Index outerStride() const
     {
-      return derived().nestedExpression().outerStride();
+      return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
     }
 
     EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 12780354b..ec646ce7d 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -353,6 +353,9 @@ template<typename Derived> class DenseBase
     bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
     bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
     bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    
+    inline bool hasNaN() const;
+    inline bool isFinite() const;
 
     inline Derived& operator*=(const Scalar& other);
     inline Derived& operator/=(const Scalar& other);
@@ -436,8 +439,6 @@ template<typename Derived> class DenseBase
       return derived().coeff(0,0);
     }
 
-/////////// Array module ///////////
-
     bool all() const;
     bool any() const;
     Index count() const;
@@ -463,11 +464,11 @@ template<typename Derived> class DenseBase
 
     template<typename ThenDerived>
     inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
-    select(const DenseBase<ThenDerived>& thenMatrix, typename ThenDerived::Scalar elseScalar) const;
+    select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;
 
     template<typename ElseDerived>
     inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
-    select(typename ElseDerived::Scalar thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
+    select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
 
     template<int p> RealScalar lpNorm() const;
 
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index 29cd739ce..217cc90d7 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -154,6 +154,7 @@ template<typename Scalar> struct scalar_hypot_op {
   {
     EIGEN_USING_STD_MATH(max);
     EIGEN_USING_STD_MATH(min);
+    using std::sqrt;
     Scalar p = (max)(_x, _y);
     Scalar q = (min)(_x, _y);
     Scalar qp = q/p;
@@ -543,20 +544,28 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
 // linear access for packet ops:
 // 1) initialization
 //   base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
-// 2) each step
+// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
 //   base += [size*step, ..., size*step]
+//
+// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
+//       in order to avoid the padd() in operator() ?
 template <typename Scalar>
 struct linspaced_op_impl<Scalar,false>
 {
   typedef typename packet_traits<Scalar>::type Packet;
 
-  linspaced_op_impl(Scalar low, Scalar step) :
+  linspaced_op_impl(const Scalar& low, const Scalar& step) :
   m_low(low), m_step(step),
   m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
-  m_base(padd(pset1<Packet>(low),pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
+  m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
 
   template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const 
+  { 
+    m_base = padd(m_base, pset1<Packet>(m_step));
+    return m_low+Scalar(i)*m_step; 
+  }
+
   template<typename Index>
   EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
 
@@ -574,7 +583,7 @@ struct linspaced_op_impl<Scalar,true>
 {
   typedef typename packet_traits<Scalar>::type Packet;
 
-  linspaced_op_impl(Scalar low, Scalar step) :
+  linspaced_op_impl(const Scalar& low, const Scalar& step) :
   m_low(low), m_step(step),
   m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
 
@@ -603,7 +612,7 @@ template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_o
 template <typename Scalar, bool RandomAccess> struct linspaced_op
 {
   typedef typename packet_traits<Scalar>::type Packet;
-  linspaced_op(Scalar low, Scalar high, int num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}
+  linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}
 
   template<typename Index>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
@@ -642,13 +651,14 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
 template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
 template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };
 
-// in CwiseBinaryOp, we require the Lhs and Rhs to have the same scalar type, except for multiplication
-// where we only require them to have the same _real_ scalar type so one may multiply, say, float by complex<float>.
+// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
+// where the mixing of different types is handled by scalar_product_traits
+// In particular, real * complex<real> is allowed.
 // FIXME move this to functor_traits adding a functor_default
-template<typename Functor> struct functor_allows_mixing_real_and_complex { enum { ret = 0 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename Functor> struct functor_is_product_like { enum { ret = 0 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
 
 
 /** \internal
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index a070e618d..557286003 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -222,7 +222,29 @@ class GeneralProduct<Lhs, Rhs, InnerProduct>
 ***********************************************************************/
 
 namespace internal {
-template<int StorageOrder> struct outer_product_selector;
+
+// Column major
+template<typename ProductType, typename Dest, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&)
+{
+  typedef typename Dest::Index Index;
+  // FIXME make sure lhs is sequentially stored
+  // FIXME not very good if rhs is real and lhs complex while alpha is real too
+  const Index cols = dest.cols();
+  for (Index j=0; j<cols; ++j)
+    func(dest.col(j), prod.rhs().coeff(j) * prod.lhs());
+}
+
+// Row major
+template<typename ProductType, typename Dest, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) {
+  typedef typename Dest::Index Index;
+  // FIXME make sure rhs is sequentially stored
+  // FIXME not very good if lhs is real and rhs complex while alpha is real too
+  const Index rows = dest.rows();
+  for (Index i=0; i<rows; ++i)
+    func(dest.row(i), prod.lhs().coeff(i) * prod.rhs());
+}
 
 template<typename Lhs, typename Rhs>
 struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
@@ -235,6 +257,8 @@ template<typename Lhs, typename Rhs>
 class GeneralProduct<Lhs, Rhs, OuterProduct>
   : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
 {
+    template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
+    
   public:
     EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
 
@@ -243,41 +267,39 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
     }
-
-    template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
-    {
-      internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
+    
+    struct set  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived()  = src; } };
+    struct add  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
+    struct sub  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
+    struct adds {
+      Scalar m_scale;
+      adds(const Scalar& s) : m_scale(s) {}
+      template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
+        dst.const_cast_derived() += m_scale * src;
+      }
+    };
+    
+    template<typename Dest>
+    inline void evalTo(Dest& dest) const {
+      internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>());
+    }
+    
+    template<typename Dest>
+    inline void addTo(Dest& dest) const {
+      internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>());
     }
-};
-
-namespace internal {
 
-template<> struct outer_product_selector<ColMajor> {
-  template<typename ProductType, typename Dest>
-  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
-    typedef typename Dest::Index Index;
-    // FIXME make sure lhs is sequentially stored
-    // FIXME not very good if rhs is real and lhs complex while alpha is real too
-    const Index cols = dest.cols();
-    for (Index j=0; j<cols; ++j)
-      dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
-  }
-};
+    template<typename Dest>
+    inline void subTo(Dest& dest) const {
+      internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>());
+    }
 
-template<> struct outer_product_selector<RowMajor> {
-  template<typename ProductType, typename Dest>
-  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
-    typedef typename Dest::Index Index;
-    // FIXME make sure rhs is sequentially stored
-    // FIXME not very good if lhs is real and rhs complex while alpha is real too
-    const Index rows = dest.rows();
-    for (Index i=0; i<rows; ++i)
-      dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
-  }
+    template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+    {
+      internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>());
+    }
 };
 
-} // end namespace internal
-
 /***********************************************************************
 *  Implementation of General Matrix Vector Product
 ***********************************************************************/
@@ -320,7 +342,7 @@ class GeneralProduct<Lhs, Rhs, GemvProduct>
     enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
     typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
 
-    template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+    template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
     {
       eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
       internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
@@ -335,7 +357,7 @@ template<int StorageOrder, bool BlasCompatible>
 struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
 {
   template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
   {
     Transpose<Dest> destT(dest);
     enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
@@ -384,7 +406,7 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
 template<> struct gemv_selector<OnTheRight,ColMajor,true>
 {
   template<typename ProductType, typename Dest>
-  static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
   {
     typedef typename ProductType::Index Index;
     typedef typename ProductType::LhsScalar   LhsScalar;
@@ -457,7 +479,7 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
 template<> struct gemv_selector<OnTheRight,RowMajor,true>
 {
   template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
   {
     typedef typename ProductType::LhsScalar LhsScalar;
     typedef typename ProductType::RhsScalar RhsScalar;
@@ -508,7 +530,7 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
 template<> struct gemv_selector<OnTheRight,ColMajor,false>
 {
   template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
   {
     typedef typename Dest::Index Index;
     // TODO makes sure dest is sequentially stored in memory, otherwise use a temp
@@ -521,7 +543,7 @@ template<> struct gemv_selector<OnTheRight,ColMajor,false>
 template<> struct gemv_selector<OnTheRight,RowMajor,false>
 {
   template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
   {
     typedef typename Dest::Index Index;
     // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index aee58f09b..967a37673 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -156,7 +156,11 @@ pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
 template<typename Packet> inline Packet
 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
 
-/** \internal \returns a packet with elements of \a *from duplicated, e.g.: (from[0],from[0],from[1],from[1]) */
+/** \internal \returns a packet with elements of \a *from duplicated.
+  * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and
+  * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
+  * Currently, this function is only used for scalar * complex products.
+ */
 template<typename Packet> inline Packet
 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
 
@@ -307,8 +311,21 @@ struct palign_impl
   static inline void run(PacketType&, const PacketType&) {}
 };
 
-/** \internal update \a first using the concatenation of the \a Offset last elements
-  * of \a first and packet_size minus \a Offset first elements of \a second */
+/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
+  * of \a first and \a Offset first elements of \a second.
+  * 
+  * This function is currently only used to optimize matrix-vector products on unligned matrices.
+  * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
+  * at the position \a Offset. For instance, for packets of 4 elements, we have:
+  *  Input:
+  *  - first = {f0,f1,f2,f3}
+  *  - second = {s0,s1,s2,s3}
+  * Output: 
+  *   - if Offset==0 then {f0,f1,f2,f3}
+  *   - if Offset==1 then {f1,f2,f3,s0}
+  *   - if Offset==2 then {f2,f3,s0,s1}
+  *   - if Offset==3 then {f3,s0,s1,s3}
+  */
 template<int Offset,typename PacketType>
 inline void palign(PacketType& first, const PacketType& second)
 {
diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h
index daf72e6bb..02cae552e 100644
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@@ -70,7 +70,7 @@ namespace Eigen
   **/
   template <typename Derived>
   inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>
-    operator/(typename Derived::Scalar s, const Eigen::ArrayBase<Derived>& a)
+    operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase<Derived>& a)
   {
     return Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>(
       a.derived(),
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index b8d32ecec..9b5c94b07 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -363,6 +363,7 @@ struct hypot_impl
     EIGEN_USING_STD_MATH(max);
     EIGEN_USING_STD_MATH(min);
     using std::abs;
+    using std::sqrt;
     RealScalar _x = abs(x);
     RealScalar _y = abs(y);
     RealScalar p = (max)(_x, _y);
@@ -420,7 +421,7 @@ struct atanh2_default_impl
     using std::log;
     using std::sqrt;
     Scalar z = x / y;
-    if (abs(z) > sqrt(NumTraits<RealScalar>::epsilon()))
+    if (y == Scalar(0) || abs(z) > sqrt(NumTraits<RealScalar>::epsilon()))
       return RealScalar(0.5) * log((y + x) / (y - x));
     else
       return z + z*z*z / RealScalar(3);
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 61af9d9a3..70c1857dd 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -208,14 +208,14 @@ class Matrix
     EIGEN_STRONG_INLINE explicit Matrix() : Base()
     {
       Base::_check_template_params();
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
     // FIXME is it still needed
     EIGEN_DEVICE_FUNC
     Matrix(internal::constructor_without_unaligned_array_assert)
       : Base(internal::constructor_without_unaligned_array_assert())
-    { Base::_check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED }
+    { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
 
     /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
       *
@@ -231,7 +231,7 @@ class Matrix
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
       eigen_assert(dim >= 0);
       eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 7b9ea8c0a..8dbe71b93 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -475,7 +475,7 @@ template<typename Derived> class MatrixBase
     const MatrixFunctionReturnValue<Derived> sin() const;
     const MatrixSquareRootReturnValue<Derived> sqrt() const;
     const MatrixLogarithmReturnValue<Derived> log() const;
-    const MatrixPowerReturnValue<Derived> pow(RealScalar p) const;
+    const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
 
 #ifdef EIGEN2_SUPPORT
     template<typename ProductDerived, typename Lhs, typename Rhs>
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index c94ef026b..bac9e50b8 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -140,6 +140,9 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
     AddCost  = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
     MulCost  = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
   };
+  
+  static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
+  static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
 };
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index 86b63ea14..4fc5dd318 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -105,13 +105,13 @@ class PermutationBase : public EigenBase<Derived>
     #endif
 
     /** \returns the number of rows */
-    inline Index rows() const { return indices().size(); }
+    inline Index rows() const { return Index(indices().size()); }
 
     /** \returns the number of columns */
-    inline Index cols() const { return indices().size(); }
+    inline Index cols() const { return Index(indices().size()); }
 
     /** \returns the size of a side of the respective square matrix, i.e., the number of indices */
-    inline Index size() const { return indices().size(); }
+    inline Index size() const { return Index(indices().size()); }
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename DenseDerived>
@@ -541,24 +541,25 @@ struct permut_matrix_product_retval
  : public ReturnByValue<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
 {
     typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
+    typedef typename MatrixType::Index Index;
 
     permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix)
       : m_permutation(perm), m_matrix(matrix)
     {}
 
-    inline int rows() const { return m_matrix.rows(); }
-    inline int cols() const { return m_matrix.cols(); }
+    inline Index rows() const { return m_matrix.rows(); }
+    inline Index cols() const { return m_matrix.cols(); }
 
     template<typename Dest> inline void evalTo(Dest& dst) const
     {
-      const int n = Side==OnTheLeft ? rows() : cols();
+      const Index n = Side==OnTheLeft ? rows() : cols();
 
       if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
       {
         // apply the permutation inplace
         Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
         mask.fill(false);
-        int r = 0;
+        Index r = 0;
         while(r < m_permutation.size())
         {
           // search for the next seed
@@ -566,10 +567,10 @@ struct permut_matrix_product_retval
           if(r>=m_permutation.size())
             break;
           // we got one, let's follow it until we are back to the seed
-          int k0 = r++;
-          int kPrev = k0;
+          Index k0 = r++;
+          Index kPrev = k0;
           mask.coeffRef(k0) = true;
-          for(int k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
+          for(Index k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
           {
                   Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
             .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 49a5518e3..4e159896e 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -11,10 +11,15 @@
 #ifndef EIGEN_DENSESTORAGEBASE_H
 #define EIGEN_DENSESTORAGEBASE_H
 
-#ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
-# define EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
+#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
+#elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=std::numeric_limits<Scalar>::quiet_NaN();
 #else
-# define EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+# undef EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
 #endif
 
 namespace Eigen {
@@ -243,11 +248,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
                    && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,nbCols<=MaxColsAtCompileTime)
                    && nbRows>=0 && nbCols>=0 && "Invalid sizes when resizing a matrix or array.");
       internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
-      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
+      #ifdef EIGEN_INITIALIZE_COEFFS
         Index size = nbRows*nbCols;
         bool size_changed = size != this->size();
         m_storage.resize(size, nbRows, nbCols);
-        if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
       #else
         internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
         m_storage.resize(nbRows*nbCols, nbRows, nbCols);
@@ -270,15 +275,15 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     {
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
       eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);
-      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
+      #ifdef EIGEN_INITIALIZE_COEFFS
         bool size_changed = size != this->size();
       #endif
       if(RowsAtCompileTime == 1)
         m_storage.resize(size, 1, size);
       else
         m_storage.resize(size, size, 1);
-      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
-        if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      #ifdef EIGEN_INITIALIZE_COEFFS
+        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
       #endif
     }
 
@@ -435,7 +440,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     EIGEN_STRONG_INLINE explicit PlainObjectBase() : m_storage()
     {
 //       _check_template_params();
-//       EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+//       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -445,7 +450,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     PlainObjectBase(internal::constructor_without_unaligned_array_assert)
       : m_storage(internal::constructor_without_unaligned_array_assert())
     {
-//       _check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+//       _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 #endif
 
@@ -454,7 +459,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       : m_storage(a_size, nbRows, nbCols)
     {
 //       _check_template_params();
-//       EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+//       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
     /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 314851d2e..3a08c027c 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -74,8 +74,8 @@ class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_ty
 
   protected:
 
-    const LhsNested m_lhs;
-    const RhsNested m_rhs;
+    LhsNested m_lhs;
+    RhsNested m_rhs;
 };
 
 template<typename Lhs, typename Rhs>
diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h
index 9748167a5..a494b5f87 100644
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@@ -108,7 +108,7 @@ class ProductBase : public MatrixBase<Derived>
     inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }
 
     template<typename Dest>
-    inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { derived().scaleAndAddTo(dst,alpha); }
+    inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); }
 
     const _LhsNested& lhs() const { return m_lhs; }
     const _RhsNested& rhs() const { return m_rhs; }
@@ -195,7 +195,7 @@ class ScaledProduct;
 // Also note that here we accept any compatible scalar types
 template<typename Derived,typename Lhs,typename Rhs>
 const ScaledProduct<Derived>
-operator*(const ProductBase<Derived,Lhs,Rhs>& prod, typename Derived::Scalar x)
+operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::Scalar& x)
 { return ScaledProduct<Derived>(prod.derived(), x); }
 
 template<typename Derived,typename Lhs,typename Rhs>
@@ -207,7 +207,7 @@ operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::Real
 
 template<typename Derived,typename Lhs,typename Rhs>
 const ScaledProduct<Derived>
-operator*(typename Derived::Scalar x,const ProductBase<Derived,Lhs,Rhs>& prod)
+operator*(const typename Derived::Scalar& x,const ProductBase<Derived,Lhs,Rhs>& prod)
 { return ScaledProduct<Derived>(prod.derived(), x); }
 
 template<typename Derived,typename Lhs,typename Rhs>
@@ -241,7 +241,7 @@ class ScaledProduct
     typedef typename Base::PlainObject PlainObject;
 //     EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct)
 
-    ScaledProduct(const NestedProduct& prod, Scalar x)
+    ScaledProduct(const NestedProduct& prod, const Scalar& x)
     : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}
 
     template<typename Dest>
@@ -254,7 +254,7 @@ class ScaledProduct
     inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }
 
     template<typename Dest>
-    inline void scaleAndAddTo(Dest& dst,Scalar a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); }
+    inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); }
 
     const Scalar& alpha() const { return m_alpha; }
     
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 0c0570e44..8aed51022 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -209,7 +209,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
 struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
   {
     etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res);
     res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
@@ -220,7 +220,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
   {
     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
   }
@@ -230,7 +230,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
   {
     eigen_assert(innerDim>0 && "you are using a non initialized matrix");
     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
@@ -248,7 +248,7 @@ struct etor_product_coeff_vectorized_unroller
 {
   typedef typename Lhs::Index Index;
   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
   {
     etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
     pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
@@ -259,7 +259,7 @@ template<typename Lhs, typename Rhs, typename Packet>
 struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
   {
     pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
   }
@@ -271,7 +271,7 @@ struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rh
   typedef typename Lhs::PacketScalar Packet;
   typedef typename Lhs::Index Index;
   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
   {
     Packet pres;
     etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
@@ -284,7 +284,7 @@ template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int R
 struct etor_product_coeff_vectorized_dyn_selector
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
   {
     res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
   }
@@ -296,7 +296,7 @@ template<typename Lhs, typename Rhs, int RhsCols>
 struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
   {
     res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
   }
@@ -306,7 +306,7 @@ template<typename Lhs, typename Rhs, int LhsRows>
 struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
   {
     res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
   }
@@ -316,7 +316,7 @@ template<typename Lhs, typename Rhs>
 struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
   {
     res = lhs.transpose().cwiseProduct(rhs).sum();
   }
@@ -326,7 +326,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
   {
     etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res);
   }
@@ -340,7 +340,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
 struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
   {
     etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
     res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
@@ -351,7 +351,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
 struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
   {
     etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
     res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
@@ -362,7 +362,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
   {
     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
   }
@@ -372,7 +372,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
   {
     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
   }
@@ -382,7 +382,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
   {
     eigen_assert(innerDim>0 && "you are using a non initialized matrix");
     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
@@ -395,7 +395,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
   {
     eigen_assert(innerDim>0 && "you are using a non initialized matrix");
     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 12b3db584..b2c775d90 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -334,7 +334,8 @@ DenseBase<Derived>::redux(const Func& func) const
             ::run(derived(), func);
 }
 
-/** \returns the minimum of all coefficients of *this
+/** \returns the minimum of all coefficients of \c *this.
+  * \warning the result is undefined if \c *this contains NaN.
   */
 template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
@@ -343,7 +344,8 @@ DenseBase<Derived>::minCoeff() const
   return this->redux(Eigen::internal::scalar_min_op<Scalar>());
 }
 
-/** \returns the maximum of all coefficients of *this
+/** \returns the maximum of all coefficients of \c *this.
+  * \warning the result is undefined if \c *this contains NaN.
   */
 template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h
index 9c409eecf..aba795bdb 100644
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -149,6 +149,8 @@ public:
       m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime,
                StrideType::InnerStrideAtCompileTime==Dynamic?0:StrideType::InnerStrideAtCompileTime)
   {}
+  
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(RefBase)
 
 protected:
 
@@ -170,7 +172,7 @@ protected:
     else
       ::new (static_cast<Base*>(this)) Base(expr.data(), expr.rows(), expr.cols());
     ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(),
-                                 StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());
+                                 StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());    
   }
 
   StrideBase m_stride;
@@ -211,8 +213,8 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
 };
 
 // this is the const ref version
-template<typename PlainObjectType, int Options, typename StrideType> class Ref<const PlainObjectType, Options, StrideType>
-  : public RefBase<Ref<const PlainObjectType, Options, StrideType> >
+template<typename TPlainObjectType, int Options, typename StrideType> class Ref<const TPlainObjectType, Options, StrideType>
+  : public RefBase<Ref<const TPlainObjectType, Options, StrideType> >
 {
     typedef internal::traits<Ref> Traits;
   public:
@@ -240,13 +242,12 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref<c
     template<typename Expression>
     void construct(const Expression& expr, internal::false_type)
     {
-//      std::cout << "Ref: copy\n";
-      m_object = expr;
+      m_object.lazyAssign(expr);
       Base::construct(m_object);
     }
 
   protected:
-    PlainObjectType m_object;
+    TPlainObjectType m_object;
 };
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/Select.h b/Eigen/src/Core/Select.h
index 7ee8f23ba..87993bbb5 100644
--- a/Eigen/src/Core/Select.h
+++ b/Eigen/src/Core/Select.h
@@ -136,7 +136,7 @@ template<typename Derived>
 template<typename ThenDerived>
 inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
 DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
-                            typename ThenDerived::Scalar elseScalar) const
+                           const typename ThenDerived::Scalar& elseScalar) const
 {
   return Select<Derived,ThenDerived,typename ThenDerived::ConstantReturnType>(
     derived(), thenMatrix.derived(), ThenDerived::Constant(rows(),cols(),elseScalar));
@@ -150,8 +150,8 @@ DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
 template<typename Derived>
 template<typename ElseDerived>
 inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
-DenseBase<Derived>::select(typename ElseDerived::Scalar thenScalar,
-                            const DenseBase<ElseDerived>& elseMatrix) const
+DenseBase<Derived>::select(const typename ElseDerived::Scalar& thenScalar,
+                           const DenseBase<ElseDerived>& elseMatrix) const
 {
   return Select<Derived,typename ElseDerived::ConstantReturnType,ElseDerived>(
     derived(), ElseDerived::Constant(rows(),cols(),thenScalar), elseMatrix.derived());
diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h
index 82cc4da73..d43789123 100644
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -132,7 +132,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
       * \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
       */
     template<typename DerivedU, typename DerivedV>
-    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha = Scalar(1));
+    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));
 
     /** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
       * \f$ this = this + \alpha ( u u^* ) \f$ where \a u is a vector or matrix.
@@ -145,7 +145,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
       * \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
       */
     template<typename DerivedU>
-    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha = Scalar(1));
+    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
 
 /////////// Cholesky module ///////////
 
diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h
index 40f9eb618..53ac6c387 100644
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -196,7 +196,10 @@ inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
                                         internal::scalar_product_op<Scalar> >::type BinOp;
   typedef typename Derived::PlainObject PlainObject;
   SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
-  tmp = PlainObject::Constant(rows(),cols(), NumTraits<Scalar>::IsInteger ? other : Scalar(1)/other);
+  Scalar actual_other;
+  if(NumTraits<Scalar>::IsInteger)  actual_other = other;
+  else                              actual_other = Scalar(1)/other;
+  tmp = PlainObject::Constant(rows(),cols(), actual_other);
   return derived();
 }
 
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index 1aefd91a8..ea227c535 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -13,6 +13,7 @@
 namespace Eigen { 
 
 namespace internal {
+
 template<typename ExpressionType, typename Scalar>
 inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
 {
@@ -32,7 +33,6 @@ template<typename Derived>
 inline typename NumTraits<typename traits<Derived>::Scalar>::Real
 blueNorm_impl(const EigenBase<Derived>& _vec)
 {
-  typedef typename Derived::Scalar Scalar;
   typedef typename Derived::RealScalar RealScalar;  
   typedef typename Derived::Index Index;
   using std::pow;
@@ -41,43 +41,40 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
   using std::sqrt;
   using std::abs;
   const Derived& vec(_vec.derived());
-  static Index nmax = -1;
+  static bool initialized = false;
   static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
-  if(nmax <= 0)
+  if(!initialized)
   {
-    int nbig, ibeta, it, iemin, iemax, iexp;
-    RealScalar abig, eps;
+    int ibeta, it, iemin, iemax, iexp;
+    RealScalar eps;
     // This program calculates the machine-dependent constants
-    // bl, b2, slm, s2m, relerr overfl, nmax
+    // bl, b2, slm, s2m, relerr overfl
     // from the "basic" machine-dependent numbers
     // nbig, ibeta, it, iemin, iemax, rbig.
     // The following define the basic machine-dependent constants.
     // For portability, the PORT subprograms "ilmaeh" and "rlmach"
     // are used. For any specific computer, each of the assignment
     // statements can be replaced
-    nbig  = (std::numeric_limits<Index>::max)();            // largest integer
-    ibeta = std::numeric_limits<RealScalar>::radix;         // base for floating-point numbers
-    it    = std::numeric_limits<RealScalar>::digits;        // number of base-beta digits in mantissa
-    iemin = std::numeric_limits<RealScalar>::min_exponent;  // minimum exponent
-    iemax = std::numeric_limits<RealScalar>::max_exponent;  // maximum exponent
-    rbig  = (std::numeric_limits<RealScalar>::max)();         // largest floating-point number
+    ibeta = std::numeric_limits<RealScalar>::radix;                 // base for floating-point numbers
+    it    = std::numeric_limits<RealScalar>::digits;                // number of base-beta digits in mantissa
+    iemin = std::numeric_limits<RealScalar>::min_exponent;          // minimum exponent
+    iemax = std::numeric_limits<RealScalar>::max_exponent;          // maximum exponent
+    rbig  = (std::numeric_limits<RealScalar>::max)();               // largest floating-point number
 
     iexp  = -((1-iemin)/2);
-    b1    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));  // lower boundary of midrange
+    b1    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // lower boundary of midrange
     iexp  = (iemax + 1 - it)/2;
-    b2    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // upper boundary of midrange
+    b2    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // upper boundary of midrange
 
     iexp  = (2-iemin)/2;
-    s1m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for lower range
+    s1m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for lower range
     iexp  = - ((iemax+it)/2);
-    s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for upper range
+    s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for upper range
 
-    overfl  = rbig*s2m;             // overflow boundary for abig
+    overfl  = rbig*s2m;                                             // overflow boundary for abig
     eps     = RealScalar(pow(double(ibeta), 1-it));
-    relerr  = sqrt(eps);         // tolerance for neglecting asml
-    abig    = RealScalar(1.0/eps - 1.0);
-    if (RealScalar(nbig)>abig)  nmax = int(abig);  // largest safe n
-    else                        nmax = nbig;
+    relerr  = sqrt(eps);                                            // tolerance for neglecting asml
+    initialized = true;
   }
   Index n = vec.size();
   RealScalar ab2 = b2 / RealScalar(n);
@@ -125,6 +122,7 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
   else
     return abig * sqrt(RealScalar(1) + internal::abs2(asml/abig));
 }
+
 } // end namespace internal
 
 /** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index b5e1468df..6c2da09cb 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -287,7 +287,7 @@ struct inplace_transpose_selector<MatrixType,false> { // non square matrix
   * m = m.transpose().eval();
   * \endcode
   * and is faster and also safer because in the latter line of code, forgetting the eval() results
-  * in a bug caused by aliasing.
+  * in a bug caused by \ref TopicAliasing "aliasing".
   *
   * Notice however that this method is only useful if you want to replace a matrix by its own transpose.
   * If you just need the transpose of a matrix, use transpose().
@@ -298,6 +298,8 @@ struct inplace_transpose_selector<MatrixType,false> { // non square matrix
 template<typename Derived>
 inline void DenseBase<Derived>::transposeInPlace()
 {
+  eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))
+               && "transposeInPlace() called on a non-square non-resizable matrix");
   internal::inplace_transpose_selector<Derived>::run(derived());
 }
 
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h
index 862c0f336..511564875 100644
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -103,8 +103,8 @@ class PartialReduxExpr : internal::no_assignment_operator,
 
 #define EIGEN_MEMBER_FUNCTOR(MEMBER,COST)                               \
   template <typename ResultType>                                        \
-  struct member_##MEMBER {                                           \
-    EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER)                         \
+  struct member_##MEMBER {                                              \
+    EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER)                            \
     typedef ResultType result_type;                                     \
     template<typename Scalar, int Size> struct Cost                     \
     { enum { value = COST }; };                                         \
@@ -233,6 +233,28 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
                        Direction==Vertical   ? 1 : m_matrix.rows(),
                        Direction==Horizontal ? 1 : m_matrix.cols());
     }
+    
+    template<typename OtherDerived> struct OppositeExtendedType {
+      typedef Replicate<OtherDerived,
+                        Direction==Horizontal ? 1 : ExpressionType::RowsAtCompileTime,
+                        Direction==Vertical   ? 1 : ExpressionType::ColsAtCompileTime> Type;
+    };
+
+    /** \internal
+      * Replicates a vector in the opposite direction to match the size of \c *this */
+    template<typename OtherDerived>
+    typename OppositeExtendedType<OtherDerived>::Type
+    extendedToOpposite(const DenseBase<OtherDerived>& other) const
+    {
+      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxColsAtCompileTime==1),
+                          YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
+      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxRowsAtCompileTime==1),
+                          YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
+      return typename OppositeExtendedType<OtherDerived>::Type
+                      (other.derived(),
+                       Direction==Horizontal  ? 1 : m_matrix.rows(),
+                       Direction==Vertical    ? 1 : m_matrix.cols());
+    }
 
   public:
 
@@ -255,6 +277,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
 
     /** \returns a row (or column) vector expression of the smallest coefficient
       * of each column (or row) of the referenced expression.
+      * 
+      * \warning the result is undefined if \c *this contains NaN.
       *
       * Example: \include PartialRedux_minCoeff.cpp
       * Output: \verbinclude PartialRedux_minCoeff.out
@@ -265,6 +289,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
 
     /** \returns a row (or column) vector expression of the largest coefficient
       * of each column (or row) of the referenced expression.
+      * 
+      * \warning the result is undefined if \c *this contains NaN.
       *
       * Example: \include PartialRedux_maxCoeff.cpp
       * Output: \verbinclude PartialRedux_maxCoeff.out
@@ -504,6 +530,23 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
       EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
       return m_matrix / extendedTo(other.derived());
     }
+    
+    /** \returns an expression where each column of row of the referenced matrix are normalized.
+      * The referenced matrix is \b not modified.
+      * \sa MatrixBase::normalized(), normalize()
+      */
+    CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
+                  const ExpressionTypeNestedCleaned,
+                  const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
+    normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); }
+    
+    
+    /** Normalize in-place each row or columns of the referenced matrix.
+      * \sa MatrixBase::normalize(), normalized()
+      */
+    void normalize() {
+      m_matrix = this->normalized();
+    }
 
 /////////// Geometry module ///////////
 
diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h
index abf8d8e8c..64867b7a2 100644
--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@@ -164,8 +164,8 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
 
 } // end namespace internal
 
-/** \returns the minimum of all coefficients of *this
-  * and puts in *row and *col its location.
+/** \returns the minimum of all coefficients of *this and puts in *row and *col its location.
+  * \warning the result is undefined if \c *this contains NaN.
   *
   * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff()
   */
@@ -181,8 +181,8 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
   return minVisitor.res;
 }
 
-/** \returns the minimum of all coefficients of *this
-  * and puts in *index its location.
+/** \returns the minimum of all coefficients of *this and puts in *index its location.
+  * \warning the result is undefined if \c *this contains NaN. 
   *
   * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff()
   */
@@ -198,8 +198,8 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
   return minVisitor.res;
 }
 
-/** \returns the maximum of all coefficients of *this
-  * and puts in *row and *col its location.
+/** \returns the maximum of all coefficients of *this and puts in *row and *col its location.
+  * \warning the result is undefined if \c *this contains NaN. 
   *
   * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
   */
@@ -215,8 +215,8 @@ DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
   return maxVisitor.res;
 }
 
-/** \returns the maximum of all coefficients of *this
-  * and puts in *index its location.
+/** \returns the maximum of all coefficients of *this and puts in *index its location.
+  * \warning the result is undefined if \c *this contains NaN.
   *
   * \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
   */
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 557af8455..5ede55fba 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -31,7 +31,8 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
 
   /* the smallest non denormalized float number */
   _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);
-
+  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf,     0xff800000);//-1.f/0.f);
+  
   /* natural logarithm computed for 4 simultaneous float
     return NaN for x <= 0
   */
@@ -51,7 +52,8 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
 
   Packet4i emm0;
 
-  Packet4f invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
+  Packet4f invalid_mask = _mm_cmplt_ps(x, _mm_setzero_ps());
+  Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
 
   x = pmax(x, p4f_min_norm_pos);  /* cut off denormalized stuff */
   emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
@@ -96,7 +98,9 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
   y2 = pmul(e, p4f_cephes_log_q2);
   x = padd(x, y);
   x = padd(x, y2);
-  return _mm_or_ps(x, invalid_mask); // negative arg will be NAN
+  // negative arg will be NAN, 0 will be -INF
+  return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
+                   _mm_and_ps(iszero_mask, p4f_minus_inf));
 }
 
 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index f84e5b3ec..addb2fc0e 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -173,18 +173,26 @@ template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const
 template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
 {
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_min_epi32(a,b);
+#else
   // after some bench, this version *is* faster than a scalar implementation
   Packet4i mask = _mm_cmplt_epi32(a,b);
   return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
 }
 
 template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
 {
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_max_epi32(a,b);
+#else
   // after some bench, this version *is* faster than a scalar implementation
   Packet4i mask = _mm_cmpgt_epi32(a,b);
   return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
 }
 
 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
index 312a05c71..51fc5fd58 100644
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -152,7 +152,7 @@ class CoeffBasedProduct
     {
       // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
       // We still allow to mix T and complex<T>.
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+      EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
       eigen_assert(lhs.cols() == rhs.rows()
         && "invalid matrix product"
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 09912fafb..780fa74d3 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -69,8 +69,8 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdi
   * - the number of scalars that fit into a packet (when vectorization is enabled).
   *
   * \sa setCpuCacheSizes */
-template<typename LhsScalar, typename RhsScalar, int KcFactor>
-void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType>
+void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
 {
   EIGEN_UNUSED_VARIABLE(n);
   // Explanations:
@@ -91,13 +91,13 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
   };
 
   manage_caching_sizes(GetAction, &l1, &l2);
-  k = std::min<std::ptrdiff_t>(k, l1/kdiv);
-  std::ptrdiff_t _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
+  k = std::min<SizeType>(k, l1/kdiv);
+  SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
   if(_m<m) m = _m & mr_mask;
 }
 
-template<typename LhsScalar, typename RhsScalar>
-inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
+template<typename LhsScalar, typename RhsScalar, typename SizeType>
+inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
 {
   computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
 }
@@ -529,7 +529,14 @@ struct gebp_kernel
 
   EIGEN_DONT_INLINE
   void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
-                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
+                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB=0);
+};
+
+template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_DONT_INLINE
+void gebp_kernel<LhsScalar,RhsScalar,Index,mr,nr,ConjugateLhs,ConjugateRhs>
+  ::operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
+               Index strideA, Index strideB, Index offsetA, Index offsetB, RhsScalar* unpackedB)
   {
     Traits traits;
     
@@ -1089,7 +1096,7 @@ EIGEN_ASM_COMMENT("mybegin4");
       }
     }
   }
-};
+
 
 #undef CJMADD
 
@@ -1110,80 +1117,83 @@ EIGEN_ASM_COMMENT("mybegin4");
 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
 struct gemm_pack_lhs
 {
-  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
-                  Index stride=0, Index offset=0)
+  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, StorageOrder, Conjugate, PanelMode>
+  ::operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride, Index offset)
+{
+  typedef typename packet_traits<Scalar>::type Packet;
+  enum { PacketSize = packet_traits<Scalar>::size };
+
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
+  Index count = 0;
+  Index peeled_mc = (rows/Pack1)*Pack1;
+  for(Index i=0; i<peeled_mc; i+=Pack1)
   {
-    typedef typename packet_traits<Scalar>::type Packet;
-    enum { PacketSize = packet_traits<Scalar>::size };
-
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
-    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
-    eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
-    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-    const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
-    Index count = 0;
-    Index peeled_mc = (rows/Pack1)*Pack1;
-    for(Index i=0; i<peeled_mc; i+=Pack1)
-    {
-      if(PanelMode) count += Pack1 * offset;
+    if(PanelMode) count += Pack1 * offset;
 
-      if(StorageOrder==ColMajor)
+    if(StorageOrder==ColMajor)
+    {
+      for(Index k=0; k<depth; k++)
       {
-        for(Index k=0; k<depth; k++)
-        {
-          Packet A, B, C, D;
-          if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
-          if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
-          if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
-          if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
-          if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
-          if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
-          if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
-          if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
-        }
+        Packet A, B, C, D;
+        if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
+        if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
+        if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
+        if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
+        if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
+        if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
+        if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
+        if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
       }
-      else
+    }
+    else
+    {
+      for(Index k=0; k<depth; k++)
       {
-        for(Index k=0; k<depth; k++)
+        // TODO add a vectorized transpose here
+        Index w=0;
+        for(; w<Pack1-3; w+=4)
         {
-          // TODO add a vectorized transpose here
-          Index w=0;
-          for(; w<Pack1-3; w+=4)
-          {
-            Scalar a(cj(lhs(i+w+0, k))),
-                   b(cj(lhs(i+w+1, k))),
-                   c(cj(lhs(i+w+2, k))),
-                   d(cj(lhs(i+w+3, k)));
-            blockA[count++] = a;
-            blockA[count++] = b;
-            blockA[count++] = c;
-            blockA[count++] = d;
-          }
-          if(Pack1%4)
-            for(;w<Pack1;++w)
-              blockA[count++] = cj(lhs(i+w, k));
+          Scalar a(cj(lhs(i+w+0, k))),
+                  b(cj(lhs(i+w+1, k))),
+                  c(cj(lhs(i+w+2, k))),
+                  d(cj(lhs(i+w+3, k)));
+          blockA[count++] = a;
+          blockA[count++] = b;
+          blockA[count++] = c;
+          blockA[count++] = d;
         }
+        if(Pack1%4)
+          for(;w<Pack1;++w)
+            blockA[count++] = cj(lhs(i+w, k));
       }
-      if(PanelMode) count += Pack1 * (stride-offset-depth);
-    }
-    if(rows-peeled_mc>=Pack2)
-    {
-      if(PanelMode) count += Pack2*offset;
-      for(Index k=0; k<depth; k++)
-        for(Index w=0; w<Pack2; w++)
-          blockA[count++] = cj(lhs(peeled_mc+w, k));
-      if(PanelMode) count += Pack2 * (stride-offset-depth);
-      peeled_mc += Pack2;
-    }
-    for(Index i=peeled_mc; i<rows; i++)
-    {
-      if(PanelMode) count += offset;
-      for(Index k=0; k<depth; k++)
-        blockA[count++] = cj(lhs(i, k));
-      if(PanelMode) count += (stride-offset-depth);
     }
+    if(PanelMode) count += Pack1 * (stride-offset-depth);
   }
-};
+  if(rows-peeled_mc>=Pack2)
+  {
+    if(PanelMode) count += Pack2*offset;
+    for(Index k=0; k<depth; k++)
+      for(Index w=0; w<Pack2; w++)
+        blockA[count++] = cj(lhs(peeled_mc+w, k));
+    if(PanelMode) count += Pack2 * (stride-offset-depth);
+    peeled_mc += Pack2;
+  }
+  for(Index i=peeled_mc; i<rows; i++)
+  {
+    if(PanelMode) count += offset;
+    for(Index k=0; k<depth; k++)
+      blockA[count++] = cj(lhs(i, k));
+    if(PanelMode) count += (stride-offset-depth);
+  }
+}
 
 // copy a complete panel of the rhs
 // this version is optimized for column major matrices
@@ -1197,92 +1207,98 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
 {
   typedef typename packet_traits<Scalar>::type Packet;
   enum { PacketSize = packet_traits<Scalar>::size };
-  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
-                  Index stride=0, Index offset=0)
+  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
+  ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
+{
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  Index packet_cols = (cols/nr) * nr;
+  Index count = 0;
+  for(Index j2=0; j2<packet_cols; j2+=nr)
   {
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
-    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
-    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-    Index packet_cols = (cols/nr) * nr;
-    Index count = 0;
-    for(Index j2=0; j2<packet_cols; j2+=nr)
+    // skip what we have before
+    if(PanelMode) count += nr * offset;
+    const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+    const Scalar* b1 = &rhs[(j2+1)*rhsStride];
+    const Scalar* b2 = &rhs[(j2+2)*rhsStride];
+    const Scalar* b3 = &rhs[(j2+3)*rhsStride];
+    for(Index k=0; k<depth; k++)
     {
-      // skip what we have before
-      if(PanelMode) count += nr * offset;
-      const Scalar* b0 = &rhs[(j2+0)*rhsStride];
-      const Scalar* b1 = &rhs[(j2+1)*rhsStride];
-      const Scalar* b2 = &rhs[(j2+2)*rhsStride];
-      const Scalar* b3 = &rhs[(j2+3)*rhsStride];
-      for(Index k=0; k<depth; k++)
-      {
-                  blockB[count+0] = cj(b0[k]);
-                  blockB[count+1] = cj(b1[k]);
-        if(nr==4) blockB[count+2] = cj(b2[k]);
-        if(nr==4) blockB[count+3] = cj(b3[k]);
-        count += nr;
-      }
-      // skip what we have after
-      if(PanelMode) count += nr * (stride-offset-depth);
+                blockB[count+0] = cj(b0[k]);
+                blockB[count+1] = cj(b1[k]);
+      if(nr==4) blockB[count+2] = cj(b2[k]);
+      if(nr==4) blockB[count+3] = cj(b3[k]);
+      count += nr;
     }
+    // skip what we have after
+    if(PanelMode) count += nr * (stride-offset-depth);
+  }
 
-    // copy the remaining columns one at a time (nr==1)
-    for(Index j2=packet_cols; j2<cols; ++j2)
+  // copy the remaining columns one at a time (nr==1)
+  for(Index j2=packet_cols; j2<cols; ++j2)
+  {
+    if(PanelMode) count += offset;
+    const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+    for(Index k=0; k<depth; k++)
     {
-      if(PanelMode) count += offset;
-      const Scalar* b0 = &rhs[(j2+0)*rhsStride];
-      for(Index k=0; k<depth; k++)
-      {
-        blockB[count] = cj(b0[k]);
-        count += 1;
-      }
-      if(PanelMode) count += (stride-offset-depth);
+      blockB[count] = cj(b0[k]);
+      count += 1;
     }
+    if(PanelMode) count += (stride-offset-depth);
   }
-};
+}
 
 // this version is optimized for row major matrices
 template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
 struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
 {
   enum { PacketSize = packet_traits<Scalar>::size };
-  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
-                  Index stride=0, Index offset=0)
+  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
+  ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
+{
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  Index packet_cols = (cols/nr) * nr;
+  Index count = 0;
+  for(Index j2=0; j2<packet_cols; j2+=nr)
   {
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
-    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
-    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-    Index packet_cols = (cols/nr) * nr;
-    Index count = 0;
-    for(Index j2=0; j2<packet_cols; j2+=nr)
+    // skip what we have before
+    if(PanelMode) count += nr * offset;
+    for(Index k=0; k<depth; k++)
     {
-      // skip what we have before
-      if(PanelMode) count += nr * offset;
-      for(Index k=0; k<depth; k++)
-      {
-        const Scalar* b0 = &rhs[k*rhsStride + j2];
-                  blockB[count+0] = cj(b0[0]);
-                  blockB[count+1] = cj(b0[1]);
-        if(nr==4) blockB[count+2] = cj(b0[2]);
-        if(nr==4) blockB[count+3] = cj(b0[3]);
-        count += nr;
-      }
-      // skip what we have after
-      if(PanelMode) count += nr * (stride-offset-depth);
+      const Scalar* b0 = &rhs[k*rhsStride + j2];
+                blockB[count+0] = cj(b0[0]);
+                blockB[count+1] = cj(b0[1]);
+      if(nr==4) blockB[count+2] = cj(b0[2]);
+      if(nr==4) blockB[count+3] = cj(b0[3]);
+      count += nr;
     }
-    // copy the remaining columns one at a time (nr==1)
-    for(Index j2=packet_cols; j2<cols; ++j2)
+    // skip what we have after
+    if(PanelMode) count += nr * (stride-offset-depth);
+  }
+  // copy the remaining columns one at a time (nr==1)
+  for(Index j2=packet_cols; j2<cols; ++j2)
+  {
+    if(PanelMode) count += offset;
+    const Scalar* b0 = &rhs[j2];
+    for(Index k=0; k<depth; k++)
     {
-      if(PanelMode) count += offset;
-      const Scalar* b0 = &rhs[j2];
-      for(Index k=0; k<depth; k++)
-      {
-        blockB[count] = cj(b0[k*rhsStride]);
-        count += 1;
-      }
-      if(PanelMode) count += stride-offset-depth;
+      blockB[count] = cj(b0[k*rhsStride]);
+      count += 1;
     }
+    if(PanelMode) count += stride-offset-depth;
   }
-};
+}
 
 } // end namespace internal
 
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 73a465ec5..3f5ffcf51 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -50,6 +50,7 @@ template<
   typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
 struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
 {
+
 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 static void run(Index rows, Index cols, Index depth,
   const LhsScalar* _lhs, Index lhsStride,
@@ -169,7 +170,6 @@ static void run(Index rows, Index cols, Index depth,
       // vertical panel which is, in practice, a very low number.
       pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
 
-
       // For each mc x kc block of the lhs's vertical panel...
       // (==GEPP_VAR1)
       for(Index i2=0; i2<rows; i2+=mc)
@@ -183,7 +183,6 @@ static void run(Index rows, Index cols, Index depth,
 
         // Everything is packed, we can now call the block * panel kernel:
         gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
-
       }
     }
   }
@@ -204,7 +203,7 @@ struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
 template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
 struct gemm_functor
 {
-  gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha,
+  gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha,
                   BlockingType& blocking)
     : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
   {}
@@ -395,7 +394,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
       EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
     }
 
-    template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+    template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
     {
       eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
 
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index 432d3a9dc..5c3763909 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -12,6 +12,9 @@
 
 namespace Eigen { 
 
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
+struct selfadjoint_rank1_update;
+
 namespace internal {
 
 /**********************************************************************
@@ -39,7 +42,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
 {
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
-                                      const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
+                                      const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
   {
     general_matrix_matrix_triangular_product<Index,
         RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
@@ -55,7 +58,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
 {
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
-                                      const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
+                                      const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
   {
     const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
     const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
@@ -133,7 +136,7 @@ struct tribb_kernel
   enum {
     BlockSize  = EIGEN_PLAIN_ENUM_MAX(mr,nr)
   };
-  void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, ResScalar alpha, RhsScalar* workspace)
+  void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace)
   {
     gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
     Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
@@ -180,31 +183,92 @@ struct tribb_kernel
 
 // high level API
 
+template<typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct>
+struct general_product_to_triangular_selector;
+
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
+{
+  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+  {
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename MatrixType::Index Index;
+    
+    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    
+    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+    enum {
+      StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+      UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
+      UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1
+    };
+    
+    internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs;
+    ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(),
+      (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));
+    if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
+    
+    internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs;
+    ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(),
+      (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));
+    if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+    
+    
+    selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
+                              LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
+                              RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex>
+          ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha);
+  }
+};
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
+{
+  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+  {
+    typedef typename MatrixType::Index Index;
+    
+    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    
+    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+    internal::general_matrix_matrix_triangular_product<Index,
+      typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+      typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+      MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
+      ::run(mat.cols(), actualLhs.cols(),
+            &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
+            mat.data(), mat.outerStride(), actualAlpha);
+  }
+};
+
 template<typename MatrixType, unsigned int UpLo>
 template<typename ProductDerived, typename _Lhs, typename _Rhs>
 TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
 {
-  typedef typename internal::remove_all<typename ProductDerived::LhsNested>::type Lhs;
-  typedef internal::blas_traits<Lhs> LhsBlasTraits;
-  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
-  typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
-  typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
-  
-  typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
-  typedef internal::blas_traits<Rhs> RhsBlasTraits;
-  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
-  typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
-  typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
-
-  typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
-
-  internal::general_matrix_matrix_triangular_product<Index,
-    typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
-    typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
-    MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
-    ::run(m_matrix.cols(), actualLhs.cols(),
-          &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
-          const_cast<Scalar*>(m_matrix.data()), m_matrix.outerStride(), actualAlpha);
+  general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha);
   
   return *this;
 }
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index 8895d3ab2..9bdd588df 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -56,6 +56,18 @@ EIGEN_DONT_INLINE static void run(
   #ifdef EIGEN_INTERNAL_DEBUGGING
     resIncr
   #endif
+  , RhsScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
+  Index rows, Index cols,
+  const LhsScalar* lhs, Index lhsStride,
+  const RhsScalar* rhs, Index rhsIncr,
+  ResScalar* res, Index
+  #ifdef EIGEN_INTERNAL_DEBUGGING
+    resIncr
+  #endif
   , RhsScalar alpha)
 {
   eigen_internal_assert(resIncr==1);
@@ -274,7 +286,6 @@ EIGEN_DONT_INLINE static void run(
   } while(Vectorizable);
   #undef _EIGEN_ACCUMULATE_PACKETS
 }
-};
 
 /* Optimized row-major matrix * vector product:
  * This algorithm processes 4 rows at onces that allows to both reduce
@@ -312,6 +323,15 @@ EIGEN_DONT_INLINE static void run(
   const LhsScalar* lhs, Index lhsStride,
   const RhsScalar* rhs, Index rhsIncr,
   ResScalar* res, Index resIncr,
+  ResScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
+  Index rows, Index cols,
+  const LhsScalar* lhs, Index lhsStride,
+  const RhsScalar* rhs, Index rhsIncr,
+  ResScalar* res, Index resIncr,
   ResScalar alpha)
 {
   EIGEN_UNUSED_VARIABLE(rhsIncr);
@@ -545,7 +565,6 @@ EIGEN_DONT_INLINE static void run(
 
   #undef _EIGEN_ACCUMULATE_PACKETS
 }
-};
 
 } // end namespace internal
 
diff --git a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
index e9de6af3e..1cb9fe6b5 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
@@ -53,7 +53,7 @@ struct general_matrix_vector_product_gemv :
 #define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
 template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
 struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
   Index rows, Index cols, \
   const Scalar* lhs, Index lhsStride, \
   const Scalar* rhs, Index rhsIncr, \
@@ -70,7 +70,7 @@ static EIGEN_DONT_INLINE void run( \
 }; \
 template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
 struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
   Index rows, Index cols, \
   const Scalar* lhs, Index lhsStride, \
   const Scalar* rhs, Index rhsIncr, \
@@ -92,7 +92,7 @@ struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,Conjugat
 { \
 typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
 \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
   Index rows, Index cols, \
   const EIGTYPE* lhs, Index lhsStride, \
   const EIGTYPE* rhs, Index rhsIncr, \
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index 48209636e..ee619df99 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -211,7 +211,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
     const Scalar* lhs, Index lhsStride,
     const Scalar* rhs, Index rhsStride,
     Scalar* res,       Index resStride,
-    Scalar alpha)
+    const Scalar& alpha)
   {
     product_selfadjoint_matrix<Scalar, Index,
       EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
@@ -234,7 +234,18 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha)
+    const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
+    Index rows, Index cols,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha)
   {
     Index size = rows;
 
@@ -301,7 +312,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
       }
     }
   }
-};
 
 // matrix * selfadjoint product
 template <typename Scalar, typename Index,
@@ -315,7 +325,18 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha)
+    const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
+    Index rows, Index cols,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha)
   {
     Index size = cols;
 
@@ -353,7 +374,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
       }
     }
   }
-};
 
 } // end namespace internal
 
@@ -383,7 +403,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
     RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
   };
 
-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
   {
     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
 
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
index 4e5c4125c..dfa687fef 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
@@ -23,7 +23,7 @@
  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
+//
  ********************************************************************************
  *   Content : Eigen bindings to Intel(R) MKL
  *   Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
@@ -47,7 +47,7 @@ template <typename Index, \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
 {\
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index rows, Index cols, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
@@ -98,7 +98,7 @@ template <typename Index, \
           int RhsStorageOrder, bool ConjugateRhs> \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
 {\
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index rows, Index cols, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
@@ -174,7 +174,7 @@ template <typename Index, \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
 {\
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index rows, Index cols, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
@@ -224,7 +224,7 @@ template <typename Index, \
           int RhsStorageOrder, bool ConjugateRhs> \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
 {\
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index rows, Index cols, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index c3145c69a..f70f4894c 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -32,10 +32,18 @@ static EIGEN_DONT_INLINE void run(
   const Scalar*  lhs, Index lhsStride,
   const Scalar* _rhs, Index rhsIncr,
   Scalar* res,
+  Scalar alpha);
+};
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
+  Index size,
+  const Scalar*  lhs, Index lhsStride,
+  const Scalar* _rhs, Index rhsIncr,
+  Scalar* res,
   Scalar alpha)
 {
   typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
 
   enum {
@@ -153,7 +161,6 @@ static EIGEN_DONT_INLINE void run(
     res[j] += alpha * t2;
   }
 }
-};
 
 } // end namespace internal 
 
@@ -180,7 +187,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
 
   SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
   {
     typedef typename Dest::Scalar ResScalar;
     typedef typename Base::RhsScalar RhsScalar;
@@ -260,7 +267,7 @@ struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
 
   SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
   {
     // let's simply transpose the product
     Transpose<Dest> destT(dest);
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
index f88d483b6..86684b66d 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
@@ -50,7 +50,7 @@ struct selfadjoint_matrix_vector_product_symv :
 #define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
 template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
 struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
   Index size, const Scalar*  lhs, Index lhsStride, \
   const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
     enum {\
@@ -77,7 +77,7 @@ struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,Co
 { \
 typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
 \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
 Index size, const EIGTYPE*  lhs, Index lhsStride, \
 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
 { \
diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h
index 6a55f3d77..6ca4ae6c0 100644
--- a/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/Eigen/src/Core/products/SelfadjointProduct.h
@@ -18,21 +18,19 @@
 
 namespace Eigen { 
 
-template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
-struct selfadjoint_rank1_update;
 
 template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
 struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
 {
-  static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
+  static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
   {
     internal::conj_if<ConjRhs> cj;
     typedef Map<const Matrix<Scalar,Dynamic,1> > OtherMap;
-    typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjRhsType;
+    typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjLhsType;
     for (Index i=0; i<size; ++i)
     {
       Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1)))
-          += (alpha * cj(vec[i])) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
+          += (alpha * cj(vecY[i])) * ConjLhsType(OtherMap(vecX+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
     }
   }
 };
@@ -40,9 +38,9 @@ struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
 template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
 struct selfadjoint_rank1_update<Scalar,Index,RowMajor,UpLo,ConjLhs,ConjRhs>
 {
-  static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
+  static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
   {
-    selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vec,alpha);
+    selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vecY,vecX,alpha);
   }
 };
 
@@ -52,7 +50,7 @@ struct selfadjoint_product_selector;
 template<typename MatrixType, typename OtherType, int UpLo>
 struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
 {
-  static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
+  static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
   {
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::Index Index;
@@ -78,14 +76,14 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
     selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
                               OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,
                             (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
-          ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualAlpha);
+          ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualOtherPtr, actualAlpha);
   }
 };
 
 template<typename MatrixType, typename OtherType, int UpLo>
 struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
 {
-  static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
+  static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
   {
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::Index Index;
@@ -113,7 +111,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
 template<typename MatrixType, unsigned int UpLo>
 template<typename DerivedU>
 SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
-::rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha)
+::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
 {
   selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
 
diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h
index 57a98cc2d..4b57f189d 100644
--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -24,7 +24,7 @@ struct selfadjoint_rank2_update_selector;
 template<typename Scalar, typename Index, typename UType, typename VType>
 struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
 {
-  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
+  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
   {
     const Index size = u.size();
     for (Index i=0; i<size; ++i)
@@ -39,7 +39,7 @@ struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
 template<typename Scalar, typename Index, typename UType, typename VType>
 struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
 {
-  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
+  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
   {
     const Index size = u.size();
     for (Index i=0; i<size; ++i)
@@ -58,7 +58,7 @@ template<bool Cond, typename T> struct conj_expr_if
 template<typename MatrixType, unsigned int UpLo>
 template<typename DerivedU, typename DerivedV>
 SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
-::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha)
+::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
 {
   typedef internal::blas_traits<DerivedU> UBlasTraits;
   typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index 92cba66f6..8110507b5 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -61,7 +61,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
     const Scalar* lhs, Index lhsStride,
     const Scalar* rhs, Index rhsStride,
     Scalar* res,       Index resStride,
-    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     product_triangular_matrix_matrix<Scalar, Index,
       (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
@@ -96,7 +96,20 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
+                                                        LhsStorageOrder,ConjugateLhs,
+                                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+    Index _rows, Index _cols, Index _depth,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     // strip zeros
     Index diagSize  = (std::min)(_rows,_depth);
@@ -203,15 +216,14 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
       }
     }
   }
-};
 
 // implements col-major += alpha * op(general) * op(triangular)
 template <typename Scalar, typename Index, int Mode,
           int LhsStorageOrder, bool ConjugateLhs,
           int RhsStorageOrder, bool ConjugateRhs, int Version>
 struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
-                                           LhsStorageOrder,ConjugateLhs,
-                                           RhsStorageOrder,ConjugateRhs,ColMajor,Version>
+                                        LhsStorageOrder,ConjugateLhs,
+                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>
 {
   typedef gebp_traits<Scalar,Scalar> Traits;
   enum {
@@ -225,7 +237,20 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
+                                                        LhsStorageOrder,ConjugateLhs,
+                                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+    Index _rows, Index _cols, Index _depth,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     // strip zeros
     Index diagSize  = (std::min)(_cols,_depth);
@@ -343,7 +368,6 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
       }
     }
   }
-};
 
 /***************************************************************************
 * Wrapper to product_triangular_matrix_matrix
@@ -364,7 +388,7 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
 
   TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
   {
     typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
     typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
index 4d20de617..ba41a1c99 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
@@ -91,7 +91,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
     conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
   }; \
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index _rows, Index _cols, Index _depth, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
@@ -205,7 +205,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
     conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
   }; \
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index _rows, Index _cols, Index _depth, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index b1c10c201..c8b7d28c4 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -27,7 +27,13 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
     HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
   };
   static EIGEN_DONT_INLINE  void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
-                                     const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
+                                     const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
+  ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+        const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
   {
     static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
     Index size = (std::min)(_rows,_cols);
@@ -78,7 +84,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
           _res, resIncr, alpha);
     }
   }
-};
 
 template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
 struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
@@ -89,8 +94,14 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
     HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
     HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
   };
-  static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
-                  const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
+  static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+                                    const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
+  ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+        const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
   {
     static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
     Index diagSize = (std::min)(_rows,_cols);
@@ -141,7 +152,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
             &res.coeffRef(diagSize), resIncr, alpha);
     }
   }
-};
 
 /***************************************************************************
 * Wrapper to product_triangular_vector
@@ -171,7 +181,7 @@ struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>
 
   TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
   {
     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
   
@@ -187,7 +197,7 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
 
   TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
   {
     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
 
@@ -205,7 +215,7 @@ namespace internal {
 template<> struct trmv_selector<ColMajor>
 {
   template<int Mode, typename Lhs, typename Rhs, typename Dest>
-  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
+  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
   {
     typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
     typedef typename ProductType::Index Index;
@@ -246,7 +256,7 @@ template<> struct trmv_selector<ColMajor>
     if(!evalToDest)
     {
       #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      int size = dest.size();
+      Index size = dest.size();
       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
       #endif
       if(!alphaIsCompatible)
@@ -281,7 +291,7 @@ template<> struct trmv_selector<ColMajor>
 template<> struct trmv_selector<RowMajor>
 {
   template<int Mode, typename Lhs, typename Rhs, typename Dest>
-  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
+  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
   {
     typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
     typedef typename ProductType::LhsScalar LhsScalar;
diff --git a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
index 3c2c3049a..09f110da7 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
@@ -50,7 +50,7 @@ struct triangular_matrix_vector_product_trmv :
 #define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
                                      const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
       triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
         _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
@@ -58,7 +58,7 @@ struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs
 }; \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
                                      const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
       triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
         _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
@@ -81,8 +81,8 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
     IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
     LowUp = IsLower ? Lower : Upper \
   }; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
-                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
  { \
    if (ConjLhs || IsZeroDiag) { \
      triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
@@ -166,8 +166,8 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
     IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
     LowUp = IsLower ? Lower : Upper \
   }; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
-                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
  { \
    if (IsZeroDiag) { \
      triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h
index a49ea3183..f103eae72 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -18,7 +18,7 @@ namespace internal {
 template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
 struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
 {
-  static EIGEN_DONT_INLINE void run(
+  static void run(
     Index size, Index cols,
     const Scalar*  tri, Index triStride,
     Scalar* _other, Index otherStride,
@@ -42,6 +42,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
     Index size, Index otherSize,
     const Scalar* _tri, Index triStride,
     Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherStride,
     level3_blocking<Scalar,Scalar>& blocking)
   {
     Index cols = otherSize;
@@ -173,7 +180,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
       }
     }
   }
-};
 
 /* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
  */
@@ -184,6 +190,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
     Index size, Index otherSize,
     const Scalar* _tri, Index triStride,
     Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherStride,
     level3_blocking<Scalar,Scalar>& blocking)
   {
     Index rows = otherSize;
@@ -308,7 +321,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
       }
     }
   }
-};
 
 } // end namespace internal
 
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
index a4f508b2e..6a0bb8339 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
@@ -48,7 +48,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
     IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
     conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
   }; \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
       Index size, Index otherSize, \
       const EIGTYPE* _tri, Index triStride, \
       EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
@@ -103,7 +103,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
     IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
     conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
   }; \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
       Index size, Index otherSize, \
       const EIGTYPE* _tri, Index triStride, \
       EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index bf6a9293c..64348cd16 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -13,7 +13,7 @@
 
 #define EIGEN_WORLD_VERSION 3
 #define EIGEN_MAJOR_VERSION 1
-#define EIGEN_MINOR_VERSION 90
+#define EIGEN_MINOR_VERSION 91
 
 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                       (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index b03bc3701..3ca666fd9 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -19,6 +19,10 @@
 #ifndef EIGEN_MEMORY_H
 #define EIGEN_MEMORY_H
 
+#ifndef EIGEN_MALLOC_ALREADY_ALIGNED
+
+// Try to determine automatically if malloc is already aligned.
+
 // On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
 //   http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
 // This is true at least since glibc 2.8.
@@ -27,7 +31,7 @@
 // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
 // quite safe, at least within the context of glibc, to equate 64-bit with LP64.
 #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
- && defined(__LP64__)
+ && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ )
   #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
 #else
   #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
@@ -52,6 +56,8 @@
   #define EIGEN_MALLOC_ALREADY_ALIGNED 0
 #endif
 
+#endif
+
 #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) \
  && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
   #define EIGEN_HAS_POSIX_MEMALIGN 1
@@ -88,11 +94,11 @@ inline void throw_std_bad_alloc()
 /** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
   * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
   */
-inline void* handmade_aligned_malloc(size_t size)
+inline void* handmade_aligned_malloc(std::size_t size)
 {
   void *original = std::malloc(size+16);
   if (original == 0) return 0;
-  void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
   *(reinterpret_cast<void**>(aligned) - 1) = original;
   return aligned;
 }
@@ -108,13 +114,18 @@ inline void handmade_aligned_free(void *ptr)
   * Since we know that our handmade version is based on std::realloc
   * we can use std::realloc to implement efficient reallocation.
   */
-inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
+inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
 {
   if (ptr == 0) return handmade_aligned_malloc(size);
   void *original = *(reinterpret_cast<void**>(ptr) - 1);
+  std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
   original = std::realloc(original,size+16);
   if (original == 0) return 0;
-  void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
+  void *previous_aligned = static_cast<char *>(original)+previous_offset;
+  if(aligned!=previous_aligned)
+    std::memmove(aligned, previous_aligned, size);
+  
   *(reinterpret_cast<void**>(aligned) - 1) = original;
   return aligned;
 }
@@ -123,7 +134,7 @@ inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
 *** Implementation of generic aligned realloc (when no realloc can be used)***
 *****************************************************************************/
 
-void* aligned_malloc(size_t size);
+void* aligned_malloc(std::size_t size);
 void  aligned_free(void *ptr);
 
 /** \internal
@@ -227,7 +238,7 @@ inline void aligned_free(void *ptr)
     std::free(ptr);
   #elif EIGEN_HAS_MM_MALLOC
     _mm_free(ptr);
-  #elif defined(_MSC_VER)
+  #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
     _aligned_free(ptr);
   #else
     handmade_aligned_free(ptr);
@@ -446,7 +457,6 @@ template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *
 template<typename Scalar, typename Index>
 static inline Index first_aligned(const Scalar* array, Index size)
 {
-  typedef typename packet_traits<Scalar>::type Packet;
   enum { PacketSize = packet_traits<Scalar>::size,
          PacketAlignedMask = PacketSize-1
   };
@@ -745,11 +755,16 @@ public:
 #    if defined(__PIC__) && defined(__i386__)
        // Case for x86 with PIC
 #      define EIGEN_CPUID(abcd,func,id) \
-         __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+         __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+#    elif defined(__PIC__) && defined(__x86_64__)
+       // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
+       // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
+#      define EIGEN_CPUID(abcd,func,id) \
+        __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
 #    else
        // Case for x86_64 or x86 w/o PIC
 #      define EIGEN_CPUID(abcd,func,id) \
-         __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
+         __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
 #    endif
 #  elif defined(_MSC_VER)
 #    if (_MSC_VER > 1500) && ( defined(_M_IX86) || defined(_M_X64) )
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index a5f31164d..71d587108 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -186,23 +186,35 @@ template<int Y, int InfX, int SupX>
 class meta_sqrt<Y, InfX, SupX, true> { public:  enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
 
 /** \internal determines whether the product of two numeric types is allowed and what the return type is */
-template<typename T, typename U> struct scalar_product_traits;
+template<typename T, typename U> struct scalar_product_traits
+{
+  enum { Defined = 0 };
+};
 
 template<typename T> struct scalar_product_traits<T,T>
 {
-  //enum { Cost = NumTraits<T>::MulCost };
+  enum {
+    // Cost = NumTraits<T>::MulCost,
+    Defined = 1
+  };
   typedef T ReturnType;
 };
 
 template<typename T> struct scalar_product_traits<T,std::complex<T> >
 {
-  //enum { Cost = 2*NumTraits<T>::MulCost };
+  enum {
+    // Cost = 2*NumTraits<T>::MulCost,
+    Defined = 1
+  };
   typedef std::complex<T> ReturnType;
 };
 
 template<typename T> struct scalar_product_traits<std::complex<T>, T>
 {
-  //enum { Cost = 2*NumTraits<T>::MulCost  };
+  enum {
+    // Cost = 2*NumTraits<T>::MulCost,
+    Defined = 1
+  };
   typedef std::complex<T> ReturnType;
 };
 
diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
index 95c70aecb..bd41bf7ed 100644
--- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h
+++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@@ -242,7 +242,7 @@ template<typename _MatrixType> class ComplexEigenSolver
     EigenvectorType m_matX;
 
   private:
-    void doComputeEigenvectors(RealScalar matrixnorm);
+    void doComputeEigenvectors(const RealScalar& matrixnorm);
     void sortEigenvalues(bool computeEigenvectors);
 };
 
@@ -252,7 +252,7 @@ ComplexEigenSolver<MatrixType>&
 ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
 {
   // this code is inspired from Jampack
-  assert(matrix.cols() == matrix.rows());
+  eigen_assert(matrix.cols() == matrix.rows());
 
   // Do a complex Schur decomposition, A = U T U^*
   // The eigenvalues are on the diagonal of T.
@@ -273,7 +273,7 @@ ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEi
 
 
 template<typename MatrixType>
-void ComplexEigenSolver<MatrixType>::doComputeEigenvectors(RealScalar matrixnorm)
+void ComplexEigenSolver<MatrixType>::doComputeEigenvectors(const RealScalar& matrixnorm)
 {
   const Index n = m_eivalues.size();
 
diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h
index 57ce23e42..62b57ff66 100644
--- a/Eigen/src/Eigenvalues/ComplexSchur.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -364,7 +364,6 @@ struct complex_schur_reduce_to_hessenberg<MatrixType, false>
   static void run(ComplexSchur<MatrixType>& _this, const MatrixType& matrix, bool computeU)
   {
     typedef typename ComplexSchur<MatrixType>::ComplexScalar ComplexScalar;
-    typedef typename ComplexSchur<MatrixType>::ComplexMatrixType ComplexMatrixType;
 
     // Note: m_hess is over RealScalar; m_matT and m_matU is over ComplexScalar
     _this.m_hess.compute(matrix);
diff --git a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
index ada7a24e3..91496ae5b 100644
--- a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
@@ -49,7 +49,7 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matri
   typedef MatrixType::RealScalar RealScalar; \
   typedef std::complex<RealScalar> ComplexScalar; \
 \
-  assert(matrix.cols() == matrix.rows()); \
+  eigen_assert(matrix.cols() == matrix.rows()); \
 \
   m_matUisUptodate = false; \
   if(matrix.cols() == 1) \
diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h
index 201ea620d..594ec6576 100644
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h
@@ -366,7 +366,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
 {
   using std::sqrt;
   using std::abs;
-  assert(matrix.cols() == matrix.rows());
+  eigen_assert(matrix.cols() == matrix.rows());
 
   // Reduce to real Schur form.
   m_realSchur.compute(matrix, computeEigenvectors);
@@ -410,7 +410,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
 
 // Complex scalar division.
 template<typename Scalar>
-std::complex<Scalar> cdiv(Scalar xr, Scalar xi, Scalar yr, Scalar yi)
+std::complex<Scalar> cdiv(const Scalar& xr, const Scalar& xi, const Scalar& yr, const Scalar& yi)
 {
   using std::abs;
   Scalar r,d;
diff --git a/Eigen/src/Eigenvalues/HessenbergDecomposition.h b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
index b8378b08a..ebd8ae908 100644
--- a/Eigen/src/Eigenvalues/HessenbergDecomposition.h
+++ b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
@@ -291,7 +291,7 @@ template<typename _MatrixType> class HessenbergDecomposition
 template<typename MatrixType>
 void HessenbergDecomposition<MatrixType>::_compute(MatrixType& matA, CoeffVectorType& hCoeffs, VectorType& temp)
 {
-  assert(matA.rows()==matA.cols());
+  eigen_assert(matA.rows()==matA.cols());
   Index n = matA.rows();
   temp.resize(n);
   for (Index i = 0; i<n-1; ++i)
diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h
index dcaa9fbd6..5706eeebe 100644
--- a/Eigen/src/Eigenvalues/RealQZ.h
+++ b/Eigen/src/Eigenvalues/RealQZ.h
@@ -559,7 +559,7 @@ namespace Eigen {
 
       const Index dim = A_in.cols();
 
-      assert (A_in.rows()==dim && A_in.cols()==dim 
+      eigen_assert (A_in.rows()==dim && A_in.cols()==dim 
           && B_in.rows()==dim && B_in.cols()==dim 
           && "Need square matrices of the same dimension");
 
diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h
index 7680f9929..64d136341 100644
--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h
@@ -234,8 +234,8 @@ template<typename _MatrixType> class RealSchur
     typedef Matrix<Scalar,3,1> Vector3s;
 
     Scalar computeNormOfT();
-    Index findSmallSubdiagEntry(Index iu, Scalar norm);
-    void splitOffTwoRows(Index iu, bool computeU, Scalar exshift);
+    Index findSmallSubdiagEntry(Index iu, const Scalar& norm);
+    void splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift);
     void computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo);
     void initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector);
     void performFrancisQRStep(Index il, Index im, Index iu, bool computeU, const Vector3s& firstHouseholderVector, Scalar* workspace);
@@ -245,7 +245,7 @@ template<typename _MatrixType> class RealSchur
 template<typename MatrixType>
 RealSchur<MatrixType>& RealSchur<MatrixType>::compute(const MatrixType& matrix, bool computeU)
 {
-  assert(matrix.cols() == matrix.rows());
+  eigen_assert(matrix.cols() == matrix.rows());
   Index maxIters = m_maxIters;
   if (maxIters == -1)
     maxIters = m_maxIterationsPerRow * matrix.rows();
@@ -343,7 +343,7 @@ inline typename MatrixType::Scalar RealSchur<MatrixType>::computeNormOfT()
 
 /** \internal Look for single small sub-diagonal element and returns its index */
 template<typename MatrixType>
-inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, Scalar norm)
+inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, const Scalar& norm)
 {
   using std::abs;
   Index res = iu;
@@ -361,7 +361,7 @@ inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(I
 
 /** \internal Update T given that rows iu-1 and iu decouple from the rest. */
 template<typename MatrixType>
-inline void RealSchur<MatrixType>::splitOffTwoRows(Index iu, bool computeU, Scalar exshift)
+inline void RealSchur<MatrixType>::splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift)
 {
   using std::sqrt;
   using std::abs;
@@ -467,8 +467,8 @@ inline void RealSchur<MatrixType>::initFrancisQRStep(Index il, Index iu, const V
 template<typename MatrixType>
 inline void RealSchur<MatrixType>::performFrancisQRStep(Index il, Index im, Index iu, bool computeU, const Vector3s& firstHouseholderVector, Scalar* workspace)
 {
-  assert(im >= il);
-  assert(im <= iu-2);
+  eigen_assert(im >= il);
+  eigen_assert(im <= iu-2);
 
   const Index size = m_matT.cols();
 
diff --git a/Eigen/src/Eigenvalues/RealSchur_MKL.h b/Eigen/src/Eigenvalues/RealSchur_MKL.h
index 960ec3c76..ad9736460 100644
--- a/Eigen/src/Eigenvalues/RealSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/RealSchur_MKL.h
@@ -48,7 +48,7 @@ RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<E
   typedef MatrixType::Scalar Scalar; \
   typedef MatrixType::RealScalar RealScalar; \
 \
-  assert(matrix.cols() == matrix.rows()); \
+  eigen_assert(matrix.cols() == matrix.rows()); \
 \
   lapack_int n = matrix.cols(), sdim, info; \
   lapack_int lda = matrix.outerStride(); \
diff --git a/Eigen/src/Eigenvalues/Tridiagonalization.h b/Eigen/src/Eigenvalues/Tridiagonalization.h
index 5118874cd..e8408761d 100644
--- a/Eigen/src/Eigenvalues/Tridiagonalization.h
+++ b/Eigen/src/Eigenvalues/Tridiagonalization.h
@@ -426,8 +426,6 @@ struct tridiagonalization_inplace_selector;
 template<typename MatrixType, typename DiagonalType, typename SubDiagonalType>
 void tridiagonalization_inplace(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
 {
-  typedef typename MatrixType::Index Index;
-  //Index n = mat.rows();
   eigen_assert(mat.cols()==mat.rows() && diag.size()==mat.rows() && subdiag.size()==mat.rows()-1);
   tridiagonalization_inplace_selector<MatrixType>::run(mat, diag, subdiag, extractQ);
 }
diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h
index 48cc0a488..538a5afb7 100644
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@@ -71,7 +71,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
   template<typename Derived>
   inline explicit AlignedBox(const MatrixBase<Derived>& a_p)
   {
-    const typename internal::nested<Derived,2>::type p(a_p.derived());
+    typename internal::nested<Derived,2>::type p(a_p.derived());
     m_min = p;
     m_max = p;
   }
@@ -282,7 +282,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const AlignedBox& other, RealScalar prec = ScalarTraits::dummy_precision()) const
+  bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const
   { return m_min.isApprox(other.m_min, prec) && m_max.isApprox(other.m_max, prec); }
 
 protected:
@@ -296,7 +296,7 @@ template<typename Scalar,int AmbientDim>
 template<typename Derived>
 inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const MatrixBase<Derived>& a_p) const
 {
-  const typename internal::nested<Derived,2*AmbientDim>::type p(a_p.derived());
+  typename internal::nested<Derived,2*AmbientDim>::type p(a_p.derived());
   Scalar dist2(0);
   Scalar aux;
   for (Index k=0; k<dim(); ++k)
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index 11ad5829c..4c1bf5fcd 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -78,8 +78,8 @@ MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
 
   typedef typename internal::nested<Derived,2>::type DerivedNested;
   typedef typename internal::nested<OtherDerived,2>::type OtherDerivedNested;
-  const DerivedNested lhs(derived());
-  const OtherDerivedNested rhs(other.derived());
+  DerivedNested lhs(derived());
+  OtherDerivedNested rhs(other.derived());
 
   return internal::cross3_impl<Architecture::Target,
                         typename internal::remove_all<DerivedNested>::type,
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 45824fbeb..d036c018a 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -154,7 +154,7 @@ public:
     * \a t in [0;1]
     * see http://en.wikipedia.org/wiki/Slerp
     */
-  template<class OtherDerived> Quaternion<Scalar> slerp(Scalar t, const QuaternionBase<OtherDerived>& other) const;
+  template<class OtherDerived> Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
     * determined by \a prec.
@@ -683,7 +683,7 @@ QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& oth
 template <class Derived>
 template <class OtherDerived>
 Quaternion<typename internal::traits<Derived>::Scalar>
-QuaternionBase<Derived>::slerp(Scalar t, const QuaternionBase<OtherDerived>& other) const
+QuaternionBase<Derived>::slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const
 {
   using std::acos;
   using std::sin;
diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
index 5a822e0ea..fbefb696f 100644
--- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
+++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@@ -44,6 +44,11 @@ bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x,
   VectorType r0 = r;
   
   RealScalar r0_sqnorm = rhs.squaredNorm();
+  if(r0_sqnorm == 0)
+  {
+    x.setZero();
+    return true;
+  }
   Scalar rho    = 1;
   Scalar alpha  = 1;
   Scalar w      = 1;
diff --git a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
index f64f2534d..00b5647c6 100644
--- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
+++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
@@ -41,15 +41,29 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
   int n = mat.cols();
 
   VectorType residual = rhs - mat * x; //initial residual
-  VectorType p(n);
 
+  RealScalar rhsNorm2 = rhs.squaredNorm();
+  if(rhsNorm2 == 0) 
+  {
+    x.setZero();
+    iters = 0;
+    tol_error = 0;
+    return;
+  }
+  RealScalar threshold = tol*tol*rhsNorm2;
+  RealScalar residualNorm2 = residual.squaredNorm();
+  if (residualNorm2 < threshold)
+  {
+    iters = 0;
+    tol_error = sqrt(residualNorm2 / rhsNorm2);
+    return;
+  }
+  
+  VectorType p(n);
   p = precond.solve(residual);      //initial search direction
 
   VectorType z(n), tmp(n);
   RealScalar absNew = internal::real(residual.dot(p));  // the square of the absolute value of r scaled by invM
-  RealScalar rhsNorm2 = rhs.squaredNorm();
-  RealScalar residualNorm2 = 0;
-  RealScalar threshold = tol*tol*rhsNorm2;
   int i = 0;
   while(i < maxIters)
   {
diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
index 5b408f83d..17d18ef58 100644
--- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
+++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
@@ -24,14 +24,15 @@ namespace internal {
   * \param ind The array of index for the elements in @p row
   * \param ncut  The number of largest elements to keep
   **/ 
-template <typename VectorV, typename VectorI>
-int QuickSplit(VectorV &row, VectorI &ind, int ncut)
+template <typename VectorV, typename VectorI, typename Index>
+Index QuickSplit(VectorV &row, VectorI &ind, Index ncut)
 {
   typedef typename VectorV::RealScalar RealScalar;
   using std::swap;
-  int mid;
-  int n = row.size(); /* length of the vector */
-  int first, last ; 
+  using std::abs;
+  Index mid;
+  Index n = row.size(); /* length of the vector */
+  Index first, last ;
   
   ncut--; /* to fit the zero-based indices */
   first = 0; 
@@ -40,9 +41,9 @@ int QuickSplit(VectorV &row, VectorI &ind, int ncut)
   
   do {
     mid = first; 
-    RealScalar abskey = std::abs(row(mid)); 
-    for (int j = first + 1; j <= last; j++) {
-      if ( std::abs(row(j)) > abskey) {
+    RealScalar abskey = abs(row(mid)); 
+    for (Index j = first + 1; j <= last; j++) {
+      if ( abs(row(j)) > abskey) {
         ++mid;
         swap(row(mid), row(j));
         swap(ind(mid), ind(j));
@@ -110,7 +111,7 @@ class IncompleteLUT : internal::noncopyable
     {}
     
     template<typename MatrixType>
-    IncompleteLUT(const MatrixType& mat, RealScalar droptol=NumTraits<Scalar>::dummy_precision(), int fillfactor = 10)
+    IncompleteLUT(const MatrixType& mat, const RealScalar& droptol=NumTraits<Scalar>::dummy_precision(), int fillfactor = 10)
       : m_droptol(droptol),m_fillfactor(fillfactor),
         m_analysisIsOk(false),m_factorizationIsOk(false),m_isInitialized(false)
     {
@@ -154,7 +155,7 @@ class IncompleteLUT : internal::noncopyable
       return *this;
     }
 
-    void setDroptol(RealScalar droptol); 
+    void setDroptol(const RealScalar& droptol); 
     void setFillfactor(int fillfactor); 
     
     template<typename Rhs, typename Dest>
@@ -203,7 +204,7 @@ protected:
  *  \param droptol   Drop any element whose magnitude is less than this tolerance 
  **/ 
 template<typename Scalar>
-void IncompleteLUT<Scalar>::setDroptol(RealScalar droptol)
+void IncompleteLUT<Scalar>::setDroptol(const RealScalar& droptol)
 {
   this->m_droptol = droptol;   
 }
@@ -246,7 +247,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
   using std::abs;
 
   eigen_assert((amat.rows() == amat.cols()) && "The factorization should be done on a square matrix");
-  int n = amat.cols();  // Size of the matrix
+  Index n = amat.cols();  // Size of the matrix
   m_lu.resize(n,n);
   // Declare Working vectors and variables
   Vector u(n) ;     // real values of the row -- maximum size is n --
@@ -264,21 +265,21 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
   u.fill(0);
 
   // number of largest elements to keep in each row:
-  int fill_in =   static_cast<int> (amat.nonZeros()*m_fillfactor)/n+1;
+  Index fill_in =   static_cast<Index> (amat.nonZeros()*m_fillfactor)/n+1;
   if (fill_in > n) fill_in = n;
 
   // number of largest nonzero elements to keep in the L and the U part of the current row:
-  int nnzL = fill_in/2;
-  int nnzU = nnzL;
+  Index nnzL = fill_in/2;
+  Index nnzU = nnzL;
   m_lu.reserve(n * (nnzL + nnzU + 1));
 
   // global loop over the rows of the sparse matrix
-  for (int ii = 0; ii < n; ii++)
+  for (Index ii = 0; ii < n; ii++)
   {
     // 1 - copy the lower and the upper part of the row i of mat in the working vector u
 
-    int sizeu = 1; // number of nonzero elements in the upper part of the current row
-    int sizel = 0; // number of nonzero elements in the lower part of the current row
+    Index sizeu = 1; // number of nonzero elements in the upper part of the current row
+    Index sizel = 0; // number of nonzero elements in the lower part of the current row
     ju(ii)    = ii;
     u(ii)     = 0;
     jr(ii)    = ii;
@@ -287,7 +288,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
     typename FactorType::InnerIterator j_it(mat, ii); // Iterate through the current row ii
     for (; j_it; ++j_it)
     {
-      int k = j_it.index();
+      Index k = j_it.index();
       if (k < ii)
       {
         // copy the lower part
@@ -303,7 +304,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
       else
       {
         // copy the upper part
-        int jpos = ii + sizeu;
+        Index jpos = ii + sizeu;
         ju(jpos) = k;
         u(jpos) = j_it.value();
         jr(k) = jpos;
@@ -322,19 +323,19 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
     rownorm = sqrt(rownorm);
 
     // 3 - eliminate the previous nonzero rows
-    int jj = 0;
-    int len = 0;
+    Index jj = 0;
+    Index len = 0;
     while (jj < sizel)
     {
       // In order to eliminate in the correct order,
       // we must select first the smallest column index among  ju(jj:sizel)
-      int k;
-      int minrow = ju.segment(jj,sizel-jj).minCoeff(&k); // k is relative to the segment
+      Index k;
+      Index minrow = ju.segment(jj,sizel-jj).minCoeff(&k); // k is relative to the segment
       k += jj;
       if (minrow != ju(jj))
       {
         // swap the two locations
-        int j = ju(jj);
+        Index j = ju(jj);
         swap(ju(jj), ju(k));
         jr(minrow) = jj;   jr(j) = k;
         swap(u(jj), u(k));
@@ -360,11 +361,11 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
       for (; ki_it; ++ki_it)
       {
         Scalar prod = fact * ki_it.value();
-        int j       = ki_it.index();
-        int jpos    = jr(j);
+        Index j       = ki_it.index();
+        Index jpos    = jr(j);
         if (jpos == -1) // fill-in element
         {
-          int newpos;
+          Index newpos;
           if (j >= ii) // dealing with the upper part
           {
             newpos = ii + sizeu;
@@ -393,7 +394,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
     } // end of the elimination on the row ii
 
     // reset the upper part of the pointer jr to zero
-    for(int k = 0; k <sizeu; k++) jr(ju(ii+k)) = -1;
+    for(Index k = 0; k <sizeu; k++) jr(ju(ii+k)) = -1;
 
     // 4 - partially sort and insert the elements in the m_lu matrix
 
@@ -406,7 +407,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
 
     // store the largest m_fill elements of the L part
     m_lu.startVec(ii);
-    for(int k = 0; k < len; k++)
+    for(Index k = 0; k < len; k++)
       m_lu.insertBackByOuterInnerUnordered(ii,ju(k)) = u(k);
 
     // store the diagonal element
@@ -418,7 +419,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
     // sort the U-part of the row
     // apply the dropping rule first
     len = 0;
-    for(int k = 1; k < sizeu; k++)
+    for(Index k = 1; k < sizeu; k++)
     {
       if(abs(u(ii+k)) > m_droptol * rownorm )
       {
@@ -434,7 +435,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
     internal::QuickSplit(uu, juu, len);
 
     // store the largest elements of the U part
-    for(int k = ii + 1; k < ii + len; k++)
+    for(Index k = ii + 1; k < ii + len; k++)
       m_lu.insertBackByOuterInnerUnordered(ii,ju(k)) = u(k);
   }
 
diff --git a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
index 11706ceba..2036922d6 100644
--- a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
+++ b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
@@ -120,7 +120,7 @@ public:
   RealScalar tolerance() const { return m_tolerance; }
   
   /** Sets the tolerance threshold used by the stopping criteria */
-  Derived& setTolerance(RealScalar tolerance)
+  Derived& setTolerance(const RealScalar& tolerance)
   {
     m_tolerance = tolerance;
     return derived();
diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h
index 20e227640..d9d75196c 100644
--- a/Eigen/src/Jacobi/Jacobi.h
+++ b/Eigen/src/Jacobi/Jacobi.h
@@ -63,7 +63,7 @@ template<typename Scalar> class JacobiRotation
 
     template<typename Derived>
     bool makeJacobi(const MatrixBase<Derived>&, typename Derived::Index p, typename Derived::Index q);
-    bool makeJacobi(RealScalar x, Scalar y, RealScalar z);
+    bool makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z);
 
     void makeGivens(const Scalar& p, const Scalar& q, Scalar* z=0);
 
@@ -80,7 +80,7 @@ template<typename Scalar> class JacobiRotation
   * \sa MatrixBase::makeJacobi(const MatrixBase<Derived>&, Index, Index), MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
   */
 template<typename Scalar>
-bool JacobiRotation<Scalar>::makeJacobi(RealScalar x, Scalar y, RealScalar z)
+bool JacobiRotation<Scalar>::makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z)
 {
   using std::sqrt;
   using std::abs;
diff --git a/Eigen/src/LU/Determinant.h b/Eigen/src/LU/Determinant.h
index d862c5d77..bb8e78a8a 100644
--- a/Eigen/src/LU/Determinant.h
+++ b/Eigen/src/LU/Determinant.h
@@ -91,7 +91,7 @@ template<typename Derived> struct determinant_impl<Derived, 4>
 template<typename Derived>
 inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const
 {
-  assert(rows() == cols());
+  eigen_assert(rows() == cols());
   typedef typename internal::nested<Derived,Base::RowsAtCompileTime>::type Nested;
   return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived());
 }
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index bcd30be00..44699b763 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -417,6 +417,9 @@ FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix)
 template<typename MatrixType>
 FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
 {
+  // the permutations are stored as int indices, so just to be sure:
+  eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest());
+  
   m_isInitialized = true;
   m_lu = matrix;
 
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 9cf1d61d8..1d389ecac 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -242,7 +242,7 @@ struct partial_lu_impl
     const Index cols = lu.cols();
     const Index size = (std::min)(rows,cols);
     nb_transpositions = 0;
-    int first_zero_pivot = -1;
+    Index first_zero_pivot = -1;
     for(Index k = 0; k < size; ++k)
     {
       Index rrows = rows-k-1;
@@ -253,7 +253,7 @@ struct partial_lu_impl
         = lu.col(k).tail(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col);
       row_of_biggest_in_col += k;
 
-      row_transpositions[k] = row_of_biggest_in_col;
+      row_transpositions[k] = PivIndex(row_of_biggest_in_col);
 
       if(biggest_in_corner != RealScalar(0))
       {
@@ -318,7 +318,7 @@ struct partial_lu_impl
     }
 
     nb_transpositions = 0;
-    int first_zero_pivot = -1;
+    Index first_zero_pivot = -1;
     for(Index k = 0; k < size; k+=blockSize)
     {
       Index bs = (std::min)(size-k,blockSize); // actual size of the block
@@ -386,6 +386,9 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t
 template<typename MatrixType>
 PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const MatrixType& matrix)
 {
+  // the row permutation is stored as int indices, so just to be sure:
+  eigen_assert(matrix.rows()<NumTraits<int>::highest());
+  
   m_lu = matrix;
 
   eigen_assert(matrix.rows() == matrix.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
diff --git a/Eigen/src/MetisSupport/MetisSupport.h b/Eigen/src/MetisSupport/MetisSupport.h
index 3a723b384..818355e79 100644
--- a/Eigen/src/MetisSupport/MetisSupport.h
+++ b/Eigen/src/MetisSupport/MetisSupport.h
@@ -29,7 +29,7 @@ public:
   void get_symmetrized_graph(const MatrixType& A)
   {
     Index m = A.cols(); 
-    
+    eigen_assert((A.rows() == A.cols()) && "ONLY FOR SQUARED MATRICES");
     // Get the transpose of the input matrix 
     MatrixType At = A.transpose(); 
     // Get the number of nonzeros elements in each row/col of At+A
diff --git a/Eigen/src/OrderingMethods/Amd.h b/Eigen/src/OrderingMethods/Amd.h
index 8878ef863..41b4fd7e3 100644
--- a/Eigen/src/OrderingMethods/Amd.h
+++ b/Eigen/src/OrderingMethods/Amd.h
@@ -2,10 +2,6 @@
 // for linear algebra.
 //
 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 /*
 
@@ -95,7 +91,6 @@ template<typename Scalar, typename Index>
 void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,Index>& C, PermutationMatrix<Dynamic,Dynamic,Index>& perm)
 {
   using std::sqrt;
-  typedef SparseMatrix<Scalar,ColMajor,Index> CCS;
   
   int d, dk, dext, lemax = 0, e, elenk, eln, i, j, k, k1,
       k2, k3, jlast, ln, dense, nzmax, mindeg = 0, nvi, nvj, nvk, mark, wnvi,
diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h
index 6dc1f280d..44548f660 100644
--- a/Eigen/src/OrderingMethods/Eigen_Colamd.h
+++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h
@@ -50,6 +50,7 @@
   
 #ifndef EIGEN_COLAMD_H
 #define EIGEN_COLAMD_H
+
 namespace internal {
 /* Ensure that debugging is turned off: */
 #ifndef COLAMD_NDEBUG
@@ -133,107 +134,106 @@ namespace internal {
 /* === Colamd reporting mechanism =========================================== */
 /* ========================================================================== */
 
-    // == Row and Column structures ==
-typedef struct colamd_col_struct
+// == Row and Column structures ==
+template <typename Index>
+struct colamd_col
 {
-    int start ;   /* index for A of first row in this column, or DEAD */
-      /* if column is dead */
-    int length ;  /* number of rows in this column */
-    union
-    {
-  int thickness ; /* number of original columns represented by this */
-      /* col, if the column is alive */
-  int parent ;  /* parent in parent tree super-column structure, if */
-      /* the column is dead */
-    } shared1 ;
-    union
-    {
-  int score ; /* the score used to maintain heap, if col is alive */
-  int order ; /* pivot ordering of this column, if col is dead */
-    } shared2 ;
-    union
-    {
-  int headhash ;  /* head of a hash bucket, if col is at the head of */
-      /* a degree list */
-  int hash ;  /* hash value, if col is not in a degree list */
-  int prev ;  /* previous column in degree list, if col is in a */
-      /* degree list (but not at the head of a degree list) */
-    } shared3 ;
-    union
-    {
-  int degree_next ; /* next column, if col is in a degree list */
-  int hash_next ;   /* next column, if col is in a hash list */
-    } shared4 ;
-
-} colamd_col ;
-
-typedef struct Colamd_Row_struct
+  Index start ;   /* index for A of first row in this column, or DEAD */
+  /* if column is dead */
+  Index length ;  /* number of rows in this column */
+  union
+  {
+    Index thickness ; /* number of original columns represented by this */
+    /* col, if the column is alive */
+    Index parent ;  /* parent in parent tree super-column structure, if */
+    /* the column is dead */
+  } shared1 ;
+  union
+  {
+    Index score ; /* the score used to maintain heap, if col is alive */
+    Index order ; /* pivot ordering of this column, if col is dead */
+  } shared2 ;
+  union
+  {
+    Index headhash ;  /* head of a hash bucket, if col is at the head of */
+    /* a degree list */
+    Index hash ;  /* hash value, if col is not in a degree list */
+    Index prev ;  /* previous column in degree list, if col is in a */
+    /* degree list (but not at the head of a degree list) */
+  } shared3 ;
+  union
+  {
+    Index degree_next ; /* next column, if col is in a degree list */
+    Index hash_next ;   /* next column, if col is in a hash list */
+  } shared4 ;
+  
+};
+ 
+template <typename Index>
+struct Colamd_Row
 {
-    int start ;   /* index for A of first col in this row */
-    int length ;  /* number of principal columns in this row */
-    union
-    {
-  int degree ;  /* number of principal & non-principal columns in row */
-  int p ;   /* used as a row pointer in init_rows_cols () */
-    } shared1 ;
-    union
-    {
-  int mark ;  /* for computing set differences and marking dead rows*/
-  int first_column ;/* first column in row (used in garbage collection) */
-    } shared2 ;
-
-} Colamd_Row ;
-    
+  Index start ;   /* index for A of first col in this row */
+  Index length ;  /* number of principal columns in this row */
+  union
+  {
+    Index degree ;  /* number of principal & non-principal columns in row */
+    Index p ;   /* used as a row pointer in init_rows_cols () */
+  } shared1 ;
+  union
+  {
+    Index mark ;  /* for computing set differences and marking dead rows*/
+    Index first_column ;/* first column in row (used in garbage collection) */
+  } shared2 ;
+  
+};
+ 
 /* ========================================================================== */
 /* === Colamd recommended memory size ======================================= */
 /* ========================================================================== */
-
+ 
 /*
-    The recommended length Alen of the array A passed to colamd is given by
-    the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro.  It returns -1 if any
-    argument is negative.  2*nnz space is required for the row and column
-    indices of the matrix. colamd_c (n_col) + colamd_r (n_row) space is
-    required for the Col and Row arrays, respectively, which are internal to
-    colamd.  An additional n_col space is the minimal amount of "elbow room",
-    and nnz/5 more space is recommended for run time efficiency.
-
-    This macro is not needed when using symamd.
-
-    Explicit typecast to int added Sept. 23, 2002, COLAMD version 2.2, to avoid
-    gcc -pedantic warning messages.
+  The recommended length Alen of the array A passed to colamd is given by
+  the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro.  It returns -1 if any
+  argument is negative.  2*nnz space is required for the row and column
+  indices of the matrix. colamd_c (n_col) + colamd_r (n_row) space is
+  required for the Col and Row arrays, respectively, which are internal to
+  colamd.  An additional n_col space is the minimal amount of "elbow room",
+  and nnz/5 more space is recommended for run time efficiency.
+  
+  This macro is not needed when using symamd.
+  
+  Explicit typecast to Index added Sept. 23, 2002, COLAMD version 2.2, to avoid
+  gcc -pedantic warning messages.
 */
+template <typename Index>
+inline Index colamd_c(Index n_col) 
+{ return Index( ((n_col) + 1) * sizeof (colamd_col<Index>) / sizeof (Index) ) ; }
 
-inline int colamd_c(int n_col) 
-{ return int( ((n_col) + 1) * sizeof (colamd_col) / sizeof (int) ) ; }
-
-inline int  colamd_r(int n_row)
-{ return int(((n_row) + 1) * sizeof (Colamd_Row) / sizeof (int)); }
-
-    // Various routines
-inline int colamd_recommended (int nnz, int n_row, int n_col) ;
-
-static inline void colamd_set_defaults (double knobs [COLAMD_KNOBS]) ;
+template <typename Index>
+inline Index  colamd_r(Index n_row)
+{ return Index(((n_row) + 1) * sizeof (Colamd_Row<Index>) / sizeof (Index)); }
 
-static bool colamd (int n_row, int n_col, int Alen, int A [], int p [], double knobs[COLAMD_KNOBS], int stats [COLAMD_STATS]) ;
+// Prototypes of non-user callable routines
+template <typename Index>
+static Index init_rows_cols (Index n_row, Index n_col, Colamd_Row<Index> Row [], colamd_col<Index> col [], Index A [], Index p [], Index stats[COLAMD_STATS] ); 
 
-static int init_rows_cols (int n_row, int n_col, Colamd_Row Row [], colamd_col col [], int A [], int p [], int stats[COLAMD_STATS] ); 
+template <typename Index>
+static void init_scoring (Index n_row, Index n_col, Colamd_Row<Index> Row [], colamd_col<Index> Col [], Index A [], Index head [], double knobs[COLAMD_KNOBS], Index *p_n_row2, Index *p_n_col2, Index *p_max_deg);
 
-static void init_scoring (int n_row, int n_col, Colamd_Row Row [], colamd_col Col [], int A [], int head [], double knobs[COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg);
+template <typename Index>
+static Index find_ordering (Index n_row, Index n_col, Index Alen, Colamd_Row<Index> Row [], colamd_col<Index> Col [], Index A [], Index head [], Index n_col2, Index max_deg, Index pfree);
 
-static int find_ordering (int n_row, int n_col, int Alen, Colamd_Row Row [], colamd_col Col [], int A [], int head [], int n_col2, int max_deg, int pfree);
+template <typename Index>
+static void order_children (Index n_col, colamd_col<Index> Col [], Index p []);
 
-static void order_children (int n_col, colamd_col Col [], int p []);
+template <typename Index>
+static void detect_super_cols (colamd_col<Index> Col [], Index A [], Index head [], Index row_start, Index row_length ) ;
 
-static void detect_super_cols (
-  colamd_col Col [],
-  int A [],
-  int head [],
-  int row_start,
-  int row_length ) ;
+template <typename Index>
+static Index garbage_collection (Index n_row, Index n_col, Colamd_Row<Index> Row [], colamd_col<Index> Col [], Index A [], Index *pfree) ;
 
-static int garbage_collection (int n_row, int n_col, Colamd_Row Row [], colamd_col Col [], int A [], int *pfree) ;
-
-static inline  int clear_mark (int n_row, Colamd_Row Row [] ) ;
+template <typename Index>
+static inline  Index clear_mark (Index n_row, Colamd_Row<Index> Row [] ) ;
 
 /* === No debugging ========================================================= */
 
@@ -260,7 +260,8 @@ static inline  int clear_mark (int n_row, Colamd_Row Row [] ) ;
  * \param n_col number of columns in A
  * \return recommended value of Alen for use by colamd
  */
-inline int colamd_recommended ( int nnz, int n_row, int n_col)
+template <typename Index>
+inline Index colamd_recommended ( Index nnz, Index n_row, Index n_col)
 {
   if ((nnz) < 0 || (n_row) < 0 || (n_col) < 0)
     return (-1);
@@ -288,22 +289,23 @@ inline int colamd_recommended ( int nnz, int n_row, int n_col)
  * 
  * \param knobs parameter settings for colamd
  */
+
 static inline void colamd_set_defaults(double knobs[COLAMD_KNOBS])
 {
-   /* === Local variables ================================================== */
-
-    int i ;
+  /* === Local variables ================================================== */
+  
+  int i ;
 
-    if (!knobs)
-    {
-  return ;      /* no knobs to initialize */
-    }
-    for (i = 0 ; i < COLAMD_KNOBS ; i++)
-    {
-  knobs [i] = 0 ;
-    }
-    knobs [COLAMD_DENSE_ROW] = 0.5 ;  /* ignore rows over 50% dense */
-    knobs [COLAMD_DENSE_COL] = 0.5 ;  /* ignore columns over 50% dense */
+  if (!knobs)
+  {
+    return ;      /* no knobs to initialize */
+  }
+  for (i = 0 ; i < COLAMD_KNOBS ; i++)
+  {
+    knobs [i] = 0 ;
+  }
+  knobs [COLAMD_DENSE_ROW] = 0.5 ;  /* ignore rows over 50% dense */
+  knobs [COLAMD_DENSE_COL] = 0.5 ;  /* ignore columns over 50% dense */
 }
 
 /** 
@@ -323,144 +325,145 @@ static inline void colamd_set_defaults(double knobs[COLAMD_KNOBS])
  * \param knobs parameter settings for colamd
  * \param stats colamd output statistics and error codes
  */
-static bool colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[COLAMD_KNOBS], int stats[COLAMD_STATS])
+template <typename Index>
+static bool colamd(Index n_row, Index n_col, Index Alen, Index *A, Index *p, double knobs[COLAMD_KNOBS], Index stats[COLAMD_STATS])
 {
-      /* === Local variables ================================================== */
-
-    int i ;     /* loop index */
-    int nnz ;     /* nonzeros in A */
-    int Row_size ;    /* size of Row [], in integers */
-    int Col_size ;    /* size of Col [], in integers */
-    int need ;      /* minimum required length of A */
-    Colamd_Row *Row ;   /* pointer into A of Row [0..n_row] array */
-    colamd_col *Col ;   /* pointer into A of Col [0..n_col] array */
-    int n_col2 ;    /* number of non-dense, non-empty columns */
-    int n_row2 ;    /* number of non-dense, non-empty rows */
-    int ngarbage ;    /* number of garbage collections performed */
-    int max_deg ;   /* maximum row degree */
-    double default_knobs [COLAMD_KNOBS] ; /* default knobs array */
-
-
-    /* === Check the input arguments ======================================== */
-
-    if (!stats)
-    {
-  COLAMD_DEBUG0 (("colamd: stats not present\n")) ;
-  return (false) ;
-    }
-    for (i = 0 ; i < COLAMD_STATS ; i++)
-    {
-  stats [i] = 0 ;
-    }
-    stats [COLAMD_STATUS] = COLAMD_OK ;
-    stats [COLAMD_INFO1] = -1 ;
-    stats [COLAMD_INFO2] = -1 ;
-
-    if (!A)   /* A is not present */
-    {
-  stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ;
-  COLAMD_DEBUG0 (("colamd: A not present\n")) ;
-  return (false) ;
-    }
-
-    if (!p)   /* p is not present */
-    {
-  stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ;
-  COLAMD_DEBUG0 (("colamd: p not present\n")) ;
-      return (false) ;
-    }
-
-    if (n_row < 0)  /* n_row must be >= 0 */
-    {
-  stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ;
-  stats [COLAMD_INFO1] = n_row ;
-  COLAMD_DEBUG0 (("colamd: nrow negative %d\n", n_row)) ;
-      return (false) ;
-    }
-
-    if (n_col < 0)  /* n_col must be >= 0 */
-    {
-  stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ;
-  stats [COLAMD_INFO1] = n_col ;
-  COLAMD_DEBUG0 (("colamd: ncol negative %d\n", n_col)) ;
-      return (false) ;
-    }
-
-    nnz = p [n_col] ;
-    if (nnz < 0)  /* nnz must be >= 0 */
-    {
-  stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ;
-  stats [COLAMD_INFO1] = nnz ;
-  COLAMD_DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ;
-  return (false) ;
-    }
-
-    if (p [0] != 0)
-    {
-  stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ;
-  stats [COLAMD_INFO1] = p [0] ;
-  COLAMD_DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ;
-  return (false) ;
-    }
-
-    /* === If no knobs, set default knobs =================================== */
-
-    if (!knobs)
-    {
-  colamd_set_defaults (default_knobs) ;
-  knobs = default_knobs ;
-    }
-
-    /* === Allocate the Row and Col arrays from array A ===================== */
-
-    Col_size = colamd_c (n_col) ;
-    Row_size = colamd_r (n_row) ;
-    need = 2*nnz + n_col + Col_size + Row_size ;
-
-    if (need > Alen)
-    {
-  /* not enough space in array A to perform the ordering */
-  stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ;
-  stats [COLAMD_INFO1] = need ;
-  stats [COLAMD_INFO2] = Alen ;
-  COLAMD_DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen));
-  return (false) ;
-    }
-
-    Alen -= Col_size + Row_size ;
-    Col = (colamd_col *) &A [Alen] ;
-    Row = (Colamd_Row *) &A [Alen + Col_size] ;
-
-    /* === Construct the row and column data structures ===================== */
-
-    if (!init_rows_cols (n_row, n_col, Row, Col, A, p, stats))
-    {
-  /* input matrix is invalid */
-  COLAMD_DEBUG0 (("colamd: Matrix invalid\n")) ;
-  return (false) ;
-    }
-
-    /* === Initialize scores, kill dense rows/columns ======================= */
-
-    init_scoring (n_row, n_col, Row, Col, A, p, knobs,
-  &n_row2, &n_col2, &max_deg) ;
-
-    /* === Order the supercolumns =========================================== */
-
-    ngarbage = find_ordering (n_row, n_col, Alen, Row, Col, A, p,
-  n_col2, max_deg, 2*nnz) ;
-
-    /* === Order the non-principal columns ================================== */
-
-    order_children (n_col, Col, p) ;
+  /* === Local variables ================================================== */
+  
+  Index i ;     /* loop index */
+  Index nnz ;     /* nonzeros in A */
+  Index Row_size ;    /* size of Row [], in integers */
+  Index Col_size ;    /* size of Col [], in integers */
+  Index need ;      /* minimum required length of A */
+  Colamd_Row<Index> *Row ;   /* pointer into A of Row [0..n_row] array */
+  colamd_col<Index> *Col ;   /* pointer into A of Col [0..n_col] array */
+  Index n_col2 ;    /* number of non-dense, non-empty columns */
+  Index n_row2 ;    /* number of non-dense, non-empty rows */
+  Index ngarbage ;    /* number of garbage collections performed */
+  Index max_deg ;   /* maximum row degree */
+  double default_knobs [COLAMD_KNOBS] ; /* default knobs array */
+  
+  
+  /* === Check the input arguments ======================================== */
+  
+  if (!stats)
+  {
+    COLAMD_DEBUG0 (("colamd: stats not present\n")) ;
+    return (false) ;
+  }
+  for (i = 0 ; i < COLAMD_STATS ; i++)
+  {
+    stats [i] = 0 ;
+  }
+  stats [COLAMD_STATUS] = COLAMD_OK ;
+  stats [COLAMD_INFO1] = -1 ;
+  stats [COLAMD_INFO2] = -1 ;
+  
+  if (!A)   /* A is not present */
+  {
+    stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ;
+    COLAMD_DEBUG0 (("colamd: A not present\n")) ;
+    return (false) ;
+  }
+  
+  if (!p)   /* p is not present */
+  {
+    stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ;
+    COLAMD_DEBUG0 (("colamd: p not present\n")) ;
+    return (false) ;
+  }
+  
+  if (n_row < 0)  /* n_row must be >= 0 */
+  {
+    stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ;
+    stats [COLAMD_INFO1] = n_row ;
+    COLAMD_DEBUG0 (("colamd: nrow negative %d\n", n_row)) ;
+    return (false) ;
+  }
+  
+  if (n_col < 0)  /* n_col must be >= 0 */
+  {
+    stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ;
+    stats [COLAMD_INFO1] = n_col ;
+    COLAMD_DEBUG0 (("colamd: ncol negative %d\n", n_col)) ;
+    return (false) ;
+  }
+  
+  nnz = p [n_col] ;
+  if (nnz < 0)  /* nnz must be >= 0 */
+  {
+    stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ;
+    stats [COLAMD_INFO1] = nnz ;
+    COLAMD_DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ;
+    return (false) ;
+  }
+  
+  if (p [0] != 0)
+  {
+    stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ;
+    stats [COLAMD_INFO1] = p [0] ;
+    COLAMD_DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ;
+    return (false) ;
+  }
+  
+  /* === If no knobs, set default knobs =================================== */
+  
+  if (!knobs)
+  {
+    colamd_set_defaults (default_knobs) ;
+    knobs = default_knobs ;
+  }
+  
+  /* === Allocate the Row and Col arrays from array A ===================== */
+  
+  Col_size = colamd_c (n_col) ;
+  Row_size = colamd_r (n_row) ;
+  need = 2*nnz + n_col + Col_size + Row_size ;
+  
+  if (need > Alen)
+  {
+    /* not enough space in array A to perform the ordering */
+    stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ;
+    stats [COLAMD_INFO1] = need ;
+    stats [COLAMD_INFO2] = Alen ;
+    COLAMD_DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen));
+    return (false) ;
+  }
+  
+  Alen -= Col_size + Row_size ;
+  Col = (colamd_col<Index> *) &A [Alen] ;
+  Row = (Colamd_Row<Index> *) &A [Alen + Col_size] ;
 
-    /* === Return statistics in stats ======================================= */
+  /* === Construct the row and column data structures ===================== */
+  
+  if (!Eigen::internal::init_rows_cols (n_row, n_col, Row, Col, A, p, stats))
+  {
+    /* input matrix is invalid */
+    COLAMD_DEBUG0 (("colamd: Matrix invalid\n")) ;
+    return (false) ;
+  }
+  
+  /* === Initialize scores, kill dense rows/columns ======================= */
 
-    stats [COLAMD_DENSE_ROW] = n_row - n_row2 ;
-    stats [COLAMD_DENSE_COL] = n_col - n_col2 ;
-    stats [COLAMD_DEFRAG_COUNT] = ngarbage ;
-    COLAMD_DEBUG0 (("colamd: done.\n")) ; 
-    return (true) ;
+  Eigen::internal::init_scoring (n_row, n_col, Row, Col, A, p, knobs,
+		&n_row2, &n_col2, &max_deg) ;
+  
+  /* === Order the supercolumns =========================================== */
+  
+  ngarbage = Eigen::internal::find_ordering (n_row, n_col, Alen, Row, Col, A, p,
+			    n_col2, max_deg, 2*nnz) ;
+  
+  /* === Order the non-principal columns ================================== */
+  
+  Eigen::internal::order_children (n_col, Col, p) ;
+  
+  /* === Return statistics in stats ======================================= */
+  
+  stats [COLAMD_DENSE_ROW] = n_row - n_row2 ;
+  stats [COLAMD_DENSE_COL] = n_col - n_col2 ;
+  stats [COLAMD_DEFRAG_COUNT] = ngarbage ;
+  COLAMD_DEBUG0 (("colamd: done.\n")) ; 
+  return (true) ;
 }
 
 /* ========================================================================== */
@@ -475,218 +478,218 @@ static bool colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[
 /* ========================================================================== */
 
 /*
-    Takes the column form of the matrix in A and creates the row form of the
-    matrix.  Also, row and column attributes are stored in the Col and Row
-    structs.  If the columns are un-sorted or contain duplicate row indices,
-    this routine will also sort and remove duplicate row indices from the
-    column form of the matrix.  Returns false if the matrix is invalid,
-    true otherwise.  Not user-callable.
+  Takes the column form of the matrix in A and creates the row form of the
+  matrix.  Also, row and column attributes are stored in the Col and Row
+  structs.  If the columns are un-sorted or contain duplicate row indices,
+  this routine will also sort and remove duplicate row indices from the
+  column form of the matrix.  Returns false if the matrix is invalid,
+  true otherwise.  Not user-callable.
 */
-
- static int init_rows_cols  /* returns true if OK, or false otherwise */
-(
+template <typename Index>
+static Index init_rows_cols  /* returns true if OK, or false otherwise */
+  (
     /* === Parameters ======================================================= */
 
-    int n_row,      /* number of rows of A */
-    int n_col,      /* number of columns of A */
-    Colamd_Row Row [],    /* of size n_row+1 */
-    colamd_col Col [],    /* of size n_col+1 */
-    int A [],     /* row indices of A, of size Alen */
-    int p [],     /* pointers to columns in A, of size n_col+1 */
-    int stats [COLAMD_STATS]  /* colamd statistics */ 
-)
+    Index n_row,      /* number of rows of A */
+    Index n_col,      /* number of columns of A */
+    Colamd_Row<Index> Row [],    /* of size n_row+1 */
+    colamd_col<Index> Col [],    /* of size n_col+1 */
+    Index A [],     /* row indices of A, of size Alen */
+    Index p [],     /* pointers to columns in A, of size n_col+1 */
+    Index stats [COLAMD_STATS]  /* colamd statistics */ 
+    )
 {
-    /* === Local variables ================================================== */
+  /* === Local variables ================================================== */
 
-    int col ;     /* a column index */
-    int row ;     /* a row index */
-    int *cp ;     /* a column pointer */
-    int *cp_end ;   /* a pointer to the end of a column */
-    int *rp ;     /* a row pointer */
-    int *rp_end ;   /* a pointer to the end of a row */
-    int last_row ;    /* previous row */
+  Index col ;     /* a column index */
+  Index row ;     /* a row index */
+  Index *cp ;     /* a column pointer */
+  Index *cp_end ;   /* a pointer to the end of a column */
+  Index *rp ;     /* a row pointer */
+  Index *rp_end ;   /* a pointer to the end of a row */
+  Index last_row ;    /* previous row */
 
-    /* === Initialize columns, and check column pointers ==================== */
-
-    for (col = 0 ; col < n_col ; col++)
-    {
-  Col [col].start = p [col] ;
-  Col [col].length = p [col+1] - p [col] ;
+  /* === Initialize columns, and check column pointers ==================== */
 
-  if (Col [col].length < 0)
+  for (col = 0 ; col < n_col ; col++)
   {
+    Col [col].start = p [col] ;
+    Col [col].length = p [col+1] - p [col] ;
+
+    if (Col [col].length < 0)
+    {
       /* column pointers must be non-decreasing */
       stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;
       stats [COLAMD_INFO1] = col ;
       stats [COLAMD_INFO2] = Col [col].length ;
       COLAMD_DEBUG0 (("colamd: col %d length %d < 0\n", col, Col [col].length)) ;
       return (false) ;
-  }
-
-  Col [col].shared1.thickness = 1 ;
-  Col [col].shared2.score = 0 ;
-  Col [col].shared3.prev = COLAMD_EMPTY ;
-  Col [col].shared4.degree_next = COLAMD_EMPTY ;
     }
 
-    /* p [0..n_col] no longer needed, used as "head" in subsequent routines */
-
-    /* === Scan columns, compute row degrees, and check row indices ========= */
+    Col [col].shared1.thickness = 1 ;
+    Col [col].shared2.score = 0 ;
+    Col [col].shared3.prev = COLAMD_EMPTY ;
+    Col [col].shared4.degree_next = COLAMD_EMPTY ;
+  }
 
-    stats [COLAMD_INFO3] = 0 ;  /* number of duplicate or unsorted row indices*/
+  /* p [0..n_col] no longer needed, used as "head" in subsequent routines */
 
-    for (row = 0 ; row < n_row ; row++)
-    {
-  Row [row].length = 0 ;
-  Row [row].shared2.mark = -1 ;
-    }
+  /* === Scan columns, compute row degrees, and check row indices ========= */
 
-    for (col = 0 ; col < n_col ; col++)
-    {
-  last_row = -1 ;
+  stats [COLAMD_INFO3] = 0 ;  /* number of duplicate or unsorted row indices*/
 
-  cp = &A [p [col]] ;
-  cp_end = &A [p [col+1]] ;
+  for (row = 0 ; row < n_row ; row++)
+  {
+    Row [row].length = 0 ;
+    Row [row].shared2.mark = -1 ;
+  }
 
-  while (cp < cp_end)
+  for (col = 0 ; col < n_col ; col++)
   {
+    last_row = -1 ;
+
+    cp = &A [p [col]] ;
+    cp_end = &A [p [col+1]] ;
+
+    while (cp < cp_end)
+    {
       row = *cp++ ;
 
       /* make sure row indices within range */
       if (row < 0 || row >= n_row)
       {
-    stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ;
-    stats [COLAMD_INFO1] = col ;
-    stats [COLAMD_INFO2] = row ;
-    stats [COLAMD_INFO3] = n_row ;
-    COLAMD_DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ;
-    return (false) ;
+	stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ;
+	stats [COLAMD_INFO1] = col ;
+	stats [COLAMD_INFO2] = row ;
+	stats [COLAMD_INFO3] = n_row ;
+	COLAMD_DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ;
+	return (false) ;
       }
 
       if (row <= last_row || Row [row].shared2.mark == col)
       {
-    /* row index are unsorted or repeated (or both), thus col */
-    /* is jumbled.  This is a notice, not an error condition. */
-    stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ;
-    stats [COLAMD_INFO1] = col ;
-    stats [COLAMD_INFO2] = row ;
-    (stats [COLAMD_INFO3]) ++ ;
-    COLAMD_DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col));
+	/* row index are unsorted or repeated (or both), thus col */
+	/* is jumbled.  This is a notice, not an error condition. */
+	stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ;
+	stats [COLAMD_INFO1] = col ;
+	stats [COLAMD_INFO2] = row ;
+	(stats [COLAMD_INFO3]) ++ ;
+	COLAMD_DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col));
       }
 
       if (Row [row].shared2.mark != col)
       {
-    Row [row].length++ ;
+	Row [row].length++ ;
       }
       else
       {
-    /* this is a repeated entry in the column, */
-    /* it will be removed */
-    Col [col].length-- ;
+	/* this is a repeated entry in the column, */
+	/* it will be removed */
+	Col [col].length-- ;
       }
 
       /* mark the row as having been seen in this column */
       Row [row].shared2.mark = col ;
 
       last_row = row ;
-  }
     }
+  }
 
-    /* === Compute row pointers ============================================= */
+  /* === Compute row pointers ============================================= */
 
-    /* row form of the matrix starts directly after the column */
-    /* form of matrix in A */
-    Row [0].start = p [n_col] ;
-    Row [0].shared1.p = Row [0].start ;
-    Row [0].shared2.mark = -1 ;
-    for (row = 1 ; row < n_row ; row++)
-    {
-  Row [row].start = Row [row-1].start + Row [row-1].length ;
-  Row [row].shared1.p = Row [row].start ;
-  Row [row].shared2.mark = -1 ;
-    }
+  /* row form of the matrix starts directly after the column */
+  /* form of matrix in A */
+  Row [0].start = p [n_col] ;
+  Row [0].shared1.p = Row [0].start ;
+  Row [0].shared2.mark = -1 ;
+  for (row = 1 ; row < n_row ; row++)
+  {
+    Row [row].start = Row [row-1].start + Row [row-1].length ;
+    Row [row].shared1.p = Row [row].start ;
+    Row [row].shared2.mark = -1 ;
+  }
 
-    /* === Create row form ================================================== */
+  /* === Create row form ================================================== */
 
-    if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
-    {
-  /* if cols jumbled, watch for repeated row indices */
-  for (col = 0 ; col < n_col ; col++)
+  if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
   {
+    /* if cols jumbled, watch for repeated row indices */
+    for (col = 0 ; col < n_col ; col++)
+    {
       cp = &A [p [col]] ;
       cp_end = &A [p [col+1]] ;
       while (cp < cp_end)
       {
-    row = *cp++ ;
-    if (Row [row].shared2.mark != col)
-    {
-        A [(Row [row].shared1.p)++] = col ;
-        Row [row].shared2.mark = col ;
-    }
+	row = *cp++ ;
+	if (Row [row].shared2.mark != col)
+	{
+	  A [(Row [row].shared1.p)++] = col ;
+	  Row [row].shared2.mark = col ;
+	}
       }
-  }
     }
-    else
-    {
-  /* if cols not jumbled, we don't need the mark (this is faster) */
-  for (col = 0 ; col < n_col ; col++)
+  }
+  else
   {
+    /* if cols not jumbled, we don't need the mark (this is faster) */
+    for (col = 0 ; col < n_col ; col++)
+    {
       cp = &A [p [col]] ;
       cp_end = &A [p [col+1]] ;
       while (cp < cp_end)
       {
-    A [(Row [*cp++].shared1.p)++] = col ;
+	A [(Row [*cp++].shared1.p)++] = col ;
       }
-  }
     }
+  }
 
-    /* === Clear the row marks and set row degrees ========================== */
+  /* === Clear the row marks and set row degrees ========================== */
 
-    for (row = 0 ; row < n_row ; row++)
-    {
-  Row [row].shared2.mark = 0 ;
-  Row [row].shared1.degree = Row [row].length ;
-    }
+  for (row = 0 ; row < n_row ; row++)
+  {
+    Row [row].shared2.mark = 0 ;
+    Row [row].shared1.degree = Row [row].length ;
+  }
 
-    /* === See if we need to re-create columns ============================== */
+  /* === See if we need to re-create columns ============================== */
 
-    if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
-    {
-      COLAMD_DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ;
+  if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
+  {
+    COLAMD_DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ;
 
 
-  /* === Compute col pointers ========================================= */
+    /* === Compute col pointers ========================================= */
 
-  /* col form of the matrix starts at A [0]. */
-  /* Note, we may have a gap between the col form and the row */
-  /* form if there were duplicate entries, if so, it will be */
-  /* removed upon the first garbage collection */
-  Col [0].start = 0 ;
-  p [0] = Col [0].start ;
-  for (col = 1 ; col < n_col ; col++)
-  {
+    /* col form of the matrix starts at A [0]. */
+    /* Note, we may have a gap between the col form and the row */
+    /* form if there were duplicate entries, if so, it will be */
+    /* removed upon the first garbage collection */
+    Col [0].start = 0 ;
+    p [0] = Col [0].start ;
+    for (col = 1 ; col < n_col ; col++)
+    {
       /* note that the lengths here are for pruned columns, i.e. */
       /* no duplicate row indices will exist for these columns */
       Col [col].start = Col [col-1].start + Col [col-1].length ;
       p [col] = Col [col].start ;
-  }
+    }
 
-  /* === Re-create col form =========================================== */
+    /* === Re-create col form =========================================== */
 
-  for (row = 0 ; row < n_row ; row++)
-  {
+    for (row = 0 ; row < n_row ; row++)
+    {
       rp = &A [Row [row].start] ;
       rp_end = rp + Row [row].length ;
       while (rp < rp_end)
       {
-    A [(p [*rp++])++] = row ;
+	A [(p [*rp++])++] = row ;
       }
-  }
     }
+  }
 
-    /* === Done.  Matrix is not (or no longer) jumbled ====================== */
+  /* === Done.  Matrix is not (or no longer) jumbled ====================== */
 
-    return (true) ;
+  return (true) ;
 }
 
 
@@ -695,83 +698,83 @@ static bool colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[
 /* ========================================================================== */
 
 /*
-    Kills dense or empty columns and rows, calculates an initial score for
-    each column, and places all columns in the degree lists.  Not user-callable.
+  Kills dense or empty columns and rows, calculates an initial score for
+  each column, and places all columns in the degree lists.  Not user-callable.
 */
-
+template <typename Index>
 static void init_scoring
-(
+  (
     /* === Parameters ======================================================= */
 
-    int n_row,      /* number of rows of A */
-    int n_col,      /* number of columns of A */
-    Colamd_Row Row [],    /* of size n_row+1 */
-    colamd_col Col [],    /* of size n_col+1 */
-    int A [],     /* column form and row form of A */
-    int head [],    /* of size n_col+1 */
+    Index n_row,      /* number of rows of A */
+    Index n_col,      /* number of columns of A */
+    Colamd_Row<Index> Row [],    /* of size n_row+1 */
+    colamd_col<Index> Col [],    /* of size n_col+1 */
+    Index A [],     /* column form and row form of A */
+    Index head [],    /* of size n_col+1 */
     double knobs [COLAMD_KNOBS],/* parameters */
-    int *p_n_row2,    /* number of non-dense, non-empty rows */
-    int *p_n_col2,    /* number of non-dense, non-empty columns */
-    int *p_max_deg    /* maximum row degree */
-)
+    Index *p_n_row2,    /* number of non-dense, non-empty rows */
+    Index *p_n_col2,    /* number of non-dense, non-empty columns */
+    Index *p_max_deg    /* maximum row degree */
+    )
 {
-    /* === Local variables ================================================== */
-
-    int c ;     /* a column index */
-    int r, row ;    /* a row index */
-    int *cp ;     /* a column pointer */
-    int deg ;     /* degree of a row or column */
-    int *cp_end ;   /* a pointer to the end of a column */
-    int *new_cp ;   /* new column pointer */
-    int col_length ;    /* length of pruned column */
-    int score ;     /* current column score */
-    int n_col2 ;    /* number of non-dense, non-empty columns */
-    int n_row2 ;    /* number of non-dense, non-empty rows */
-    int dense_row_count ; /* remove rows with more entries than this */
-    int dense_col_count ; /* remove cols with more entries than this */
-    int min_score ;   /* smallest column score */
-    int max_deg ;   /* maximum row degree */
-    int next_col ;    /* Used to add to degree list.*/
-
-
-    /* === Extract knobs ==================================================== */
-
-    dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
-    dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
-    COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ;
-    max_deg = 0 ;
-    n_col2 = n_col ;
-    n_row2 = n_row ;
-
-    /* === Kill empty columns =============================================== */
-
-    /* Put the empty columns at the end in their natural order, so that LU */
-    /* factorization can proceed as far as possible. */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-  deg = Col [c].length ;
-  if (deg == 0)
+  /* === Local variables ================================================== */
+
+  Index c ;     /* a column index */
+  Index r, row ;    /* a row index */
+  Index *cp ;     /* a column pointer */
+  Index deg ;     /* degree of a row or column */
+  Index *cp_end ;   /* a pointer to the end of a column */
+  Index *new_cp ;   /* new column pointer */
+  Index col_length ;    /* length of pruned column */
+  Index score ;     /* current column score */
+  Index n_col2 ;    /* number of non-dense, non-empty columns */
+  Index n_row2 ;    /* number of non-dense, non-empty rows */
+  Index dense_row_count ; /* remove rows with more entries than this */
+  Index dense_col_count ; /* remove cols with more entries than this */
+  Index min_score ;   /* smallest column score */
+  Index max_deg ;   /* maximum row degree */
+  Index next_col ;    /* Used to add to degree list.*/
+
+
+  /* === Extract knobs ==================================================== */
+
+  dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
+  dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
+  COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ;
+  max_deg = 0 ;
+  n_col2 = n_col ;
+  n_row2 = n_row ;
+
+  /* === Kill empty columns =============================================== */
+
+  /* Put the empty columns at the end in their natural order, so that LU */
+  /* factorization can proceed as far as possible. */
+  for (c = n_col-1 ; c >= 0 ; c--)
   {
+    deg = Col [c].length ;
+    if (deg == 0)
+    {
       /* this is a empty column, kill and order it last */
       Col [c].shared2.order = --n_col2 ;
       KILL_PRINCIPAL_COL (c) ;
-  }
     }
-    COLAMD_DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ;
+  }
+  COLAMD_DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ;
 
-    /* === Kill dense columns =============================================== */
+  /* === Kill dense columns =============================================== */
 
-    /* Put the dense columns at the end, in their natural order */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-  /* skip any dead columns */
-  if (COL_IS_DEAD (c))
+  /* Put the dense columns at the end, in their natural order */
+  for (c = n_col-1 ; c >= 0 ; c--)
   {
+    /* skip any dead columns */
+    if (COL_IS_DEAD (c))
+    {
       continue ;
-  }
-  deg = Col [c].length ;
-  if (deg > dense_col_count)
-  {
+    }
+    deg = Col [c].length ;
+    if (deg > dense_col_count)
+    {
       /* this is a dense column, kill and order it last */
       Col [c].shared2.order = --n_col2 ;
       /* decrement the row degrees */
@@ -779,60 +782,60 @@ static void init_scoring
       cp_end = cp + Col [c].length ;
       while (cp < cp_end)
       {
-    Row [*cp++].shared1.degree-- ;
+	Row [*cp++].shared1.degree-- ;
       }
       KILL_PRINCIPAL_COL (c) ;
-  }
     }
-    COLAMD_DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ;
+  }
+  COLAMD_DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ;
 
-    /* === Kill dense and empty rows ======================================== */
+  /* === Kill dense and empty rows ======================================== */
 
-    for (r = 0 ; r < n_row ; r++)
-    {
-  deg = Row [r].shared1.degree ;
-  COLAMD_ASSERT (deg >= 0 && deg <= n_col) ;
-  if (deg > dense_row_count || deg == 0)
+  for (r = 0 ; r < n_row ; r++)
   {
+    deg = Row [r].shared1.degree ;
+    COLAMD_ASSERT (deg >= 0 && deg <= n_col) ;
+    if (deg > dense_row_count || deg == 0)
+    {
       /* kill a dense or empty row */
       KILL_ROW (r) ;
       --n_row2 ;
-  }
-  else
-  {
+    }
+    else
+    {
       /* keep track of max degree of remaining rows */
       max_deg = COLAMD_MAX (max_deg, deg) ;
-  }
     }
-    COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ;
+  }
+  COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ;
 
-    /* === Compute initial column scores ==================================== */
+  /* === Compute initial column scores ==================================== */
 
-    /* At this point the row degrees are accurate.  They reflect the number */
-    /* of "live" (non-dense) columns in each row.  No empty rows exist. */
-    /* Some "live" columns may contain only dead rows, however.  These are */
-    /* pruned in the code below. */
+  /* At this point the row degrees are accurate.  They reflect the number */
+  /* of "live" (non-dense) columns in each row.  No empty rows exist. */
+  /* Some "live" columns may contain only dead rows, however.  These are */
+  /* pruned in the code below. */
 
-    /* now find the initial matlab score for each column */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-  /* skip dead column */
-  if (COL_IS_DEAD (c))
+  /* now find the initial matlab score for each column */
+  for (c = n_col-1 ; c >= 0 ; c--)
   {
+    /* skip dead column */
+    if (COL_IS_DEAD (c))
+    {
       continue ;
-  }
-  score = 0 ;
-  cp = &A [Col [c].start] ;
-  new_cp = cp ;
-  cp_end = cp + Col [c].length ;
-  while (cp < cp_end)
-  {
+    }
+    score = 0 ;
+    cp = &A [Col [c].start] ;
+    new_cp = cp ;
+    cp_end = cp + Col [c].length ;
+    while (cp < cp_end)
+    {
       /* get a row */
       row = *cp++ ;
       /* skip if dead */
       if (ROW_IS_DEAD (row))
       {
-    continue ;
+	continue ;
       }
       /* compact the column */
       *new_cp++ = row ;
@@ -840,52 +843,52 @@ static void init_scoring
       score += Row [row].shared1.degree - 1 ;
       /* guard against integer overflow */
       score = COLAMD_MIN (score, n_col) ;
-  }
-  /* determine pruned column length */
-  col_length = (int) (new_cp - &A [Col [c].start]) ;
-  if (col_length == 0)
-  {
+    }
+    /* determine pruned column length */
+    col_length = (Index) (new_cp - &A [Col [c].start]) ;
+    if (col_length == 0)
+    {
       /* a newly-made null column (all rows in this col are "dense" */
       /* and have already been killed) */
       COLAMD_DEBUG2 (("Newly null killed: %d\n", c)) ;
       Col [c].shared2.order = --n_col2 ;
       KILL_PRINCIPAL_COL (c) ;
-  }
-  else
-  {
+    }
+    else
+    {
       /* set column length and set score */
       COLAMD_ASSERT (score >= 0) ;
       COLAMD_ASSERT (score <= n_col) ;
       Col [c].length = col_length ;
       Col [c].shared2.score = score ;
-  }
     }
-    COLAMD_DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n",
-      n_col-n_col2)) ;
+  }
+  COLAMD_DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n",
+		  n_col-n_col2)) ;
 
-    /* At this point, all empty rows and columns are dead.  All live columns */
-    /* are "clean" (containing no dead rows) and simplicial (no supercolumns */
-    /* yet).  Rows may contain dead columns, but all live rows contain at */
-    /* least one live column. */
+  /* At this point, all empty rows and columns are dead.  All live columns */
+  /* are "clean" (containing no dead rows) and simplicial (no supercolumns */
+  /* yet).  Rows may contain dead columns, but all live rows contain at */
+  /* least one live column. */
 
-    /* === Initialize degree lists ========================================== */
+  /* === Initialize degree lists ========================================== */
 
 
-    /* clear the hash buckets */
-    for (c = 0 ; c <= n_col ; c++)
-    {
-  head [c] = COLAMD_EMPTY ;
-    }
-    min_score = n_col ;
-    /* place in reverse order, so low column indices are at the front */
-    /* of the lists.  This is to encourage natural tie-breaking */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-  /* only add principal columns to degree lists */
-  if (COL_IS_ALIVE (c))
+  /* clear the hash buckets */
+  for (c = 0 ; c <= n_col ; c++)
+  {
+    head [c] = COLAMD_EMPTY ;
+  }
+  min_score = n_col ;
+  /* place in reverse order, so low column indices are at the front */
+  /* of the lists.  This is to encourage natural tie-breaking */
+  for (c = n_col-1 ; c >= 0 ; c--)
   {
+    /* only add principal columns to degree lists */
+    if (COL_IS_ALIVE (c))
+    {
       COLAMD_DEBUG4 (("place %d score %d minscore %d ncol %d\n",
-    c, Col [c].shared2.score, min_score, n_col)) ;
+		      c, Col [c].shared2.score, min_score, n_col)) ;
 
       /* === Add columns score to DList =============================== */
 
@@ -906,7 +909,7 @@ static void init_scoring
       /* previous pointer to this new column */
       if (next_col != COLAMD_EMPTY)
       {
-    Col [next_col].shared3.prev = c ;
+	Col [next_col].shared3.prev = c ;
       }
       head [score] = c ;
 
@@ -914,15 +917,15 @@ static void init_scoring
       min_score = COLAMD_MIN (min_score, score) ;
 
 
-  }
     }
+  }
 
 
-    /* === Return number of remaining columns, and max row degree =========== */
+  /* === Return number of remaining columns, and max row degree =========== */
 
-    *p_n_col2 = n_col2 ;
-    *p_n_row2 = n_row2 ;
-    *p_max_deg = max_deg ;
+  *p_n_col2 = n_col2 ;
+  *p_n_row2 = n_row2 ;
+  *p_max_deg = max_deg ;
 }
 
 
@@ -931,235 +934,235 @@ static void init_scoring
 /* ========================================================================== */
 
 /*
-    Order the principal columns of the supercolumn form of the matrix
-    (no supercolumns on input).  Uses a minimum approximate column minimum
-    degree ordering method.  Not user-callable.
+  Order the principal columns of the supercolumn form of the matrix
+  (no supercolumns on input).  Uses a minimum approximate column minimum
+  degree ordering method.  Not user-callable.
 */
-
-static int find_ordering /* return the number of garbage collections */
-(
+template <typename Index>
+static Index find_ordering /* return the number of garbage collections */
+  (
     /* === Parameters ======================================================= */
 
-    int n_row,      /* number of rows of A */
-    int n_col,      /* number of columns of A */
-    int Alen,     /* size of A, 2*nnz + n_col or larger */
-    Colamd_Row Row [],    /* of size n_row+1 */
-    colamd_col Col [],    /* of size n_col+1 */
-    int A [],     /* column form and row form of A */
-    int head [],    /* of size n_col+1 */
-    int n_col2,     /* Remaining columns to order */
-    int max_deg,    /* Maximum row degree */
-    int pfree     /* index of first free slot (2*nnz on entry) */
-)
+    Index n_row,      /* number of rows of A */
+    Index n_col,      /* number of columns of A */
+    Index Alen,     /* size of A, 2*nnz + n_col or larger */
+    Colamd_Row<Index> Row [],    /* of size n_row+1 */
+    colamd_col<Index> Col [],    /* of size n_col+1 */
+    Index A [],     /* column form and row form of A */
+    Index head [],    /* of size n_col+1 */
+    Index n_col2,     /* Remaining columns to order */
+    Index max_deg,    /* Maximum row degree */
+    Index pfree     /* index of first free slot (2*nnz on entry) */
+    )
 {
-    /* === Local variables ================================================== */
-
-    int k ;     /* current pivot ordering step */
-    int pivot_col ;   /* current pivot column */
-    int *cp ;     /* a column pointer */
-    int *rp ;     /* a row pointer */
-    int pivot_row ;   /* current pivot row */
-    int *new_cp ;   /* modified column pointer */
-    int *new_rp ;   /* modified row pointer */
-    int pivot_row_start ; /* pointer to start of pivot row */
-    int pivot_row_degree ;  /* number of columns in pivot row */
-    int pivot_row_length ;  /* number of supercolumns in pivot row */
-    int pivot_col_score ; /* score of pivot column */
-    int needed_memory ;   /* free space needed for pivot row */
-    int *cp_end ;   /* pointer to the end of a column */
-    int *rp_end ;   /* pointer to the end of a row */
-    int row ;     /* a row index */
-    int col ;     /* a column index */
-    int max_score ;   /* maximum possible score */
-    int cur_score ;   /* score of current column */
-    unsigned int hash ;   /* hash value for supernode detection */
-    int head_column ;   /* head of hash bucket */
-    int first_col ;   /* first column in hash bucket */
-    int tag_mark ;    /* marker value for mark array */
-    int row_mark ;    /* Row [row].shared2.mark */
-    int set_difference ;  /* set difference size of row with pivot row */
-    int min_score ;   /* smallest column score */
-    int col_thickness ;   /* "thickness" (no. of columns in a supercol) */
-    int max_mark ;    /* maximum value of tag_mark */
-    int pivot_col_thickness ; /* number of columns represented by pivot col */
-    int prev_col ;    /* Used by Dlist operations. */
-    int next_col ;    /* Used by Dlist operations. */
-    int ngarbage ;    /* number of garbage collections performed */
-
-
-    /* === Initialization and clear mark ==================================== */
-
-    max_mark = INT_MAX - n_col ;  /* INT_MAX defined in <limits.h> */
-    tag_mark = clear_mark (n_row, Row) ;
-    min_score = 0 ;
-    ngarbage = 0 ;
-    COLAMD_DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ;
-
-    /* === Order the columns ================================================ */
-
-    for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */)
-    {
+  /* === Local variables ================================================== */
+
+  Index k ;     /* current pivot ordering step */
+  Index pivot_col ;   /* current pivot column */
+  Index *cp ;     /* a column pointer */
+  Index *rp ;     /* a row pointer */
+  Index pivot_row ;   /* current pivot row */
+  Index *new_cp ;   /* modified column pointer */
+  Index *new_rp ;   /* modified row pointer */
+  Index pivot_row_start ; /* pointer to start of pivot row */
+  Index pivot_row_degree ;  /* number of columns in pivot row */
+  Index pivot_row_length ;  /* number of supercolumns in pivot row */
+  Index pivot_col_score ; /* score of pivot column */
+  Index needed_memory ;   /* free space needed for pivot row */
+  Index *cp_end ;   /* pointer to the end of a column */
+  Index *rp_end ;   /* pointer to the end of a row */
+  Index row ;     /* a row index */
+  Index col ;     /* a column index */
+  Index max_score ;   /* maximum possible score */
+  Index cur_score ;   /* score of current column */
+  unsigned int hash ;   /* hash value for supernode detection */
+  Index head_column ;   /* head of hash bucket */
+  Index first_col ;   /* first column in hash bucket */
+  Index tag_mark ;    /* marker value for mark array */
+  Index row_mark ;    /* Row [row].shared2.mark */
+  Index set_difference ;  /* set difference size of row with pivot row */
+  Index min_score ;   /* smallest column score */
+  Index col_thickness ;   /* "thickness" (no. of columns in a supercol) */
+  Index max_mark ;    /* maximum value of tag_mark */
+  Index pivot_col_thickness ; /* number of columns represented by pivot col */
+  Index prev_col ;    /* Used by Dlist operations. */
+  Index next_col ;    /* Used by Dlist operations. */
+  Index ngarbage ;    /* number of garbage collections performed */
+
+
+  /* === Initialization and clear mark ==================================== */
+
+  max_mark = INT_MAX - n_col ;  /* INT_MAX defined in <limits.h> */
+  tag_mark = Eigen::internal::clear_mark (n_row, Row) ;
+  min_score = 0 ;
+  ngarbage = 0 ;
+  COLAMD_DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ;
+
+  /* === Order the columns ================================================ */
+
+  for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */)
+  {
 
-  /* === Select pivot column, and order it ============================ */
+    /* === Select pivot column, and order it ============================ */
 
-  /* make sure degree list isn't empty */
-  COLAMD_ASSERT (min_score >= 0) ;
-  COLAMD_ASSERT (min_score <= n_col) ;
-  COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ;
+    /* make sure degree list isn't empty */
+    COLAMD_ASSERT (min_score >= 0) ;
+    COLAMD_ASSERT (min_score <= n_col) ;
+    COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ;
 
-  /* get pivot column from head of minimum degree list */
-  while (head [min_score] == COLAMD_EMPTY && min_score < n_col)
-  {
+    /* get pivot column from head of minimum degree list */
+    while (head [min_score] == COLAMD_EMPTY && min_score < n_col)
+    {
       min_score++ ;
-  }
-  pivot_col = head [min_score] ;
-  COLAMD_ASSERT (pivot_col >= 0 && pivot_col <= n_col) ;
-  next_col = Col [pivot_col].shared4.degree_next ;
-  head [min_score] = next_col ;
-  if (next_col != COLAMD_EMPTY)
-  {
+    }
+    pivot_col = head [min_score] ;
+    COLAMD_ASSERT (pivot_col >= 0 && pivot_col <= n_col) ;
+    next_col = Col [pivot_col].shared4.degree_next ;
+    head [min_score] = next_col ;
+    if (next_col != COLAMD_EMPTY)
+    {
       Col [next_col].shared3.prev = COLAMD_EMPTY ;
-  }
+    }
 
-  COLAMD_ASSERT (COL_IS_ALIVE (pivot_col)) ;
-  COLAMD_DEBUG3 (("Pivot col: %d\n", pivot_col)) ;
+    COLAMD_ASSERT (COL_IS_ALIVE (pivot_col)) ;
+    COLAMD_DEBUG3 (("Pivot col: %d\n", pivot_col)) ;
 
-  /* remember score for defrag check */
-  pivot_col_score = Col [pivot_col].shared2.score ;
+    /* remember score for defrag check */
+    pivot_col_score = Col [pivot_col].shared2.score ;
 
-  /* the pivot column is the kth column in the pivot order */
-  Col [pivot_col].shared2.order = k ;
+    /* the pivot column is the kth column in the pivot order */
+    Col [pivot_col].shared2.order = k ;
 
-  /* increment order count by column thickness */
-  pivot_col_thickness = Col [pivot_col].shared1.thickness ;
-  k += pivot_col_thickness ;
-  COLAMD_ASSERT (pivot_col_thickness > 0) ;
+    /* increment order count by column thickness */
+    pivot_col_thickness = Col [pivot_col].shared1.thickness ;
+    k += pivot_col_thickness ;
+    COLAMD_ASSERT (pivot_col_thickness > 0) ;
 
-  /* === Garbage_collection, if necessary ============================= */
+    /* === Garbage_collection, if necessary ============================= */
 
-  needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ;
-  if (pfree + needed_memory >= Alen)
-  {
-      pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
+    needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ;
+    if (pfree + needed_memory >= Alen)
+    {
+      pfree = Eigen::internal::garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
       ngarbage++ ;
       /* after garbage collection we will have enough */
       COLAMD_ASSERT (pfree + needed_memory < Alen) ;
       /* garbage collection has wiped out the Row[].shared2.mark array */
-      tag_mark = clear_mark (n_row, Row) ;
+      tag_mark = Eigen::internal::clear_mark (n_row, Row) ;
 
-  }
+    }
 
-  /* === Compute pivot row pattern ==================================== */
+    /* === Compute pivot row pattern ==================================== */
 
-  /* get starting location for this new merged row */
-  pivot_row_start = pfree ;
+    /* get starting location for this new merged row */
+    pivot_row_start = pfree ;
 
-  /* initialize new row counts to zero */
-  pivot_row_degree = 0 ;
+    /* initialize new row counts to zero */
+    pivot_row_degree = 0 ;
 
-  /* tag pivot column as having been visited so it isn't included */
-  /* in merged pivot row */
-  Col [pivot_col].shared1.thickness = -pivot_col_thickness ;
+    /* tag pivot column as having been visited so it isn't included */
+    /* in merged pivot row */
+    Col [pivot_col].shared1.thickness = -pivot_col_thickness ;
 
-  /* pivot row is the union of all rows in the pivot column pattern */
-  cp = &A [Col [pivot_col].start] ;
-  cp_end = cp + Col [pivot_col].length ;
-  while (cp < cp_end)
-  {
+    /* pivot row is the union of all rows in the pivot column pattern */
+    cp = &A [Col [pivot_col].start] ;
+    cp_end = cp + Col [pivot_col].length ;
+    while (cp < cp_end)
+    {
       /* get a row */
       row = *cp++ ;
       COLAMD_DEBUG4 (("Pivot col pattern %d %d\n", ROW_IS_ALIVE (row), row)) ;
       /* skip if row is dead */
       if (ROW_IS_DEAD (row))
       {
-    continue ;
+	continue ;
       }
       rp = &A [Row [row].start] ;
       rp_end = rp + Row [row].length ;
       while (rp < rp_end)
       {
-    /* get a column */
-    col = *rp++ ;
-    /* add the column, if alive and untagged */
-    col_thickness = Col [col].shared1.thickness ;
-    if (col_thickness > 0 && COL_IS_ALIVE (col))
-    {
-        /* tag column in pivot row */
-        Col [col].shared1.thickness = -col_thickness ;
-        COLAMD_ASSERT (pfree < Alen) ;
-        /* place column in pivot row */
-        A [pfree++] = col ;
-        pivot_row_degree += col_thickness ;
-    }
+	/* get a column */
+	col = *rp++ ;
+	/* add the column, if alive and untagged */
+	col_thickness = Col [col].shared1.thickness ;
+	if (col_thickness > 0 && COL_IS_ALIVE (col))
+	{
+	  /* tag column in pivot row */
+	  Col [col].shared1.thickness = -col_thickness ;
+	  COLAMD_ASSERT (pfree < Alen) ;
+	  /* place column in pivot row */
+	  A [pfree++] = col ;
+	  pivot_row_degree += col_thickness ;
+	}
       }
-  }
+    }
 
-  /* clear tag on pivot column */
-  Col [pivot_col].shared1.thickness = pivot_col_thickness ;
-  max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ;
+    /* clear tag on pivot column */
+    Col [pivot_col].shared1.thickness = pivot_col_thickness ;
+    max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ;
 
 
-  /* === Kill all rows used to construct pivot row ==================== */
+    /* === Kill all rows used to construct pivot row ==================== */
 
-  /* also kill pivot row, temporarily */
-  cp = &A [Col [pivot_col].start] ;
-  cp_end = cp + Col [pivot_col].length ;
-  while (cp < cp_end)
-  {
+    /* also kill pivot row, temporarily */
+    cp = &A [Col [pivot_col].start] ;
+    cp_end = cp + Col [pivot_col].length ;
+    while (cp < cp_end)
+    {
       /* may be killing an already dead row */
       row = *cp++ ;
       COLAMD_DEBUG3 (("Kill row in pivot col: %d\n", row)) ;
       KILL_ROW (row) ;
-  }
+    }
 
-  /* === Select a row index to use as the new pivot row =============== */
+    /* === Select a row index to use as the new pivot row =============== */
 
-  pivot_row_length = pfree - pivot_row_start ;
-  if (pivot_row_length > 0)
-  {
+    pivot_row_length = pfree - pivot_row_start ;
+    if (pivot_row_length > 0)
+    {
       /* pick the "pivot" row arbitrarily (first row in col) */
       pivot_row = A [Col [pivot_col].start] ;
       COLAMD_DEBUG3 (("Pivotal row is %d\n", pivot_row)) ;
-  }
-  else
-  {
+    }
+    else
+    {
       /* there is no pivot row, since it is of zero length */
       pivot_row = COLAMD_EMPTY ;
       COLAMD_ASSERT (pivot_row_length == 0) ;
-  }
-  COLAMD_ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ;
+    }
+    COLAMD_ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ;
 
-  /* === Approximate degree computation =============================== */
+    /* === Approximate degree computation =============================== */
 
-  /* Here begins the computation of the approximate degree.  The column */
-  /* score is the sum of the pivot row "length", plus the size of the */
-  /* set differences of each row in the column minus the pattern of the */
-  /* pivot row itself.  The column ("thickness") itself is also */
-  /* excluded from the column score (we thus use an approximate */
-  /* external degree). */
+    /* Here begins the computation of the approximate degree.  The column */
+    /* score is the sum of the pivot row "length", plus the size of the */
+    /* set differences of each row in the column minus the pattern of the */
+    /* pivot row itself.  The column ("thickness") itself is also */
+    /* excluded from the column score (we thus use an approximate */
+    /* external degree). */
 
-  /* The time taken by the following code (compute set differences, and */
-  /* add them up) is proportional to the size of the data structure */
-  /* being scanned - that is, the sum of the sizes of each column in */
-  /* the pivot row.  Thus, the amortized time to compute a column score */
-  /* is proportional to the size of that column (where size, in this */
-  /* context, is the column "length", or the number of row indices */
-  /* in that column).  The number of row indices in a column is */
-  /* monotonically non-decreasing, from the length of the original */
-  /* column on input to colamd. */
+    /* The time taken by the following code (compute set differences, and */
+    /* add them up) is proportional to the size of the data structure */
+    /* being scanned - that is, the sum of the sizes of each column in */
+    /* the pivot row.  Thus, the amortized time to compute a column score */
+    /* is proportional to the size of that column (where size, in this */
+    /* context, is the column "length", or the number of row indices */
+    /* in that column).  The number of row indices in a column is */
+    /* monotonically non-decreasing, from the length of the original */
+    /* column on input to colamd. */
 
-  /* === Compute set differences ====================================== */
+    /* === Compute set differences ====================================== */
 
-  COLAMD_DEBUG3 (("** Computing set differences phase. **\n")) ;
+    COLAMD_DEBUG3 (("** Computing set differences phase. **\n")) ;
 
-  /* pivot row is currently dead - it will be revived later. */
+    /* pivot row is currently dead - it will be revived later. */
 
-  COLAMD_DEBUG3 (("Pivot row: ")) ;
-  /* for each column in pivot row */
-  rp = &A [pivot_row_start] ;
-  rp_end = rp + pivot_row_length ;
-  while (rp < rp_end)
-  {
+    COLAMD_DEBUG3 (("Pivot row: ")) ;
+    /* for each column in pivot row */
+    rp = &A [pivot_row_start] ;
+    rp_end = rp + pivot_row_length ;
+    while (rp < rp_end)
+    {
       col = *rp++ ;
       COLAMD_ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ;
       COLAMD_DEBUG3 (("Col: %d\n", col)) ;
@@ -1179,15 +1182,15 @@ static int find_ordering /* return the number of garbage collections */
       COLAMD_ASSERT (cur_score >= COLAMD_EMPTY) ;
       if (prev_col == COLAMD_EMPTY)
       {
-    head [cur_score] = next_col ;
+	head [cur_score] = next_col ;
       }
       else
       {
-    Col [prev_col].shared4.degree_next = next_col ;
+	Col [prev_col].shared4.degree_next = next_col ;
       }
       if (next_col != COLAMD_EMPTY)
       {
-    Col [next_col].shared3.prev = prev_col ;
+	Col [next_col].shared3.prev = prev_col ;
       }
 
       /* === Scan the column ========================================== */
@@ -1196,49 +1199,49 @@ static int find_ordering /* return the number of garbage collections */
       cp_end = cp + Col [col].length ;
       while (cp < cp_end)
       {
-    /* get a row */
-    row = *cp++ ;
-    row_mark = Row [row].shared2.mark ;
-    /* skip if dead */
-    if (ROW_IS_MARKED_DEAD (row_mark))
-    {
-        continue ;
-    }
-    COLAMD_ASSERT (row != pivot_row) ;
-    set_difference = row_mark - tag_mark ;
-    /* check if the row has been seen yet */
-    if (set_difference < 0)
-    {
-        COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ;
-        set_difference = Row [row].shared1.degree ;
-    }
-    /* subtract column thickness from this row's set difference */
-    set_difference -= col_thickness ;
-    COLAMD_ASSERT (set_difference >= 0) ;
-    /* absorb this row if the set difference becomes zero */
-    if (set_difference == 0)
-    {
-        COLAMD_DEBUG3 (("aggressive absorption. Row: %d\n", row)) ;
-        KILL_ROW (row) ;
-    }
-    else
-    {
-        /* save the new mark */
-        Row [row].shared2.mark = set_difference + tag_mark ;
-    }
+	/* get a row */
+	row = *cp++ ;
+	row_mark = Row [row].shared2.mark ;
+	/* skip if dead */
+	if (ROW_IS_MARKED_DEAD (row_mark))
+	{
+	  continue ;
+	}
+	COLAMD_ASSERT (row != pivot_row) ;
+	set_difference = row_mark - tag_mark ;
+	/* check if the row has been seen yet */
+	if (set_difference < 0)
+	{
+	  COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ;
+	  set_difference = Row [row].shared1.degree ;
+	}
+	/* subtract column thickness from this row's set difference */
+	set_difference -= col_thickness ;
+	COLAMD_ASSERT (set_difference >= 0) ;
+	/* absorb this row if the set difference becomes zero */
+	if (set_difference == 0)
+	{
+	  COLAMD_DEBUG3 (("aggressive absorption. Row: %d\n", row)) ;
+	  KILL_ROW (row) ;
+	}
+	else
+	{
+	  /* save the new mark */
+	  Row [row].shared2.mark = set_difference + tag_mark ;
+	}
       }
-  }
+    }
 
 
-  /* === Add up set differences for each column ======================= */
+    /* === Add up set differences for each column ======================= */
 
-  COLAMD_DEBUG3 (("** Adding set differences phase. **\n")) ;
+    COLAMD_DEBUG3 (("** Adding set differences phase. **\n")) ;
 
-  /* for each column in pivot row */
-  rp = &A [pivot_row_start] ;
-  rp_end = rp + pivot_row_length ;
-  while (rp < rp_end)
-  {
+    /* for each column in pivot row */
+    rp = &A [pivot_row_start] ;
+    rp_end = rp + pivot_row_length ;
+    while (rp < rp_end)
+    {
       /* get a column */
       col = *rp++ ;
       COLAMD_ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ;
@@ -1253,119 +1256,117 @@ static int find_ordering /* return the number of garbage collections */
 
       while (cp < cp_end)
       {
-    /* get a row */
-    row = *cp++ ;
-    COLAMD_ASSERT(row >= 0 && row < n_row) ;
-    row_mark = Row [row].shared2.mark ;
-    /* skip if dead */
-    if (ROW_IS_MARKED_DEAD (row_mark))
-    {
-        continue ;
-    }
-    COLAMD_ASSERT (row_mark > tag_mark) ;
-    /* compact the column */
-    *new_cp++ = row ;
-    /* compute hash function */
-    hash += row ;
-    /* add set difference */
-    cur_score += row_mark - tag_mark ;
-    /* integer overflow... */
-    cur_score = COLAMD_MIN (cur_score, n_col) ;
+	/* get a row */
+	row = *cp++ ;
+	COLAMD_ASSERT(row >= 0 && row < n_row) ;
+	row_mark = Row [row].shared2.mark ;
+	/* skip if dead */
+	if (ROW_IS_MARKED_DEAD (row_mark))
+	{
+	  continue ;
+	}
+	COLAMD_ASSERT (row_mark > tag_mark) ;
+	/* compact the column */
+	*new_cp++ = row ;
+	/* compute hash function */
+	hash += row ;
+	/* add set difference */
+	cur_score += row_mark - tag_mark ;
+	/* integer overflow... */
+	cur_score = COLAMD_MIN (cur_score, n_col) ;
       }
 
       /* recompute the column's length */
-      Col [col].length = (int) (new_cp - &A [Col [col].start]) ;
+      Col [col].length = (Index) (new_cp - &A [Col [col].start]) ;
 
       /* === Further mass elimination ================================= */
 
       if (Col [col].length == 0)
       {
-    COLAMD_DEBUG4 (("further mass elimination. Col: %d\n", col)) ;
-    /* nothing left but the pivot row in this column */
-    KILL_PRINCIPAL_COL (col) ;
-    pivot_row_degree -= Col [col].shared1.thickness ;
-    COLAMD_ASSERT (pivot_row_degree >= 0) ;
-    /* order it */
-    Col [col].shared2.order = k ;
-    /* increment order count by column thickness */
-    k += Col [col].shared1.thickness ;
+	COLAMD_DEBUG4 (("further mass elimination. Col: %d\n", col)) ;
+	/* nothing left but the pivot row in this column */
+	KILL_PRINCIPAL_COL (col) ;
+	pivot_row_degree -= Col [col].shared1.thickness ;
+	COLAMD_ASSERT (pivot_row_degree >= 0) ;
+	/* order it */
+	Col [col].shared2.order = k ;
+	/* increment order count by column thickness */
+	k += Col [col].shared1.thickness ;
       }
       else
       {
-    /* === Prepare for supercolumn detection ==================== */
-
-    COLAMD_DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ;
-
-    /* save score so far */
-    Col [col].shared2.score = cur_score ;
-
-    /* add column to hash table, for supercolumn detection */
-    hash %= n_col + 1 ;
-
-    COLAMD_DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ;
-    COLAMD_ASSERT (hash <= n_col) ;
-
-    head_column = head [hash] ;
-    if (head_column > COLAMD_EMPTY)
-    {
-        /* degree list "hash" is non-empty, use prev (shared3) of */
-        /* first column in degree list as head of hash bucket */
-        first_col = Col [head_column].shared3.headhash ;
-        Col [head_column].shared3.headhash = col ;
-    }
-    else
-    {
-        /* degree list "hash" is empty, use head as hash bucket */
-        first_col = - (head_column + 2) ;
-        head [hash] = - (col + 2) ;
-    }
-    Col [col].shared4.hash_next = first_col ;
-
-    /* save hash function in Col [col].shared3.hash */
-    Col [col].shared3.hash = (int) hash ;
-    COLAMD_ASSERT (COL_IS_ALIVE (col)) ;
+	/* === Prepare for supercolumn detection ==================== */
+
+	COLAMD_DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ;
+
+	/* save score so far */
+	Col [col].shared2.score = cur_score ;
+
+	/* add column to hash table, for supercolumn detection */
+	hash %= n_col + 1 ;
+
+	COLAMD_DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ;
+	COLAMD_ASSERT (hash <= n_col) ;
+
+	head_column = head [hash] ;
+	if (head_column > COLAMD_EMPTY)
+	{
+	  /* degree list "hash" is non-empty, use prev (shared3) of */
+	  /* first column in degree list as head of hash bucket */
+	  first_col = Col [head_column].shared3.headhash ;
+	  Col [head_column].shared3.headhash = col ;
+	}
+	else
+	{
+	  /* degree list "hash" is empty, use head as hash bucket */
+	  first_col = - (head_column + 2) ;
+	  head [hash] = - (col + 2) ;
+	}
+	Col [col].shared4.hash_next = first_col ;
+
+	/* save hash function in Col [col].shared3.hash */
+	Col [col].shared3.hash = (Index) hash ;
+	COLAMD_ASSERT (COL_IS_ALIVE (col)) ;
       }
-  }
-
-  /* The approximate external column degree is now computed.  */
+    }
 
-  /* === Supercolumn detection ======================================== */
+    /* The approximate external column degree is now computed.  */
 
-  COLAMD_DEBUG3 (("** Supercolumn detection phase. **\n")) ;
+    /* === Supercolumn detection ======================================== */
 
-  detect_super_cols (
+    COLAMD_DEBUG3 (("** Supercolumn detection phase. **\n")) ;
 
-    Col, A, head, pivot_row_start, pivot_row_length) ;
+    Eigen::internal::detect_super_cols (Col, A, head, pivot_row_start, pivot_row_length) ;
 
-  /* === Kill the pivotal column ====================================== */
+    /* === Kill the pivotal column ====================================== */
 
-  KILL_PRINCIPAL_COL (pivot_col) ;
+    KILL_PRINCIPAL_COL (pivot_col) ;
 
-  /* === Clear mark =================================================== */
+    /* === Clear mark =================================================== */
 
-  tag_mark += (max_deg + 1) ;
-  if (tag_mark >= max_mark)
-  {
+    tag_mark += (max_deg + 1) ;
+    if (tag_mark >= max_mark)
+    {
       COLAMD_DEBUG2 (("clearing tag_mark\n")) ;
-      tag_mark = clear_mark (n_row, Row) ;
-  }
+      tag_mark = Eigen::internal::clear_mark (n_row, Row) ;
+    }
 
-  /* === Finalize the new pivot row, and column scores ================ */
+    /* === Finalize the new pivot row, and column scores ================ */
 
-  COLAMD_DEBUG3 (("** Finalize scores phase. **\n")) ;
+    COLAMD_DEBUG3 (("** Finalize scores phase. **\n")) ;
 
-  /* for each column in pivot row */
-  rp = &A [pivot_row_start] ;
-  /* compact the pivot row */
-  new_rp = rp ;
-  rp_end = rp + pivot_row_length ;
-  while (rp < rp_end)
-  {
+    /* for each column in pivot row */
+    rp = &A [pivot_row_start] ;
+    /* compact the pivot row */
+    new_rp = rp ;
+    rp_end = rp + pivot_row_length ;
+    while (rp < rp_end)
+    {
       col = *rp++ ;
       /* skip dead columns */
       if (COL_IS_DEAD (col))
       {
-    continue ;
+	continue ;
       }
       *new_rp++ = col ;
       /* add new pivot row to column */
@@ -1403,32 +1404,32 @@ static int find_ordering /* return the number of garbage collections */
       Col [col].shared3.prev = COLAMD_EMPTY ;
       if (next_col != COLAMD_EMPTY)
       {
-    Col [next_col].shared3.prev = col ;
+	Col [next_col].shared3.prev = col ;
       }
       head [cur_score] = col ;
 
       /* see if this score is less than current min */
       min_score = COLAMD_MIN (min_score, cur_score) ;
 
-  }
+    }
 
-  /* === Resurrect the new pivot row ================================== */
+    /* === Resurrect the new pivot row ================================== */
 
-  if (pivot_row_degree > 0)
-  {
+    if (pivot_row_degree > 0)
+    {
       /* update pivot row length to reflect any cols that were killed */
       /* during super-col detection and mass elimination */
       Row [pivot_row].start  = pivot_row_start ;
-      Row [pivot_row].length = (int) (new_rp - &A[pivot_row_start]) ;
+      Row [pivot_row].length = (Index) (new_rp - &A[pivot_row_start]) ;
       Row [pivot_row].shared1.degree = pivot_row_degree ;
       Row [pivot_row].shared2.mark = 0 ;
       /* pivot row is no longer dead */
-  }
     }
+  }
 
-    /* === All principal columns have now been ordered ====================== */
+  /* === All principal columns have now been ordered ====================== */
 
-    return (ngarbage) ;
+  return (ngarbage) ;
 }
 
 
@@ -1437,47 +1438,47 @@ static int find_ordering /* return the number of garbage collections */
 /* ========================================================================== */
 
 /*
-    The find_ordering routine has ordered all of the principal columns (the
-    representatives of the supercolumns).  The non-principal columns have not
-    yet been ordered.  This routine orders those columns by walking up the
-    parent tree (a column is a child of the column which absorbed it).  The
-    final permutation vector is then placed in p [0 ... n_col-1], with p [0]
-    being the first column, and p [n_col-1] being the last.  It doesn't look
-    like it at first glance, but be assured that this routine takes time linear
-    in the number of columns.  Although not immediately obvious, the time
-    taken by this routine is O (n_col), that is, linear in the number of
-    columns.  Not user-callable.
+  The find_ordering routine has ordered all of the principal columns (the
+  representatives of the supercolumns).  The non-principal columns have not
+  yet been ordered.  This routine orders those columns by walking up the
+  parent tree (a column is a child of the column which absorbed it).  The
+  final permutation vector is then placed in p [0 ... n_col-1], with p [0]
+  being the first column, and p [n_col-1] being the last.  It doesn't look
+  like it at first glance, but be assured that this routine takes time linear
+  in the number of columns.  Although not immediately obvious, the time
+  taken by this routine is O (n_col), that is, linear in the number of
+  columns.  Not user-callable.
 */
-
+template <typename Index>
 static inline  void order_children
 (
-    /* === Parameters ======================================================= */
+  /* === Parameters ======================================================= */
 
-    int n_col,      /* number of columns of A */
-    colamd_col Col [],    /* of size n_col+1 */
-    int p []      /* p [0 ... n_col-1] is the column permutation*/
-)
+  Index n_col,      /* number of columns of A */
+  colamd_col<Index> Col [],    /* of size n_col+1 */
+  Index p []      /* p [0 ... n_col-1] is the column permutation*/
+  )
 {
-    /* === Local variables ================================================== */
+  /* === Local variables ================================================== */
 
-    int i ;     /* loop counter for all columns */
-    int c ;     /* column index */
-    int parent ;    /* index of column's parent */
-    int order ;     /* column's order */
+  Index i ;     /* loop counter for all columns */
+  Index c ;     /* column index */
+  Index parent ;    /* index of column's parent */
+  Index order ;     /* column's order */
 
-    /* === Order each non-principal column ================================== */
+  /* === Order each non-principal column ================================== */
 
-    for (i = 0 ; i < n_col ; i++)
-    {
-  /* find an un-ordered non-principal column */
-  COLAMD_ASSERT (COL_IS_DEAD (i)) ;
-  if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == COLAMD_EMPTY)
+  for (i = 0 ; i < n_col ; i++)
   {
+    /* find an un-ordered non-principal column */
+    COLAMD_ASSERT (COL_IS_DEAD (i)) ;
+    if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == COLAMD_EMPTY)
+    {
       parent = i ;
       /* once found, find its principal parent */
       do
       {
-    parent = Col [parent].shared1.parent ;
+	parent = Col [parent].shared1.parent ;
       } while (!COL_IS_DEAD_PRINCIPAL (parent)) ;
 
       /* now, order all un-ordered non-principal columns along path */
@@ -1488,32 +1489,32 @@ static inline  void order_children
 
       do
       {
-    COLAMD_ASSERT (Col [c].shared2.order == COLAMD_EMPTY) ;
+	COLAMD_ASSERT (Col [c].shared2.order == COLAMD_EMPTY) ;
 
-    /* order this column */
-    Col [c].shared2.order = order++ ;
-    /* collaps tree */
-    Col [c].shared1.parent = parent ;
+	/* order this column */
+	Col [c].shared2.order = order++ ;
+	/* collaps tree */
+	Col [c].shared1.parent = parent ;
 
-    /* get immediate parent of this column */
-    c = Col [c].shared1.parent ;
+	/* get immediate parent of this column */
+	c = Col [c].shared1.parent ;
 
-    /* continue until we hit an ordered column.  There are */
-    /* guarranteed not to be anymore unordered columns */
-    /* above an ordered column */
+	/* continue until we hit an ordered column.  There are */
+	/* guarranteed not to be anymore unordered columns */
+	/* above an ordered column */
       } while (Col [c].shared2.order == COLAMD_EMPTY) ;
 
       /* re-order the super_col parent to largest order for this group */
       Col [parent].shared2.order = order ;
-  }
     }
+  }
 
-    /* === Generate the permutation ========================================= */
+  /* === Generate the permutation ========================================= */
 
-    for (c = 0 ; c < n_col ; c++)
-    {
-  p [Col [c].shared2.order] = c ;
-    }
+  for (c = 0 ; c < n_col ; c++)
+  {
+    p [Col [c].shared2.order] = c ;
+  }
 }
 
 
@@ -1522,94 +1523,94 @@ static inline  void order_children
 /* ========================================================================== */
 
 /*
-    Detects supercolumns by finding matches between columns in the hash buckets.
-    Check amongst columns in the set A [row_start ... row_start + row_length-1].
-    The columns under consideration are currently *not* in the degree lists,
-    and have already been placed in the hash buckets.
+  Detects supercolumns by finding matches between columns in the hash buckets.
+  Check amongst columns in the set A [row_start ... row_start + row_length-1].
+  The columns under consideration are currently *not* in the degree lists,
+  and have already been placed in the hash buckets.
 
-    The hash bucket for columns whose hash function is equal to h is stored
-    as follows:
+  The hash bucket for columns whose hash function is equal to h is stored
+  as follows:
 
   if head [h] is >= 0, then head [h] contains a degree list, so:
 
-    head [h] is the first column in degree bucket h.
-    Col [head [h]].headhash gives the first column in hash bucket h.
+  head [h] is the first column in degree bucket h.
+  Col [head [h]].headhash gives the first column in hash bucket h.
 
   otherwise, the degree list is empty, and:
 
-    -(head [h] + 2) is the first column in hash bucket h.
+  -(head [h] + 2) is the first column in hash bucket h.
 
-    For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous
-    column" pointer.  Col [c].shared3.hash is used instead as the hash number
-    for that column.  The value of Col [c].shared4.hash_next is the next column
-    in the same hash bucket.
+  For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous
+  column" pointer.  Col [c].shared3.hash is used instead as the hash number
+  for that column.  The value of Col [c].shared4.hash_next is the next column
+  in the same hash bucket.
 
-    Assuming no, or "few" hash collisions, the time taken by this routine is
-    linear in the sum of the sizes (lengths) of each column whose score has
-    just been computed in the approximate degree computation.
-    Not user-callable.
+  Assuming no, or "few" hash collisions, the time taken by this routine is
+  linear in the sum of the sizes (lengths) of each column whose score has
+  just been computed in the approximate degree computation.
+  Not user-callable.
 */
-
+template <typename Index>
 static void detect_super_cols
 (
-    /* === Parameters ======================================================= */
-
-    colamd_col Col [],    /* of size n_col+1 */
-    int A [],     /* row indices of A */
-    int head [],    /* head of degree lists and hash buckets */
-    int row_start,    /* pointer to set of columns to check */
-    int row_length    /* number of columns to check */
+  /* === Parameters ======================================================= */
+  
+  colamd_col<Index> Col [],    /* of size n_col+1 */
+  Index A [],     /* row indices of A */
+  Index head [],    /* head of degree lists and hash buckets */
+  Index row_start,    /* pointer to set of columns to check */
+  Index row_length    /* number of columns to check */
 )
 {
-    /* === Local variables ================================================== */
-
-    int hash ;      /* hash value for a column */
-    int *rp ;     /* pointer to a row */
-    int c ;     /* a column index */
-    int super_c ;   /* column index of the column to absorb into */
-    int *cp1 ;      /* column pointer for column super_c */
-    int *cp2 ;      /* column pointer for column c */
-    int length ;    /* length of column super_c */
-    int prev_c ;    /* column preceding c in hash bucket */
-    int i ;     /* loop counter */
-    int *rp_end ;   /* pointer to the end of the row */
-    int col ;     /* a column index in the row to check */
-    int head_column ;   /* first column in hash bucket or degree list */
-    int first_col ;   /* first column in hash bucket */
-
-    /* === Consider each column in the row ================================== */
-
-    rp = &A [row_start] ;
-    rp_end = rp + row_length ;
-    while (rp < rp_end)
-    {
-  col = *rp++ ;
-  if (COL_IS_DEAD (col))
+  /* === Local variables ================================================== */
+
+  Index hash ;      /* hash value for a column */
+  Index *rp ;     /* pointer to a row */
+  Index c ;     /* a column index */
+  Index super_c ;   /* column index of the column to absorb into */
+  Index *cp1 ;      /* column pointer for column super_c */
+  Index *cp2 ;      /* column pointer for column c */
+  Index length ;    /* length of column super_c */
+  Index prev_c ;    /* column preceding c in hash bucket */
+  Index i ;     /* loop counter */
+  Index *rp_end ;   /* pointer to the end of the row */
+  Index col ;     /* a column index in the row to check */
+  Index head_column ;   /* first column in hash bucket or degree list */
+  Index first_col ;   /* first column in hash bucket */
+
+  /* === Consider each column in the row ================================== */
+
+  rp = &A [row_start] ;
+  rp_end = rp + row_length ;
+  while (rp < rp_end)
   {
+    col = *rp++ ;
+    if (COL_IS_DEAD (col))
+    {
       continue ;
-  }
+    }
 
-  /* get hash number for this column */
-  hash = Col [col].shared3.hash ;
-  COLAMD_ASSERT (hash <= n_col) ;
+    /* get hash number for this column */
+    hash = Col [col].shared3.hash ;
+    COLAMD_ASSERT (hash <= n_col) ;
 
-  /* === Get the first column in this hash bucket ===================== */
+    /* === Get the first column in this hash bucket ===================== */
 
-  head_column = head [hash] ;
-  if (head_column > COLAMD_EMPTY)
-  {
+    head_column = head [hash] ;
+    if (head_column > COLAMD_EMPTY)
+    {
       first_col = Col [head_column].shared3.headhash ;
-  }
-  else
-  {
+    }
+    else
+    {
       first_col = - (head_column + 2) ;
-  }
+    }
 
-  /* === Consider each column in the hash bucket ====================== */
+    /* === Consider each column in the hash bucket ====================== */
 
-  for (super_c = first_col ; super_c != COLAMD_EMPTY ;
-      super_c = Col [super_c].shared4.hash_next)
-  {
+    for (super_c = first_col ; super_c != COLAMD_EMPTY ;
+	 super_c = Col [super_c].shared4.hash_next)
+    {
       COLAMD_ASSERT (COL_IS_ALIVE (super_c)) ;
       COLAMD_ASSERT (Col [super_c].shared3.hash == hash) ;
       length = Col [super_c].length ;
@@ -1620,71 +1621,71 @@ static void detect_super_cols
       /* === Compare super_c with all columns after it ================ */
 
       for (c = Col [super_c].shared4.hash_next ;
-     c != COLAMD_EMPTY ; c = Col [c].shared4.hash_next)
+	   c != COLAMD_EMPTY ; c = Col [c].shared4.hash_next)
       {
-    COLAMD_ASSERT (c != super_c) ;
-    COLAMD_ASSERT (COL_IS_ALIVE (c)) ;
-    COLAMD_ASSERT (Col [c].shared3.hash == hash) ;
-
-    /* not identical if lengths or scores are different */
-    if (Col [c].length != length ||
-        Col [c].shared2.score != Col [super_c].shared2.score)
-    {
-        prev_c = c ;
-        continue ;
+	COLAMD_ASSERT (c != super_c) ;
+	COLAMD_ASSERT (COL_IS_ALIVE (c)) ;
+	COLAMD_ASSERT (Col [c].shared3.hash == hash) ;
+
+	/* not identical if lengths or scores are different */
+	if (Col [c].length != length ||
+	    Col [c].shared2.score != Col [super_c].shared2.score)
+	{
+	  prev_c = c ;
+	  continue ;
+	}
+
+	/* compare the two columns */
+	cp1 = &A [Col [super_c].start] ;
+	cp2 = &A [Col [c].start] ;
+
+	for (i = 0 ; i < length ; i++)
+	{
+	  /* the columns are "clean" (no dead rows) */
+	  COLAMD_ASSERT (ROW_IS_ALIVE (*cp1))  ;
+	  COLAMD_ASSERT (ROW_IS_ALIVE (*cp2))  ;
+	  /* row indices will same order for both supercols, */
+	  /* no gather scatter nessasary */
+	  if (*cp1++ != *cp2++)
+	  {
+	    break ;
+	  }
+	}
+
+	/* the two columns are different if the for-loop "broke" */
+	if (i != length)
+	{
+	  prev_c = c ;
+	  continue ;
+	}
+
+	/* === Got it!  two columns are identical =================== */
+
+	COLAMD_ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ;
+
+	Col [super_c].shared1.thickness += Col [c].shared1.thickness ;
+	Col [c].shared1.parent = super_c ;
+	KILL_NON_PRINCIPAL_COL (c) ;
+	/* order c later, in order_children() */
+	Col [c].shared2.order = COLAMD_EMPTY ;
+	/* remove c from hash bucket */
+	Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ;
+      }
     }
 
-    /* compare the two columns */
-    cp1 = &A [Col [super_c].start] ;
-    cp2 = &A [Col [c].start] ;
+    /* === Empty this hash bucket ======================================= */
 
-    for (i = 0 ; i < length ; i++)
-    {
-        /* the columns are "clean" (no dead rows) */
-        COLAMD_ASSERT (ROW_IS_ALIVE (*cp1))  ;
-        COLAMD_ASSERT (ROW_IS_ALIVE (*cp2))  ;
-        /* row indices will same order for both supercols, */
-        /* no gather scatter nessasary */
-        if (*cp1++ != *cp2++)
-        {
-      break ;
-        }
-    }
-
-    /* the two columns are different if the for-loop "broke" */
-    if (i != length)
+    if (head_column > COLAMD_EMPTY)
     {
-        prev_c = c ;
-        continue ;
-    }
-
-    /* === Got it!  two columns are identical =================== */
-
-    COLAMD_ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ;
-
-    Col [super_c].shared1.thickness += Col [c].shared1.thickness ;
-    Col [c].shared1.parent = super_c ;
-    KILL_NON_PRINCIPAL_COL (c) ;
-    /* order c later, in order_children() */
-    Col [c].shared2.order = COLAMD_EMPTY ;
-    /* remove c from hash bucket */
-    Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ;
-      }
-  }
-
-  /* === Empty this hash bucket ======================================= */
-
-  if (head_column > COLAMD_EMPTY)
-  {
       /* corresponding degree list "hash" is not empty */
       Col [head_column].shared3.headhash = COLAMD_EMPTY ;
-  }
-  else
-  {
+    }
+    else
+    {
       /* corresponding degree list "hash" is empty */
       head [hash] = COLAMD_EMPTY ;
-  }
     }
+  }
 }
 
 
@@ -1693,93 +1694,93 @@ static void detect_super_cols
 /* ========================================================================== */
 
 /*
-    Defragments and compacts columns and rows in the workspace A.  Used when
-    all avaliable memory has been used while performing row merging.  Returns
-    the index of the first free position in A, after garbage collection.  The
-    time taken by this routine is linear is the size of the array A, which is
-    itself linear in the number of nonzeros in the input matrix.
-    Not user-callable.
+  Defragments and compacts columns and rows in the workspace A.  Used when
+  all avaliable memory has been used while performing row merging.  Returns
+  the index of the first free position in A, after garbage collection.  The
+  time taken by this routine is linear is the size of the array A, which is
+  itself linear in the number of nonzeros in the input matrix.
+  Not user-callable.
 */
-
-static int garbage_collection  /* returns the new value of pfree */
-(
+template <typename Index>
+static Index garbage_collection  /* returns the new value of pfree */
+  (
     /* === Parameters ======================================================= */
-
-    int n_row,      /* number of rows */
-    int n_col,      /* number of columns */
-    Colamd_Row Row [],    /* row info */
-    colamd_col Col [],    /* column info */
-    int A [],     /* A [0 ... Alen-1] holds the matrix */
-    int *pfree      /* &A [0] ... pfree is in use */
-)
+    
+    Index n_row,      /* number of rows */
+    Index n_col,      /* number of columns */
+    Colamd_Row<Index> Row [],    /* row info */
+    colamd_col<Index> Col [],    /* column info */
+    Index A [],     /* A [0 ... Alen-1] holds the matrix */
+    Index *pfree      /* &A [0] ... pfree is in use */
+    )
 {
-    /* === Local variables ================================================== */
+  /* === Local variables ================================================== */
 
-    int *psrc ;     /* source pointer */
-    int *pdest ;    /* destination pointer */
-    int j ;     /* counter */
-    int r ;     /* a row index */
-    int c ;     /* a column index */
-    int length ;    /* length of a row or column */
+  Index *psrc ;     /* source pointer */
+  Index *pdest ;    /* destination pointer */
+  Index j ;     /* counter */
+  Index r ;     /* a row index */
+  Index c ;     /* a column index */
+  Index length ;    /* length of a row or column */
 
-    /* === Defragment the columns =========================================== */
+  /* === Defragment the columns =========================================== */
 
-    pdest = &A[0] ;
-    for (c = 0 ; c < n_col ; c++)
-    {
-  if (COL_IS_ALIVE (c))
+  pdest = &A[0] ;
+  for (c = 0 ; c < n_col ; c++)
   {
+    if (COL_IS_ALIVE (c))
+    {
       psrc = &A [Col [c].start] ;
 
       /* move and compact the column */
       COLAMD_ASSERT (pdest <= psrc) ;
-      Col [c].start = (int) (pdest - &A [0]) ;
+      Col [c].start = (Index) (pdest - &A [0]) ;
       length = Col [c].length ;
       for (j = 0 ; j < length ; j++)
       {
-    r = *psrc++ ;
-    if (ROW_IS_ALIVE (r))
-    {
-        *pdest++ = r ;
-    }
+	r = *psrc++ ;
+	if (ROW_IS_ALIVE (r))
+	{
+	  *pdest++ = r ;
+	}
       }
-      Col [c].length = (int) (pdest - &A [Col [c].start]) ;
-  }
+      Col [c].length = (Index) (pdest - &A [Col [c].start]) ;
     }
+  }
 
-    /* === Prepare to defragment the rows =================================== */
+  /* === Prepare to defragment the rows =================================== */
 
-    for (r = 0 ; r < n_row ; r++)
-    {
-  if (ROW_IS_ALIVE (r))
+  for (r = 0 ; r < n_row ; r++)
   {
+    if (ROW_IS_ALIVE (r))
+    {
       if (Row [r].length == 0)
       {
-    /* this row is of zero length.  cannot compact it, so kill it */
-    COLAMD_DEBUG3 (("Defrag row kill\n")) ;
-    KILL_ROW (r) ;
+	/* this row is of zero length.  cannot compact it, so kill it */
+	COLAMD_DEBUG3 (("Defrag row kill\n")) ;
+	KILL_ROW (r) ;
       }
       else
       {
-    /* save first column index in Row [r].shared2.first_column */
-    psrc = &A [Row [r].start] ;
-    Row [r].shared2.first_column = *psrc ;
-    COLAMD_ASSERT (ROW_IS_ALIVE (r)) ;
-    /* flag the start of the row with the one's complement of row */
-    *psrc = ONES_COMPLEMENT (r) ;
+	/* save first column index in Row [r].shared2.first_column */
+	psrc = &A [Row [r].start] ;
+	Row [r].shared2.first_column = *psrc ;
+	COLAMD_ASSERT (ROW_IS_ALIVE (r)) ;
+	/* flag the start of the row with the one's complement of row */
+	*psrc = ONES_COMPLEMENT (r) ;
 
       }
-  }
     }
+  }
 
-    /* === Defragment the rows ============================================== */
+  /* === Defragment the rows ============================================== */
 
-    psrc = pdest ;
-    while (psrc < pfree)
-    {
-  /* find a negative number ... the start of a row */
-  if (*psrc++ < 0)
+  psrc = pdest ;
+  while (psrc < pfree)
   {
+    /* find a negative number ... the start of a row */
+    if (*psrc++ < 0)
+    {
       psrc-- ;
       /* get the row index */
       r = ONES_COMPLEMENT (*psrc) ;
@@ -1790,26 +1791,26 @@ static int garbage_collection  /* returns the new value of pfree */
 
       /* move and compact the row */
       COLAMD_ASSERT (pdest <= psrc) ;
-      Row [r].start = (int) (pdest - &A [0]) ;
+      Row [r].start = (Index) (pdest - &A [0]) ;
       length = Row [r].length ;
       for (j = 0 ; j < length ; j++)
       {
-    c = *psrc++ ;
-    if (COL_IS_ALIVE (c))
-    {
-        *pdest++ = c ;
-    }
+	c = *psrc++ ;
+	if (COL_IS_ALIVE (c))
+	{
+	  *pdest++ = c ;
+	}
       }
-      Row [r].length = (int) (pdest - &A [Row [r].start]) ;
+      Row [r].length = (Index) (pdest - &A [Row [r].start]) ;
 
-  }
     }
-    /* ensure we found all the rows */
-    COLAMD_ASSERT (debug_rows == 0) ;
+  }
+  /* ensure we found all the rows */
+  COLAMD_ASSERT (debug_rows == 0) ;
 
-    /* === Return the new value of pfree ==================================== */
+  /* === Return the new value of pfree ==================================== */
 
-    return ((int) (pdest - &A [0])) ;
+  return ((Index) (pdest - &A [0])) ;
 }
 
 
@@ -1818,30 +1819,30 @@ static int garbage_collection  /* returns the new value of pfree */
 /* ========================================================================== */
 
 /*
-    Clears the Row [].shared2.mark array, and returns the new tag_mark.
-    Return value is the new tag_mark.  Not user-callable.
+  Clears the Row [].shared2.mark array, and returns the new tag_mark.
+  Return value is the new tag_mark.  Not user-callable.
 */
-
-static inline  int clear_mark  /* return the new value for tag_mark */
-(
-    /* === Parameters ======================================================= */
-
-    int n_row,    /* number of rows in A */
-    Colamd_Row Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */
-)
+template <typename Index>
+static inline  Index clear_mark  /* return the new value for tag_mark */
+  (
+      /* === Parameters ======================================================= */
+
+    Index n_row,    /* number of rows in A */
+    Colamd_Row<Index> Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */
+    )
 {
-    /* === Local variables ================================================== */
+  /* === Local variables ================================================== */
 
-    int r ;
+  Index r ;
 
-    for (r = 0 ; r < n_row ; r++)
-    {
-  if (ROW_IS_ALIVE (r))
+  for (r = 0 ; r < n_row ; r++)
   {
+    if (ROW_IS_ALIVE (r))
+    {
       Row [r].shared2.mark = 0 ;
-  }
     }
-    return (1) ;
+  }
+  return (1) ;
 }
 
 
diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h
index 2471316b9..b4da6531a 100644
--- a/Eigen/src/OrderingMethods/Ordering.h
+++ b/Eigen/src/OrderingMethods/Ordering.h
@@ -4,29 +4,13 @@
 //
 // Copyright (C) 2012  Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_ORDERING_H
 #define EIGEN_ORDERING_H
 
-#include "Amd.h"
 namespace Eigen {
   
 #include "Eigen_Colamd.h"
@@ -53,6 +37,8 @@ void ordering_helper_at_plus_a(const MatrixType& mat, MatrixType& symmat)
     
 }
 
+#ifndef EIGEN_MPL2_ONLY
+
 /** \ingroup OrderingMethods_Module
   * \class AMDOrdering
   *
@@ -94,6 +80,8 @@ class AMDOrdering
     }
 };
 
+#endif // EIGEN_MPL2_ONLY
+
 /** \ingroup OrderingMethods_Module
   * \class NaturalOrdering
   *
@@ -134,26 +122,26 @@ class COLAMDOrdering
     template <typename MatrixType>
     void operator() (const MatrixType& mat, PermutationType& perm)
     {
-      int m = mat.rows();
-      int n = mat.cols();
-      int nnz = mat.nonZeros();
+      Index m = mat.rows();
+      Index n = mat.cols();
+      Index nnz = mat.nonZeros();
       // Get the recommended value of Alen to be used by colamd
-      int Alen = internal::colamd_recommended(nnz, m, n); 
+      Index Alen = internal::colamd_recommended(nnz, m, n); 
       // Set the default parameters
       double knobs [COLAMD_KNOBS]; 
-      int stats [COLAMD_STATS];
+      Index stats [COLAMD_STATS];
       internal::colamd_set_defaults(knobs);
       
-      int info;
+      Index info;
       IndexVector p(n+1), A(Alen); 
-      for(int i=0; i <= n; i++)   p(i) = mat.outerIndexPtr()[i];
-      for(int i=0; i < nnz; i++)  A(i) = mat.innerIndexPtr()[i];
+      for(Index i=0; i <= n; i++)   p(i) = mat.outerIndexPtr()[i];
+      for(Index i=0; i < nnz; i++)  A(i) = mat.innerIndexPtr()[i];
       // Call Colamd routine to compute the ordering 
       info = internal::colamd(m, n, Alen, A.data(), p.data(), knobs, stats); 
       eigen_assert( info && "COLAMD failed " );
       
       perm.resize(n);
-      for (int i = 0; i < n; i++) perm.indices()(p(i)) = i;
+      for (Index i = 0; i < n; i++) perm.indices()(p(i)) = i;
     }
 };
 
diff --git a/Eigen/src/PaStiXSupport/PaStiXSupport.h b/Eigen/src/PaStiXSupport/PaStiXSupport.h
index 82e137c64..a955287d1 100644
--- a/Eigen/src/PaStiXSupport/PaStiXSupport.h
+++ b/Eigen/src/PaStiXSupport/PaStiXSupport.h
@@ -157,27 +157,6 @@ class PastixBase : internal::noncopyable
     template<typename Rhs,typename Dest>
     bool _solve (const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const;
     
-    /** \internal */
-    template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex>
-    void _solve_sparse(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
-    {
-      eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
-      eigen_assert(rows()==b.rows());
-      
-      // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
-      static const int NbColsAtOnce = 1;
-      int rhsCols = b.cols();
-      int size = b.rows();
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,rhsCols);
-      for(int k=0; k<rhsCols; k+=NbColsAtOnce)
-      {
-        int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
-        tmp.leftCols(actualCols) = b.middleCols(k,actualCols);
-        tmp.leftCols(actualCols) = derived().solve(tmp.leftCols(actualCols));
-        dest.middleCols(k,actualCols) = tmp.leftCols(actualCols).sparseView();
-      }
-    }
-    
     Derived& derived()
     {
       return *static_cast<Derived*>(this);
@@ -731,7 +710,7 @@ struct sparse_solve_retval<PastixBase<_MatrixType>, Rhs>
 
   template<typename Dest> void evalTo(Dest& dst) const
   {
-    dec()._solve_sparse(rhs(),dst);
+    this->defaultEvalTo(dst);
   }
 };
 
diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h
index d623bf518..1c48f0df7 100644
--- a/Eigen/src/PardisoSupport/PardisoSupport.h
+++ b/Eigen/src/PardisoSupport/PardisoSupport.h
@@ -206,29 +206,6 @@ class PardisoImpl
     template<typename BDerived, typename XDerived>
     bool _solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const;
 
-    /** \internal */
-    template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex>
-    void _solve_sparse(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
-    {
-      eigen_assert(m_size==b.rows());
-
-      // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
-      static const int NbColsAtOnce = 4;
-      int rhsCols = b.cols();
-      int size = b.rows();
-      // Pardiso cannot solve in-place,
-      // so we need two temporaries
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic,ColMajor> tmp_rhs(size,rhsCols);
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic,ColMajor> tmp_res(size,rhsCols);
-      for(int k=0; k<rhsCols; k+=NbColsAtOnce)
-      {
-        int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
-        tmp_rhs.leftCols(actualCols) = b.middleCols(k,actualCols);
-        tmp_res.leftCols(actualCols) = derived().solve(tmp_rhs.leftCols(actualCols));
-        dest.middleCols(k,actualCols) = tmp_res.leftCols(actualCols).sparseView();
-      }
-    }
-
   protected:
     void pardisoRelease()
     {
@@ -604,7 +581,7 @@ struct sparse_solve_retval<PardisoImpl<Derived>, Rhs>
 
   template<typename Dest> void evalTo(Dest& dst) const
   {
-    dec().derived()._solve_sparse(rhs(),dst);
+    this->defaultEvalTo(dst);
   }
 };
 
diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h
index 47f67e6cd..9ec8a65e4 100644
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -56,6 +56,12 @@ template<typename _MatrixType> class ColPivHouseholderQR
     typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
     typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType;
     typedef typename HouseholderSequence<MatrixType,HCoeffsType>::ConjugateReturnType HouseholderSequenceType;
+    
+  private:
+    
+    typedef typename PermutationType::Index PermIndexType;
+    
+  public:
 
     /**
     * \brief Default Constructor.
@@ -81,7 +87,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
     ColPivHouseholderQR(Index rows, Index cols)
       : m_qr(rows, cols),
         m_hCoeffs((std::min)(rows,cols)),
-        m_colsPermutation(cols),
+        m_colsPermutation(PermIndexType(cols)),
         m_colsTranspositions(cols),
         m_temp(cols),
         m_colSqNorms(cols),
@@ -91,7 +97,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
     ColPivHouseholderQR(const MatrixType& matrix)
       : m_qr(matrix.rows(), matrix.cols()),
         m_hCoeffs((std::min)(matrix.rows(),matrix.cols())),
-        m_colsPermutation(matrix.cols()),
+        m_colsPermutation(PermIndexType(matrix.cols())),
         m_colsTranspositions(matrix.cols()),
         m_temp(matrix.cols()),
         m_colSqNorms(matrix.cols()),
@@ -139,7 +145,22 @@ template<typename _MatrixType> class ColPivHouseholderQR
       eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
       return m_qr;
     }
-
+    
+    /** \returns a reference to the matrix where the result Householder QR is stored 
+     * \warning The strict lower part of this matrix contains internal values. 
+     * Only the upper triangular part should be referenced. To get it, use
+     * \code matrixR().template triangularView<Upper>() \endcode
+     * For rank-deficient matrices, use 
+     * \code 
+     * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>() 
+     * \endcode
+     */
+    const MatrixType& matrixR() const
+    {
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      return m_qr;
+    }
+    
     ColPivHouseholderQR& compute(const MatrixType& matrix);
 
     const PermutationType& colsPermutation() const
@@ -330,6 +351,18 @@ template<typename _MatrixType> class ColPivHouseholderQR
       *          diagonal coefficient of R.
       */
     RealScalar maxPivot() const { return m_maxpivot; }
+    
+    /** \brief Reports whether the QR factorization was succesful.
+      *
+      * \note This function always returns \c Success. It is provided for compatibility 
+      * with other factorization routines.
+      * \returns \c Success 
+      */
+    ComputationInfo info() const
+    {
+      eigen_assert(m_isInitialized && "Decomposition is not initialized.");
+      return Success;
+    }
 
   protected:
     MatrixType m_qr;
@@ -368,6 +401,9 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
   Index rows = matrix.rows();
   Index cols = matrix.cols();
   Index size = matrix.diagonalSize();
+  
+  // the column permutation is stored as int indices, so just to be sure:
+  eigen_assert(cols<=NumTraits<int>::highest());
 
   m_qr = matrix;
   m_hCoeffs.resize(size);
@@ -443,9 +479,9 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
     m_colSqNorms.tail(cols-k-1) -= m_qr.row(k).tail(cols-k-1).cwiseAbs2();
   }
 
-  m_colsPermutation.setIdentity(cols);
-  for(Index k = 0; k < m_nonzero_pivots; ++k)
-    m_colsPermutation.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k));
+  m_colsPermutation.setIdentity(PermIndexType(cols));
+  for(PermIndexType k = 0; k < m_nonzero_pivots; ++k)
+    m_colsPermutation.applyTranspositionOnTheRight(k, PermIndexType(m_colsTranspositions.coeff(k)));
 
   m_det_pq = (number_of_transpositions%2) ? -1 : 1;
   m_isInitialized = true;
@@ -482,7 +518,7 @@ struct solve_retval<ColPivHouseholderQR<_MatrixType>, Rhs>
 		     .transpose()
       );
 
-    dec().matrixQR()
+    dec().matrixR()
        .topLeftCorner(nonzero_pivots, nonzero_pivots)
        .template triangularView<Upper>()
        .solveInPlace(c.topRows(nonzero_pivots));
diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index 9db64e219..0314d5259 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -241,7 +241,6 @@ void householder_qr_inplace_blocked(MatrixQR& mat, HCoeffs& hCoeffs,
 {
   typedef typename MatrixQR::Index Index;
   typedef typename MatrixQR::Scalar Scalar;
-  typedef typename MatrixQR::RealScalar RealScalar;
   typedef Block<MatrixQR,Dynamic,Dynamic> BlockType;
 
   Index rows = mat.rows();
diff --git a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
index 17b764a37..0ffb894f6 100644
--- a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
+++ b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
@@ -60,7 +60,7 @@ class SPQR
     typedef typename _MatrixType::Scalar Scalar;
     typedef typename _MatrixType::RealScalar RealScalar;
     typedef UF_long Index ; 
-    typedef SparseMatrix<Scalar, _MatrixType::Flags, Index> MatrixType;
+    typedef SparseMatrix<Scalar, ColMajor, Index> MatrixType;
     typedef PermutationMatrix<Dynamic, Dynamic> PermutationType;
   public:
     SPQR() 
@@ -88,7 +88,7 @@ class SPQR
       delete[] m_E;
       delete[] m_HPinv; 
     }
-    void compute(const MatrixType& matrix)
+    void compute(const _MatrixType& matrix)
     {
       MatrixType mat(matrix);
       cholmod_sparse A; 
@@ -105,20 +105,18 @@ class SPQR
       }
       m_info = Success;
       m_isInitialized = true;
+      m_isRUpToDate = false;
     }
     /** 
-     * Get the number of rows of the triangular matrix. 
+     * Get the number of rows of the input matrix and the Q matrix
      */
-    inline Index rows() const { return m_cR->nrow; }
+    inline Index rows() const {return m_H->nrow; }
     
     /** 
-     * Get the number of columns of the triangular matrix. 
+     * Get the number of columns of the input matrix. 
      */
     inline Index cols() const { return m_cR->ncol; }
-    /**
-     * This is the number of rows in the input matrix and the Q matrix
-     */
-    inline Index rowsQ() const {return m_HTau->nrow; }
+   
       /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
       *
       * \sa compute()
@@ -126,8 +124,8 @@ class SPQR
     template<typename Rhs>
     inline const internal::solve_retval<SPQR, Rhs> solve(const MatrixBase<Rhs>& B) const 
     {
-      eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()"); 
-      eigen_assert(rows()==B.rows()
+      eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()");
+      eigen_assert(this->rows()==B.rows()
                     && "SPQR::solve(): invalid number of rows of the right hand side matrix B");
           return internal::solve_retval<SPQR, Rhs>(*this, B.derived());
     }
@@ -139,22 +137,28 @@ class SPQR
       eigen_assert(b.cols()==1 && "This method is for vectors only");
       
       //Compute Q^T * b
-      dest = matrixQ().transpose() * b;
-      
-        // Solves with the triangular matrix R
       Dest y; 
-      y = this->matrixQR().template triangularView<Upper>().solve(dest.derived());
+      y = matrixQ().transpose() * b;
+        // Solves with the triangular matrix R
+      Index rk = this->rank();
+      y.topRows(rk) = this->matrixR().topLeftCorner(rk, rk).template triangularView<Upper>().solve(y.topRows(rk));
+      y.bottomRows(cols()-rk).setZero();
       // Apply the column permutation 
-      dest = colsPermutation() * y;
+      dest.topRows(cols()) = colsPermutation() * y.topRows(cols());
       
       m_info = Success;
     }
-    /// Get the sparse triangular matrix R. It is a sparse matrix
-    MatrixType matrixQR() const
+    
+    /** \returns the sparse triangular factor R. It is a sparse matrix
+     */
+    const MatrixType matrixR() const
     {
-      MatrixType R; 
-      R = viewAsEigen<Scalar, MatrixType::Flags, typename MatrixType::Index>(*m_cR);
-      return R; 
+      eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()");
+      if(!m_isRUpToDate) {
+        m_R = viewAsEigen<Scalar,ColMajor, typename MatrixType::Index>(*m_cR);
+        m_isRUpToDate = true;
+      }
+      return m_R;
     }
     /// Get an expression of the matrix Q
     SPQRMatrixQReturnType<SPQR> matrixQ() const
@@ -181,15 +185,12 @@ class SPQR
       return m_cc.SPQR_istat[4];
     }
     /// Set the fill-reducing ordering method to be used
-    void setOrdering(int ord) { m_ordering = ord;}
+    void setSPQROrdering(int ord) { m_ordering = ord;}
     /// Set the tolerance tol to treat columns with 2-norm < =tol as zero
-    void setThreshold(RealScalar tol) { m_tolerance = tol; }
+    void setPivotThreshold(const RealScalar& tol) { m_tolerance = tol; }
     
-    /// Return a pointer to SPQR workspace 
-    cholmod_common *cc() const { return &m_cc; }
-    cholmod_sparse * H() const { return m_H; }
-    Index  *HPinv() const { return m_HPinv; }
-    cholmod_dense* HTau() const { return m_HTau; }
+    /** \returns a pointer to the SPQR workspace */
+    cholmod_common *cholmodCommon() const { return &m_cc; }
     
     
     /** \brief Reports whether previous computation was successful.
@@ -206,17 +207,20 @@ class SPQR
     bool m_isInitialized;
     bool m_analysisIsOk;
     bool m_factorizationIsOk;
+    mutable bool m_isRUpToDate;
     mutable ComputationInfo m_info;
     int m_ordering; // Ordering method to use, see SPQR's manual
     int m_allow_tol; // Allow to use some tolerance during numerical factorization.
     RealScalar m_tolerance; // treat columns with 2-norm below this tolerance as zero
     mutable cholmod_sparse *m_cR; // The sparse R factor in cholmod format
+    mutable MatrixType m_R; // The sparse matrix R in Eigen format
     mutable Index *m_E; // The permutation applied to columns
     mutable cholmod_sparse *m_H;  //The householder vectors
     mutable Index *m_HPinv; // The row permutation of H
     mutable cholmod_dense *m_HTau; // The Householder coefficients
     mutable Index m_rank; // The rank of the matrix
     mutable cholmod_common m_cc; // Workspace and parameters
+    template<typename ,typename > friend struct SPQR_QProduct;
 };
 
 template <typename SPQRType, typename Derived>
@@ -227,7 +231,7 @@ struct SPQR_QProduct : ReturnByValue<SPQR_QProduct<SPQRType,Derived> >
   //Define the constructor to get reference to argument types
   SPQR_QProduct(const SPQRType& spqr, const Derived& other, bool transpose) : m_spqr(spqr),m_other(other),m_transpose(transpose) {}
   
-  inline Index rows() const { return m_transpose ? m_spqr.rowsQ() : m_spqr.cols(); }
+  inline Index rows() const { return m_transpose ? m_spqr.rows() : m_spqr.cols(); }
   inline Index cols() const { return m_other.cols(); }
   // Assign to a vector
   template<typename ResType>
@@ -236,9 +240,9 @@ struct SPQR_QProduct : ReturnByValue<SPQR_QProduct<SPQRType,Derived> >
     cholmod_dense y_cd;
     cholmod_dense *x_cd; 
     int method = m_transpose ? SPQR_QTX : SPQR_QX; 
-    cholmod_common *cc = m_spqr.cc();
+    cholmod_common *cc = m_spqr.cholmodCommon();
     y_cd = viewAsCholmod(m_other.const_cast_derived());
-    x_cd = SuiteSparseQR_qmult<Scalar>(method, m_spqr.H(), m_spqr.HTau(), m_spqr.HPinv(), &y_cd, cc);
+    x_cd = SuiteSparseQR_qmult<Scalar>(method, m_spqr.m_H, m_spqr.m_HTau, m_spqr.m_HPinv, &y_cd, cc);
     res = Matrix<Scalar,ResType::RowsAtCompileTime,ResType::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x), x_cd->nrow, x_cd->ncol);
     cholmod_free_dense(&x_cd, cc); 
   }
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index 1616fe560..2ab4fc05e 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -78,7 +78,8 @@ public:
   {
     if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
     {
-      m_qr = FullPivHouseholderQR<MatrixType>(svd.rows(), svd.cols());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.rows(), svd.cols());
     }
     if (svd.m_computeFullU) m_workspace.resize(svd.rows());
   }
@@ -96,7 +97,8 @@ public:
     return false;
   }
 private:
-  FullPivHouseholderQR<MatrixType> m_qr;
+  typedef FullPivHouseholderQR<MatrixType> QRType;
+  QRType m_qr;
   WorkspaceType m_workspace;
 };
 
@@ -121,7 +123,8 @@ public:
   {
     if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
     {
-      m_qr = FullPivHouseholderQR<TransposeTypeWithSameStorageOrder>(svd.cols(), svd.rows());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.cols(), svd.rows());
     }
     m_adjoint.resize(svd.cols(), svd.rows());
     if (svd.m_computeFullV) m_workspace.resize(svd.cols());
@@ -141,7 +144,8 @@ public:
     else return false;
   }
 private:
-  FullPivHouseholderQR<TransposeTypeWithSameStorageOrder> m_qr;
+  typedef FullPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
+  QRType m_qr;
   TransposeTypeWithSameStorageOrder m_adjoint;
   typename internal::plain_row_type<MatrixType>::type m_workspace;
 };
@@ -158,7 +162,8 @@ public:
   {
     if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
     {
-      m_qr = ColPivHouseholderQR<MatrixType>(svd.rows(), svd.cols());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.rows(), svd.cols());
     }
     if (svd.m_computeFullU) m_workspace.resize(svd.rows());
     else if (svd.m_computeThinU) m_workspace.resize(svd.cols());
@@ -183,7 +188,8 @@ public:
   }
 
 private:
-  ColPivHouseholderQR<MatrixType> m_qr;
+  typedef ColPivHouseholderQR<MatrixType> QRType;
+  QRType m_qr;
   typename internal::plain_col_type<MatrixType>::type m_workspace;
 };
 
@@ -209,7 +215,8 @@ public:
   {
     if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
     {
-      m_qr = ColPivHouseholderQR<TransposeTypeWithSameStorageOrder>(svd.cols(), svd.rows());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.cols(), svd.rows());
     }
     if (svd.m_computeFullV) m_workspace.resize(svd.cols());
     else if (svd.m_computeThinV) m_workspace.resize(svd.rows());
@@ -237,7 +244,8 @@ public:
   }
 
 private:
-  ColPivHouseholderQR<TransposeTypeWithSameStorageOrder> m_qr;
+  typedef ColPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
+  QRType m_qr;
   TransposeTypeWithSameStorageOrder m_adjoint;
   typename internal::plain_row_type<MatrixType>::type m_workspace;
 };
@@ -254,7 +262,8 @@ public:
   {
     if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
     {
-      m_qr = HouseholderQR<MatrixType>(svd.rows(), svd.cols());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.rows(), svd.cols());
     }
     if (svd.m_computeFullU) m_workspace.resize(svd.rows());
     else if (svd.m_computeThinU) m_workspace.resize(svd.cols());
@@ -278,7 +287,8 @@ public:
     return false;
   }
 private:
-  HouseholderQR<MatrixType> m_qr;
+  typedef HouseholderQR<MatrixType> QRType;
+  QRType m_qr;
   typename internal::plain_col_type<MatrixType>::type m_workspace;
 };
 
@@ -304,7 +314,8 @@ public:
   {
     if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
     {
-      m_qr = HouseholderQR<TransposeTypeWithSameStorageOrder>(svd.cols(), svd.rows());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.cols(), svd.rows());
     }
     if (svd.m_computeFullV) m_workspace.resize(svd.cols());
     else if (svd.m_computeThinV) m_workspace.resize(svd.rows());
@@ -332,7 +343,8 @@ public:
   }
 
 private:
-  HouseholderQR<TransposeTypeWithSameStorageOrder> m_qr;
+  typedef HouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
+  QRType m_qr;
   TransposeTypeWithSameStorageOrder m_adjoint;
   typename internal::plain_row_type<MatrixType>::type m_workspace;
 };
diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h
index 51f6fe9ef..62747279d 100644
--- a/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h
@@ -1,52 +1,12 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2012 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-/*
-
-NOTE: the _symbolic, and _numeric functions has been adapted from
-      the LDL library:
-
-LDL Copyright (c) 2005 by Timothy A. Davis.  All Rights Reserved.
-
-LDL License:
-
-    Your use or distribution of LDL or any modified version of
-    LDL implies that you agree to this License.
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
-    USA
-
-    Permission is hereby granted to use or copy this program under the
-    terms of the GNU LGPL, provided that the Copyright, this License,
-    and the Availability of the original version is retained on all copies.
-    User documentation of any code that uses this code or any modified
-    version of this code must cite the Copyright, this License, the
-    Availability note, and "Used by permission." Permission to modify
-    the code and to distribute modified code is granted, provided the
-    Copyright, this License, and the Availability note are retained,
-    and a notice that the code was modified is included.
- */
-
-#include "../Core/util/NonMPL2.h"
-
 #ifndef EIGEN_SIMPLICIAL_CHOLESKY_H
 #define EIGEN_SIMPLICIAL_CHOLESKY_H
 
@@ -215,27 +175,6 @@ class SimplicialCholeskyBase : internal::noncopyable
         dest = m_Pinv * dest;
     }
 
-    /** \internal */
-    template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex>
-    void _solve_sparse(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
-    {
-      eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
-      eigen_assert(m_matrix.rows()==b.rows());
-      
-      // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
-      static const int NbColsAtOnce = 4;
-      int rhsCols = b.cols();
-      int size = b.rows();
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,rhsCols);
-      for(int k=0; k<rhsCols; k+=NbColsAtOnce)
-      {
-        int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
-        tmp.leftCols(actualCols) = b.middleCols(k,actualCols);
-        tmp.leftCols(actualCols) = derived().solve(tmp.leftCols(actualCols));
-        dest.middleCols(k,actualCols) = tmp.leftCols(actualCols).sparseView();
-      }
-    }
-
 #endif // EIGEN_PARSED_BY_DOXYGEN
 
   protected:
@@ -693,153 +632,6 @@ void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, CholMatrixTy
   ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
 }
 
-template<typename Derived>
-void SimplicialCholeskyBase<Derived>::analyzePattern_preordered(const CholMatrixType& ap, bool doLDLT)
-{
-  const Index size = ap.rows();
-  m_matrix.resize(size, size);
-  m_parent.resize(size);
-  m_nonZerosPerCol.resize(size);
-  
-  ei_declare_aligned_stack_constructed_variable(Index, tags, size, 0);
-
-  for(Index k = 0; k < size; ++k)
-  {
-    /* L(k,:) pattern: all nodes reachable in etree from nz in A(0:k-1,k) */
-    m_parent[k] = -1;             /* parent of k is not yet known */
-    tags[k] = k;                  /* mark node k as visited */
-    m_nonZerosPerCol[k] = 0;      /* count of nonzeros in column k of L */
-    for(typename CholMatrixType::InnerIterator it(ap,k); it; ++it)
-    {
-      Index i = it.index();
-      if(i < k)
-      {
-        /* follow path from i to root of etree, stop at flagged node */
-        for(; tags[i] != k; i = m_parent[i])
-        {
-          /* find parent of i if not yet determined */
-          if (m_parent[i] == -1)
-            m_parent[i] = k;
-          m_nonZerosPerCol[i]++;        /* L (k,i) is nonzero */
-          tags[i] = k;                  /* mark i as visited */
-        }
-      }
-    }
-  }
-  
-  /* construct Lp index array from m_nonZerosPerCol column counts */
-  Index* Lp = m_matrix.outerIndexPtr();
-  Lp[0] = 0;
-  for(Index k = 0; k < size; ++k)
-    Lp[k+1] = Lp[k] + m_nonZerosPerCol[k] + (doLDLT ? 0 : 1);
-
-  m_matrix.resizeNonZeros(Lp[size]);
-  
-  m_isInitialized     = true;
-  m_info              = Success;
-  m_analysisIsOk      = true;
-  m_factorizationIsOk = false;
-}
-
-
-template<typename Derived>
-template<bool DoLDLT>
-void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType& ap)
-{
-  using std::sqrt;
-  
-  eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
-  eigen_assert(ap.rows()==ap.cols());
-  const Index size = ap.rows();
-  eigen_assert(m_parent.size()==size);
-  eigen_assert(m_nonZerosPerCol.size()==size);
-
-  const Index* Lp = m_matrix.outerIndexPtr();
-  Index* Li = m_matrix.innerIndexPtr();
-  Scalar* Lx = m_matrix.valuePtr();
-
-  ei_declare_aligned_stack_constructed_variable(Scalar, y, size, 0);
-  ei_declare_aligned_stack_constructed_variable(Index,  pattern, size, 0);
-  ei_declare_aligned_stack_constructed_variable(Index,  tags, size, 0);
-  
-  bool ok = true;
-  m_diag.resize(DoLDLT ? size : 0);
-  
-  for(Index k = 0; k < size; ++k)
-  {
-    // compute nonzero pattern of kth row of L, in topological order
-    y[k] = 0.0;                     // Y(0:k) is now all zero
-    Index top = size;               // stack for pattern is empty
-    tags[k] = k;                    // mark node k as visited
-    m_nonZerosPerCol[k] = 0;        // count of nonzeros in column k of L
-    for(typename MatrixType::InnerIterator it(ap,k); it; ++it)
-    {
-      Index i = it.index();
-      if(i <= k)
-      {
-        y[i] += internal::conj(it.value());            /* scatter A(i,k) into Y (sum duplicates) */
-        Index len;
-        for(len = 0; tags[i] != k; i = m_parent[i])
-        {
-          pattern[len++] = i;     /* L(k,i) is nonzero */
-          tags[i] = k;            /* mark i as visited */
-        }
-        while(len > 0)
-          pattern[--top] = pattern[--len];
-      }
-    }
-
-    /* compute numerical values kth row of L (a sparse triangular solve) */
-
-    RealScalar d = internal::real(y[k]) * m_shiftScale + m_shiftOffset;    // get D(k,k), apply the shift function, and clear Y(k)
-    y[k] = 0.0;
-    for(; top < size; ++top)
-    {
-      Index i = pattern[top];       /* pattern[top:n-1] is pattern of L(:,k) */
-      Scalar yi = y[i];             /* get and clear Y(i) */
-      y[i] = 0.0;
-      
-      /* the nonzero entry L(k,i) */
-      Scalar l_ki;
-      if(DoLDLT)
-        l_ki = yi / m_diag[i];       
-      else
-        yi = l_ki = yi / Lx[Lp[i]];
-      
-      Index p2 = Lp[i] + m_nonZerosPerCol[i];
-      Index p;
-      for(p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p)
-        y[Li[p]] -= internal::conj(Lx[p]) * yi;
-      d -= internal::real(l_ki * internal::conj(yi));
-      Li[p] = k;                          /* store L(k,i) in column form of L */
-      Lx[p] = l_ki;
-      ++m_nonZerosPerCol[i];              /* increment count of nonzeros in col i */
-    }
-    if(DoLDLT)
-    {
-      m_diag[k] = d;
-      if(d == RealScalar(0))
-      {
-        ok = false;                         /* failure, D(k,k) is zero */
-        break;
-      }
-    }
-    else
-    {
-      Index p = Lp[k] + m_nonZerosPerCol[k]++;
-      Li[p] = k ;                /* store L(k,k) = sqrt (d) in column k */
-      if(d <= RealScalar(0)) {
-        ok = false;              /* failure, matrix is not positive definite */
-        break;
-      }
-      Lx[p] = sqrt(d) ;
-    }
-  }
-
-  m_info = ok ? Success : NumericalIssue;
-  m_factorizationIsOk = true;
-}
-
 namespace internal {
   
 template<typename Derived, typename Rhs>
@@ -864,7 +656,7 @@ struct sparse_solve_retval<SimplicialCholeskyBase<Derived>, Rhs>
 
   template<typename Dest> void evalTo(Dest& dst) const
   {
-    dec().derived()._solve_sparse(rhs(),dst);
+    this->defaultEvalTo(dst);
   }
 };
 
diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
new file mode 100644
index 000000000..4b249868f
--- /dev/null
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
@@ -0,0 +1,199 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+
+/*
+
+NOTE: thes functions vave been adapted from the LDL library:
+
+LDL Copyright (c) 2005 by Timothy A. Davis.  All Rights Reserved.
+
+LDL License:
+
+    Your use or distribution of LDL or any modified version of
+    LDL implies that you agree to this License.
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+    USA
+
+    Permission is hereby granted to use or copy this program under the
+    terms of the GNU LGPL, provided that the Copyright, this License,
+    and the Availability of the original version is retained on all copies.
+    User documentation of any code that uses this code or any modified
+    version of this code must cite the Copyright, this License, the
+    Availability note, and "Used by permission." Permission to modify
+    the code and to distribute modified code is granted, provided the
+    Copyright, this License, and the Availability note are retained,
+    and a notice that the code was modified is included.
+ */
+
+#include "../Core/util/NonMPL2.h"
+
+#ifndef EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H
+#define EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H
+
+namespace Eigen {
+
+template<typename Derived>
+void SimplicialCholeskyBase<Derived>::analyzePattern_preordered(const CholMatrixType& ap, bool doLDLT)
+{
+  const Index size = ap.rows();
+  m_matrix.resize(size, size);
+  m_parent.resize(size);
+  m_nonZerosPerCol.resize(size);
+
+  ei_declare_aligned_stack_constructed_variable(Index, tags, size, 0);
+
+  for(Index k = 0; k < size; ++k)
+  {
+    /* L(k,:) pattern: all nodes reachable in etree from nz in A(0:k-1,k) */
+    m_parent[k] = -1;             /* parent of k is not yet known */
+    tags[k] = k;                  /* mark node k as visited */
+    m_nonZerosPerCol[k] = 0;      /* count of nonzeros in column k of L */
+    for(typename CholMatrixType::InnerIterator it(ap,k); it; ++it)
+    {
+      Index i = it.index();
+      if(i < k)
+      {
+        /* follow path from i to root of etree, stop at flagged node */
+        for(; tags[i] != k; i = m_parent[i])
+        {
+          /* find parent of i if not yet determined */
+          if (m_parent[i] == -1)
+            m_parent[i] = k;
+          m_nonZerosPerCol[i]++;        /* L (k,i) is nonzero */
+          tags[i] = k;                  /* mark i as visited */
+        }
+      }
+    }
+  }
+
+  /* construct Lp index array from m_nonZerosPerCol column counts */
+  Index* Lp = m_matrix.outerIndexPtr();
+  Lp[0] = 0;
+  for(Index k = 0; k < size; ++k)
+    Lp[k+1] = Lp[k] + m_nonZerosPerCol[k] + (doLDLT ? 0 : 1);
+
+  m_matrix.resizeNonZeros(Lp[size]);
+
+  m_isInitialized     = true;
+  m_info              = Success;
+  m_analysisIsOk      = true;
+  m_factorizationIsOk = false;
+}
+
+
+template<typename Derived>
+template<bool DoLDLT>
+void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType& ap)
+{
+  using std::sqrt;
+
+  eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
+  eigen_assert(ap.rows()==ap.cols());
+  const Index size = ap.rows();
+  eigen_assert(m_parent.size()==size);
+  eigen_assert(m_nonZerosPerCol.size()==size);
+
+  const Index* Lp = m_matrix.outerIndexPtr();
+  Index* Li = m_matrix.innerIndexPtr();
+  Scalar* Lx = m_matrix.valuePtr();
+
+  ei_declare_aligned_stack_constructed_variable(Scalar, y, size, 0);
+  ei_declare_aligned_stack_constructed_variable(Index,  pattern, size, 0);
+  ei_declare_aligned_stack_constructed_variable(Index,  tags, size, 0);
+
+  bool ok = true;
+  m_diag.resize(DoLDLT ? size : 0);
+
+  for(Index k = 0; k < size; ++k)
+  {
+    // compute nonzero pattern of kth row of L, in topological order
+    y[k] = 0.0;                     // Y(0:k) is now all zero
+    Index top = size;               // stack for pattern is empty
+    tags[k] = k;                    // mark node k as visited
+    m_nonZerosPerCol[k] = 0;        // count of nonzeros in column k of L
+    for(typename MatrixType::InnerIterator it(ap,k); it; ++it)
+    {
+      Index i = it.index();
+      if(i <= k)
+      {
+        y[i] += internal::conj(it.value());            /* scatter A(i,k) into Y (sum duplicates) */
+        Index len;
+        for(len = 0; tags[i] != k; i = m_parent[i])
+        {
+          pattern[len++] = i;     /* L(k,i) is nonzero */
+          tags[i] = k;            /* mark i as visited */
+        }
+        while(len > 0)
+          pattern[--top] = pattern[--len];
+      }
+    }
+
+    /* compute numerical values kth row of L (a sparse triangular solve) */
+
+    RealScalar d = internal::real(y[k]) * m_shiftScale + m_shiftOffset;    // get D(k,k), apply the shift function, and clear Y(k)
+    y[k] = 0.0;
+    for(; top < size; ++top)
+    {
+      Index i = pattern[top];       /* pattern[top:n-1] is pattern of L(:,k) */
+      Scalar yi = y[i];             /* get and clear Y(i) */
+      y[i] = 0.0;
+
+      /* the nonzero entry L(k,i) */
+      Scalar l_ki;
+      if(DoLDLT)
+        l_ki = yi / m_diag[i];
+      else
+        yi = l_ki = yi / Lx[Lp[i]];
+
+      Index p2 = Lp[i] + m_nonZerosPerCol[i];
+      Index p;
+      for(p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p)
+        y[Li[p]] -= internal::conj(Lx[p]) * yi;
+      d -= internal::real(l_ki * internal::conj(yi));
+      Li[p] = k;                          /* store L(k,i) in column form of L */
+      Lx[p] = l_ki;
+      ++m_nonZerosPerCol[i];              /* increment count of nonzeros in col i */
+    }
+    if(DoLDLT)
+    {
+      m_diag[k] = d;
+      if(d == RealScalar(0))
+      {
+        ok = false;                         /* failure, D(k,k) is zero */
+        break;
+      }
+    }
+    else
+    {
+      Index p = Lp[k] + m_nonZerosPerCol[k]++;
+      Li[p] = k ;                /* store L(k,k) = sqrt (d) in column k */
+      if(d <= RealScalar(0)) {
+        ok = false;              /* failure, matrix is not positive definite */
+        break;
+      }
+      Lx[p] = sqrt(d) ;
+    }
+  }
+
+  m_info = ok ? Success : NumericalIssue;
+  m_factorizationIsOk = true;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H
diff --git a/Eigen/src/SparseCore/AmbiVector.h b/Eigen/src/SparseCore/AmbiVector.h
index dca738751..17fff96a7 100644
--- a/Eigen/src/SparseCore/AmbiVector.h
+++ b/Eigen/src/SparseCore/AmbiVector.h
@@ -288,7 +288,7 @@ class AmbiVector<_Scalar,_Index>::Iterator
       * In practice, all coefficients having a magnitude smaller than \a epsilon
       * are skipped.
       */
-    Iterator(const AmbiVector& vec, RealScalar epsilon = 0)
+    Iterator(const AmbiVector& vec, const RealScalar& epsilon = 0)
       : m_vector(vec)
     {
       using std::abs;
diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
index 16b5e1dba..4b13f08d4 100644
--- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@@ -121,9 +121,9 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
 namespace internal {
 
 template<typename Lhs, typename Rhs, typename ResultType,
-  int LhsStorageOrder = traits<Lhs>::Flags&RowMajorBit,
-  int RhsStorageOrder = traits<Rhs>::Flags&RowMajorBit,
-  int ResStorageOrder = traits<ResultType>::Flags&RowMajorBit>
+  int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+  int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+  int ResStorageOrder = (traits<ResultType>::Flags&RowMajorBit) ? RowMajor : ColMajor>
 struct conservative_sparse_sparse_product_selector;
 
 template<typename Lhs, typename Rhs, typename ResultType>
diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h
index b1eaf0b2c..e025e4d40 100644
--- a/Eigen/src/SparseCore/SparseBlock.h
+++ b/Eigen/src/SparseCore/SparseBlock.h
@@ -61,7 +61,7 @@ public:
 
   protected:
 
-    const typename XprType::Nested m_matrix;
+    typename XprType::Nested m_matrix;
     Index m_outerStart;
     const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
 };
@@ -129,59 +129,58 @@ public:
 
       // 2 - let's check whether there is enough allocated memory
       Index nnz           = tmp.nonZeros();
-      Index nnz_previous  = nonZeros();
-      Index free_size     = Index(matrix.data().allocatedSize()) + nnz_previous;
-      Index nnz_head      = m_outerStart==0 ? 0 : matrix.outerIndexPtr()[m_outerStart];
-      Index tail          = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()];
-      Index nnz_tail      = matrix.nonZeros() - tail;
+      Index start         = m_outerStart==0 ? 0 : matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block
+      Index end           = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]; // ending posiiton of the current block
+      Index block_size    = end - start;                                                // available room in the current block
+      Index tail_size     = m_matrix.outerIndexPtr()[m_matrix.outerSize()] - end;
+      
+      Index free_size     = m_matrix.isCompressed()
+                          ? Index(matrix.data().allocatedSize()) + block_size
+                          : block_size;
 
-      if(nnz>free_size)
+      if(nnz>free_size) 
       {
         // realloc manually to reduce copies
-        typename SparseMatrixType::Storage newdata(m_matrix.nonZeros() - nnz_previous + nnz);
+        typename SparseMatrixType::Storage newdata(m_matrix.data().allocatedSize() - block_size + nnz);
 
-        std::memcpy(&newdata.value(0), &m_matrix.data().value(0), nnz_head*sizeof(Scalar));
-        std::memcpy(&newdata.index(0), &m_matrix.data().index(0), nnz_head*sizeof(Index));
+        std::memcpy(&newdata.value(0), &m_matrix.data().value(0), start*sizeof(Scalar));
+        std::memcpy(&newdata.index(0), &m_matrix.data().index(0), start*sizeof(Index));
 
-        std::memcpy(&newdata.value(nnz_head), &tmp.data().value(0), nnz*sizeof(Scalar));
-        std::memcpy(&newdata.index(nnz_head), &tmp.data().index(0), nnz*sizeof(Index));
+        std::memcpy(&newdata.value(start), &tmp.data().value(0), nnz*sizeof(Scalar));
+        std::memcpy(&newdata.index(start), &tmp.data().index(0), nnz*sizeof(Index));
 
-        std::memcpy(&newdata.value(nnz_head+nnz), &matrix.data().value(tail), nnz_tail*sizeof(Scalar));
-        std::memcpy(&newdata.index(nnz_head+nnz), &matrix.data().index(tail), nnz_tail*sizeof(Index));
+        std::memcpy(&newdata.value(start+nnz), &matrix.data().value(end), tail_size*sizeof(Scalar));
+        std::memcpy(&newdata.index(start+nnz), &matrix.data().index(end), tail_size*sizeof(Index));
+        
+        newdata.resize(m_matrix.outerIndexPtr()[m_matrix.outerSize()] - block_size + nnz);
 
         matrix.data().swap(newdata);
       }
       else
       {
         // no need to realloc, simply copy the tail at its respective position and insert tmp
-        matrix.data().resize(nnz_head + nnz + nnz_tail);
-
-        if(nnz<nnz_previous)
-        {
-          std::memcpy(&matrix.data().value(nnz_head+nnz), &matrix.data().value(tail), nnz_tail*sizeof(Scalar));
-          std::memcpy(&matrix.data().index(nnz_head+nnz), &matrix.data().index(tail), nnz_tail*sizeof(Index));
-        }
-        else
-        {
-          for(Index i=nnz_tail-1; i>=0; --i)
-          {
-            matrix.data().value(nnz_head+nnz+i) = matrix.data().value(tail+i);
-            matrix.data().index(nnz_head+nnz+i) = matrix.data().index(tail+i);
-          }
-        }
-
-        std::memcpy(&matrix.data().value(nnz_head), &tmp.data().value(0), nnz*sizeof(Scalar));
-        std::memcpy(&matrix.data().index(nnz_head), &tmp.data().index(0), nnz*sizeof(Index));
+        matrix.data().resize(start + nnz + tail_size);
+
+        std::memmove(&matrix.data().value(start+nnz), &matrix.data().value(end), tail_size*sizeof(Scalar));
+        std::memmove(&matrix.data().index(start+nnz), &matrix.data().index(end), tail_size*sizeof(Index));
+
+        std::memcpy(&matrix.data().value(start), &tmp.data().value(0), nnz*sizeof(Scalar));
+        std::memcpy(&matrix.data().index(start), &tmp.data().index(0), nnz*sizeof(Index));
       }
+      
+      // update innerNonZeros
+      if(!m_matrix.isCompressed())
+        for(Index j=0; j<m_outerSize.value(); ++j)
+          matrix.innerNonZeroPtr()[m_outerStart+j] = tmp.innerVector(j).nonZeros();
 
       // update outer index pointers
-      Index p = nnz_head;
+      Index p = start;
       for(Index k=0; k<m_outerSize.value(); ++k)
       {
         matrix.outerIndexPtr()[m_outerStart+k] = p;
         p += tmp.innerVector(k).nonZeros();
       }
-      std::ptrdiff_t offset = nnz - nnz_previous;
+      std::ptrdiff_t offset = nnz - block_size;
       for(Index k = m_outerStart + m_outerSize.value(); k<=matrix.outerSize(); ++k)
       {
         matrix.outerIndexPtr()[k] += offset;
@@ -353,7 +352,7 @@ public:
           m_block(block),
           m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
       {
-        while(Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()))
+        while( (Base::operator bool()) && (Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value())) )
           Base::operator++();
       }
 
@@ -376,7 +375,7 @@ public:
           m_block(block),
           m_begin(IsRowMajor ? block.m_startCol.value() : block.m_startRow.value())
       {
-        while(Base::index() >= (IsRowMajor ? m_block.m_startCol.value()+block.m_blockCols.value() : m_block.m_startRow.value()+block.m_blockRows.value()) )
+        while( (Base::operator bool()) && (Base::index() >= (IsRowMajor ? m_block.m_startCol.value()+block.m_blockCols.value() : m_block.m_startRow.value()+block.m_blockRows.value())) )
           Base::operator--();
       }
 
@@ -391,7 +390,7 @@ public:
     friend class InnerIterator;
     friend class ReverseInnerIterator;
 
-    const typename XprType::Nested m_matrix;
+    typename XprType::Nested m_matrix;
     const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
     const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
     const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
diff --git a/Eigen/src/SparseCore/SparseColEtree.h b/Eigen/src/SparseCore/SparseColEtree.h
index df6b9f966..f89ca3814 100644
--- a/Eigen/src/SparseCore/SparseColEtree.h
+++ b/Eigen/src/SparseCore/SparseColEtree.h
@@ -36,11 +36,11 @@ namespace Eigen {
 namespace internal {
 
 /** Find the root of the tree/set containing the vertex i : Use Path halving */ 
-template<typename IndexVector>
-int etree_find (int i, IndexVector& pp)
+template<typename Index, typename IndexVector>
+Index etree_find (Index i, IndexVector& pp)
 {
-  int p = pp(i); // Parent 
-  int gp = pp(p); // Grand parent 
+  Index p = pp(i); // Parent 
+  Index gp = pp(p); // Grand parent 
   while (gp != p) 
   {
     pp(i) = gp; // Parent pointer on find path is changed to former grand parent
@@ -55,9 +55,10 @@ int etree_find (int i, IndexVector& pp)
   * \param mat The matrix in column-major format. 
   * \param parent The elimination tree
   * \param firstRowElt The column index of the first element in each row
+  * \param perm The permutation to apply to the column of \b mat
   */
 template <typename MatrixType, typename IndexVector>
-int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowElt)
+int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowElt, typename MatrixType::Index *perm=0)
 {
   typedef typename MatrixType::Index Index;
   Index nc = mat.cols(); // Number of columns 
@@ -68,14 +69,16 @@ int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowEl
   pp.setZero(); // Initialize disjoint sets 
   parent.resize(mat.cols());
   //Compute first nonzero column in each row 
-  int row,col; 
+  Index row,col; 
   firstRowElt.resize(m);
   firstRowElt.setConstant(nc);
   firstRowElt.segment(0, nc).setLinSpaced(nc, 0, nc-1);
   bool found_diag;
   for (col = 0; col < nc; col++)
   {
-    for (typename MatrixType::InnerIterator it(mat, col); it; ++it)
+    Index pcol = col;
+    if(perm) pcol  = perm[col];
+    for (typename MatrixType::InnerIterator it(mat, pcol); it; ++it)
     { 
       row = it.row();
       firstRowElt(row) = (std::min)(firstRowElt(row), col);
@@ -85,7 +88,7 @@ int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowEl
           except use (firstRowElt[r],c) in place of an edge (r,c) of A.
     Thus each row clique in A'*A is replaced by a star
     centered at its first vertex, which has the same fill. */
-  int rset, cset, rroot; 
+  Index rset, cset, rroot; 
   for (col = 0; col < nc; col++) 
   {
     found_diag = false;
@@ -95,9 +98,11 @@ int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowEl
     parent(col) = nc; 
     /* The diagonal element is treated here even if it does not exist in the matrix
      * hence the loop is executed once more */ 
-    for (typename MatrixType::InnerIterator it(mat, col); it||!found_diag; ++it)
+    Index pcol = col;
+    if(perm) pcol  = perm[col];
+    for (typename MatrixType::InnerIterator it(mat, pcol); it||!found_diag; ++it)
     { //  A sequence of interleaved find and union is performed 
-      int i = col;
+      Index i = col;
       if(it) i = it.index();
       if (i == col) found_diag = true;
       row = firstRowElt(i);
@@ -120,10 +125,10 @@ int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowEl
   * Depth-first search from vertex n.  No recursion.
   * This routine was contributed by Cédric Doucet, CEDRAT Group, Meylan, France.
 */
-template <typename IndexVector>
-void nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVector& next_kid, IndexVector& post, int postnum)
+template <typename Index, typename IndexVector>
+void nr_etdfs (Index n, IndexVector& parent, IndexVector& first_kid, IndexVector& next_kid, IndexVector& post, Index postnum)
 {
-  int current = n, first, next;
+  Index current = n, first, next;
   while (postnum != n) 
   {
     // No kid for the current node
@@ -167,18 +172,18 @@ void nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVector&
   * \param parent Input tree
   * \param post postordered tree
   */
-template <typename IndexVector>
-void treePostorder(int n, IndexVector& parent, IndexVector& post)
+template <typename Index, typename IndexVector>
+void treePostorder(Index n, IndexVector& parent, IndexVector& post)
 {
   IndexVector first_kid, next_kid; // Linked list of children 
-  int postnum; 
+  Index postnum; 
   // Allocate storage for working arrays and results 
   first_kid.resize(n+1); 
   next_kid.setZero(n+1);
   post.setZero(n+1);
   
   // Set up structure describing children
-  int v, dad; 
+  Index v, dad; 
   first_kid.setConstant(-1); 
   for (v = n-1; v >= 0; v--) 
   {
diff --git a/Eigen/src/SparseCore/SparseDot.h b/Eigen/src/SparseCore/SparseDot.h
index b25911c72..dfeb3a8df 100644
--- a/Eigen/src/SparseCore/SparseDot.h
+++ b/Eigen/src/SparseCore/SparseDot.h
@@ -54,8 +54,8 @@ SparseMatrixBase<Derived>::dot(const SparseMatrixBase<OtherDerived>& other) cons
   typedef typename internal::remove_all<Nested>::type  NestedCleaned;
   typedef typename internal::remove_all<OtherNested>::type  OtherNestedCleaned;
 
-  const Nested nthis(derived());
-  const OtherNested nother(other.derived());
+  Nested nthis(derived());
+  OtherNested nother(other.derived());
 
   typename NestedCleaned::InnerIterator i(nthis,0);
   typename OtherNestedCleaned::InnerIterator j(nother,0);
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 5ff01da28..dc57f77fc 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -213,7 +213,7 @@ class SparseMatrix
       * inserted in increasing inner index order, and in O(nnz_j) for a random insertion.
       *
       */
-    EIGEN_DONT_INLINE Scalar& insert(Index row, Index col)
+    Scalar& insert(Index row, Index col)
     {
       if(isCompressed())
       {
@@ -300,11 +300,11 @@ class SparseMatrix
           totalReserveSize += reserveSizes[j];
         }
         m_data.reserve(totalReserveSize);
-        std::ptrdiff_t previousOuterIndex = m_outerIndex[m_outerSize];
-        for(std::ptrdiff_t j=m_outerSize-1; j>=0; --j)
+        Index previousOuterIndex = m_outerIndex[m_outerSize];
+        for(Index j=m_outerSize-1; j>=0; --j)
         {
-          ptrdiff_t innerNNZ = previousOuterIndex - m_outerIndex[j];
-          for(std::ptrdiff_t i=innerNNZ-1; i>=0; --i)
+          Index innerNNZ = previousOuterIndex - m_outerIndex[j];
+          for(Index i=innerNNZ-1; i>=0; --i)
           {
             m_data.index(newOuterIndex[j]+i) = m_data.index(m_outerIndex[j]+i);
             m_data.value(newOuterIndex[j]+i) = m_data.value(m_outerIndex[j]+i);
@@ -327,19 +327,19 @@ class SparseMatrix
         {
           newOuterIndex[j] = count;
           Index alreadyReserved = (m_outerIndex[j+1]-m_outerIndex[j]) - m_innerNonZeros[j];
-          Index toReserve = std::max<std::ptrdiff_t>(reserveSizes[j], alreadyReserved);
+          Index toReserve = std::max<Index>(reserveSizes[j], alreadyReserved);
           count += toReserve + m_innerNonZeros[j];
         }
         newOuterIndex[m_outerSize] = count;
         
         m_data.resize(count);
-        for(ptrdiff_t j=m_outerSize-1; j>=0; --j)
+        for(Index j=m_outerSize-1; j>=0; --j)
         {
-          std::ptrdiff_t offset = newOuterIndex[j] - m_outerIndex[j];
+          Index offset = newOuterIndex[j] - m_outerIndex[j];
           if(offset>0)
           {
-            std::ptrdiff_t innerNNZ = m_innerNonZeros[j];
-            for(std::ptrdiff_t i=innerNNZ-1; i>=0; --i)
+            Index innerNNZ = m_innerNonZeros[j];
+            for(Index i=innerNNZ-1; i>=0; --i)
             {
               m_data.index(newOuterIndex[j]+i) = m_data.index(m_outerIndex[j]+i);
               m_data.value(newOuterIndex[j]+i) = m_data.value(m_outerIndex[j]+i);
@@ -434,7 +434,7 @@ class SparseMatrix
     
     /** \internal
       * same as insert(Index,Index) except that the indices are given relative to the storage order */
-    EIGEN_DONT_INLINE Scalar& insertByOuterInner(Index j, Index i)
+    Scalar& insertByOuterInner(Index j, Index i)
     {
       return insert(IsRowMajor ? j : i, IsRowMajor ? i : j);
     }
@@ -451,7 +451,7 @@ class SparseMatrix
       for(Index j=1; j<m_outerSize; ++j)
       {
         Index nextOldStart = m_outerIndex[j+1];
-        std::ptrdiff_t offset = oldStart - m_outerIndex[j];
+        Index offset = oldStart - m_outerIndex[j];
         if(offset>0)
         {
           for(Index k=0; k<m_innerNonZeros[j]; ++k)
@@ -474,7 +474,7 @@ class SparseMatrix
     {
       if(m_innerNonZeros != 0)
         return; 
-      m_innerNonZeros = new Index[m_outerSize]; 
+      m_innerNonZeros = static_cast<Index*>(std::malloc(m_outerSize * sizeof(Index)));
       for (int i = 0; i < m_outerSize; i++)
       {
         m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; 
@@ -711,62 +711,7 @@ class SparseMatrix
     #endif
 
     template<typename OtherDerived>
-    EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other)
-    {
-      const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
-      if (needToTranspose)
-      {
-        // two passes algorithm:
-        //  1 - compute the number of coeffs per dest inner vector
-        //  2 - do the actual copy/eval
-        // Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed
-        typedef typename internal::nested<OtherDerived,2>::type OtherCopy;
-        typedef typename internal::remove_all<OtherCopy>::type _OtherCopy;
-        OtherCopy otherCopy(other.derived());
-
-        SparseMatrix dest(other.rows(),other.cols());
-        Eigen::Map<Matrix<Index, Dynamic, 1> > (dest.m_outerIndex,dest.outerSize()).setZero();
-
-        // pass 1
-        // FIXME the above copy could be merged with that pass
-        for (Index j=0; j<otherCopy.outerSize(); ++j)
-          for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
-            ++dest.m_outerIndex[it.index()];
-
-        // prefix sum
-        Index count = 0;
-        VectorXi positions(dest.outerSize());
-        for (Index j=0; j<dest.outerSize(); ++j)
-        {
-          Index tmp = dest.m_outerIndex[j];
-          dest.m_outerIndex[j] = count;
-          positions[j] = count;
-          count += tmp;
-        }
-        dest.m_outerIndex[dest.outerSize()] = count;
-        // alloc
-        dest.m_data.resize(count);
-        // pass 2
-        for (Index j=0; j<otherCopy.outerSize(); ++j)
-        {
-          for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
-          {
-            Index pos = positions[it.index()]++;
-            dest.m_data.index(pos) = j;
-            dest.m_data.value(pos) = it.value();
-          }
-        }
-        this->swap(dest);
-        return *this;
-      }
-      else
-      {
-        if(other.isRValue())
-          initAssignment(other.derived());
-        // there is no special optimization
-        return Base::operator=(other.derived());
-      }
-    }
+    EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other);
 
     friend std::ostream & operator << (std::ostream & s, const SparseMatrix& m)
     {
@@ -836,111 +781,7 @@ protected:
 
     /** \internal
       * \sa insert(Index,Index) */
-    EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col)
-    {
-      eigen_assert(isCompressed());
-
-      const Index outer = IsRowMajor ? row : col;
-      const Index inner = IsRowMajor ? col : row;
-
-      Index previousOuter = outer;
-      if (m_outerIndex[outer+1]==0)
-      {
-        // we start a new inner vector
-        while (previousOuter>=0 && m_outerIndex[previousOuter]==0)
-        {
-          m_outerIndex[previousOuter] = static_cast<Index>(m_data.size());
-          --previousOuter;
-        }
-        m_outerIndex[outer+1] = m_outerIndex[outer];
-      }
-
-      // here we have to handle the tricky case where the outerIndex array
-      // starts with: [ 0 0 0 0 0 1 ...] and we are inserted in, e.g.,
-      // the 2nd inner vector...
-      bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0))
-                    && (size_t(m_outerIndex[outer+1]) == m_data.size());
-
-      size_t startId = m_outerIndex[outer];
-      // FIXME let's make sure sizeof(long int) == sizeof(size_t)
-      size_t p = m_outerIndex[outer+1];
-      ++m_outerIndex[outer+1];
-
-      float reallocRatio = 1;
-      if (m_data.allocatedSize()<=m_data.size())
-      {
-        // if there is no preallocated memory, let's reserve a minimum of 32 elements
-        if (m_data.size()==0)
-        {
-          m_data.reserve(32);
-        }
-        else
-        {
-          // we need to reallocate the data, to reduce multiple reallocations
-          // we use a smart resize algorithm based on the current filling ratio
-          // in addition, we use float to avoid integers overflows
-          float nnzEstimate = float(m_outerIndex[outer])*float(m_outerSize)/float(outer+1);
-          reallocRatio = (nnzEstimate-float(m_data.size()))/float(m_data.size());
-          // furthermore we bound the realloc ratio to:
-          //   1) reduce multiple minor realloc when the matrix is almost filled
-          //   2) avoid to allocate too much memory when the matrix is almost empty
-          reallocRatio = (std::min)((std::max)(reallocRatio,1.5f),8.f);
-        }
-      }
-      m_data.resize(m_data.size()+1,reallocRatio);
-
-      if (!isLastVec)
-      {
-        if (previousOuter==-1)
-        {
-          // oops wrong guess.
-          // let's correct the outer offsets
-          for (Index k=0; k<=(outer+1); ++k)
-            m_outerIndex[k] = 0;
-          Index k=outer+1;
-          while(m_outerIndex[k]==0)
-            m_outerIndex[k++] = 1;
-          while (k<=m_outerSize && m_outerIndex[k]!=0)
-            m_outerIndex[k++]++;
-          p = 0;
-          --k;
-          k = m_outerIndex[k]-1;
-          while (k>0)
-          {
-            m_data.index(k) = m_data.index(k-1);
-            m_data.value(k) = m_data.value(k-1);
-            k--;
-          }
-        }
-        else
-        {
-          // we are not inserting into the last inner vec
-          // update outer indices:
-          Index j = outer+2;
-          while (j<=m_outerSize && m_outerIndex[j]!=0)
-            m_outerIndex[j++]++;
-          --j;
-          // shift data of last vecs:
-          Index k = m_outerIndex[j]-1;
-          while (k>=Index(p))
-          {
-            m_data.index(k) = m_data.index(k-1);
-            m_data.value(k) = m_data.value(k-1);
-            k--;
-          }
-        }
-      }
-
-      while ( (p > startId) && (m_data.index(p-1) > inner) )
-      {
-        m_data.index(p) = m_data.index(p-1);
-        m_data.value(p) = m_data.value(p-1);
-        --p;
-      }
-
-      m_data.index(p) = inner;
-      return (m_data.value(p) = 0);
-    }
+    EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col);
 
     /** \internal
       * A vector object that is equal to 0 everywhere but v at the position i */
@@ -959,36 +800,7 @@ protected:
 
     /** \internal
       * \sa insert(Index,Index) */
-    EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col)
-    {
-      eigen_assert(!isCompressed());
-
-      const Index outer = IsRowMajor ? row : col;
-      const Index inner = IsRowMajor ? col : row;
-
-      std::ptrdiff_t room = m_outerIndex[outer+1] - m_outerIndex[outer];
-      std::ptrdiff_t innerNNZ = m_innerNonZeros[outer];
-      if(innerNNZ>=room)
-      {
-        // this inner vector is full, we need to reallocate the whole buffer :(
-        reserve(SingletonVector(outer,std::max<std::ptrdiff_t>(2,innerNNZ)));
-      }
-
-      Index startId = m_outerIndex[outer];
-      Index p = startId + m_innerNonZeros[outer];
-      while ( (p > startId) && (m_data.index(p-1) > inner) )
-      {
-        m_data.index(p) = m_data.index(p-1);
-        m_data.value(p) = m_data.value(p-1);
-        --p;
-      }
-      eigen_assert((p<=startId || m_data.index(p-1)!=inner) && "you cannot insert an element that already exist, you must call coeffRef to this end");
-
-      m_innerNonZeros[outer]++;
-
-      m_data.index(p) = inner;
-      return (m_data.value(p) = 0);
-    }
+    EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col);
 
 public:
     /** \internal
@@ -1097,7 +909,6 @@ void set_from_triplets(const InputIterator& begin, const InputIterator& end, Spa
   EIGEN_UNUSED_VARIABLE(Options);
   enum { IsRowMajor = SparseMatrixType::IsRowMajor };
   typedef typename SparseMatrixType::Scalar Scalar;
-  typedef typename SparseMatrixType::Index Index;
   SparseMatrix<Scalar,IsRowMajor?ColMajor:RowMajor> trMat(mat.rows(),mat.cols());
 
   // pass 1: count the nnz per inner-vector
@@ -1205,6 +1016,204 @@ void SparseMatrix<Scalar,_Options,_Index>::sumupDuplicates()
   m_data.resize(m_outerIndex[m_outerSize]);
 }
 
+template<typename Scalar, int _Options, typename _Index>
+template<typename OtherDerived>
+EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Options,_Index>::operator=(const SparseMatrixBase<OtherDerived>& other)
+{
+  const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
+  if (needToTranspose)
+  {
+    // two passes algorithm:
+    //  1 - compute the number of coeffs per dest inner vector
+    //  2 - do the actual copy/eval
+    // Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed
+    typedef typename internal::nested<OtherDerived,2>::type OtherCopy;
+    typedef typename internal::remove_all<OtherCopy>::type _OtherCopy;
+    OtherCopy otherCopy(other.derived());
+
+    SparseMatrix dest(other.rows(),other.cols());
+    Eigen::Map<Matrix<Index, Dynamic, 1> > (dest.m_outerIndex,dest.outerSize()).setZero();
+
+    // pass 1
+    // FIXME the above copy could be merged with that pass
+    for (Index j=0; j<otherCopy.outerSize(); ++j)
+      for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
+        ++dest.m_outerIndex[it.index()];
+
+    // prefix sum
+    Index count = 0;
+    VectorXi positions(dest.outerSize());
+    for (Index j=0; j<dest.outerSize(); ++j)
+    {
+      Index tmp = dest.m_outerIndex[j];
+      dest.m_outerIndex[j] = count;
+      positions[j] = count;
+      count += tmp;
+    }
+    dest.m_outerIndex[dest.outerSize()] = count;
+    // alloc
+    dest.m_data.resize(count);
+    // pass 2
+    for (Index j=0; j<otherCopy.outerSize(); ++j)
+    {
+      for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
+      {
+        Index pos = positions[it.index()]++;
+        dest.m_data.index(pos) = j;
+        dest.m_data.value(pos) = it.value();
+      }
+    }
+    this->swap(dest);
+    return *this;
+  }
+  else
+  {
+    if(other.isRValue())
+      initAssignment(other.derived());
+    // there is no special optimization
+    return Base::operator=(other.derived());
+  }
+}
+
+template<typename _Scalar, int _Options, typename _Index>
+EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertUncompressed(Index row, Index col)
+{
+  eigen_assert(!isCompressed());
+
+  const Index outer = IsRowMajor ? row : col;
+  const Index inner = IsRowMajor ? col : row;
+
+  Index room = m_outerIndex[outer+1] - m_outerIndex[outer];
+  Index innerNNZ = m_innerNonZeros[outer];
+  if(innerNNZ>=room)
+  {
+    // this inner vector is full, we need to reallocate the whole buffer :(
+    reserve(SingletonVector(outer,std::max<Index>(2,innerNNZ)));
+  }
+
+  Index startId = m_outerIndex[outer];
+  Index p = startId + m_innerNonZeros[outer];
+  while ( (p > startId) && (m_data.index(p-1) > inner) )
+  {
+    m_data.index(p) = m_data.index(p-1);
+    m_data.value(p) = m_data.value(p-1);
+    --p;
+  }
+  eigen_assert((p<=startId || m_data.index(p-1)!=inner) && "you cannot insert an element that already exist, you must call coeffRef to this end");
+
+  m_innerNonZeros[outer]++;
+
+  m_data.index(p) = inner;
+  return (m_data.value(p) = 0);
+}
+
+template<typename _Scalar, int _Options, typename _Index>
+EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertCompressed(Index row, Index col)
+{
+  eigen_assert(isCompressed());
+
+  const Index outer = IsRowMajor ? row : col;
+  const Index inner = IsRowMajor ? col : row;
+
+  Index previousOuter = outer;
+  if (m_outerIndex[outer+1]==0)
+  {
+    // we start a new inner vector
+    while (previousOuter>=0 && m_outerIndex[previousOuter]==0)
+    {
+      m_outerIndex[previousOuter] = static_cast<Index>(m_data.size());
+      --previousOuter;
+    }
+    m_outerIndex[outer+1] = m_outerIndex[outer];
+  }
+
+  // here we have to handle the tricky case where the outerIndex array
+  // starts with: [ 0 0 0 0 0 1 ...] and we are inserted in, e.g.,
+  // the 2nd inner vector...
+  bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0))
+                && (size_t(m_outerIndex[outer+1]) == m_data.size());
+
+  size_t startId = m_outerIndex[outer];
+  // FIXME let's make sure sizeof(long int) == sizeof(size_t)
+  size_t p = m_outerIndex[outer+1];
+  ++m_outerIndex[outer+1];
+
+  float reallocRatio = 1;
+  if (m_data.allocatedSize()<=m_data.size())
+  {
+    // if there is no preallocated memory, let's reserve a minimum of 32 elements
+    if (m_data.size()==0)
+    {
+      m_data.reserve(32);
+    }
+    else
+    {
+      // we need to reallocate the data, to reduce multiple reallocations
+      // we use a smart resize algorithm based on the current filling ratio
+      // in addition, we use float to avoid integers overflows
+      float nnzEstimate = float(m_outerIndex[outer])*float(m_outerSize)/float(outer+1);
+      reallocRatio = (nnzEstimate-float(m_data.size()))/float(m_data.size());
+      // furthermore we bound the realloc ratio to:
+      //   1) reduce multiple minor realloc when the matrix is almost filled
+      //   2) avoid to allocate too much memory when the matrix is almost empty
+      reallocRatio = (std::min)((std::max)(reallocRatio,1.5f),8.f);
+    }
+  }
+  m_data.resize(m_data.size()+1,reallocRatio);
+
+  if (!isLastVec)
+  {
+    if (previousOuter==-1)
+    {
+      // oops wrong guess.
+      // let's correct the outer offsets
+      for (Index k=0; k<=(outer+1); ++k)
+        m_outerIndex[k] = 0;
+      Index k=outer+1;
+      while(m_outerIndex[k]==0)
+        m_outerIndex[k++] = 1;
+      while (k<=m_outerSize && m_outerIndex[k]!=0)
+        m_outerIndex[k++]++;
+      p = 0;
+      --k;
+      k = m_outerIndex[k]-1;
+      while (k>0)
+      {
+        m_data.index(k) = m_data.index(k-1);
+        m_data.value(k) = m_data.value(k-1);
+        k--;
+      }
+    }
+    else
+    {
+      // we are not inserting into the last inner vec
+      // update outer indices:
+      Index j = outer+2;
+      while (j<=m_outerSize && m_outerIndex[j]!=0)
+        m_outerIndex[j++]++;
+      --j;
+      // shift data of last vecs:
+      Index k = m_outerIndex[j]-1;
+      while (k>=Index(p))
+      {
+        m_data.index(k) = m_data.index(k-1);
+        m_data.value(k) = m_data.value(k-1);
+        k--;
+      }
+    }
+  }
+
+  while ( (p > startId) && (m_data.index(p-1) > inner) )
+  {
+    m_data.index(p) = m_data.index(p-1);
+    m_data.value(p) = m_data.value(p-1);
+    --p;
+  }
+
+  m_data.index(p) = inner;
+  return (m_data.value(p) = 0);
+}
+
 } // end namespace Eigen
 
 #endif // EIGEN_SPARSEMATRIX_H
diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h
index c10853791..9630b60f5 100644
--- a/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h
@@ -213,7 +213,6 @@ class SparseSelfAdjointTimeDenseProduct
       // TODO use alpha
       eigen_assert(alpha==Scalar(1) && "alpha != 1 is not implemented yet, sorry");
       typedef typename internal::remove_all<Lhs>::type _Lhs;
-      typedef typename internal::remove_all<Rhs>::type _Rhs;
       typedef typename _Lhs::InnerIterator LhsInnerIterator;
       enum {
         LhsIsRowMajor = (_Lhs::Flags&RowMajorBit)==RowMajorBit,
diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h
index a9c8979cf..cd1e76070 100644
--- a/Eigen/src/SparseCore/SparseVector.h
+++ b/Eigen/src/SparseCore/SparseVector.h
@@ -230,7 +230,8 @@ class SparseVector
     template<typename OtherDerived>
     inline SparseVector& operator=(const SparseMatrixBase<OtherDerived>& other)
     {
-      if (int(RowsAtCompileTime)!=int(OtherDerived::RowsAtCompileTime))
+      if ( (bool(OtherDerived::IsVectorAtCompileTime) && int(RowsAtCompileTime)!=int(OtherDerived::RowsAtCompileTime))
+          || ((!bool(OtherDerived::IsVectorAtCompileTime)) && ( bool(IsColVector) ? other.cols()>1 : other.rows()>1 )))
         return assign(other.transpose());
       else
         return assign(other);
@@ -309,30 +310,7 @@ class SparseVector
 
 protected:
     template<typename OtherDerived>
-    EIGEN_DONT_INLINE SparseVector& assign(const SparseMatrixBase<OtherDerived>& _other)
-    {
-      const OtherDerived& other(_other.derived());
-      const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
-      if(needToTranspose)
-      {
-        Index size = other.size();
-        Index nnz = other.nonZeros();
-        resize(size);
-        reserve(nnz);
-        for(Index i=0; i<size; ++i)
-        {
-          typename OtherDerived::InnerIterator it(other, i);
-          if(it)
-              insert(i) = it.value();
-        }
-        return *this;
-      }
-      else
-      {
-        // there is no special optimization
-        return Base::operator=(other);
-      }
-    }
+    EIGEN_DONT_INLINE SparseVector& assign(const SparseMatrixBase<OtherDerived>& _other);
     
     Storage m_data;
     Index m_size;
@@ -402,6 +380,33 @@ class SparseVector<Scalar,_Options,_Index>::ReverseInnerIterator
     const Index m_start;
 };
 
+template<typename Scalar, int _Options, typename _Index>
+template<typename OtherDerived>
+EIGEN_DONT_INLINE SparseVector<Scalar,_Options,_Index>& SparseVector<Scalar,_Options,_Index>::assign(const SparseMatrixBase<OtherDerived>& _other)
+{
+  const OtherDerived& other(_other.derived());
+  const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
+  if(needToTranspose)
+  {
+    Index size = other.size();
+    Index nnz = other.nonZeros();
+    resize(size);
+    reserve(nnz);
+    for(Index i=0; i<size; ++i)
+    {
+      typename OtherDerived::InnerIterator it(other, i);
+      if(it)
+          insert(i) = it.value();
+    }
+    return *this;
+  }
+  else
+  {
+    // there is no special optimization
+    return Base::operator=(other);
+  }
+}
+    
 } // end namespace Eigen
 
 #endif // EIGEN_SPARSEVECTOR_H
diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h
index 175794811..e78250084 100644
--- a/Eigen/src/SparseLU/SparseLU.h
+++ b/Eigen/src/SparseLU/SparseLU.h
@@ -2,6 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -13,6 +14,8 @@
 
 namespace Eigen {
 
+template <typename _MatrixType, typename _OrderingType> class SparseLU;
+template <typename MappedSparseMatrixType> struct SparseLUMatrixLReturnType; 
 /** \ingroup SparseLU_Module
   * \class SparseLU
   * 
@@ -39,7 +42,7 @@ namespace Eigen {
   * \code
   * VectorXd x(n), b(n);
   * SparseMatrix<double, ColMajor> A;
-  * SparseLU<SparseMatrix<scalar, ColMajor>, COLAMDOrdering<int> >   solver;
+  * SparseLU<SparseMatrix<scalar, ColMajor>, COLAMDOrdering<Index> >   solver;
   * // fill A and b;
   * // Compute the ordering permutation vector from the structural pattern of A
   * solver.analyzePattern(A); 
@@ -65,7 +68,7 @@ namespace Eigen {
   * \sa \ref OrderingMethods_Module
   */
 template <typename _MatrixType, typename _OrderingType>
-class SparseLU
+class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::Index>
 {
   public:
     typedef _MatrixType MatrixType; 
@@ -74,17 +77,18 @@ class SparseLU
     typedef typename MatrixType::RealScalar RealScalar; 
     typedef typename MatrixType::Index Index; 
     typedef SparseMatrix<Scalar,ColMajor,Index> NCMatrix;
-    typedef SuperNodalMatrix<Scalar, Index> SCMatrix; 
+    typedef internal::MappedSuperNodalMatrix<Scalar, Index> SCMatrix; 
     typedef Matrix<Scalar,Dynamic,1> ScalarVector;
     typedef Matrix<Index,Dynamic,1> IndexVector;
     typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType;
+    typedef internal::SparseLUImpl<Scalar, Index> Base;
     
   public:
-    SparseLU():m_isInitialized(true),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0)
+    SparseLU():m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0)
     {
       initperfvalues(); 
     }
-    SparseLU(const MatrixType& matrix):m_isInitialized(true),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0)
+    SparseLU(const MatrixType& matrix):m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0)
     {
       initperfvalues(); 
       compute(matrix);
@@ -119,36 +123,26 @@ class SparseLU
       m_symmetricmode = sym;
     }
     
-    /** Set the threshold used for a diagonal entry to be an acceptable pivot. */
-    void diagPivotThresh(RealScalar thresh)
-    {
-      m_diagpivotthresh = thresh; 
-    }
-     
-    /** Return the number of nonzero elements in the L factor */
-    int nnzL()
+    /** Returns an expression of the matrix L, internally stored as supernodes 
+     * For a triangular solve with this matrix, use
+     * \code
+     * y = b; matrixL().solveInPlace(y);
+     * \endcode
+     */
+    SparseLUMatrixLReturnType<SCMatrix> matrixL() const
     {
-      if (m_factorizationIsOk)
-        return m_nnzL; 
-      else
-      {
-        std::cerr<<"Numerical factorization should be done before\n"; 
-        return 0; 
-      }
+      return SparseLUMatrixLReturnType<SCMatrix>(m_Lstore);
     }
-    /** Return the number of nonzero elements in the U factor */
-    int nnzU()
+    /** Set the threshold used for a diagonal entry to be an acceptable pivot. */
+    void setPivotThreshold(const RealScalar& thresh)
     {
-      if (m_factorizationIsOk)
-        return m_nnzU; 
-      else
-      {
-        std::cerr<<"Numerical factorization should be done before\n"; 
-        return 0; 
-      }
+      m_diagpivotthresh = thresh; 
     }
+
     /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
       *
+      * \warning the destination matrix X in X = this->solve(B) must be colmun-major.
+      *
       * \sa compute()
       */
     template<typename Rhs>
@@ -160,6 +154,18 @@ class SparseLU
           return internal::solve_retval<SparseLU, Rhs>(*this, B.derived());
     }
 
+        /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
+      *
+      * \sa compute()
+      */
+    template<typename Rhs>
+    inline const internal::sparse_solve_retval<SparseLU, Rhs> solve(const SparseMatrixBase<Rhs>& B) const 
+    {
+      eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); 
+      eigen_assert(rows()==B.rows()
+                    && "SparseLU::solve(): invalid number of rows of the right hand side matrix B");
+          return internal::sparse_solve_retval<SparseLU, Rhs>(*this, B.derived());
+    }
     
      /** \brief Reports whether previous computation was successful.
       *
@@ -174,7 +180,13 @@ class SparseLU
       eigen_assert(m_isInitialized && "Decomposition is not initialized.");
       return m_info;
     }
-
+    /**
+     * \returns A string describing the type of error
+     */
+    std::string lastErrorMessage() const
+    {
+      return m_lastError; 
+    }
     template<typename Rhs, typename Dest>
     bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &_X) const
     {
@@ -184,20 +196,21 @@ class SparseLU
                         THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
       
       
-      int nrhs = B.cols(); 
+      Index nrhs = B.cols(); 
       Index n = B.rows(); 
       
       // Permute the right hand side to form X = Pr*B
       // on return, X is overwritten by the computed solution
       X.resize(n,nrhs);
-      for(int j = 0; j < nrhs; ++j)
+      for(Index j = 0; j < nrhs; ++j)
         X.col(j) = m_perm_r * B.col(j); 
       
       //Forward substitution with L 
-      m_Lstore.solveInPlace(X);
+//       m_Lstore.solveInPlace(X);
+        this->matrixL().solveInPlace(X);
       
       // Backward solve with U
-      for (int k = m_Lstore.nsuper(); k >= 0; k--)
+      for (Index k = m_Lstore.nsuper(); k >= 0; k--)
       {
         Index fsupc = m_Lstore.supToCol()[k];
         Index lda = m_Lstore.colIndexPtr()[fsupc+1] - m_Lstore.colIndexPtr()[fsupc]; // leading dimension
@@ -206,7 +219,7 @@ class SparseLU
         
         if (nsupc == 1)
         {
-          for (int j = 0; j < nrhs; j++)
+          for (Index j = 0; j < nrhs; j++)
           {
             X(fsupc, j) /= m_Lstore.valuePtr()[luptr]; 
           }
@@ -218,11 +231,11 @@ class SparseLU
           U = A.template triangularView<Upper>().solve(U); 
         }
         
-        for (int j = 0; j < nrhs; ++j)
+        for (Index j = 0; j < nrhs; ++j)
         {
-          for (int jcol = fsupc; jcol < fsupc + nsupc; jcol++)
+          for (Index jcol = fsupc; jcol < fsupc + nsupc; jcol++)
           {
-            typename MappedSparseMatrix<Scalar>::InnerIterator it(m_Ustore, jcol);
+            typename MappedSparseMatrix<Scalar,ColMajor, Index>::InnerIterator it(m_Ustore, jcol);
             for ( ; it; ++it)
             {
               Index irow = it.index(); 
@@ -233,7 +246,7 @@ class SparseLU
       } // End For U-solve
       
       // Permute back the solution 
-      for (int j = 0; j < nrhs; ++j)
+      for (Index j = 0; j < nrhs; ++j)
         X.col(j) = m_perm_c.inverse() * X.col(j); 
       
       return true; 
@@ -256,23 +269,23 @@ class SparseLU
     bool m_isInitialized;
     bool m_factorizationIsOk;
     bool m_analysisIsOk;
+    std::string m_lastError;
     NCMatrix m_mat; // The input (permuted ) matrix 
     SCMatrix m_Lstore; // The lower triangular matrix (supernodal)
-    MappedSparseMatrix<Scalar> m_Ustore; // The upper triangular matrix
+    MappedSparseMatrix<Scalar,ColMajor,Index> m_Ustore; // The upper triangular matrix
     PermutationType m_perm_c; // Column permutation 
     PermutationType m_perm_r ; // Row permutation
     IndexVector m_etree; // Column elimination tree 
     
-    LU_GlobalLU_t<IndexVector, ScalarVector> m_glu; 
+    typename Base::GlobalLU_t m_glu; 
                                
-    // SuperLU/SparseLU options 
+    // SparseLU options 
     bool m_symmetricmode;
-    
     // values for performance 
-    LU_perfvalues m_perfv; 
+    internal::perfvalues<Index> m_perfv; 
     RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot
-    int m_nnzL, m_nnzU; // Nonzeros in L and U factors 
-  
+    Index m_nnzL, m_nnzU; // Nonzeros in L and U factors 
+    
   private:
     // Copy constructor 
     SparseLU (SparseLU& ) {}
@@ -301,18 +314,17 @@ void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
   ord(mat,m_perm_c);
   
   // Apply the permutation to the column of the input  matrix
-//   m_mat = mat * m_perm_c.inverse(); //FIXME It should be less expensive here to permute only the structural pattern of the matrix
-   
   //First copy the whole input matrix. 
   m_mat = mat;
-  m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used.  
-  //Then, permute only the column pointers
-  for (int i = 0; i < mat.cols(); i++)
-  {
-    m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i]; 
-    m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; 
+  if (m_perm_c.size()) {
+    m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used.  
+    //Then, permute only the column pointers
+    for (Index i = 0; i < mat.cols(); i++)
+    {
+      m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i]; 
+      m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; 
+    }
   }
-    
   // Compute the column elimination tree of the permuted matrix 
   IndexVector firstRowElt;
   internal::coletree(m_mat, m_etree,firstRowElt); 
@@ -325,18 +337,20 @@ void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
       
    
     // Renumber etree in postorder 
-    int m = m_mat.cols(); 
+    Index m = m_mat.cols(); 
     iwork.resize(m+1);
-    for (int i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i));
+    for (Index i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i));
     m_etree = iwork;
     
     // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree
-    PermutationType post_perm(m); //FIXME Use directly a constructor with post
-    for (int i = 0; i < m; i++) 
+    PermutationType post_perm(m); 
+    for (Index i = 0; i < m; i++) 
       post_perm.indices()(i) = post(i); 
         
     // Combine the two permutations : postorder the permutation for future use
-    m_perm_c = post_perm * m_perm_c;
+    if(m_perm_c.size()) {
+      m_perm_c = post_perm * m_perm_c;
+    }
     
   } // end postordering 
   
@@ -367,7 +381,7 @@ void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
 template <typename MatrixType, typename OrderingType>
 void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
 {
-  
+  using internal::emptyIdxLU;
   eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); 
   eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices");
   
@@ -377,24 +391,32 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
   // Apply the column permutation computed in analyzepattern()
   //   m_mat = matrix * m_perm_c.inverse(); 
   m_mat = matrix;
-  m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers.
-  //Then, permute only the column pointers
-  for (int i = 0; i < matrix.cols(); i++)
+  if (m_perm_c.size()) 
   {
-    m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i]; 
-    m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i+1] - matrix.outerIndexPtr()[i]; 
+    m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers.
+    //Then, permute only the column pointers
+    for (Index i = 0; i < matrix.cols(); i++)
+    {
+      m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i]; 
+      m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i+1] - matrix.outerIndexPtr()[i]; 
+    }
+  } 
+  else 
+  { //FIXME This should not be needed if the empty permutation is handled transparently
+    m_perm_c.resize(matrix.cols());
+    for(Index i = 0; i < matrix.cols(); ++i) m_perm_c.indices()(i) = i;
   }
   
-  int m = m_mat.rows();
-  int n = m_mat.cols();
-  int nnz = m_mat.nonZeros();
-  int maxpanel = m_perfv.panel_size * m;
+  Index m = m_mat.rows();
+  Index n = m_mat.cols();
+  Index nnz = m_mat.nonZeros();
+  Index maxpanel = m_perfv.panel_size * m;
   // Allocate working storage common to the factor routines
-  int lwork = 0;
-  int info = SparseLUBase<Scalar,Index>::LUMemInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); 
+  Index lwork = 0;
+  Index info = Base::memInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); 
   if (info) 
   {
-    std::cerr << "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ;
+    m_lastError = "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ;
     m_factorizationIsOk = false;
     return ; 
   }
@@ -406,7 +428,7 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
   IndexVector repfnz(maxpanel);
   IndexVector panel_lsub(maxpanel);
   IndexVector xprune(n); xprune.setZero();
-  IndexVector marker(m*LU_NO_MARKER); marker.setZero();
+  IndexVector marker(m*internal::LUNoMarker); marker.setZero();
   
   repfnz.setConstant(-1); 
   panel_lsub.setConstant(-1);
@@ -415,7 +437,7 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
   ScalarVector dense; 
   dense.setZero(maxpanel);
   ScalarVector tempv; 
-  tempv.setZero(LU_NUM_TEMPV(m, m_perfv.panel_size, m_perfv.maxsuper, /*m_perfv.rowblk*/m) );
+  tempv.setZero(internal::LUnumTempV(m, m_perfv.panel_size, m_perfv.maxsuper, /*m_perfv.rowblk*/m) );
   
   // Compute the inverse of perm_c
   PermutationType iperm_c(m_perm_c.inverse()); 
@@ -423,35 +445,35 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
   // Identify initial relaxed snodes
   IndexVector relax_end(n);
   if ( m_symmetricmode == true ) 
-    SparseLUBase<Scalar,Index>::LU_heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
+    Base::heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
   else
-    SparseLUBase<Scalar,Index>::LU_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
+    Base::relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
   
   
   m_perm_r.resize(m); 
   m_perm_r.indices().setConstant(-1);
   marker.setConstant(-1);
   
-  m_glu.supno(0) = IND_EMPTY; m_glu.xsup.setConstant(0);
+  m_glu.supno(0) = emptyIdxLU; m_glu.xsup.setConstant(0);
   m_glu.xsup(0) = m_glu.xlsub(0) = m_glu.xusub(0) = m_glu.xlusup(0) = Index(0);
   
   // Work on one 'panel' at a time. A panel is one of the following :
   //  (a) a relaxed supernode at the bottom of the etree, or
   //  (b) panel_size contiguous columns, <panel_size> defined by the user
-  int jcol; 
+  Index jcol; 
   IndexVector panel_histo(n);
   Index pivrow; // Pivotal row number in the original row matrix
-  int nseg1; // Number of segments in U-column above panel row jcol
-  int nseg; // Number of segments in each U-column 
-  int irep; 
-  int i, k, jj; 
+  Index nseg1; // Number of segments in U-column above panel row jcol
+  Index nseg; // Number of segments in each U-column 
+  Index irep; 
+  Index i, k, jj; 
   for (jcol = 0; jcol < n; )
   {
     // Adjust panel size so that a panel won't overlap with the next relaxed snode. 
-    int panel_size = m_perfv.panel_size; // upper bound on panel width
+    Index panel_size = m_perfv.panel_size; // upper bound on panel width
     for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
     {
-      if (relax_end(k) != IND_EMPTY) 
+      if (relax_end(k) != emptyIdxLU) 
       {
         panel_size = k - jcol; 
         break; 
@@ -461,10 +483,10 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
       panel_size = n - jcol; 
       
     // Symbolic outer factorization on a panel of columns 
-    SparseLUBase<Scalar,Index>::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); 
+    Base::panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); 
     
     // Numeric sup-panel updates in topological order 
-    SparseLUBase<Scalar,Index>::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); 
+    Base::panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); 
     
     // Sparse LU within the panel, and below the panel diagonal 
     for ( jj = jcol; jj< jcol + panel_size; jj++) 
@@ -475,10 +497,10 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
       //Depth-first-search for the current column
       VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
       VectorBlock<IndexVector> repfnz_k(repfnz, k, m); 
-      info = SparseLUBase<Scalar,Index>::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); 
+      info = Base::column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); 
       if ( info ) 
       {
-        std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n";
+        m_lastError =  "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() ";
         m_info = NumericalIssue; 
         m_factorizationIsOk = false; 
         return; 
@@ -486,62 +508,82 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
       // Numeric updates to this column 
       VectorBlock<ScalarVector> dense_k(dense, k, m); 
       VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1); 
-      info = SparseLUBase<Scalar,Index>::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); 
+      info = Base::column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); 
       if ( info ) 
       {
-        std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n";
+        m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() ";
         m_info = NumericalIssue; 
         m_factorizationIsOk = false; 
         return; 
       }
       
       // Copy the U-segments to ucol(*)
-      info = SparseLUBase<Scalar,Index>::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); 
+      info = Base::copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); 
       if ( info ) 
       {
-        std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n";
+        m_lastError = "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() ";
         m_info = NumericalIssue; 
         m_factorizationIsOk = false; 
         return; 
       }
       
       // Form the L-segment 
-      info = SparseLUBase<Scalar,Index>::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
+      info = Base::pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
       if ( info ) 
       {
-        std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl; 
+        m_lastError = "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT ";
+        std::ostringstream returnInfo;
+        returnInfo << info; 
+        m_lastError += returnInfo.str();
         m_info = NumericalIssue; 
         m_factorizationIsOk = false; 
         return; 
       }
       
       // Prune columns (0:jj-1) using column jj
-      SparseLUBase<Scalar,Index>::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); 
+      Base::pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); 
       
       // Reset repfnz for this column 
       for (i = 0; i < nseg; i++)
       {
         irep = segrep(i); 
-        repfnz_k(irep) = IND_EMPTY; 
+        repfnz_k(irep) = emptyIdxLU; 
       }
     } // end SparseLU within the panel  
     jcol += panel_size;  // Move to the next panel
   } // end for -- end elimination 
   
   // Count the number of nonzeros in factors 
-  SparseLUBase<Scalar,Index>::LU_countnz(n, m_nnzL, m_nnzU, m_glu); 
+  Base::countnz(n, m_nnzL, m_nnzU, m_glu); 
   // Apply permutation  to the L subscripts 
-  SparseLUBase<Scalar,Index>::LU_fixupL(n, m_perm_r.indices(), m_glu); 
+  Base::fixupL(n, m_perm_r.indices(), m_glu); 
   
   // Create supernode matrix L 
   m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); 
   // Create the column major upper sparse matrix  U; 
-  new (&m_Ustore) MappedSparseMatrix<Scalar> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); 
+  new (&m_Ustore) MappedSparseMatrix<Scalar, ColMajor, Index> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); 
   
   m_info = Success;
   m_factorizationIsOk = true;
 }
 
+template<typename MappedSupernodalType>
+struct SparseLUMatrixLReturnType
+{
+  typedef typename MappedSupernodalType::Index Index;
+  typedef typename MappedSupernodalType::Scalar Scalar;
+  SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL)
+  { }
+  Index rows() { return m_mapL.rows(); }
+  Index cols() { return m_mapL.cols(); }
+  template<typename Dest>
+  void solveInPlace( MatrixBase<Dest> &X) const
+  {
+    m_mapL.solveInPlace(X);
+  }
+  const MappedSupernodalType& m_mapL;
+};
+
 namespace internal {
   
 template<typename _MatrixType, typename Derived, typename Rhs>
@@ -557,6 +599,18 @@ struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
   }
 };
 
+template<typename _MatrixType, typename Derived, typename Rhs>
+struct sparse_solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
+  : sparse_solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs>
+{
+  typedef SparseLU<_MatrixType,Derived> Dec;
+  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    this->defaultEvalTo(dst);
+  }
+};
 } // end namespace internal
 
 } // End namespace Eigen 
diff --git a/Eigen/src/SparseLU/SparseLUBase.h b/Eigen/src/SparseLU/SparseLUBase.h
deleted file mode 100644
index f4c5fbead..000000000
--- a/Eigen/src/SparseLU/SparseLUBase.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef SPARSELUBASE_H
-#define SPARSELUBASE_H
-
-namespace Eigen {
-  
-/** \ingroup SparseLU_Module
-  * \class SparseLUBase
-  * Base class for sparseLU
-  */
-template <typename Scalar, typename Index>
-struct SparseLUBase
-{
-  typedef Matrix<Scalar,Dynamic,1> ScalarVector;
-  typedef Matrix<Index,Dynamic,1> IndexVector; 
-  typedef typename ScalarVector::RealScalar RealScalar; 
-  typedef Ref<Matrix<Scalar,Dynamic,1> > BlockScalarVector;
-  typedef Ref<Matrix<Index,Dynamic,1> > BlockIndexVector;
-  typedef LU_GlobalLU_t<IndexVector, ScalarVector> GlobalLU_t; 
-  typedef SparseMatrix<Scalar,ColMajor,Index> MatrixType; 
-  
-  template <typename VectorType>
-  static int expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_expansions);
-  static int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size,  GlobalLU_t& glu); 
-  template <typename VectorType>
-  static int LUMemXpand(VectorType& vec, int& maxlen, int nbElts, LU_MemType memtype, int& num_expansions);
-  static void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end); 
-  static void LU_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end); 
-  static int LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat,  IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t<IndexVector, ScalarVector>& glu); 
-  static int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu);
-  static int LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, GlobalLU_t& glu);
-  template <typename Traits>
-  static void LU_dfs_kernel(const int jj, IndexVector& perm_r,
-                   int& nseg, IndexVector& panel_lsub, IndexVector& segrep,
-                   Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
-                   IndexVector& xplore, GlobalLU_t& glu, int& nextl_col, int krow, Traits& traits);
-  static void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
-   
-  static void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu);
-  static int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg,  BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
-  static int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, int fpanelc, GlobalLU_t& glu); 
-  static int LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu); 
-  static void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu);
-  static void LU_countnz(const int n, int& nnzL, int& nnzU, GlobalLU_t& glu); 
-  static void LU_fixupL(const int n, const IndexVector& perm_r, GlobalLU_t& glu); 
-
-}; 
-
-} // end namespace Eigen
-
-#endif
diff --git a/Eigen/src/SparseLU/SparseLUImpl.h b/Eigen/src/SparseLU/SparseLUImpl.h
new file mode 100644
index 000000000..14d70897d
--- /dev/null
+++ b/Eigen/src/SparseLU/SparseLUImpl.h
@@ -0,0 +1,64 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef SPARSELU_IMPL_H
+#define SPARSELU_IMPL_H
+
+namespace Eigen {
+namespace internal {
+  
+/** \ingroup SparseLU_Module
+  * \class SparseLUImpl
+  * Base class for sparseLU
+  */
+template <typename Scalar, typename Index>
+class SparseLUImpl
+{
+  public:
+    typedef Matrix<Scalar,Dynamic,1> ScalarVector;
+    typedef Matrix<Index,Dynamic,1> IndexVector; 
+    typedef typename ScalarVector::RealScalar RealScalar; 
+    typedef Ref<Matrix<Scalar,Dynamic,1> > BlockScalarVector;
+    typedef Ref<Matrix<Index,Dynamic,1> > BlockIndexVector;
+    typedef LU_GlobalLU_t<IndexVector, ScalarVector> GlobalLU_t; 
+    typedef SparseMatrix<Scalar,ColMajor,Index> MatrixType; 
+    
+  protected:
+     template <typename VectorType>
+     Index expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions);
+     Index memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size,  GlobalLU_t& glu); 
+     template <typename VectorType>
+     Index memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions);
+     void heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end); 
+     void relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end); 
+     Index snode_dfs(const Index jcol, const Index kcol,const MatrixType& mat,  IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu); 
+     Index snode_bmod (const Index jcol, const Index fsupc, ScalarVector& dense, GlobalLU_t& glu);
+     Index pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu);
+     template <typename Traits>
+     void dfs_kernel(const Index jj, IndexVector& perm_r,
+                    Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,
+                    Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
+                    IndexVector& xplore, GlobalLU_t& glu, Index& nextl_col, Index krow, Traits& traits);
+     void panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
+    
+     void panel_bmod(const Index m, const Index w, const Index jcol, const Index nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu);
+     Index column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg,  BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
+     Index column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu); 
+     Index copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu); 
+     void pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu);
+     void countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu); 
+     void fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu); 
+     
+     template<typename , typename >
+     friend struct column_dfs_traits;
+}; 
+
+} // end namespace internal
+} // namespace Eigen
+
+#endif
diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h
index 049d5e694..6d9570d19 100644
--- a/Eigen/src/SparseLU/SparseLU_Memory.h
+++ b/Eigen/src/SparseLU/SparseLU_Memory.h
@@ -32,15 +32,23 @@
 #define EIGEN_SPARSELU_MEMORY
 
 namespace Eigen {
+namespace internal {
   
-#define LU_NO_MARKER 3
-#define LU_NUM_TEMPV(m,w,t,b) ((std::max)(m, (t+b)*w)  )
-#define IND_EMPTY (-1)
+enum { LUNoMarker = 3 };
+enum {emptyIdxLU = -1};
+template<typename Index>
+inline Index LUnumTempV(Index& m, Index& w, Index& t, Index& b)
+{
+  return (std::max)(m, (t+b)*w);
+}
+
+template< typename Scalar, typename Index>
+inline Index LUTempSpace(Index&m, Index& w)
+{
+  return (2*w + 4 + LUNoMarker) * m * sizeof(Index) + (w + 1) * m * sizeof(Scalar);
+}
+
 
-#define LU_Reduce(alpha) ((alpha + 1) / 2) // i.e (alpha-1)/2 + 1
-#define LU_GluIntArray(n) (5* (n) + 5)
-#define LU_TempSpace(m, w) ( (2*w + 4 + LU_NO_MARKER) * m * sizeof(Index) \
-                                  + (w + 1) * m * sizeof(Scalar) )
 
 
 /** 
@@ -53,11 +61,11 @@ namespace Eigen {
   */
 template <typename Scalar, typename Index>
 template <typename VectorType>
-int  SparseLUBase<Scalar,Index>::expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_expansions) 
+Index  SparseLUImpl<Scalar,Index>::expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions) 
 {
   
   float alpha = 1.5; // Ratio of the memory increase 
-  int new_len; // New size of the allocated memory
+  Index new_len; // New size of the allocated memory
   
   if(num_expansions == 0 || keep_prev) 
     new_len = length ; // First time allocate requested
@@ -88,10 +96,10 @@ int  SparseLUBase<Scalar,Index>::expand(VectorType& vec, int& length, int nbElts
     else 
     {
       // Reduce the size and increase again 
-      int tries = 0; // Number of attempts
+      Index tries = 0; // Number of attempts
       do 
       {
-        alpha = LU_Reduce(alpha);
+        alpha = (alpha + 1)/2;
         new_len = alpha * length ; 
         try
         {
@@ -128,18 +136,20 @@ int  SparseLUBase<Scalar,Index>::expand(VectorType& vec, int& length, int nbElts
  * \note Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation
  */
 template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size,  GlobalLU_t& glu)
+Index SparseLUImpl<Scalar,Index>::memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size,  GlobalLU_t& glu)
 {
-  int& num_expansions = glu.num_expansions; //No memory expansions so far
+  Index& num_expansions = glu.num_expansions; //No memory expansions so far
   num_expansions = 0; 
   glu.nzumax = glu.nzlumax = (std::max)(fillratio * annz, m*n); // estimated number of nonzeros in U 
   glu.nzlmax  = (std::max)(1., fillratio/4.) * annz; // estimated  nnz in L factor
 
   // Return the estimated size to the user if necessary
-  if (lwork == IND_EMPTY) 
+  Index tempSpace;
+  tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar);
+  if (lwork == emptyIdxLU) 
   {
-    int estimated_size;
-    estimated_size = LU_GluIntArray(n) * sizeof(Index)  + LU_TempSpace(m, panel_size)
+    Index estimated_size;
+    estimated_size = (5 * n + 5) * sizeof(Index)  + tempSpace
                     + (glu.nzlmax + glu.nzumax) * sizeof(Index) + (glu.nzlumax+glu.nzumax) *  sizeof(Scalar) + n; 
     return estimated_size;
   }
@@ -192,13 +202,13 @@ int SparseLUBase<Scalar,Index>::LUMemInit(int m, int n, int annz, int lwork, int
  */
 template <typename Scalar, typename Index>
 template <typename VectorType>
-int SparseLUBase<Scalar,Index>::LUMemXpand(VectorType& vec, int& maxlen, int nbElts, LU_MemType memtype, int& num_expansions)
+Index SparseLUImpl<Scalar,Index>::memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions)
 {
-  int failed_size; 
+  Index failed_size; 
   if (memtype == USUB)
-     failed_size = expand<VectorType>(vec, maxlen, nbElts, 1, num_expansions);
+     failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 1, num_expansions);
   else
-    failed_size = expand<VectorType>(vec, maxlen, nbElts, 0, num_expansions);
+    failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 0, num_expansions);
 
   if (failed_size)
     return failed_size; 
@@ -206,6 +216,7 @@ int SparseLUBase<Scalar,Index>::LUMemXpand(VectorType& vec, int& maxlen, int nbE
   return 0 ;  
 }
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif // EIGEN_SPARSELU_MEMORY
diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h
index 89d6e81b7..24d6bf179 100644
--- a/Eigen/src/SparseLU/SparseLU_Structs.h
+++ b/Eigen/src/SparseLU/SparseLU_Structs.h
@@ -68,10 +68,10 @@
 
 #ifndef EIGEN_LU_STRUCTS
 #define EIGEN_LU_STRUCTS
-
 namespace Eigen {
+namespace internal {
   
-typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} LU_MemType; 
+typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType; 
 
 template <typename IndexVector, typename ScalarVector>
 struct LU_GlobalLU_t {
@@ -89,21 +89,23 @@ struct LU_GlobalLU_t {
   IndexVector xusub; // Pointers to the beginning of each column of U in ucol 
   Index   nzumax; // Current max size of ucol
   Index   n; // Number of columns in the matrix  
-  int   num_expansions; 
+  Index   num_expansions; 
 };
 
-// Values to set for performance 
-struct LU_perfvalues {
-  int panel_size; // a panel consists of at most <panel_size> consecutive columns
-  int relax; // To control degree of relaxing supernodes. If the number of nodes (columns) 
+// Values to set for performance
+template <typename Index>
+struct perfvalues {
+  Index panel_size; // a panel consists of at most <panel_size> consecutive columns
+  Index relax; // To control degree of relaxing supernodes. If the number of nodes (columns) 
                 // in a subtree of the elimination tree is less than relax, this subtree is considered 
                 // as one supernode regardless of the row structures of those columns
-  int maxsuper; // The maximum size for a supernode in complete LU
-  int rowblk; // The minimum row dimension for 2-D blocking to be used;
-  int colblk; // The minimum column dimension for 2-D blocking to be used;
-  int fillfactor; // The estimated fills factors for L and U, compared with A
+  Index maxsuper; // The maximum size for a supernode in complete LU
+  Index rowblk; // The minimum row dimension for 2-D blocking to be used;
+  Index colblk; // The minimum column dimension for 2-D blocking to be used;
+  Index fillfactor; // The estimated fills factors for L and U, compared with A
 }; 
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif // EIGEN_LU_STRUCTS
diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
index d5770e1ae..3eae95479 100644
--- a/Eigen/src/SparseLU/SparseLU_Matrix.h
+++ b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
@@ -8,10 +8,11 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_SPARSELU_MATRIX_H
-#define EIGEN_SPARSELU_MATRIX_H
+#ifndef EIGEN_SPARSELU_SUPERNODAL_MATRIX_H
+#define EIGEN_SPARSELU_SUPERNODAL_MATRIX_H
 
 namespace Eigen {
+namespace internal {
 
 /** \ingroup SparseLU_Module
  * \brief a class to manipulate the L supernodal factor from the SparseLU factorization
@@ -23,13 +24,13 @@ namespace Eigen {
  * NOTE : This class corresponds to the SCformat structure in SuperLU
  * 
  */
-/* TO DO
+/* TODO
  * InnerIterator as for sparsematrix 
  * SuperInnerIterator to iterate through all supernodes 
  * Function for triangular solve
  */
 template <typename _Scalar, typename _Index>
-class SuperNodalMatrix
+class MappedSuperNodalMatrix
 {
   public:
     typedef _Scalar Scalar; 
@@ -37,17 +38,17 @@ class SuperNodalMatrix
     typedef Matrix<Index,Dynamic,1> IndexVector; 
     typedef Matrix<Scalar,Dynamic,1> ScalarVector;
   public:
-    SuperNodalMatrix()
+    MappedSuperNodalMatrix()
     {
       
     }
-    SuperNodalMatrix(int m, int n,  ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, 
+    MappedSuperNodalMatrix(Index m, Index n,  ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, 
              IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
     {
       setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col);
     }
     
-    ~SuperNodalMatrix()
+    ~MappedSuperNodalMatrix()
     {
       
     }
@@ -57,7 +58,7 @@ class SuperNodalMatrix
      * FIXME This class will be modified such that it can be use in the course 
      * of the factorization.
      */
-    void setInfos(int m, int n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, 
+    void setInfos(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, 
              IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
     {
       m_row = m;
@@ -69,34 +70,24 @@ class SuperNodalMatrix
       m_nsuper = col_to_sup(n); 
       m_col_to_sup = col_to_sup.data(); 
       m_sup_to_col = sup_to_col.data(); 
-      
     }
     
     /**
      * Number of rows
      */
-    int rows()
-    {
-      return m_row;
-    }
+    Index rows() { return m_row; }
     
     /**
      * Number of columns
      */
-    int cols()
-    {
-      return m_col;
-    }
+    Index cols() { return m_col; }
     
     /**
      * Return the array of nonzero values packed by column
      * 
      * The size is nnz
      */
-    Scalar* valuePtr()
-    {
-      return m_nzval; 
-    }
+    Scalar* valuePtr() {  return m_nzval; }
     
     const Scalar* valuePtr() const 
     {
@@ -118,10 +109,7 @@ class SuperNodalMatrix
     /**
      * Return the array of compressed row indices of all supernodes
      */
-    Index* rowIndex()
-    {
-      return m_rowind; 
-    }
+    Index* rowIndex()  { return m_rowind; }
     
     const Index* rowIndex() const
     {
@@ -131,10 +119,7 @@ class SuperNodalMatrix
     /**
      * Return the location in \em rowvaluePtr() which starts each column
      */
-    Index* rowIndexPtr()
-    {
-      return m_rowind_colptr; 
-    }
+    Index* rowIndexPtr() { return m_rowind_colptr; }
     
     const Index* rowIndexPtr() const 
     {
@@ -144,10 +129,7 @@ class SuperNodalMatrix
     /** 
      * Return the array of column-to-supernode mapping 
      */
-    Index* colToSup()
-    {
-      return m_col_to_sup;       
-    }
+    Index* colToSup()  { return m_col_to_sup; }
     
     const Index* colToSup() const
     {
@@ -156,10 +138,7 @@ class SuperNodalMatrix
     /**
      * Return the array of supernode-to-column mapping
      */
-    Index* supToCol()
-    {
-      return m_sup_to_col;
-    }
+    Index* supToCol() { return m_sup_to_col; }
     
     const Index* supToCol() const 
     {
@@ -169,7 +148,7 @@ class SuperNodalMatrix
     /**
      * Return the number of supernodes
      */
-    int nsuper() const 
+    Index nsuper() const 
     {
       return m_nsuper; 
     }
@@ -196,22 +175,21 @@ class SuperNodalMatrix
 };
 
 /**
-  * \brief InnerIterator class to iterate over nonzero values of the current column in the supernode
+  * \brief InnerIterator class to iterate over nonzero values of the current column in the supernodal matrix L
   * 
   */
 template<typename Scalar, typename Index>
-class SuperNodalMatrix<Scalar,Index>::InnerIterator
+class MappedSuperNodalMatrix<Scalar,Index>::InnerIterator
 {
   public:
-     InnerIterator(const SuperNodalMatrix& mat, Index outer)
+     InnerIterator(const MappedSuperNodalMatrix& mat, Index outer)
       : m_matrix(mat),
         m_outer(outer), 
+        m_supno(mat.colToSup()[outer]),
         m_idval(mat.colIndexPtr()[outer]),
-        m_startval(m_idval),
-        m_endval(mat.colIndexPtr()[outer+1]),
-        m_idrow(mat.rowIndexPtr()[outer]),
-        m_startidrow(m_idrow),
-        m_endidrow(mat.rowIndexPtr()[outer+1])
+        m_startidval(m_idval),
+        m_endidval(mat.colIndexPtr()[outer+1]),
+        m_idrow(mat.rowIndexPtr()[outer])
     {}
     inline InnerIterator& operator++()
     { 
@@ -227,22 +205,21 @@ class SuperNodalMatrix<Scalar,Index>::InnerIterator
     inline Index row() const { return index(); }
     inline Index col() const { return m_outer; }
     
-    inline Index supIndex() const { return m_matrix.colToSup()[m_outer]; }
+    inline Index supIndex() const { return m_supno; }
     
     inline operator bool() const 
     { 
-      return ( (m_idrow < m_endidrow) && (m_idrow > m_startidrow) ); 
+      return ( (m_idval < m_endidval) && (m_idval >= m_startidval) );
     }
     
   protected:
-    const SuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix 
+    const MappedSuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix 
     const Index m_outer; // Current column 
+    const Index m_supno; // Current SuperNode number
     Index m_idval; //Index to browse the values in the current column
-    const Index m_startval; // Start of the column value 
-    const Index m_endval; // End of the column value 
+    const Index m_startidval; // Start of the column value
+    const Index m_endidval; // End of the column value
     Index m_idrow;  //Index to browse the row indices 
-    const Index m_startidrow; // Start of the row indices of the current column value
-    const Index m_endidrow; // End of the row indices of the current column value
 };
 
 /**
@@ -251,14 +228,14 @@ class SuperNodalMatrix<Scalar,Index>::InnerIterator
  */
 template<typename Scalar, typename Index>
 template<typename Dest>
-void SuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const
+void MappedSuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const
 {
     Index n = X.rows(); 
-    int nrhs = X.cols(); 
+    Index nrhs = X.cols(); 
     const Scalar * Lval = valuePtr(); // Nonzero values 
     Matrix<Scalar,Dynamic,Dynamic> work(n, nrhs); // working vector
     work.setZero();
-    for (int k = 0; k <= nsuper(); k ++)
+    for (Index k = 0; k <= nsuper(); k ++)
     {
       Index fsupc = supToCol()[k]; // First column of the current supernode 
       Index istart = rowIndexPtr()[fsupc];  // Pointer index to the subscript of the current column
@@ -269,7 +246,7 @@ void SuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const
       
       if (nsupc == 1 )
       {
-        for (int j = 0; j < nrhs; j++)
+        for (Index j = 0; j < nrhs; j++)
         {
           InnerIterator it(*this, fsupc); 
           ++it; // Skip the diagonal element
@@ -296,10 +273,10 @@ void SuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const
         work.block(0, 0, nrow, nrhs) = A * U; 
         
         //Begin Scatter 
-        for (int j = 0; j < nrhs; j++)
+        for (Index j = 0; j < nrhs; j++)
         {
           Index iptr = istart + nsupc; 
-          for (int i = 0; i < nrow; i++)
+          for (Index i = 0; i < nrow; i++)
           {
             irow = rowIndex()[iptr]; 
             X(irow, j) -= work(i, j); // Scatter operation
@@ -311,6 +288,7 @@ void SuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const
     } 
 }
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif // EIGEN_SPARSELU_MATRIX_H
diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h
index e764823ae..15352ac33 100644
--- a/Eigen/src/SparseLU/SparseLU_Utils.h
+++ b/Eigen/src/SparseLU/SparseLU_Utils.h
@@ -12,18 +12,19 @@
 #define EIGEN_SPARSELU_UTILS_H
 
 namespace Eigen {
+namespace internal {
 
 /**
  * \brief Count Nonzero elements in the factors
  */
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_countnz(const int n, int& nnzL, int& nnzU, GlobalLU_t& glu)
+void SparseLUImpl<Scalar,Index>::countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu)
 {
  nnzL = 0; 
  nnzU = (glu.xusub)(n); 
- int nsuper = (glu.supno)(n); 
- int jlen; 
- int i, j, fsupc;
+ Index nsuper = (glu.supno)(n); 
+ Index jlen; 
+ Index i, j, fsupc;
  if (n <= 0 ) return; 
  // For each supernode
  for (i = 0; i <= nsuper; i++)
@@ -48,12 +49,12 @@ void SparseLUBase<Scalar,Index>::LU_countnz(const int n, int& nnzL, int& nnzU, G
  * 
  */
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_fixupL(const int n, const IndexVector& perm_r, GlobalLU_t& glu)
+void SparseLUImpl<Scalar,Index>::fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu)
 {
-  int fsupc, i, j, k, jstart; 
+  Index fsupc, i, j, k, jstart; 
   
-  int nextl = 0; 
-  int nsuper = (glu.supno)(n); 
+  Index nextl = 0; 
+  Index nsuper = (glu.supno)(n); 
   
   // For each supernode 
   for (i = 0; i <= nsuper; i++)
@@ -73,6 +74,7 @@ void SparseLUBase<Scalar,Index>::LU_fixupL(const int n, const IndexVector& perm_
   glu.xlsub(n) = nextl; 
 }
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif // EIGEN_SPARSELU_UTILS_H
diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h
index 6d557eb81..f24bd87d3 100644
--- a/Eigen/src/SparseLU/SparseLU_column_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h
@@ -32,7 +32,8 @@
 #define SPARSELU_COLUMN_BMOD_H
 
 namespace Eigen {
-  
+
+namespace internal {
 /**
  * \brief Performs numeric block updates (sup-col) in topological order
  * 
@@ -49,11 +50,11 @@ namespace Eigen {
  * 
  */
 template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LU_column_bmod(const int jcol, const int nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, int fpanelc, GlobalLU_t& glu)
+Index SparseLUImpl<Scalar,Index>::column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu)
 {
-  int  jsupno, k, ksub, krep, ksupno; 
-  int lptr, nrow, isub, irow, nextlu, new_next, ufirst; 
-  int fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; 
+  Index  jsupno, k, ksub, krep, ksupno; 
+  Index lptr, nrow, isub, irow, nextlu, new_next, ufirst; 
+  Index fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; 
   /* krep = representative of current k-th supernode
     * fsupc =  first supernodal column
     * nsupc = number of columns in a supernode
@@ -66,10 +67,10 @@ int SparseLUBase<Scalar,Index>::LU_column_bmod(const int jcol, const int nseg, B
   jsupno = glu.supno(jcol);
   // For each nonzero supernode segment of U[*,j] in topological order 
   k = nseg - 1; 
-  int d_fsupc; // distance between the first column of the current panel and the 
+  Index d_fsupc; // distance between the first column of the current panel and the 
                // first column of the current snode
-  int fst_col; // First column within small LU update
-  int segsize; 
+  Index fst_col; // First column within small LU update
+  Index segsize; 
   for (ksub = 0; ksub < nseg; ksub++)
   {
     krep = segrep(k); k--; 
@@ -94,7 +95,7 @@ int SparseLUBase<Scalar,Index>::LU_column_bmod(const int jcol, const int nseg, B
       nsupc = krep - fst_col + 1; 
       nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); 
       nrow = nsupr - d_fsupc - nsupc;
-      int lda = glu.xlusup(fst_col+1) - glu.xlusup(fst_col);
+      Index lda = glu.xlusup(fst_col+1) - glu.xlusup(fst_col);
       
       
       // Perform a triangular solver and block update, 
@@ -112,14 +113,14 @@ int SparseLUBase<Scalar,Index>::LU_column_bmod(const int jcol, const int nseg, B
   fsupc = glu.xsup(jsupno);
   
   // copy the SPA dense into L\U[*,j]
-  int mem; 
+  Index mem; 
   new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc); 
-  int offset = internal::first_multiple<Index>(new_next, internal::packet_traits<Scalar>::size) - new_next;
+  Index offset = internal::first_multiple<Index>(new_next, internal::packet_traits<Scalar>::size) - new_next;
   if(offset)
     new_next += offset;
   while (new_next > glu.nzlumax )
   {
-    mem = LUMemXpand<ScalarVector>(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions);  
+    mem = memXpand<ScalarVector>(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions);  
     if (mem) return mem; 
   }
   
@@ -160,7 +161,7 @@ int SparseLUBase<Scalar,Index>::LU_column_bmod(const int jcol, const int nseg, B
     
     // points to the beginning of jcol in snode L\U(jsupno) 
     ufirst = glu.xlusup(jcol) + d_fsupc; 
-    int lda = glu.xlusup(jcol+1) - glu.xlusup(jcol);
+    Index lda = glu.xlusup(jcol+1) - glu.xlusup(jcol);
     Map<Matrix<Scalar,Dynamic,Dynamic>, 0,  OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) ); 
     VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc); 
     u = A.template triangularView<UnitLower>().solve(u); 
@@ -173,6 +174,7 @@ int SparseLUBase<Scalar,Index>::LU_column_bmod(const int jcol, const int nseg, B
   return 0; 
 }
 
+} // end namespace internal
 } // end namespace Eigen
 
 #endif // SPARSELU_COLUMN_BMOD_H
diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h
index 1bf17330a..bd450ddc7 100644
--- a/Eigen/src/SparseLU/SparseLU_column_dfs.h
+++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h
@@ -30,36 +30,37 @@
 #ifndef SPARSELU_COLUMN_DFS_H
 #define SPARSELU_COLUMN_DFS_H
 
+template <typename Scalar, typename Index> class SparseLUImpl;
 namespace Eigen {
 
 namespace internal {
-  
+
 template<typename IndexVector, typename ScalarVector>
-struct LU_column_dfs_traits
+struct column_dfs_traits
 {
-  typedef typename IndexVector::Scalar Index;
   typedef typename ScalarVector::Scalar Scalar;
-  LU_column_dfs_traits(Index jcol, Index& jsuper, LU_GlobalLU_t<IndexVector, ScalarVector>& glu)
-   : m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu)
+  typedef typename IndexVector::Scalar Index;
+  column_dfs_traits(Index jcol, Index& jsuper, typename SparseLUImpl<Scalar, Index>::GlobalLU_t& glu, SparseLUImpl<Scalar, Index>& luImpl)
+   : m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu), m_luImpl(luImpl)
  {}
   bool update_segrep(Index /*krep*/, Index /*jj*/)
   {
     return true;
   }
-  void mem_expand(IndexVector& lsub, int& nextl, int chmark)
+  void mem_expand(IndexVector& lsub, Index& nextl, Index chmark)
   {
     if (nextl >= m_glu.nzlmax)
-      SparseLUBase<Scalar,Index>::LUMemXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); 
-    if (chmark != (m_jcol-1)) m_jsuper_ref = IND_EMPTY;
+      m_luImpl.memXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); 
+    if (chmark != (m_jcol-1)) m_jsuper_ref = emptyIdxLU;
   }
   enum { ExpandMem = true };
   
-  int m_jcol;
-  int& m_jsuper_ref;
-  LU_GlobalLU_t<IndexVector, ScalarVector>& m_glu;
+  Index m_jcol;
+  Index& m_jsuper_ref;
+  typename SparseLUImpl<Scalar, Index>::GlobalLU_t& m_glu;
+  SparseLUImpl<Scalar, Index>& m_luImpl;
 };
 
-} // end namespace internal
 
 /**
  * \brief Performs a symbolic factorization on column jcol and decide the supernode boundary
@@ -89,34 +90,34 @@ struct LU_column_dfs_traits
  * 
  */
 template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg,  BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
+Index SparseLUImpl<Scalar,Index>::column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg,  BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
 {
   
-  int jsuper = glu.supno(jcol); 
-  int nextl = glu.xlsub(jcol); 
+  Index jsuper = glu.supno(jcol); 
+  Index nextl = glu.xlsub(jcol); 
   VectorBlock<IndexVector> marker2(marker, 2*m, m); 
   
   
-  internal::LU_column_dfs_traits<IndexVector, ScalarVector> traits(jcol, jsuper, glu);
+  column_dfs_traits<IndexVector, ScalarVector> traits(jcol, jsuper, glu, *this);
   
   // For each nonzero in A(*,jcol) do dfs 
-  for (int k = 0; lsub_col[k] != IND_EMPTY; k++) 
+  for (Index k = 0; lsub_col[k] != emptyIdxLU; k++) 
   {
-    int krow = lsub_col(k); 
-    lsub_col(k) = IND_EMPTY; 
-    int kmark = marker2(krow); 
+    Index krow = lsub_col(k); 
+    lsub_col(k) = emptyIdxLU; 
+    Index kmark = marker2(krow); 
     
     // krow was visited before, go to the next nonz; 
     if (kmark == jcol) continue;
     
-    LU_dfs_kernel(jcol, perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent,
+    dfs_kernel(jcol, perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent,
                    xplore, glu, nextl, krow, traits);
   } // for each nonzero ... 
   
-  int fsupc, jptr, jm1ptr, ito, ifrom, istop;
-  int nsuper = glu.supno(jcol);
-  int jcolp1 = jcol + 1;
-  int jcolm1 = jcol - 1;
+  Index fsupc, jptr, jm1ptr, ito, ifrom, istop;
+  Index nsuper = glu.supno(jcol);
+  Index jcolp1 = jcol + 1;
+  Index jcolm1 = jcol - 1;
   
   // check to see if j belongs in the same supernode as j-1
   if ( jcol == 0 )
@@ -130,18 +131,18 @@ int SparseLUBase<Scalar,Index>::LU_column_dfs(const int m, const int jcol, Index
     jm1ptr = glu.xlsub(jcolm1); 
     
     // Use supernodes of type T2 : see SuperLU paper
-    if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = IND_EMPTY;
+    if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = emptyIdxLU;
     
     // Make sure the number of columns in a supernode doesn't
     // exceed threshold
-    if ( (jcol - fsupc) >= maxsuper) jsuper = IND_EMPTY; 
+    if ( (jcol - fsupc) >= maxsuper) jsuper = emptyIdxLU; 
     
     /* If jcol starts a new supernode, reclaim storage space in
      * glu.lsub from previous supernode. Note we only store 
      * the subscript set of the first and last columns of 
      * a supernode. (first for num values, last for pruning)
      */
-    if (jsuper == IND_EMPTY)
+    if (jsuper == emptyIdxLU)
     { // starts a new supernode 
       if ( (fsupc < jcolm1-1) ) 
       { // >= 3 columns in nsuper
@@ -169,6 +170,8 @@ int SparseLUBase<Scalar,Index>::LU_column_dfs(const int m, const int jcol, Index
   return 0; 
 }
 
+} // end namespace internal
+
 } // end namespace Eigen
 
 #endif
diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
index 10c85d4ff..170610d9f 100644
--- a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
+++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
@@ -30,6 +30,7 @@
 #define SPARSELU_COPY_TO_UCOL_H
 
 namespace Eigen {
+namespace internal {
 
 /**
  * \brief Performs numeric block updates (sup-col) in topological order
@@ -46,14 +47,14 @@ namespace Eigen {
  * 
  */
 template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu)
+Index SparseLUImpl<Scalar,Index>::copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu)
 {  
   Index ksub, krep, ksupno; 
     
   Index jsupno = glu.supno(jcol);
   
   // For each nonzero supernode segment of U[*,j] in topological order 
-  int k = nseg - 1, i; 
+  Index k = nseg - 1, i; 
   Index nextu = glu.xusub(jcol); 
   Index kfnz, isub, segsize; 
   Index new_next,irow; 
@@ -65,7 +66,7 @@ int SparseLUBase<Scalar,Index>::LU_copy_to_ucol(const int jcol, const int nseg,
     if (jsupno != ksupno ) // should go into ucol(); 
     {
       kfnz = repfnz(krep); 
-      if (kfnz != IND_EMPTY)
+      if (kfnz != emptyIdxLU)
       { // Nonzero U-segment 
         fsupc = glu.xsup(ksupno); 
         isub = glu.xlsub(fsupc) + kfnz - fsupc; 
@@ -73,9 +74,9 @@ int SparseLUBase<Scalar,Index>::LU_copy_to_ucol(const int jcol, const int nseg,
         new_next = nextu + segsize; 
         while (new_next > glu.nzumax) 
         {
-          mem = LUMemXpand<ScalarVector>(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions); 
+          mem = memXpand<ScalarVector>(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions); 
           if (mem) return mem; 
-          mem = LUMemXpand<IndexVector>(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions); 
+          mem = memXpand<IndexVector>(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions); 
           if (mem) return mem; 
           
         }
@@ -99,6 +100,7 @@ int SparseLUBase<Scalar,Index>::LU_copy_to_ucol(const int jcol, const int nseg,
   return 0; 
 }
 
+} // namespace internal
 } // end namespace Eigen
 
 #endif // SPARSELU_COPY_TO_UCOL_H
diff --git a/Eigen/src/SparseLU/SparseLU_gemm_kernel.h b/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
index 11e7318b5..9e4e3e72b 100644
--- a/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
+++ b/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
@@ -21,9 +21,9 @@ namespace internal {
   *  - lda and ldc must be multiples of the respective packet size
   *  - C must have the same alignment as A
   */
-template<typename Scalar>
+template<typename Scalar,typename Index>
 EIGEN_DONT_INLINE
-void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar* B, int ldb, Scalar* C, int ldc)
+void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const Scalar* B, Index ldb, Scalar* C, Index ldc)
 {
   using namespace Eigen::internal;
   
@@ -37,37 +37,37 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
     BM = 4096/sizeof(Scalar),           // number of rows of A-C per chunk
     SM = PM*PacketSize                  // step along M
   };
-  int d_end = (d/RK)*RK;    // number of columns of A (rows of B) suitable for full register blocking
-  int n_end = (n/RN)*RN;    // number of columns of B-C suitable for processing RN columns at once
-  int i0 = internal::first_aligned(A,m);
+  Index d_end = (d/RK)*RK;    // number of columns of A (rows of B) suitable for full register blocking
+  Index n_end = (n/RN)*RN;    // number of columns of B-C suitable for processing RN columns at once
+  Index i0 = internal::first_aligned(A,m);
   
   eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_aligned(C,m)));
   
   // handle the non aligned rows of A and C without any optimization:
-  for(int i=0; i<i0; ++i)
+  for(Index i=0; i<i0; ++i)
   {
-    for(int j=0; j<n; ++j)
+    for(Index j=0; j<n; ++j)
     {
       Scalar c = C[i+j*ldc];
-      for(int k=0; k<d; ++k)
+      for(Index k=0; k<d; ++k)
         c += B[k+j*ldb] * A[i+k*lda];
       C[i+j*ldc] = c;
     }
   }
   // process the remaining rows per chunk of BM rows
-  for(int ib=i0; ib<m; ib+=BM)
+  for(Index ib=i0; ib<m; ib+=BM)
   {
-    int actual_b = std::min<int>(BM, m-ib);                 // actual number of rows
-    int actual_b_end1 = (actual_b/SM)*SM;                   // actual number of rows suitable for peeling
-    int actual_b_end2 = (actual_b/PacketSize)*PacketSize;   // actual number of rows suitable for vectorization
+    Index actual_b = std::min<Index>(BM, m-ib);                 // actual number of rows
+    Index actual_b_end1 = (actual_b/SM)*SM;                   // actual number of rows suitable for peeling
+    Index actual_b_end2 = (actual_b/PacketSize)*PacketSize;   // actual number of rows suitable for vectorization
     
     // Let's process two columns of B-C at once
-    for(int j=0; j<n_end; j+=RN)
+    for(Index j=0; j<n_end; j+=RN)
     {
       const Scalar* Bc0 = B+(j+0)*ldb;
       const Scalar* Bc1 = B+(j+1)*ldb;
       
-      for(int k=0; k<d_end; k+=RK)
+      for(Index k=0; k<d_end; k+=RK)
       {
         
         // load and expand a RN x RK block of B
@@ -93,30 +93,38 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
         
                   a0 = pload<Packet>(A0);
                   a1 = pload<Packet>(A1);
-        if(RK==4) a2 = pload<Packet>(A2);
-        if(RK==4) a3 = pload<Packet>(A3);
+        if(RK==4)
+        {
+          a2 = pload<Packet>(A2);
+          a3 = pload<Packet>(A3);
+        }
+        else
+        {
+          // workaround "may be used uninitialized in this function" warning
+          a2 = a3 = a0;
+        }
         
-#define KMADD(c, a, b, tmp) tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);
+#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
 #define WORK(I)  \
                     c0 = pload<Packet>(C0+i+(I)*PacketSize);   \
                     c1 = pload<Packet>(C1+i+(I)*PacketSize);   \
-                    KMADD(c0, a0, b00, t0);       \
-                    KMADD(c1, a0, b01, t1);       \
+                    KMADD(c0, a0, b00, t0)      \
+                    KMADD(c1, a0, b01, t1)      \
                     a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
-                    KMADD(c0, a1, b10, t0);       \
-                    KMADD(c1, a1, b11, t1);       \
+                    KMADD(c0, a1, b10, t0)      \
+                    KMADD(c1, a1, b11, t1)       \
                     a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
-          if(RK==4) KMADD(c0, a2, b20, t0);       \
-          if(RK==4) KMADD(c1, a2, b21, t1);       \
+          if(RK==4) KMADD(c0, a2, b20, t0)       \
+          if(RK==4) KMADD(c1, a2, b21, t1)       \
           if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
-          if(RK==4) KMADD(c0, a3, b30, t0);       \
-          if(RK==4) KMADD(c1, a3, b31, t1);       \
+          if(RK==4) KMADD(c0, a3, b30, t0)       \
+          if(RK==4) KMADD(c1, a3, b31, t1)       \
           if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
                     pstore(C0+i+(I)*PacketSize, c0);           \
                     pstore(C1+i+(I)*PacketSize, c1)
         
         // process rows of A' - C' with aggressive vectorization and peeling 
-        for(int i=0; i<actual_b_end1; i+=PacketSize*8)
+        for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
         {
           EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL1");
                     prefetch((A0+i+(5)*PacketSize));
@@ -133,12 +141,13 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
                     WORK(7);
         }
         // process the remaining rows with vectorization only
-        for(int i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
+        for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
         {
           WORK(0);
         }
+#undef WORK
         // process the remaining rows without vectorization
-        for(int i=actual_b_end2; i<actual_b; ++i)
+        for(Index i=actual_b_end2; i<actual_b; ++i)
         {
           if(RK==4)
           {
@@ -154,7 +163,6 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
         
         Bc0 += RK;
         Bc1 += RK;
-#undef WORK
       } // peeled loop on k
     } // peeled loop on the columns j
     // process the last column (we now perform a matrux-vector product)
@@ -162,7 +170,7 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
     {
       const Scalar* Bc0 = B+(n-1)*ldb;
       
-      for(int k=0; k<d_end; k+=RK)
+      for(Index k=0; k<d_end; k+=RK)
       {
         
         // load and expand a 1 x RK block of B
@@ -183,23 +191,31 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
         
                   a0 = pload<Packet>(A0);
                   a1 = pload<Packet>(A1);
-        if(RK==4) a2 = pload<Packet>(A2);
-        if(RK==4) a3 = pload<Packet>(A3);
+        if(RK==4)
+        {
+          a2 = pload<Packet>(A2);
+          a3 = pload<Packet>(A3);
+        }
+        else
+        {
+          // workaround "may be used uninitialized in this function" warning
+          a2 = a3 = a0;
+        }
         
 #define WORK(I) \
                   c0 = pload<Packet>(C0+i+(I)*PacketSize);   \
-                  KMADD(c0, a0, b00, t0);       \
+                  KMADD(c0, a0, b00, t0)       \
                   a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
-                  KMADD(c0, a1, b10, t0);       \
+                  KMADD(c0, a1, b10, t0)       \
                   a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
-        if(RK==4) KMADD(c0, a2, b20, t0);       \
+        if(RK==4) KMADD(c0, a2, b20, t0)       \
         if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
-        if(RK==4) KMADD(c0, a3, b30, t0);       \
+        if(RK==4) KMADD(c0, a3, b30, t0)       \
         if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
                   pstore(C0+i+(I)*PacketSize, c0);
         
         // agressive vectorization and peeling
-        for(int i=0; i<actual_b_end1; i+=PacketSize*8)
+        for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
         {
           EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL2");
           WORK(0);
@@ -212,12 +228,12 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
           WORK(7);
         }
         // vectorization only
-        for(int i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
+        for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
         {
           WORK(0);
         }
         // remaining scalars
-        for(int i=actual_b_end2; i<actual_b; ++i)
+        for(Index i=actual_b_end2; i<actual_b; ++i)
         {
           if(RK==4) 
             C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
@@ -231,10 +247,10 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
     }
     
     // process the last columns of A, corresponding to the last rows of B
-    int rd = d-d_end;
+    Index rd = d-d_end;
     if(rd>0)
     {
-      for(int j=0; j<n; ++j)
+      for(Index j=0; j<n; ++j)
       {
         enum {
           Alignment = PacketSize>1 ? Aligned : 0
diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
index a1ea5bc06..7a4e4305a 100644
--- a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
+++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
@@ -29,6 +29,7 @@
 #define SPARSELU_HEAP_RELAX_SNODE_H
 
 namespace Eigen {
+namespace internal {
 
 /** 
  * \brief Identify the initial relaxed supernodes
@@ -42,14 +43,14 @@ namespace Eigen {
  * \param relax_end last column in a supernode
  */
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end)
+void SparseLUImpl<Scalar,Index>::heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
 {
   
   // The etree may not be postordered, but its heap ordered  
   IndexVector post;
   internal::treePostorder(n, et, post); // Post order etree
   IndexVector inv_post(n+1); 
-  int i;
+  Index i;
   for (i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()???
   
   // Renumber etree in postorder 
@@ -63,8 +64,8 @@ void SparseLUBase<Scalar,Index>::LU_heap_relax_snode (const int n, IndexVector&
   et = iwork; 
   
   // compute the number of descendants of each node in the etree
-  relax_end.setConstant(IND_EMPTY);
-  int j, parent; 
+  relax_end.setConstant(emptyIdxLU);
+  Index j, parent; 
   descendants.setZero();
   for (j = 0; j < n; j++) 
   {
@@ -73,11 +74,11 @@ void SparseLUBase<Scalar,Index>::LU_heap_relax_snode (const int n, IndexVector&
       descendants(parent) += descendants(j) + 1;
   }
   // Identify the relaxed supernodes by postorder traversal of the etree
-  int snode_start; // beginning of a snode 
-  int k;
-  int nsuper_et_post = 0; // Number of relaxed snodes in postordered etree 
-  int nsuper_et = 0; // Number of relaxed snodes in the original etree 
-  int l; 
+  Index snode_start; // beginning of a snode 
+  Index k;
+  Index nsuper_et_post = 0; // Number of relaxed snodes in postordered etree 
+  Index nsuper_et = 0; // Number of relaxed snodes in the original etree 
+  Index l; 
   for (j = 0; j < n; )
   {
     parent = et(j);
@@ -120,6 +121,7 @@ void SparseLUBase<Scalar,Index>::LU_heap_relax_snode (const int n, IndexVector&
   et = et_save; 
 }
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif // SPARSELU_HEAP_RELAX_SNODE_H
diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
index 8b65ff37c..0d0283b13 100644
--- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
@@ -12,6 +12,7 @@
 #define SPARSELU_KERNEL_BMOD_H
 
 namespace Eigen {
+namespace internal {
   
 /**
  * \brief Performs numeric block updates from a given supernode to a single column
@@ -29,88 +30,101 @@ namespace Eigen {
  */
 template <int SegSizeAtCompileTime> struct LU_kernel_bmod
 {
-  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
-  EIGEN_DONT_INLINE static void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int lda, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros)
+  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
+  static EIGEN_DONT_INLINE void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
+                                    const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
+};
+
+template <int SegSizeAtCompileTime>
+template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
+EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
+                                                                  const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
+{
+  typedef typename ScalarVector::Scalar Scalar;
+  // First, copy U[*,j] segment from dense(*) to tempv(*)
+  // The result of triangular solve is in tempv[*]; 
+    // The result of matric-vector update is in dense[*]
+  Index isub = lptr + no_zeros; 
+  int i;
+  Index irow;
+  for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
   {
-    typedef typename ScalarVector::Scalar Scalar;
-    // First, copy U[*,j] segment from dense(*) to tempv(*)
-    // The result of triangular solve is in tempv[*]; 
-      // The result of matric-vector update is in dense[*]
-    int isub = lptr + no_zeros; 
-    int i, irow;
-    for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
-    {
-      irow = lsub(isub); 
-      tempv(i) = dense(irow); 
-      ++isub; 
-    }
-    // Dense triangular solve -- start effective triangle
-    luptr += lda * no_zeros + no_zeros; 
-    // Form Eigen matrix and vector 
-    Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );
-    Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);
-    
-    u = A.template triangularView<UnitLower>().solve(u); 
-    
-    // Dense matrix-vector product y <-- B*x 
-    luptr += segsize;
-    const int PacketSize = internal::packet_traits<Scalar>::size;
-    int ldl = internal::first_multiple(nrow, PacketSize);
-    Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
-    int aligned_offset = internal::first_aligned(tempv.data()+segsize, PacketSize);
-    int aligned_with_B_offset = (PacketSize-internal::first_aligned(B.data(), PacketSize))%PacketSize;
-    Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
-    
-    l.setZero();
-    internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride());
-    
-    // Scatter tempv[] into SPA dense[] as a temporary storage 
-    isub = lptr + no_zeros;
-    for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
-    {
-      irow = lsub(isub++); 
-      dense(irow) = tempv(i);
-    }
-    
-    // Scatter l into SPA dense[]
-    for (i = 0; i < nrow; i++)
-    {
-      irow = lsub(isub++); 
-      dense(irow) -= l(i);
-    } 
+    irow = lsub(isub); 
+    tempv(i) = dense(irow); 
+    ++isub; 
   }
-};
+  // Dense triangular solve -- start effective triangle
+  luptr += lda * no_zeros + no_zeros; 
+  // Form Eigen matrix and vector 
+  Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );
+  Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);
+  
+  u = A.template triangularView<UnitLower>().solve(u); 
+  
+  // Dense matrix-vector product y <-- B*x 
+  luptr += segsize;
+  const Index PacketSize = internal::packet_traits<Scalar>::size;
+  Index ldl = internal::first_multiple(nrow, PacketSize);
+  Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
+  Index aligned_offset = internal::first_aligned(tempv.data()+segsize, PacketSize);
+  Index aligned_with_B_offset = (PacketSize-internal::first_aligned(B.data(), PacketSize))%PacketSize;
+  Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
+  
+  l.setZero();
+  internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride());
+  
+  // Scatter tempv[] into SPA dense[] as a temporary storage 
+  isub = lptr + no_zeros;
+  for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
+  {
+    irow = lsub(isub++); 
+    dense(irow) = tempv(i);
+  }
+  
+  // Scatter l into SPA dense[]
+  for (i = 0; i < nrow; i++)
+  {
+    irow = lsub(isub++); 
+    dense(irow) -= l(i);
+  } 
+}
 
 template <> struct LU_kernel_bmod<1>
 {
-  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
-  EIGEN_DONT_INLINE static void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, int& luptr, const int lda, const int nrow,
-                                    IndexVector& lsub, const int lptr, const int no_zeros)
+  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
+  static EIGEN_DONT_INLINE void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
+                                    const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
+};
+
+
+template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
+EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
+                                              const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
+{
+  typedef typename ScalarVector::Scalar Scalar;
+  Scalar f = dense(lsub(lptr + no_zeros));
+  luptr += lda * no_zeros + no_zeros + 1;
+  const Scalar* a(lusup.data() + luptr);
+  const /*typename IndexVector::Scalar*/Index*  irow(lsub.data()+lptr + no_zeros + 1);
+  Index i = 0;
+  for (; i+1 < nrow; i+=2)
   {
-    typedef typename ScalarVector::Scalar Scalar;
-    Scalar f = dense(lsub(lptr + no_zeros));
-    luptr += lda * no_zeros + no_zeros + 1;
-    const Scalar* a(lusup.data() + luptr);
-    const typename IndexVector::Scalar*  irow(lsub.data()+lptr + no_zeros + 1);
-    int i = 0;
-    for (; i+1 < nrow; i+=2)
-    {
-      int i0 = *(irow++);
-      int i1 = *(irow++);
-      Scalar a0 = *(a++);
-      Scalar a1 = *(a++);
-      Scalar d0 = dense.coeff(i0);
-      Scalar d1 = dense.coeff(i1);
-      d0 -= f*a0;
-      d1 -= f*a1;
-      dense.coeffRef(i0) = d0;
-      dense.coeffRef(i1) = d1;
-    }
-    if(i<nrow)
-      dense.coeffRef(*(irow++)) -= f * *(a++);
+    Index i0 = *(irow++);
+    Index i1 = *(irow++);
+    Scalar a0 = *(a++);
+    Scalar a1 = *(a++);
+    Scalar d0 = dense.coeff(i0);
+    Scalar d1 = dense.coeff(i1);
+    d0 -= f*a0;
+    d1 -= f*a1;
+    dense.coeffRef(i0) = d0;
+    dense.coeffRef(i1) = d1;
   }
-};
+  if(i<nrow)
+    dense.coeffRef(*(irow++)) -= f * *(a++);
+}
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif // SPARSELU_KERNEL_BMOD_H
diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h
index fbc146a36..da0e0fc3c 100644
--- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h
@@ -32,6 +32,7 @@
 #define SPARSELU_PANEL_BMOD_H
 
 namespace Eigen {
+namespace internal {
 
 /**
  * \brief Performs numeric block updates (sup-panel) in topological order.
@@ -52,18 +53,19 @@ namespace Eigen {
  * 
  */
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv,
-                                               IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu)
+void SparseLUImpl<Scalar,Index>::panel_bmod(const Index m, const Index w, const Index jcol, 
+                                            const Index nseg, ScalarVector& dense, ScalarVector& tempv,
+                                            IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu)
 {
   
-  int ksub,jj,nextl_col; 
-  int fsupc, nsupc, nsupr, nrow; 
-  int krep, kfnz; 
-  int lptr; // points to the row subscripts of a supernode 
-  int luptr; // ...
-  int segsize,no_zeros ; 
+  Index ksub,jj,nextl_col; 
+  Index fsupc, nsupc, nsupr, nrow; 
+  Index krep, kfnz; 
+  Index lptr; // points to the row subscripts of a supernode 
+  Index luptr; // ...
+  Index segsize,no_zeros ; 
   // For each nonz supernode segment of U[*,j] in topological order
-  int k = nseg - 1; 
+  Index k = nseg - 1; 
   const Index PacketSize = internal::packet_traits<Scalar>::size;
   
   for (ksub = 0; ksub < nseg; ksub++)
@@ -81,15 +83,15 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
     lptr = glu.xlsub(fsupc); 
     
     // loop over the panel columns to detect the actual number of columns and rows
-    int u_rows = 0;
-    int u_cols = 0;
+    Index u_rows = 0;
+    Index u_cols = 0;
     for (jj = jcol; jj < jcol + w; jj++)
     {
       nextl_col = (jj-jcol) * m; 
       VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
       
       kfnz = repfnz_col(krep); 
-      if ( kfnz == IND_EMPTY ) 
+      if ( kfnz == emptyIdxLU ) 
         continue; // skip any zero segment
       
       segsize = krep - kfnz + 1;
@@ -99,11 +101,11 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
     
     if(nsupc >= 2)
     { 
-      int ldu = internal::first_multiple<Index>(u_rows, PacketSize);
+      Index ldu = internal::first_multiple<Index>(u_rows, PacketSize);
       Map<Matrix<Scalar,Dynamic,Dynamic>, Aligned,  OuterStride<> > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu));
       
       // gather U
-      int u_col = 0;
+      Index u_col = 0;
       for (jj = jcol; jj < jcol + w; jj++)
       {
         nextl_col = (jj-jcol) * m; 
@@ -111,19 +113,19 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
         VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
         
         kfnz = repfnz_col(krep); 
-        if ( kfnz == IND_EMPTY ) 
+        if ( kfnz == emptyIdxLU ) 
           continue; // skip any zero segment
         
         segsize = krep - kfnz + 1;
         luptr = glu.xlusup(fsupc);    
         no_zeros = kfnz - fsupc; 
         
-        int isub = lptr + no_zeros;
-        int off = u_rows-segsize;
-        for (int i = 0; i < off; i++) U(i,u_col) = 0;
-        for (int i = 0; i < segsize; i++)
+        Index isub = lptr + no_zeros;
+        Index off = u_rows-segsize;
+        for (Index i = 0; i < off; i++) U(i,u_col) = 0;
+        for (Index i = 0; i < segsize; i++)
         {
-          int irow = glu.lsub(isub); 
+          Index irow = glu.lsub(isub); 
           U(i+off,u_col) = dense_col(irow); 
           ++isub; 
         }
@@ -131,7 +133,7 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
       }
       // solve U = A^-1 U
       luptr = glu.xlusup(fsupc);
-      int lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc);
+      Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc);
       no_zeros = (krep - u_rows + 1) - fsupc;
       luptr += lda * no_zeros + no_zeros;
       Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) );
@@ -142,8 +144,8 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
       Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) );
       eigen_assert(tempv.size()>w*ldu + nrow*w + 1);
       
-      int ldl = internal::first_multiple<Index>(nrow, PacketSize);
-      int offset = (PacketSize-internal::first_aligned(B.data(), PacketSize)) % PacketSize;
+      Index ldl = internal::first_multiple<Index>(nrow, PacketSize);
+      Index offset = (PacketSize-internal::first_aligned(B.data(), PacketSize)) % PacketSize;
       Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
       
       L.setZero();
@@ -158,25 +160,25 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
         VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
         
         kfnz = repfnz_col(krep); 
-        if ( kfnz == IND_EMPTY ) 
+        if ( kfnz == emptyIdxLU ) 
           continue; // skip any zero segment
         
         segsize = krep - kfnz + 1;
         no_zeros = kfnz - fsupc; 
-        int isub = lptr + no_zeros;
+        Index isub = lptr + no_zeros;
         
-        int off = u_rows-segsize;
-        for (int i = 0; i < segsize; i++)
+        Index off = u_rows-segsize;
+        for (Index i = 0; i < segsize; i++)
         {
-          int irow = glu.lsub(isub++); 
+          Index irow = glu.lsub(isub++); 
           dense_col(irow) = U.coeff(i+off,u_col);
           U.coeffRef(i+off,u_col) = 0;
         }
         
         // Scatter l into SPA dense[]
-        for (int i = 0; i < nrow; i++)
+        for (Index i = 0; i < nrow; i++)
         {
-          int irow = glu.lsub(isub++); 
+          Index irow = glu.lsub(isub++); 
           dense_col(irow) -= L.coeff(i,u_col);
           L.coeffRef(i,u_col) = 0;
         }
@@ -193,13 +195,13 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
         VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
         
         kfnz = repfnz_col(krep); 
-        if ( kfnz == IND_EMPTY ) 
+        if ( kfnz == emptyIdxLU ) 
           continue; // skip any zero segment
         
         segsize = krep - kfnz + 1;
         luptr = glu.xlusup(fsupc);
         
-        int lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);// nsupr
+        Index lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);// nsupr
         
         // Perform a trianglar solve and block update, 
         // then scatter the result of sup-col update to dense[]
@@ -212,7 +214,9 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
     }
     
   } // End for each updating supernode
-}
+} // end panel bmod
+
+} // end namespace internal
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h
index 16e04423b..dc0054efd 100644
--- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h
+++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h
@@ -35,10 +35,10 @@ namespace Eigen {
 namespace internal {
   
 template<typename IndexVector>
-struct LU_panel_dfs_traits
+struct panel_dfs_traits
 {
   typedef typename IndexVector::Scalar Index;
-  LU_panel_dfs_traits(Index jcol, Index* marker)
+  panel_dfs_traits(Index jcol, Index* marker)
     : m_jcol(jcol), m_marker(marker)
   {}
   bool update_segrep(Index krep, Index jj)
@@ -50,30 +50,29 @@ struct LU_panel_dfs_traits
     }
     return false;
   }
-  void mem_expand(IndexVector& /*glu.lsub*/, int /*nextl*/, int /*chmark*/) {}
+  void mem_expand(IndexVector& /*glu.lsub*/, Index /*nextl*/, Index /*chmark*/) {}
   enum { ExpandMem = false };
   Index m_jcol;
   Index* m_marker;
 };
 
-} // end namespace internal
 
 template <typename Scalar, typename Index>
 template <typename Traits>
-void SparseLUBase<Scalar,Index>::LU_dfs_kernel(const int jj, IndexVector& perm_r,
-                   int& nseg, IndexVector& panel_lsub, IndexVector& segrep,
+void SparseLUImpl<Scalar,Index>::dfs_kernel(const Index jj, IndexVector& perm_r,
+                   Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,
                    Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
                    IndexVector& xplore, GlobalLU_t& glu,
-                   int& nextl_col, int krow, Traits& traits
+                   Index& nextl_col, Index krow, Traits& traits
                   )
 {
   
-  int kmark = marker(krow);
+  Index kmark = marker(krow);
       
   // For each unmarked krow of jj
   marker(krow) = jj; 
-  int kperm = perm_r(krow); 
-  if (kperm == IND_EMPTY ) {
+  Index kperm = perm_r(krow); 
+  if (kperm == emptyIdxLU ) {
     // krow is in L : place it in structure of L(*, jj)
     panel_lsub(nextl_col++) = krow;  // krow is indexed into A
     
@@ -84,11 +83,11 @@ void SparseLUBase<Scalar,Index>::LU_dfs_kernel(const int jj, IndexVector& perm_r
     // krow is in U : if its supernode-representative krep
     // has been explored, update repfnz(*)
     // krep = supernode representative of the current row
-    int krep = glu.xsup(glu.supno(kperm)+1) - 1; 
+    Index krep = glu.xsup(glu.supno(kperm)+1) - 1; 
     // First nonzero element in the current column:
-    int myfnz = repfnz_col(krep); 
+    Index myfnz = repfnz_col(krep); 
     
-    if (myfnz != IND_EMPTY )
+    if (myfnz != emptyIdxLU )
     {
       // Representative visited before
       if (myfnz > kperm ) repfnz_col(krep) = kperm; 
@@ -97,28 +96,28 @@ void SparseLUBase<Scalar,Index>::LU_dfs_kernel(const int jj, IndexVector& perm_r
     else 
     {
       // Otherwise, perform dfs starting at krep
-      int oldrep = IND_EMPTY; 
+      Index oldrep = emptyIdxLU; 
       parent(krep) = oldrep; 
       repfnz_col(krep) = kperm; 
-      int xdfs =  glu.xlsub(krep); 
-      int maxdfs = xprune(krep); 
+      Index xdfs =  glu.xlsub(krep); 
+      Index maxdfs = xprune(krep); 
       
-      int kpar;
+      Index kpar;
       do 
       {
         // For each unmarked kchild of krep
         while (xdfs < maxdfs) 
         {
-          int kchild = glu.lsub(xdfs); 
+          Index kchild = glu.lsub(xdfs); 
           xdfs++; 
-          int chmark = marker(kchild); 
+          Index chmark = marker(kchild); 
           
           if (chmark != jj ) 
           {
             marker(kchild) = jj; 
-            int chperm = perm_r(kchild); 
+            Index chperm = perm_r(kchild); 
             
-            if (chperm == IND_EMPTY) 
+            if (chperm == emptyIdxLU) 
             {
               // case kchild is in L: place it in L(*, j)
               panel_lsub(nextl_col++) = kchild;
@@ -129,10 +128,10 @@ void SparseLUBase<Scalar,Index>::LU_dfs_kernel(const int jj, IndexVector& perm_r
               // case kchild is in U :
               // chrep = its supernode-rep. If its rep has been explored, 
               // update its repfnz(*)
-              int chrep = glu.xsup(glu.supno(chperm)+1) - 1; 
+              Index chrep = glu.xsup(glu.supno(chperm)+1) - 1; 
               myfnz = repfnz_col(chrep); 
               
-              if (myfnz != IND_EMPTY) 
+              if (myfnz != emptyIdxLU) 
               { // Visited before 
                 if (myfnz > chperm) 
                   repfnz_col(chrep) = chperm; 
@@ -167,13 +166,13 @@ void SparseLUBase<Scalar,Index>::LU_dfs_kernel(const int jj, IndexVector& perm_r
         }
         
         kpar = parent(krep); // Pop recursion, mimic recursion 
-        if (kpar == IND_EMPTY) 
+        if (kpar == emptyIdxLU) 
           break; // dfs done 
         krep = kpar; 
         xdfs = xplore(krep); 
         maxdfs = xprune(krep); 
 
-      } while (kpar != IND_EMPTY); // Do until empty stack 
+      } while (kpar != emptyIdxLU); // Do until empty stack 
       
     } // end if (myfnz = -1)
 
@@ -217,18 +216,18 @@ void SparseLUBase<Scalar,Index>::LU_dfs_kernel(const int jj, IndexVector& perm_r
  */
 
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
+void SparseLUImpl<Scalar,Index>::panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
 {
-  int nextl_col; // Next available position in panel_lsub[*,jj] 
+  Index nextl_col; // Next available position in panel_lsub[*,jj] 
   
   // Initialize pointers 
   VectorBlock<IndexVector> marker1(marker, m, m); 
   nseg = 0; 
   
-  internal::LU_panel_dfs_traits<IndexVector> traits(jcol, marker1.data());
+  panel_dfs_traits<IndexVector> traits(jcol, marker1.data());
   
   // For each column in the panel 
-  for (int jj = jcol; jj < jcol + w; jj++) 
+  for (Index jj = jcol; jj < jcol + w; jj++) 
   {
     nextl_col = (jj - jcol) * m; 
     
@@ -239,20 +238,21 @@ void SparseLUBase<Scalar,Index>::LU_panel_dfs(const int m, const int w, const in
     // For each nnz in A[*, jj] do depth first search
     for (typename MatrixType::InnerIterator it(A, jj); it; ++it)
     {
-      int krow = it.row(); 
+      Index krow = it.row(); 
       dense_col(krow) = it.value();
       
-      int kmark = marker(krow); 
+      Index kmark = marker(krow); 
       if (kmark == jj) 
         continue; // krow visited before, go to the next nonzero
       
-      LU_dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent,
+      dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent,
                    xplore, glu, nextl_col, krow, traits);
     }// end for nonzeros in column jj
     
   } // end for column jj
 }
 
+} // end namespace internal
 } // end namespace Eigen
 
 #endif // SPARSELU_PANEL_DFS_H
diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h
index 69472da9b..ddcd4ec98 100644
--- a/Eigen/src/SparseLU/SparseLU_pivotL.h
+++ b/Eigen/src/SparseLU/SparseLU_pivotL.h
@@ -31,6 +31,7 @@
 #define SPARSELU_PIVOTL_H
 
 namespace Eigen {
+namespace internal {
   
 /**
  * \brief Performs the numerical pivotin on the current column of L, and the CDIV operation.
@@ -56,7 +57,7 @@ namespace Eigen {
  * 
  */
 template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, GlobalLU_t& glu)
+Index SparseLUImpl<Scalar,Index>::pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu)
 {
   
   Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol
@@ -72,7 +73,7 @@ int SparseLUBase<Scalar,Index>::LU_pivotL(const int jcol, const RealScalar diagp
   Index diagind = iperm_c(jcol); // diagonal index 
   RealScalar pivmax = 0.0; 
   Index pivptr = nsupc; 
-  Index diag = IND_EMPTY; 
+  Index diag = emptyIdxLU; 
   RealScalar rtemp;
   Index isub, icol, itemp, k; 
   for (isub = nsupc; isub < nsupr; ++isub) {
@@ -127,6 +128,7 @@ int SparseLUBase<Scalar,Index>::LU_pivotL(const int jcol, const RealScalar diagp
   return 0;
 }
 
+} // end namespace internal
 } // end namespace Eigen
 
 #endif // SPARSELU_PIVOTL_H
diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h
index 816358bc3..5a855f82f 100644
--- a/Eigen/src/SparseLU/SparseLU_pruneL.h
+++ b/Eigen/src/SparseLU/SparseLU_pruneL.h
@@ -31,6 +31,7 @@
 #define SPARSELU_PRUNEL_H
 
 namespace Eigen {
+namespace internal {
 
 /**
  * \brief Prunes the L-structure.
@@ -49,11 +50,11 @@ namespace Eigen {
  * 
  */
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu)
+void SparseLUImpl<Scalar,Index>::pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu)
 {
   // For each supernode-rep irep in U(*,j]
-  int jsupno = glu.supno(jcol); 
-  int i,irep,irep1; 
+  Index jsupno = glu.supno(jcol); 
+  Index i,irep,irep1; 
   bool movnum, do_prune = false; 
   Index kmin, kmax, minloc, maxloc,krow; 
   for (i = 0; i < nseg; i++)
@@ -63,7 +64,7 @@ void SparseLUBase<Scalar,Index>::LU_pruneL(const int jcol, const IndexVector& pe
     do_prune = false; 
     
     // Don't prune with a zero U-segment 
-    if (repfnz(irep) == IND_EMPTY) continue; 
+    if (repfnz(irep) == emptyIdxLU) continue; 
     
     // If a snode overlaps with the next panel, then the U-segment
     // is fragmented into two parts -- irep and irep1. We should let 
@@ -97,9 +98,9 @@ void SparseLUBase<Scalar,Index>::LU_pruneL(const int jcol, const IndexVector& pe
         
         while (kmin <= kmax)
         {
-          if (perm_r(glu.lsub(kmax)) == IND_EMPTY)
+          if (perm_r(glu.lsub(kmax)) == emptyIdxLU)
             kmax--; 
-          else if ( perm_r(glu.lsub(kmin)) != IND_EMPTY)
+          else if ( perm_r(glu.lsub(kmin)) != emptyIdxLU)
             kmin++;
           else 
           {
@@ -128,6 +129,7 @@ void SparseLUBase<Scalar,Index>::LU_pruneL(const int jcol, const IndexVector& pe
   } // End for each U-segment
 }
 
+} // end namespace internal
 } // end namespace Eigen
 
 #endif // SPARSELU_PRUNEL_H
diff --git a/Eigen/src/SparseLU/SparseLU_relax_snode.h b/Eigen/src/SparseLU/SparseLU_relax_snode.h
index 44b279878..58ec32e27 100644
--- a/Eigen/src/SparseLU/SparseLU_relax_snode.h
+++ b/Eigen/src/SparseLU/SparseLU_relax_snode.h
@@ -29,6 +29,8 @@
 #define SPARSELU_RELAX_SNODE_H
 
 namespace Eigen {
+
+namespace internal {
  
 /** 
  * \brief Identify the initial relaxed supernodes
@@ -42,12 +44,12 @@ namespace Eigen {
  * \param relax_end last column in a supernode
  */
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end)
+void SparseLUImpl<Scalar,Index>::relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
 {
   
   // compute the number of descendants of each node in the etree
-  int j, parent; 
-  relax_end.setConstant(IND_EMPTY);
+  Index j, parent; 
+  relax_end.setConstant(emptyIdxLU);
   descendants.setZero();
   for (j = 0; j < n; j++) 
   {
@@ -56,7 +58,7 @@ void SparseLUBase<Scalar,Index>::LU_relax_snode (const int n, IndexVector& et, c
       descendants(parent) += descendants(j) + 1;
   }
   // Identify the relaxed supernodes by postorder traversal of the etree
-  int snode_start; // beginning of a snode 
+  Index snode_start; // beginning of a snode 
   for (j = 0; j < n; )
   {
     parent = et(j);
@@ -75,6 +77,7 @@ void SparseLUBase<Scalar,Index>::LU_relax_snode (const int n, IndexVector& et, c
   
 }
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif
diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h
index 7fa3e54a5..b3d5cd208 100644
--- a/Eigen/src/SparseQR/SparseQR.h
+++ b/Eigen/src/SparseQR/SparseQR.h
@@ -1,15 +1,15 @@
-#ifndef EIGEN_SPARSE_QR_H
-#define EIGEN_SPARSE_QR_H
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2012 Desire Nuentsa <desire.nuentsa_wakam@inria.fr>
-// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2012-2013 Desire Nuentsa <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2012-2013 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+#ifndef EIGEN_SPARSE_QR_H
+#define EIGEN_SPARSE_QR_H
 
 namespace Eigen {
 
@@ -35,21 +35,22 @@ namespace internal {
 /**
   * \ingroup SparseQR_Module
   * \class SparseQR
-  * \brief Sparse left-looking QR factorization
+  * \brief Sparse left-looking rank-revealing QR factorization
   * 
-  * This class is used to perform a left-looking QR decomposition 
-  * of sparse matrices. The result is then used to solve linear leasts_square systems.
-  * Clearly, a QR factorization is returned such that A*P = Q*R where :
+  * This class implements a left-looking rank-revealing QR decomposition 
+  * of sparse matrices. When a column has a norm less than a given tolerance
+  * it is implicitly permuted to the end. The QR factorization thus obtained is 
+  * given by A*P = Q*R where R is upper triangular or trapezoidal. 
   * 
-  * P is the column permutation. Use colsPermutation() to get it.
+  * P is the column permutation which is the product of the fill-reducing and the
+  * rank-revealing permutations. Use colsPermutation() to get it.
   * 
-  * Q is the orthogonal matrix represented as Householder reflectors. 
+  * Q is the orthogonal matrix represented as products of Householder reflectors. 
   * Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose.
   * You can then apply it to a vector.
   * 
-  * R is the sparse triangular factor. Use matrixR() to get it as SparseMatrix.
-  * 
-  * \note This is not a rank-revealing QR decomposition.
+  * R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient.
+  * matrixR().topLeftCorner(rank(), rank()) always returns a triangular factor of full rank.
   * 
   * \tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<>
   * \tparam _OrderingType The fill-reducing ordering method. See the \link OrderingMethods_Module 
@@ -71,10 +72,10 @@ class SparseQR
     typedef Matrix<Scalar, Dynamic, 1> ScalarVector;
     typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType;
   public:
-    SparseQR () : m_isInitialized(false),m_analysisIsok(false)
+    SparseQR () : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true)
     { }
     
-    SparseQR(const MatrixType& mat) : m_isInitialized(false),m_analysisIsok(false)
+    SparseQR(const MatrixType& mat) : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true)
     {
       compute(mat);
     }
@@ -96,7 +97,17 @@ class SparseQR
     
     /** \returns a const reference to the \b sparse upper triangular matrix R of the QR factorization.
       */
-    const MatrixType& matrixR() const { return m_R; }
+    const QRMatrixType& matrixR() const { return m_R; }
+    
+    /** \returns the number of non linearly dependent columns as determined by the pivoting threshold.
+      *
+      * \sa setPivotThreshold()
+      */
+    Index rank() const 
+    {
+      eigen_assert(m_isInitialized && "The factorization should be called first, use compute()");
+      return m_nonzeropivots; 
+    }
     
     /** \returns an expression of the matrix Q as products of sparse Householder reflectors.
       * You can do the following to get an actual SparseMatrix representation of Q:
@@ -107,35 +118,57 @@ class SparseQR
     SparseQRMatrixQReturnType<SparseQR> matrixQ() const 
     { return SparseQRMatrixQReturnType<SparseQR>(*this); }
     
-    /** \returns a const reference to the fill-in reducing permutation that was applied to the columns of A
+    /** \returns a const reference to the column permutation P that was applied to A such that A*P = Q*R
+      * It is the combination of the fill-in reducing permutation and numerical column pivoting.
       */
     const PermutationType& colsPermutation() const
     { 
       eigen_assert(m_isInitialized && "Decomposition is not initialized.");
-      return m_perm_c;
+      return m_outputPerm_c;
     }
     
+    /** \returns A string describing the type of error.
+      * This method is provided to ease debugging, not to handle errors.
+      */
+    std::string lastErrorMessage() const { return m_lastError; }
+    
     /** \internal */
     template<typename Rhs, typename Dest>
     bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &dest) const
     {
       eigen_assert(m_isInitialized && "The factorization should be called first, use compute()");
       eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix");
-      Index rank = this->matrixR().cols();
+
+      Index rank = this->rank();
+      
       // Compute Q^T * b;
-      dest = this->matrixQ().transpose() * B;      
-      // Solve with the triangular matrix R
-      Dest y;
-      y = this->matrixR().template triangularView<Upper>().solve(dest.derived().topRows(rank));
+      typename Dest::PlainObject y, b;
+      y = this->matrixQ().transpose() * B; 
+      b = y;
       
+      // Solve with the triangular matrix R
+      y.topRows(rank) = this->matrixR().topLeftCorner(rank, rank).template triangularView<Upper>().solve(b.topRows(rank));
+      y.bottomRows(y.size()-rank).setZero();
+
       // Apply the column permutation
-      if (m_perm_c.size())  dest.topRows(rank) =  colsPermutation().inverse() * y;
-      else                  dest = y;
+      if (m_perm_c.size())  dest.topRows(cols()) = colsPermutation() * y.topRows(cols());
+      else                  dest = y.topRows(cols());
       
       m_info = Success;
       return true;
     }
     
+    /** Sets the threshold that is used to determine linearly dependent columns during the factorization.
+      *
+      * In practice, if during the factorization the norm of the column that has to be eliminated is below
+      * this threshold, then the entire column is treated as zero, and it is moved at the end.
+      */
+    void setPivotThreshold(const RealScalar& threshold)
+    {
+      m_useDefaultThreshold = false;
+      m_threshold = threshold;
+    }
+    
     /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
       *
       * \sa compute()
@@ -167,15 +200,20 @@ class SparseQR
     bool m_analysisIsok;
     bool m_factorizationIsok;
     mutable ComputationInfo m_info;
+    std::string m_lastError;
     QRMatrixType m_pmat;            // Temporary matrix
     QRMatrixType m_R;               // The triangular factor matrix
     QRMatrixType m_Q;               // The orthogonal reflectors
     ScalarVector m_hcoeffs;         // The Householder coefficients
-    PermutationType m_perm_c;       // Column  permutation 
-    PermutationType m_perm_r;       // Column permutation 
+    PermutationType m_perm_c;       // Fill-reducing  Column  permutation
+    PermutationType m_pivotperm;    // The permutation for rank revealing
+    PermutationType m_outputPerm_c; // The final column permutation
+    RealScalar m_threshold;         // Threshold to determine null Householder reflections
+    bool m_useDefaultThreshold;     // Use default threshold
+    Index m_nonzeropivots;          // Number of non zero pivots found 
     IndexVector m_etree;            // Column elimination tree
     IndexVector m_firstRowElt;      // First element in each row
-    IndexVector m_found_diag_elem;  // Existence of diagonal elements
+    
     template <typename, typename > friend struct SparseQR_QProduct;
     
 };
@@ -184,7 +222,8 @@ class SparseQR
   * 
   * In this step, the fill-reducing permutation is computed and applied to the columns of A
   * and the column elimination tree is computed as well. Only the sparcity pattern of \a mat is exploited.
-  * \note In this step it is assumed that there is no empty row in the matrix \a mat
+  * 
+  * \note In this step it is assumed that there is no empty row in the matrix \a mat.
   */
 template <typename MatrixType, typename OrderingType>
 void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
@@ -194,21 +233,20 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
   ord(mat, m_perm_c); 
   Index n = mat.cols();
   Index m = mat.rows();
-  // Permute the input matrix... only the column pointers are permuted
-  // FIXME: directly send "m_perm.inverse() * mat" to coletree -> need an InnerIterator to the sparse-permutation-product expression.
-  m_pmat = mat;
-  m_pmat.uncompress();
-  for (int i = 0; i < n; i++)
+  
+  if (!m_perm_c.size())
   {
-    Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i;
-    m_pmat.outerIndexPtr()[p] = mat.outerIndexPtr()[i]; 
-    m_pmat.innerNonZeroPtr()[p] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; 
+    m_perm_c.resize(n);
+    m_perm_c.indices().setLinSpaced(n, 0,n-1);
   }
+  
   // Compute the column elimination tree of the permuted matrix
-  internal::coletree(m_pmat, m_etree, m_firstRowElt);
+  m_outputPerm_c = m_perm_c.inverse();
+  internal::coletree(mat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
   
   m_R.resize(n, n);
-  m_Q.resize(m, m);
+  m_Q.resize(m, n);
+  
   // Allocate space for nonzero elements : rough estimation
   m_R.reserve(2*mat.nonZeros()); //FIXME Get a more accurate estimation through symbolic factorization with the etree
   m_Q.reserve(2*mat.nonZeros());
@@ -216,7 +254,7 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
   m_analysisIsok = true;
 }
 
-/** \brief Perform the numerical QR factorization of the input matrix
+/** \brief Performs the numerical QR factorization of the input matrix
   * 
   * The function SparseQR::analyzePattern(const MatrixType&) must have been called beforehand with
   * a matrix having the same sparcity pattern than \a mat.
@@ -226,156 +264,220 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
 template <typename MatrixType, typename OrderingType>
 void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
 {
+  using std::abs;
+  using std::max;
+  
   eigen_assert(m_analysisIsok && "analyzePattern() should be called before this step");
   Index m = mat.rows();
   Index n = mat.cols();
   IndexVector mark(m); mark.setConstant(-1);  // Record the visited nodes
   IndexVector Ridx(n), Qidx(m);               // Store temporarily the row indexes for the current column of R and Q
   Index nzcolR, nzcolQ;                       // Number of nonzero for the current column of R and Q
-  Index pcol;
-  ScalarVector tval(m); tval.setZero();       // Temporary vector
-  IndexVector iperm(n);
+  ScalarVector tval(m);                       // The dense vector used to compute the current column
   bool found_diag;
-  if (m_perm_c.size())
-    for(int i = 0; i < n; i++) iperm(m_perm_c.indices()(i)) = i;
-  else
-    iperm.setLinSpaced(n, 0, n-1);
-      
-  // Left looking QR factorization : Compute a column of R and Q at a time
-  for (Index col = 0; col < n; col++)
+    
+  m_pmat = mat;
+  m_pmat.uncompress(); // To have the innerNonZeroPtr allocated
+  // Apply the fill-in reducing permutation lazily:
+  for (int i = 0; i < n; i++)
   {
+    Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i;
+    m_pmat.outerIndexPtr()[p] = mat.outerIndexPtr()[i]; 
+    m_pmat.innerNonZeroPtr()[p] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; 
+  }
+  
+  /* Compute the default threshold, see : 
+   * Tim Davis, "Algorithm 915, SuiteSparseQR: Multifrontal Multithreaded Rank-Revealing
+   * Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3 
+   */
+  if(m_useDefaultThreshold) 
+  {
+    RealScalar max2Norm = 0.0;
+    for (int j = 0; j < n; j++) max2Norm = (max)(max2Norm, m_pmat.col(j).norm());
+    m_threshold = 20 * (m + n) * max2Norm * NumTraits<RealScalar>::epsilon();
+  }
+  
+  // Initialize the numerical permutation
+  m_pivotperm.setIdentity(n);
+  
+  Index nonzeroCol = 0; // Record the number of valid pivots
+  
+  // Left looking rank-revealing QR factorization: compute a column of R and Q at a time
+  for (Index col = 0; col < n; ++col)
+  {
+    mark.setConstant(-1);
     m_R.startVec(col);
     m_Q.startVec(col);
-    mark(col) = col;
-    Qidx(0) = col; 
+    mark(nonzeroCol) = col;
+    Qidx(0) = nonzeroCol;
     nzcolR = 0; nzcolQ = 1;
-    pcol = iperm(col);
     found_diag = false;
-    // Find the nonzero locations of the column k of R, 
-    // i.e All the nodes (with indexes lower than k) reachable through the col etree rooted at node k
-    for (typename MatrixType::InnerIterator itp(mat, pcol); itp || !found_diag; ++itp)
+    tval.setZero(); 
+    
+    // Symbolic factorization: find the nonzero locations of the column k of the factors R and Q, i.e.,
+    // all the nodes (with indexes lower than rank) reachable through the column elimination tree (etree) rooted at node k.
+    // Note: if the diagonal entry does not exist, then its contribution must be explicitly added,
+    // thus the trick with found_diag that permits to do one more iteration on the diagonal element if this one has not been found.
+    for (typename MatrixType::InnerIterator itp(m_pmat, col); itp || !found_diag; ++itp)
     {
-      Index curIdx = col;
-      if (itp) curIdx = itp.row();
-      if(curIdx == col) found_diag = true;
-      // Get the nonzeros indexes  of the current column of R
+      Index curIdx = nonzeroCol ;
+      if(itp) curIdx = itp.row();
+      if(curIdx == nonzeroCol) found_diag = true;
+      
+      // Get the nonzeros indexes of the current column of R
       Index st = m_firstRowElt(curIdx); // The traversal of the etree starts here 
       if (st < 0 )
       {
-        std::cerr << " Empty row found during Numerical factorization ... Abort \n";
-        m_info = NumericalIssue;
+        m_lastError = "Empty row found during numerical factorization";
+        m_info = InvalidInput;
         return;
       }
+
       // Traverse the etree 
       Index bi = nzcolR;
       for (; mark(st) != col; st = m_etree(st))
       {
-        Ridx(nzcolR) = st; // Add this row to the list 
-        mark(st) = col; // Mark this row as visited
+        Ridx(nzcolR) = st;  // Add this row to the list,
+        mark(st) = col;     // and mark this row as visited
         nzcolR++;
       }
+
       // Reverse the list to get the topological ordering
       Index nt = nzcolR-bi;
-      for(int i = 0; i < nt/2; i++) std::swap(Ridx(bi+i), Ridx(nzcolR-i-1));
+      for(Index i = 0; i < nt/2; i++) std::swap(Ridx(bi+i), Ridx(nzcolR-i-1));
        
-      // Copy the current row value of mat
-      if (itp) tval(curIdx) = itp.value();
-      else tval(curIdx) = Scalar(0.);
+      // Copy the current (curIdx,pcol) value of the input matrix
+      if(itp) tval(curIdx) = itp.value();
+      else    tval(curIdx) = Scalar(0);
       
       // Compute the pattern of Q(:,k)
-      if (curIdx > col && mark(curIdx) < col) 
+      if(curIdx > nonzeroCol && mark(curIdx) != col ) 
       {
-        Qidx(nzcolQ) = curIdx; // Add this row to the pattern of Q
-        mark(curIdx) = col; // And mark it as visited
+        Qidx(nzcolQ) = curIdx;  // Add this row to the pattern of Q,
+        mark(curIdx) = col;     // and mark it as visited
         nzcolQ++;
       }
     }
-    
+
     // Browse all the indexes of R(:,col) in reverse order
     for (Index i = nzcolR-1; i >= 0; i--)
     {
-      Index curIdx = Ridx(i);
-      // Apply the <curIdx> householder vector  to tval
-      Scalar tdot(0.);
-      //First compute q'*tval
-      for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq)
-      {
-        tdot += internal::conj(itq.value()) * tval(itq.row());
-      }
+      Index curIdx = m_pivotperm.indices()(Ridx(i));
+      
+      // Apply the curIdx-th householder vector to the current column (temporarily stored into tval)
+      Scalar tdot(0);
+      
+      // First compute q' * tval
+      tdot = m_Q.col(curIdx).dot(tval);
+
       tdot *= m_hcoeffs(curIdx);
-      // Then compute tval = tval - q*tau
+      
+      // Then update tval = tval - q * tau
+      // FIXME: tval -= tdot * m_Q.col(curIdx) should amount to the same (need to check/add support for efficient "dense ?= sparse")
       for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq)
-      {
         tval(itq.row()) -= itq.value() * tdot;
-      }
-      //With the topological ordering, updates for curIdx are fully done at this point
-      m_R.insertBackByOuterInnerUnordered(col, curIdx) = tval(curIdx);
-      tval(curIdx) = Scalar(0.);
-      
+
       // Detect fill-in for the current column of Q
-      if(m_etree(curIdx) == col)
+      if(m_etree(Ridx(i)) == nonzeroCol)
       {
         for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq)
         {
           Index iQ = itq.row();
-          if (mark(iQ) < col)
+          if (mark(iQ) != col)
           {
-            Qidx(nzcolQ++) = iQ; // Add this row to the pattern of Q
-            mark(iQ) = col; //And mark it as visited
+            Qidx(nzcolQ++) = iQ;  // Add this row to the pattern of Q,
+            mark(iQ) = col;       // and mark it as visited
           }
         }
       }
-    } // End update current column of R
+    } // End update current column
+        
+    // Compute the Householder reflection that eliminate the current column
+    // FIXME this step should call the Householder module.
+    Scalar tau;
+    RealScalar beta;
+    Scalar c0 = nzcolQ ? tval(Qidx(0)) : Scalar(0);
+    
+    // First, the squared norm of Q((col+1):m, col)
+    RealScalar sqrNorm = 0.;
+    for (Index itq = 1; itq < nzcolQ; ++itq) sqrNorm += internal::abs2(tval(Qidx(itq)));
     
-    // Record the current (unscaled) column of V.
-    for (Index itq = 0; itq < nzcolQ; ++itq)
-    {
-      Index iQ = Qidx(itq);      
-      m_Q.insertBackByOuterInnerUnordered(col,iQ) = tval(iQ);
-      tval(iQ) = Scalar(0.);
-    }
-    // Compute the new Householder reflection
-    RealScalar sqrNorm =0.;
-    Scalar tau; RealScalar beta;
-    typename QRMatrixType::InnerIterator itq(m_Q, col);
-    Scalar c0 = (itq) ? itq.value() : Scalar(0.);
-    //First, the squared norm of Q((col+1):m, col)
-    if(itq) ++itq;
-    for (; itq; ++itq)
-    {
-      sqrNorm += internal::abs2(itq.value());
-    }
     if(sqrNorm == RealScalar(0) && internal::imag(c0) == RealScalar(0))
     {
       tau = RealScalar(0);
       beta = internal::real(c0);
-      typename QRMatrixType::InnerIterator it(m_Q,col);
-      it.valueRef() = 1; //FIXME A row permutation should be performed at this point
-    }
+      tval(Qidx(0)) = 1;
+     }
     else
     {
       beta = std::sqrt(internal::abs2(c0) + sqrNorm);
       if(internal::real(c0) >= RealScalar(0))
         beta = -beta;
-      typename QRMatrixType::InnerIterator it(m_Q,col);
-      it.valueRef() = 1;
-      for (++it; it; ++it)
-      {
-        it.valueRef() /= (c0 - beta);
-      }
+      tval(Qidx(0)) = 1;
+      for (Index itq = 1; itq < nzcolQ; ++itq)
+        tval(Qidx(itq)) /= (c0 - beta);
       tau = internal::conj((beta-c0) / beta);
         
     }
-    m_hcoeffs(col) = tau;
-    m_R.insertBackByOuterInnerUnordered(col, col) = beta;
+
+    // Insert values in R
+    for (Index  i = nzcolR-1; i >= 0; i--)
+    {
+      Index curIdx = Ridx(i);
+      if(curIdx < nonzeroCol) 
+      {
+        m_R.insertBackByOuterInnerUnordered(col, curIdx) = tval(curIdx);
+        tval(curIdx) = Scalar(0.);
+      }
+    }
+
+    if(abs(beta) >= m_threshold)
+    {
+      m_R.insertBackByOuterInner(col, nonzeroCol) = beta;
+      nonzeroCol++;
+      // The householder coefficient
+      m_hcoeffs(col) = tau;
+      // Record the householder reflections
+      for (Index itq = 0; itq < nzcolQ; ++itq)
+      {
+        Index iQ = Qidx(itq);
+        m_Q.insertBackByOuterInnerUnordered(col,iQ) = tval(iQ);
+        tval(iQ) = Scalar(0.);
+      }    
+    }
+    else
+    {
+      // Zero pivot found: move implicitly this column to the end
+      m_hcoeffs(col) = Scalar(0);
+      for (Index j = nonzeroCol; j < n-1; j++) 
+        std::swap(m_pivotperm.indices()(j), m_pivotperm.indices()[j+1]);
+      
+      // Recompute the column elimination tree
+      internal::coletree(m_pmat, m_etree, m_firstRowElt, m_pivotperm.indices().data());
+    }
   }
+  
   // Finalize the column pointers of the sparse matrices R and Q
-  m_R.finalize(); m_R.makeCompressed();
-  m_Q.finalize(); m_Q.makeCompressed();
+  m_Q.finalize();
+  m_Q.makeCompressed();
+  m_R.finalize();
+  m_R.makeCompressed();
+  
+  m_nonzeropivots = nonzeroCol;
+  
+  if(nonzeroCol<n)
+  {
+    // Permute the triangular factor to put the 'dead' columns to the end
+    MatrixType tempR(m_R);
+    m_R = tempR * m_pivotperm;
+    
+    // Update the column permutation
+    m_outputPerm_c = m_outputPerm_c * m_pivotperm;
+  }
+  
   m_isInitialized = true; 
   m_factorizationIsok = true;
   m_info = Success;
-  
 }
 
 namespace internal {
@@ -404,14 +506,13 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
   // Get the references 
   SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) : 
   m_qr(qr),m_other(other),m_transpose(transpose) {}
-  inline Index rows() const { return m_transpose ? m_qr.rowsQ() : m_qr.cols(); }
+  inline Index rows() const { return m_transpose ? m_qr.rows() : m_qr.cols(); }
   inline Index cols() const { return m_other.cols(); }
   
   // Assign to a vector
   template<typename DesType>
   void evalTo(DesType& res) const
   {
-    Index m = m_qr.rows();
     Index n = m_qr.cols(); 
     if (m_transpose)
     {
@@ -420,11 +521,13 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
       res =  m_other;
       for (Index k = 0; k < n; k++)
       {
-        Scalar tau; 
-        // Or alternatively 
-        tau = m_qr.m_Q.col(k).tail(m-k).dot(res.tail(m-k)); 
+        Scalar tau = Scalar(0); 
+        tau = m_qr.m_Q.col(k).dot(res); 
         tau = tau * m_qr.m_hcoeffs(k);
-        res -= tau * m_qr.m_Q.col(k);
+        for (typename MatrixType::InnerIterator itq(m_qr.m_Q, k); itq; ++itq)
+        {
+          res(itq.row()) -= itq.value() * tau;
+        }
       }
     }
     else
@@ -434,8 +537,8 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
       res = m_other;
       for (Index k = n-1; k >=0; k--)
       {
-        Scalar tau;
-        tau = m_qr.m_Q.col(k).tail(m-k).dot(res.tail(m-k));
+        Scalar tau = Scalar(0);
+        tau = m_qr.m_Q.col(k).dot(res); 
         tau = tau * m_qr.m_hcoeffs(k);
         res -= tau * m_qr.m_Q.col(k);
       }
diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h
index cd6c4b91f..3034c7af5 100644
--- a/Eigen/src/SuperLUSupport/SuperLUSupport.h
+++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h
@@ -353,14 +353,14 @@ class SuperLUBase : internal::noncopyable
       *
       * \sa compute()
       */
-//     template<typename Rhs>
-//     inline const internal::sparse_solve_retval<SuperLU, Rhs> solve(const SparseMatrixBase<Rhs>& b) const
-//     {
-//       eigen_assert(m_isInitialized && "SuperLU is not initialized.");
-//       eigen_assert(rows()==b.rows()
-//                 && "SuperLU::solve(): invalid number of rows of the right hand side matrix b");
-//       return internal::sparse_solve_retval<SuperLU, Rhs>(*this, b.derived());
-//     }
+    template<typename Rhs>
+    inline const internal::sparse_solve_retval<SuperLUBase, Rhs> solve(const SparseMatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "SuperLU is not initialized.");
+      eigen_assert(rows()==b.rows()
+                && "SuperLU::solve(): invalid number of rows of the right hand side matrix b");
+      return internal::sparse_solve_retval<SuperLUBase, Rhs>(*this, b.derived());
+    }
     
     /** Performs a symbolic decomposition on the sparcity of \a matrix.
       *
@@ -1015,7 +1015,7 @@ struct sparse_solve_retval<SuperLUBase<_MatrixType,Derived>, Rhs>
 
   template<typename Dest> void evalTo(Dest& dst) const
   {
-    dec().derived()._solve(rhs(),dst);
+    this->defaultEvalTo(dst);
   }
 };
 
diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h
index 22d049089..d85b8be85 100644
--- a/Eigen/src/UmfPackSupport/UmfPackSupport.h
+++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h
@@ -215,14 +215,14 @@ class UmfPackLU : internal::noncopyable
       *
       * \sa compute()
       */
-//     template<typename Rhs>
-//     inline const internal::sparse_solve_retval<UmfPAckLU, Rhs> solve(const SparseMatrixBase<Rhs>& b) const
-//     {
-//       eigen_assert(m_isInitialized && "UmfPAckLU is not initialized.");
-//       eigen_assert(rows()==b.rows()
-//                 && "UmfPAckLU::solve(): invalid number of rows of the right hand side matrix b");
-//       return internal::sparse_solve_retval<UmfPAckLU, Rhs>(*this, b.derived());
-//     }
+    template<typename Rhs>
+    inline const internal::sparse_solve_retval<UmfPackLU, Rhs> solve(const SparseMatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "UmfPackLU is not initialized.");
+      eigen_assert(rows()==b.rows()
+                && "UmfPackLU::solve(): invalid number of rows of the right hand side matrix b");
+      return internal::sparse_solve_retval<UmfPackLU, Rhs>(*this, b.derived());
+    }
 
     /** Performs a symbolic decomposition on the sparcity of \a matrix.
       *
@@ -381,7 +381,8 @@ bool UmfPackLU<MatrixType>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDe
   const int rhsCols = b.cols();
   eigen_assert((BDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major rhs yet");
   eigen_assert((XDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major result yet");
-
+  eigen_assert(b.derived().data() != x.derived().data() && " Umfpack does not support inplace solve");
+  
   int errorCode;
   for (int j=0; j<rhsCols; ++j)
   {
@@ -420,7 +421,7 @@ struct sparse_solve_retval<UmfPackLU<_MatrixType>, Rhs>
 
   template<typename Dest> void evalTo(Dest& dst) const
   {
-    dec()._solve(rhs(),dst);
+    this->defaultEvalTo(dst);
   }
 };
 
diff --git a/Eigen/src/misc/SparseSolve.h b/Eigen/src/misc/SparseSolve.h
index 272c4a479..244bb8ec7 100644
--- a/Eigen/src/misc/SparseSolve.h
+++ b/Eigen/src/misc/SparseSolve.h
@@ -47,6 +47,23 @@ template<typename _DecompositionType, typename Rhs> struct sparse_solve_retval_b
   }
 
   protected:
+    template<typename DestScalar, int DestOptions, typename DestIndex>
+    inline void defaultEvalTo(SparseMatrix<DestScalar,DestOptions,DestIndex>& dst) const
+    {
+      // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
+      static const int NbColsAtOnce = 4;
+      int rhsCols = m_rhs.cols();
+      int size = m_rhs.rows();
+      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,rhsCols);
+      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,rhsCols);
+      for(int k=0; k<rhsCols; k+=NbColsAtOnce)
+      {
+        int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
+        tmp.leftCols(actualCols) = m_rhs.middleCols(k,actualCols);
+        tmpX.leftCols(actualCols) = m_dec.solve(tmp.leftCols(actualCols));
+        dst.middleCols(k,actualCols) = tmpX.leftCols(actualCols).sparseView();
+      }
+    }
     const DecompositionType& m_dec;
     typename Rhs::Nested m_rhs;
 };
author	Gael Guennebaud <g.gael@free.fr>	2013-04-19 11:21:39 +0200
committer	Gael Guennebaud <g.gael@free.fr>	2013-04-19 11:21:39 +0200
commit	9cd2d14005def8e7df0b0bf5fd6eb51f8a6591e9 (patch)
tree	ca4df13b58e923bdebd9d5f59aecda9d1e30ca58 /Eigen
parent	4e2e615a7c2c719d2d708ab32840bad353322d8c (diff)
parent	46755648ec341aa5e0283b47456108bb2897b1b3 (diff)