252 files changed, 5314 insertions, 4162 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3ba310a27..de25c69f9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -105,26 +105,63 @@ if(EIGEN_DEFAULT_TO_ROW_MAJOR)
   add_definitions("-DEIGEN_DEFAULT_TO_ROW_MAJOR")
 endif()
 
-add_definitions("-DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS")
-
 set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")
 
-if(CMAKE_COMPILER_IS_GNUCXX)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing")
+macro(ei_add_cxx_compiler_flag FLAG)
+  string(REGEX REPLACE "-" "" SFLAG ${FLAG})
+  check_cxx_compiler_flag(${FLAG} COMPILER_SUPPORT_${SFLAG})
+  if(COMPILER_SUPPORT_${SFLAG})
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+  endif()
+endmacro(ei_add_cxx_compiler_flag)
+
+if(NOT MSVC)
+  # We assume that other compilers are partly compatible with GNUCC
+  
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
   set(CMAKE_CXX_FLAGS_DEBUG "-g3")
   set(CMAKE_CXX_FLAGS_RELEASE "-g0 -O2")
-
-  check_cxx_compiler_flag("-Wno-variadic-macros" COMPILER_SUPPORT_WNOVARIADICMACRO)
-  if(COMPILER_SUPPORT_WNOVARIADICMACRO)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-variadic-macros")
+  
+  # clang outputs some warnings for unknwon flags that are not caught by check_cxx_compiler_flag
+  # adding -Werror turns such warnings into errors
+  check_cxx_compiler_flag("-Werror" COMPILER_SUPPORT_WERROR)
+  if(COMPILER_SUPPORT_WERROR)
+    set(CMAKE_REQUIRED_FLAGS "-Werror")
   endif()
-
-  check_cxx_compiler_flag("-Wextra" COMPILER_SUPPORT_WEXTRA)
-  if(COMPILER_SUPPORT_WEXTRA)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra")
+  
+  ei_add_cxx_compiler_flag("-pedantic")
+  ei_add_cxx_compiler_flag("-Wall")
+  ei_add_cxx_compiler_flag("-Wextra")
+  #ei_add_cxx_compiler_flag("-Weverything")              # clang
+  
+  ei_add_cxx_compiler_flag("-Wundef")
+  ei_add_cxx_compiler_flag("-Wcast-align")
+  ei_add_cxx_compiler_flag("-Wchar-subscripts")
+  ei_add_cxx_compiler_flag("-Wnon-virtual-dtor")
+  ei_add_cxx_compiler_flag("-Wunused-local-typedefs")
+  ei_add_cxx_compiler_flag("-Wpointer-arith")
+  ei_add_cxx_compiler_flag("-Wwrite-strings")
+  ei_add_cxx_compiler_flag("-Wformat-security")
+  
+  ei_add_cxx_compiler_flag("-Wno-psabi")
+  ei_add_cxx_compiler_flag("-Wno-variadic-macros")
+  ei_add_cxx_compiler_flag("-Wno-long-long")
+  
+  ei_add_cxx_compiler_flag("-fno-check-new")
+  ei_add_cxx_compiler_flag("-fno-common")
+  ei_add_cxx_compiler_flag("-fstrict-aliasing")
+  ei_add_cxx_compiler_flag("-wd981")                   # disbale ICC's "operands are evaluated in unspecified order" remark
+  
+  # The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails
+  # Moreover we should not set both -strict-ansi and -ansi
+  check_cxx_compiler_flag("-strict-ansi" COMPILER_SUPPORT_STRICTANSI)
+  if(COMPILER_SUPPORT_STRICTANSI)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -strict-ansi")
+  else()
+    ei_add_cxx_compiler_flag("-ansi")
   endif()
-
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
+  
+  set(CMAKE_REQUIRED_FLAGS "")
 
   option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
   if(EIGEN_TEST_SSE2)
@@ -164,7 +201,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
 
   option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
   if(EIGEN_TEST_NEON)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a"8)
     message(STATUS "Enabling NEON in tests/examples")
   endif()
 
@@ -177,9 +214,8 @@ if(CMAKE_COMPILER_IS_GNUCXX)
     endif()
   endif()
 
-endif(CMAKE_COMPILER_IS_GNUCXX)
+else(NOT MSVC)
 
-if(MSVC)
   # C4127 - conditional expression is constant
   # C4714 - marked as __forceinline not inlined (I failed to deactivate it selectively)
   #         We can disable this warning in the unit tests since it is clear that it occurs
@@ -209,7 +245,7 @@ if(MSVC)
     endif(NOT CMAKE_CL_64)
     message(STATUS "Enabling SSE2 in tests/examples")
   endif(EIGEN_TEST_SSE2)
-endif(MSVC)
+endif(NOT MSVC)
 
 option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)
 option(EIGEN_TEST_X87 "Force using X87 instructions. Implies no vectorization." OFF)
@@ -308,6 +344,7 @@ add_subdirectory(Eigen)
 add_subdirectory(doc EXCLUDE_FROM_ALL)
 
 include(EigenConfigureTesting)
+
 # fixme, not sure this line is still needed:
 enable_testing() # must be called from the root CMakeLists, see man page
 
@@ -342,6 +379,8 @@ if(NOT WIN32)
   add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
 endif(NOT WIN32)
 
+configure_file(scripts/cdashtesting.cmake.in cdashtesting.cmake @ONLY)
+
 ei_testing_print_summary()
 
 message(STATUS "")
diff --git a/CTestConfig.cmake b/CTestConfig.cmake
index a5a4eb012..4c0027824 100644
--- a/CTestConfig.cmake
+++ b/CTestConfig.cmake
@@ -11,3 +11,7 @@ set(CTEST_DROP_METHOD "http")
 set(CTEST_DROP_SITE "manao.inria.fr")
 set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
 set(CTEST_DROP_SITE_CDASH TRUE)
+set(CTEST_PROJECT_SUBPROJECTS
+Official
+Unsupported
+)
diff --git a/Eigen/Core b/Eigen/Core
index 26a37cf42..1798264e9 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -65,7 +65,7 @@
   #endif
 #else
   // Remember that usage of defined() in a #define is undefined by the standard
-  #if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
+  #if (defined __SSE2__) && ( (!defined __GNUC__) || (defined __INTEL_COMPILER) || EIGEN_GNUC_AT_LEAST(4,2) )
     #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
   #endif
 #endif
diff --git a/Eigen/OrderingMethods b/Eigen/OrderingMethods
index 423cfb0cd..7c0f1ffff 100644
--- a/Eigen/OrderingMethods
+++ b/Eigen/OrderingMethods
@@ -56,7 +56,10 @@
   * \endcode
   */
 
+#ifndef EIGEN_MPL2_ONLY
 #include "src/OrderingMethods/Amd.h"
+#endif
+
 #include "src/OrderingMethods/Ordering.h"
 #include "src/Core/util/ReenableStupidWarnings.h"
 
diff --git a/Eigen/Sparse b/Eigen/Sparse
index a3cee2482..9d4da4c06 100644
--- a/Eigen/Sparse
+++ b/Eigen/Sparse
@@ -7,6 +7,8 @@
   * - SparseCore
   * - OrderingMethods
   * - SparseCholesky
+  * - SparseLU
+  * - SparseQR
   * - IterativeLinearSolvers
   *
   * \code
@@ -17,6 +19,8 @@
 #include "SparseCore"
 #include "OrderingMethods"
 #include "SparseCholesky"
+#include "SparseLU"
+#include "SparseQR"
 #include "IterativeLinearSolvers"
 
 #endif // EIGEN_SPARSE_MODULE_H
diff --git a/Eigen/SparseCholesky b/Eigen/SparseCholesky
index f94e2e765..800f17bc4 100644
--- a/Eigen/SparseCholesky
+++ b/Eigen/SparseCholesky
@@ -20,11 +20,19 @@
   * \endcode
   */
 
+#ifdef EIGEN_MPL2_ONLY
+#error The SparseCholesky module has nothing to offer in MPL2 only mode
+#endif
+
 #include "src/misc/Solve.h"
 #include "src/misc/SparseSolve.h"
 
 #include "src/SparseCholesky/SimplicialCholesky.h"
 
+#ifndef EIGEN_MPL2_ONLY
+#include "src/SparseCholesky/SimplicialCholesky_impl.h"
+#endif
+
 #include "src/Core/util/ReenableStupidWarnings.h"
 
 #endif // EIGEN_SPARSECHOLESKY_MODULE_H
diff --git a/Eigen/SparseLU b/Eigen/SparseLU
index 69cc6beca..38b38b531 100644
--- a/Eigen/SparseLU
+++ b/Eigen/SparseLU
@@ -2,6 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -14,7 +15,9 @@
 
 /** 
   * \defgroup SparseLU_Module SparseLU module
-  *
+  * This module defines a supernodal factorization of general sparse matrices.
+  * The code is fully optimized for supernode-panel updates with specialized kernels.
+  * Please, see the documentation of the SparseLU class for more details.
   */
 
 // Ordering interface
@@ -23,8 +26,8 @@
 #include "src/SparseLU/SparseLU_gemm_kernel.h"
 
 #include "src/SparseLU/SparseLU_Structs.h"
-#include "src/SparseLU/SparseLU_Matrix.h"
-#include "src/SparseLU/SparseLUBase.h"
+#include "src/SparseLU/SparseLU_SupernodalMatrix.h"
+#include "src/SparseLU/SparseLUImpl.h"
 #include "src/SparseCore/SparseColEtree.h"
 #include "src/SparseLU/SparseLU_Memory.h"
 #include "src/SparseLU/SparseLU_heap_relax_snode.h"
diff --git a/Eigen/SparseQR b/Eigen/SparseQR
index 8030d641d..f51913f7b 100644
--- a/Eigen/SparseQR
+++ b/Eigen/SparseQR
@@ -26,4 +26,4 @@
 
 #include "src/Core/util/ReenableStupidWarnings.h"
 
-#endif
-\ No newline at end of file
+#endif
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index 5dede8081..9bd60d648 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -196,7 +196,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
     LDLT& compute(const MatrixType& matrix);
 
     template <typename Derived>
-    LDLT& rankUpdate(const MatrixBase<Derived>& w,RealScalar alpha=1);
+    LDLT& rankUpdate(const MatrixBase<Derived>& w, const RealScalar& alpha=1);
 
     /** \returns the internal LDLT decomposition matrix
       *
@@ -347,7 +347,7 @@ template<> struct ldlt_inplace<Lower>
   // Here only rank-1 updates are implemented, to reduce the
   // requirement for intermediate storage and improve accuracy
   template<typename MatrixType, typename WDerived>
-  static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, typename MatrixType::RealScalar sigma=1)
+  static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, const typename MatrixType::RealScalar& sigma=1)
   {
     using internal::isfinite;
     typedef typename MatrixType::Scalar Scalar;
@@ -386,7 +386,7 @@ template<> struct ldlt_inplace<Lower>
   }
 
   template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
-  static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, typename MatrixType::RealScalar sigma=1)
+  static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, const typename MatrixType::RealScalar& sigma=1)
   {
     // Apply the permutation to the input w
     tmp = transpositions * w;
@@ -405,7 +405,7 @@ template<> struct ldlt_inplace<Upper>
   }
 
   template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
-  static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, typename MatrixType::RealScalar sigma=1)
+  static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, const typename MatrixType::RealScalar& sigma=1)
   {
     Transpose<MatrixType> matt(mat);
     return ldlt_inplace<Lower>::update(matt, transpositions, tmp, w.conjugate(), sigma);
@@ -457,7 +457,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
   */
 template<typename MatrixType, int _UpLo>
 template<typename Derived>
-LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w,typename NumTraits<typename MatrixType::Scalar>::Real sigma)
+LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename NumTraits<typename MatrixType::Scalar>::Real& sigma)
 {
   const Index size = w.rows();
   if (m_isInitialized)
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index 478fad251..db22a2f85 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -200,7 +200,7 @@ static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const V
   typedef Matrix<Scalar,Dynamic,1> TempVectorType;
   typedef typename TempVectorType::SegmentReturnType TempVecSegment;
 
-  int n = mat.cols();
+  Index n = mat.cols();
   eigen_assert(mat.rows()==n && vec.size()==n);
 
   TempVectorType temp;
@@ -212,12 +212,12 @@ static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const V
     // i.e., for sigma > 0
     temp = sqrt(sigma) * vec;
 
-    for(int i=0; i<n; ++i)
+    for(Index i=0; i<n; ++i)
     {
       JacobiRotation<Scalar> g;
       g.makeGivens(mat(i,i), -temp(i), &mat(i,i));
 
-      int rs = n-i-1;
+      Index rs = n-i-1;
       if(rs>0)
       {
         ColXprSegment x(mat.col(i).tail(rs));
@@ -230,7 +230,7 @@ static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const V
   {
     temp = vec;
     RealScalar beta = 1;
-    for(int j=0; j<n; ++j)
+    for(Index j=0; j<n; ++j)
     {
       RealScalar Ljj = real(mat.coeff(j,j));
       RealScalar dj = abs2(Ljj);
diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h
index 26abaf48f..42d289ad8 100644
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -51,7 +51,6 @@ void cholmod_configure_matrix(CholmodType& mat)
 template<typename _Scalar, int _Options, typename _Index>
 cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
 {
-  typedef SparseMatrix<_Scalar,_Options,_Index> MatrixType;
   cholmod_sparse res;
   res.nzmax   = mat.nonZeros();
   res.nrow    = mat.rows();;
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h
index 707a9d7f2..4c83f7e7f 100644
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -114,7 +114,7 @@ class Array
     EIGEN_STRONG_INLINE explicit Array() : Base()
     {
       Base::_check_template_params();
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -125,7 +125,7 @@ class Array
       : Base(internal::constructor_without_unaligned_array_assert())
     {
       Base::_check_template_params();
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 #endif
 
@@ -143,7 +143,7 @@ class Array
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array)
       eigen_assert(dim >= 0);
       eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index dc9b55fa4..8c9078f06 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -158,7 +158,7 @@ template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_DefaultTraversal_InnerUnrolling
 {
   EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
   {
     dst.copyCoeffByOuterInner(outer, Index, src);
     assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
@@ -169,7 +169,7 @@ template<typename Derived1, typename Derived2, int Stop>
 struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 {
   EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
+  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
 };
 
 /***********************
@@ -224,7 +224,7 @@ struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_innervec_InnerUnrolling
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
   {
     dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
     assign_innervec_InnerUnrolling<Derived1, Derived2,
@@ -235,7 +235,7 @@ struct assign_innervec_InnerUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
+  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
 };
 
 /***************************************************************************
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 5e134c83a..8d835b2f6 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -149,8 +149,8 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
     inner = Index % DstXprType::InnerSizeAtCompileTime
   };
 
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator)
   {
     dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
     copy_using_evaluator_DefaultTraversal_CompleteUnrolling
@@ -162,15 +162,15 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
 };
 
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator, 
-				      int outer)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator, 
+                                      int outer)
   {
     dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator);
     copy_using_evaluator_DefaultTraversal_InnerUnrolling
@@ -182,7 +182,7 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
 };
 
 /***********************
@@ -192,8 +192,8 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, Sr
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator)
   {
     dstEvaluator.copyCoeff(Index, srcEvaluator);
     copy_using_evaluator_LinearTraversal_CompleteUnrolling
@@ -205,7 +205,7 @@ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
 };
 
 /**************************
@@ -224,8 +224,8 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
     JointAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
   };
 
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator)
   {
     dstEvaluator.template copyPacketByOuterInner<Aligned, JointAlignment>(outer, inner, srcEvaluator);
     enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
@@ -238,15 +238,15 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_innervec_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
 };
 
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
 struct copy_using_evaluator_innervec_InnerUnrolling
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator, 
-				      int outer)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator, 
+                                      int outer)
   {
     dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, Index, srcEvaluator);
     typedef typename DstEvaluatorType::XprType DstXprType;
@@ -260,7 +260,7 @@ struct copy_using_evaluator_innervec_InnerUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_innervec_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
 };
 
 /***************************************************************************
@@ -301,7 +301,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnr
 template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, CompleteUnrolling>
 {
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -319,7 +319,7 @@ template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, InnerUnrolling>
 {
   typedef typename DstXprType::Index Index;
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -330,7 +330,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, Inner
     const Index outerSize = dst.outerSize();
     for(Index outer = 0; outer < outerSize; ++outer)
       copy_using_evaluator_DefaultTraversal_InnerUnrolling
-	<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
+        <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
         ::run(dstEvaluator, srcEvaluator, outer);
   }
 };
@@ -345,7 +345,7 @@ struct unaligned_copy_using_evaluator_impl
   // if IsAligned = true, then do nothing
   template <typename SrcEvaluatorType, typename DstEvaluatorType>
   static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, 
-				      typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {}
+                                      typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {}
 };
 
 template <>
@@ -356,15 +356,15 @@ struct unaligned_copy_using_evaluator_impl<false>
 #ifdef _MSC_VER
   template <typename DstEvaluatorType, typename SrcEvaluatorType>
   static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator, 
-				    const SrcEvaluatorType &srcEvaluator, 
-				    typename DstEvaluatorType::Index start, 
-				    typename DstEvaluatorType::Index end)
+                                    const SrcEvaluatorType &srcEvaluator,
+                                    typename DstEvaluatorType::Index start,
+                                    typename DstEvaluatorType::Index end)
 #else
   template <typename DstEvaluatorType, typename SrcEvaluatorType>
   static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, 
-				      const SrcEvaluatorType &srcEvaluator, 
-				      typename DstEvaluatorType::Index start, 
-				      typename DstEvaluatorType::Index end)
+                                      const SrcEvaluatorType &srcEvaluator,
+                                      typename DstEvaluatorType::Index start,
+                                      typename DstEvaluatorType::Index end)
 #endif
   {
     for (typename DstEvaluatorType::Index index = start; index < end; ++index)
@@ -375,7 +375,7 @@ struct unaligned_copy_using_evaluator_impl<false>
 template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, NoUnrolling>
 {
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -392,7 +392,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers
       dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
       srcAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
     };
-    const Index alignedStart = dstIsAligned ? 0 : first_aligned(&dstEvaluator.coeffRef(0), size);
+    const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&dstEvaluator.coeffRef(0), size);
     const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
 
     unaligned_copy_using_evaluator_impl<dstIsAligned!=0>::run(dstEvaluator, srcEvaluator, 0, alignedStart);
@@ -410,7 +410,7 @@ template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, CompleteUnrolling>
 {
   typedef typename DstXprType::Index Index;
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -460,7 +460,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
 template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, CompleteUnrolling>
 {
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -478,7 +478,7 @@ template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, InnerUnrolling>
 {
   typedef typename DstXprType::Index Index;
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -489,7 +489,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
     const Index outerSize = dst.outerSize();
     for(Index outer = 0; outer < outerSize; ++outer)
       copy_using_evaluator_innervec_InnerUnrolling
-	<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
+        <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
         ::run(dstEvaluator, srcEvaluator, outer);
   }
 };
@@ -519,7 +519,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnro
 template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, CompleteUnrolling>
 {
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@@ -560,7 +560,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversa
     const Index outerSize = dst.outerSize();
     const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
     Index alignedStart = ((!alignable) || copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned) ? 0
-                       : first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);
+                       : internal::first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);
 
     for(Index outer = 0; outer < outerSize; ++outer)
     {
@@ -596,7 +596,6 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoU
   {
     typedef typename evaluator<DstXprType>::type DstEvaluatorType;
     typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-    typedef typename DstXprType::Index Index;
 
     DstEvaluatorType dstEvaluator(dst);
     SrcEvaluatorType srcEvaluator(src);
@@ -616,7 +615,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoU
 template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
 EIGEN_STRONG_INLINE
 const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst, 
-				       const EigenBase<SrcXprType>& src)
+                                       const EigenBase<SrcXprType>& src)
 {
   return noalias_copy_using_evaluator(dst.expression(), src.derived());
 }
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index fbc2cf227..e8377f67a 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -140,8 +140,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
     {
       eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
           && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
-      eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow + blockRows <= xpr.rows()
-          && a_startCol >= 0 && blockCols >= 0 && a_startCol + blockCols <= xpr.cols());
+      eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow  <= xpr.rows() - blockRows
+          && a_startCol >= 0 && blockCols >= 0 && a_startCol <= xpr.cols() - blockCols);
     }
 };
          
diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h
index 57efd8e69..f6afeb034 100644
--- a/Eigen/src/Core/BooleanRedux.h
+++ b/Eigen/src/Core/BooleanRedux.h
@@ -85,9 +85,7 @@ inline bool DenseBase<Derived>::all() const
           && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
   };
   if(unroll)
-    return internal::all_unroller<Derived,
-                           unroll ? int(SizeAtCompileTime) : Dynamic
-     >::run(derived());
+    return internal::all_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
   else
   {
     for(Index j = 0; j < cols(); ++j)
@@ -111,9 +109,7 @@ inline bool DenseBase<Derived>::any() const
           && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
   };
   if(unroll)
-    return internal::any_unroller<Derived,
-                           unroll ? int(SizeAtCompileTime) : Dynamic
-           >::run(derived());
+    return internal::any_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
   else
   {
     for(Index j = 0; j < cols(); ++j)
@@ -133,6 +129,26 @@ inline typename DenseBase<Derived>::Index DenseBase<Derived>::count() const
   return derived().template cast<bool>().template cast<Index>().sum();
 }
 
+/** \returns true is \c *this contains at least one Not A Number (NaN).
+  *
+  * \sa isFinite()
+  */
+template<typename Derived>
+inline bool DenseBase<Derived>::hasNaN() const
+{
+  return !((derived().array()==derived().array()).all());
+}
+
+/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values.
+  *
+  * \sa hasNaN()
+  */
+template<typename Derived>
+inline bool DenseBase<Derived>::isFinite() const
+{
+  return !((derived()-derived()).hasNaN());
+}
+    
 } // end namespace Eigen
 
 #endif // EIGEN_ALLANDANY_H
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 5d991b74b..272027c7b 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -349,7 +349,7 @@ struct evaluator_impl<EvalToTemp<ArgType> >
   template<int LoadMode> 
   PacketReturnType packet(Index row, Index col) const
   {
-    return m_resultImpl.packet<LoadMode>(row, col);
+    return m_resultImpl.template packet<LoadMode>(row, col);
   }
 
   template<int LoadMode> 
@@ -361,13 +361,13 @@ struct evaluator_impl<EvalToTemp<ArgType> >
   template<int StoreMode> 
   void writePacket(Index row, Index col, const PacketScalar& x)
   {
-    m_resultImpl.writePacket<StoreMode>(row, col, x);
+    m_resultImpl.template writePacket<StoreMode>(row, col, x);
   }
 
   template<int StoreMode> 
   void writePacket(Index index, const PacketScalar& x)
   {
-    m_resultImpl.writePacket<StoreMode>(index, x);
+    m_resultImpl.template writePacket<StoreMode>(index, x);
   }
 
 protected:
diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h
index 532b2b96e..e603fb9d6 100644
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -94,8 +94,8 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
 // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
 // add together a float matrix and a double matrix.
 #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
-  EIGEN_STATIC_ASSERT((internal::functor_allows_mixing_real_and_complex<BINOP>::ret \
-                        ? int(internal::is_same<typename NumTraits<LHS>::Real, typename NumTraits<RHS>::Real>::value) \
+  EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \
+                        ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \
                         : int(internal::is_same<LHS, RHS>::value)), \
     YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
 
diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h
index 66f73a950..9f9d4972d 100644
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@@ -44,9 +44,10 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
     // "error: no integral type can represent all of the enumerator values
     InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic
                              ? int(Dynamic)
-                             : int(MatrixTypeInnerStride)
-                               * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
-    OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
+                             : int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
+    OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret == Dynamic
+                             ? int(Dynamic)
+                             : outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar))
   };
 };
 }
@@ -98,6 +99,9 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
     typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;
 
     EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+    
+    inline Scalar* data() { return &coeffRef(0); }
+    inline const Scalar* data() const { return &coeff(0); }
 
     inline Index innerStride() const
     {
@@ -106,7 +110,7 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
 
     inline Index outerStride() const
     {
-      return derived().nestedExpression().outerStride();
+      return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
     }
 
     EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 12780354b..ec646ce7d 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -353,6 +353,9 @@ template<typename Derived> class DenseBase
     bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
     bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
     bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    
+    inline bool hasNaN() const;
+    inline bool isFinite() const;
 
     inline Derived& operator*=(const Scalar& other);
     inline Derived& operator/=(const Scalar& other);
@@ -436,8 +439,6 @@ template<typename Derived> class DenseBase
       return derived().coeff(0,0);
     }
 
-/////////// Array module ///////////
-
     bool all() const;
     bool any() const;
     Index count() const;
@@ -463,11 +464,11 @@ template<typename Derived> class DenseBase
 
     template<typename ThenDerived>
     inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
-    select(const DenseBase<ThenDerived>& thenMatrix, typename ThenDerived::Scalar elseScalar) const;
+    select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;
 
     template<typename ElseDerived>
     inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
-    select(typename ElseDerived::Scalar thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
+    select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
 
     template<int p> RealScalar lpNorm() const;
 
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index 29cd739ce..217cc90d7 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -154,6 +154,7 @@ template<typename Scalar> struct scalar_hypot_op {
   {
     EIGEN_USING_STD_MATH(max);
     EIGEN_USING_STD_MATH(min);
+    using std::sqrt;
     Scalar p = (max)(_x, _y);
     Scalar q = (min)(_x, _y);
     Scalar qp = q/p;
@@ -543,20 +544,28 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
 // linear access for packet ops:
 // 1) initialization
 //   base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
-// 2) each step
+// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
 //   base += [size*step, ..., size*step]
+//
+// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
+//       in order to avoid the padd() in operator() ?
 template <typename Scalar>
 struct linspaced_op_impl<Scalar,false>
 {
   typedef typename packet_traits<Scalar>::type Packet;
 
-  linspaced_op_impl(Scalar low, Scalar step) :
+  linspaced_op_impl(const Scalar& low, const Scalar& step) :
   m_low(low), m_step(step),
   m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
-  m_base(padd(pset1<Packet>(low),pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
+  m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
 
   template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const 
+  { 
+    m_base = padd(m_base, pset1<Packet>(m_step));
+    return m_low+Scalar(i)*m_step; 
+  }
+
   template<typename Index>
   EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
 
@@ -574,7 +583,7 @@ struct linspaced_op_impl<Scalar,true>
 {
   typedef typename packet_traits<Scalar>::type Packet;
 
-  linspaced_op_impl(Scalar low, Scalar step) :
+  linspaced_op_impl(const Scalar& low, const Scalar& step) :
   m_low(low), m_step(step),
   m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
 
@@ -603,7 +612,7 @@ template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_o
 template <typename Scalar, bool RandomAccess> struct linspaced_op
 {
   typedef typename packet_traits<Scalar>::type Packet;
-  linspaced_op(Scalar low, Scalar high, int num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}
+  linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}
 
   template<typename Index>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
@@ -642,13 +651,14 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
 template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
 template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };
 
-// in CwiseBinaryOp, we require the Lhs and Rhs to have the same scalar type, except for multiplication
-// where we only require them to have the same _real_ scalar type so one may multiply, say, float by complex<float>.
+// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
+// where the mixing of different types is handled by scalar_product_traits
+// In particular, real * complex<real> is allowed.
 // FIXME move this to functor_traits adding a functor_default
-template<typename Functor> struct functor_allows_mixing_real_and_complex { enum { ret = 0 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename Functor> struct functor_is_product_like { enum { ret = 0 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
 
 
 /** \internal
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index a070e618d..557286003 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -222,7 +222,29 @@ class GeneralProduct<Lhs, Rhs, InnerProduct>
 ***********************************************************************/
 
 namespace internal {
-template<int StorageOrder> struct outer_product_selector;
+
+// Column major
+template<typename ProductType, typename Dest, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&)
+{
+  typedef typename Dest::Index Index;
+  // FIXME make sure lhs is sequentially stored
+  // FIXME not very good if rhs is real and lhs complex while alpha is real too
+  const Index cols = dest.cols();
+  for (Index j=0; j<cols; ++j)
+    func(dest.col(j), prod.rhs().coeff(j) * prod.lhs());
+}
+
+// Row major
+template<typename ProductType, typename Dest, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) {
+  typedef typename Dest::Index Index;
+  // FIXME make sure rhs is sequentially stored
+  // FIXME not very good if lhs is real and rhs complex while alpha is real too
+  const Index rows = dest.rows();
+  for (Index i=0; i<rows; ++i)
+    func(dest.row(i), prod.lhs().coeff(i) * prod.rhs());
+}
 
 template<typename Lhs, typename Rhs>
 struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
@@ -235,6 +257,8 @@ template<typename Lhs, typename Rhs>
 class GeneralProduct<Lhs, Rhs, OuterProduct>
   : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
 {
+    template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
+    
   public:
     EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
 
@@ -243,41 +267,39 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
     }
-
-    template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
-    {
-      internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
+    
+    struct set  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived()  = src; } };
+    struct add  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
+    struct sub  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
+    struct adds {
+      Scalar m_scale;
+      adds(const Scalar& s) : m_scale(s) {}
+      template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
+        dst.const_cast_derived() += m_scale * src;
+      }
+    };
+    
+    template<typename Dest>
+    inline void evalTo(Dest& dest) const {
+      internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>());
+    }
+    
+    template<typename Dest>
+    inline void addTo(Dest& dest) const {
+      internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>());
     }
-};
-
-namespace internal {
 
-template<> struct outer_product_selector<ColMajor> {
-  template<typename ProductType, typename Dest>
-  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
-    typedef typename Dest::Index Index;
-    // FIXME make sure lhs is sequentially stored
-    // FIXME not very good if rhs is real and lhs complex while alpha is real too
-    const Index cols = dest.cols();
-    for (Index j=0; j<cols; ++j)
-      dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
-  }
-};
+    template<typename Dest>
+    inline void subTo(Dest& dest) const {
+      internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>());
+    }
 
-template<> struct outer_product_selector<RowMajor> {
-  template<typename ProductType, typename Dest>
-  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
-    typedef typename Dest::Index Index;
-    // FIXME make sure rhs is sequentially stored
-    // FIXME not very good if lhs is real and rhs complex while alpha is real too
-    const Index rows = dest.rows();
-    for (Index i=0; i<rows; ++i)
-      dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
-  }
+    template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
+    {
+      internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>());
+    }
 };
 
-} // end namespace internal
-
 /***********************************************************************
 *  Implementation of General Matrix Vector Product
 ***********************************************************************/
@@ -320,7 +342,7 @@ class GeneralProduct<Lhs, Rhs, GemvProduct>
     enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
     typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
 
-    template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+    template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
     {
       eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
       internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
@@ -335,7 +357,7 @@ template<int StorageOrder, bool BlasCompatible>
 struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
 {
   template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
   {
     Transpose<Dest> destT(dest);
     enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
@@ -384,7 +406,7 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
 template<> struct gemv_selector<OnTheRight,ColMajor,true>
 {
   template<typename ProductType, typename Dest>
-  static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
   {
     typedef typename ProductType::Index Index;
     typedef typename ProductType::LhsScalar   LhsScalar;
@@ -457,7 +479,7 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
 template<> struct gemv_selector<OnTheRight,RowMajor,true>
 {
   template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
   {
     typedef typename ProductType::LhsScalar LhsScalar;
     typedef typename ProductType::RhsScalar RhsScalar;
@@ -508,7 +530,7 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
 template<> struct gemv_selector<OnTheRight,ColMajor,false>
 {
   template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
   {
     typedef typename Dest::Index Index;
     // TODO makes sure dest is sequentially stored in memory, otherwise use a temp
@@ -521,7 +543,7 @@ template<> struct gemv_selector<OnTheRight,ColMajor,false>
 template<> struct gemv_selector<OnTheRight,RowMajor,false>
 {
   template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
   {
     typedef typename Dest::Index Index;
     // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index aee58f09b..967a37673 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -156,7 +156,11 @@ pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
 template<typename Packet> inline Packet
 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
 
-/** \internal \returns a packet with elements of \a *from duplicated, e.g.: (from[0],from[0],from[1],from[1]) */
+/** \internal \returns a packet with elements of \a *from duplicated.
+  * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and
+  * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
+  * Currently, this function is only used for scalar * complex products.
+ */
 template<typename Packet> inline Packet
 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
 
@@ -307,8 +311,21 @@ struct palign_impl
   static inline void run(PacketType&, const PacketType&) {}
 };
 
-/** \internal update \a first using the concatenation of the \a Offset last elements
-  * of \a first and packet_size minus \a Offset first elements of \a second */
+/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
+  * of \a first and \a Offset first elements of \a second.
+  * 
+  * This function is currently only used to optimize matrix-vector products on unligned matrices.
+  * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
+  * at the position \a Offset. For instance, for packets of 4 elements, we have:
+  *  Input:
+  *  - first = {f0,f1,f2,f3}
+  *  - second = {s0,s1,s2,s3}
+  * Output: 
+  *   - if Offset==0 then {f0,f1,f2,f3}
+  *   - if Offset==1 then {f1,f2,f3,s0}
+  *   - if Offset==2 then {f2,f3,s0,s1}
+  *   - if Offset==3 then {f3,s0,s1,s3}
+  */
 template<int Offset,typename PacketType>
 inline void palign(PacketType& first, const PacketType& second)
 {
diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h
index daf72e6bb..02cae552e 100644
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@@ -70,7 +70,7 @@ namespace Eigen
   **/
   template <typename Derived>
   inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>
-    operator/(typename Derived::Scalar s, const Eigen::ArrayBase<Derived>& a)
+    operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase<Derived>& a)
   {
     return Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>(
       a.derived(),
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index b8d32ecec..9b5c94b07 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -363,6 +363,7 @@ struct hypot_impl
     EIGEN_USING_STD_MATH(max);
     EIGEN_USING_STD_MATH(min);
     using std::abs;
+    using std::sqrt;
     RealScalar _x = abs(x);
     RealScalar _y = abs(y);
     RealScalar p = (max)(_x, _y);
@@ -420,7 +421,7 @@ struct atanh2_default_impl
     using std::log;
     using std::sqrt;
     Scalar z = x / y;
-    if (abs(z) > sqrt(NumTraits<RealScalar>::epsilon()))
+    if (y == Scalar(0) || abs(z) > sqrt(NumTraits<RealScalar>::epsilon()))
       return RealScalar(0.5) * log((y + x) / (y - x));
     else
       return z + z*z*z / RealScalar(3);
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 61af9d9a3..70c1857dd 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -208,14 +208,14 @@ class Matrix
     EIGEN_STRONG_INLINE explicit Matrix() : Base()
     {
       Base::_check_template_params();
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
     // FIXME is it still needed
     EIGEN_DEVICE_FUNC
     Matrix(internal::constructor_without_unaligned_array_assert)
       : Base(internal::constructor_without_unaligned_array_assert())
-    { Base::_check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED }
+    { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
 
     /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
       *
@@ -231,7 +231,7 @@ class Matrix
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
       eigen_assert(dim >= 0);
       eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 7b9ea8c0a..8dbe71b93 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -475,7 +475,7 @@ template<typename Derived> class MatrixBase
     const MatrixFunctionReturnValue<Derived> sin() const;
     const MatrixSquareRootReturnValue<Derived> sqrt() const;
     const MatrixLogarithmReturnValue<Derived> log() const;
-    const MatrixPowerReturnValue<Derived> pow(RealScalar p) const;
+    const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
 
 #ifdef EIGEN2_SUPPORT
     template<typename ProductDerived, typename Lhs, typename Rhs>
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index c94ef026b..bac9e50b8 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -140,6 +140,9 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
     AddCost  = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
     MulCost  = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
   };
+  
+  static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
+  static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
 };
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index 86b63ea14..4fc5dd318 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -105,13 +105,13 @@ class PermutationBase : public EigenBase<Derived>
     #endif
 
     /** \returns the number of rows */
-    inline Index rows() const { return indices().size(); }
+    inline Index rows() const { return Index(indices().size()); }
 
     /** \returns the number of columns */
-    inline Index cols() const { return indices().size(); }
+    inline Index cols() const { return Index(indices().size()); }
 
     /** \returns the size of a side of the respective square matrix, i.e., the number of indices */
-    inline Index size() const { return indices().size(); }
+    inline Index size() const { return Index(indices().size()); }
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename DenseDerived>
@@ -541,24 +541,25 @@ struct permut_matrix_product_retval
  : public ReturnByValue<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
 {
     typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
+    typedef typename MatrixType::Index Index;
 
     permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix)
       : m_permutation(perm), m_matrix(matrix)
     {}
 
-    inline int rows() const { return m_matrix.rows(); }
-    inline int cols() const { return m_matrix.cols(); }
+    inline Index rows() const { return m_matrix.rows(); }
+    inline Index cols() const { return m_matrix.cols(); }
 
     template<typename Dest> inline void evalTo(Dest& dst) const
     {
-      const int n = Side==OnTheLeft ? rows() : cols();
+      const Index n = Side==OnTheLeft ? rows() : cols();
 
       if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
       {
         // apply the permutation inplace
         Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
         mask.fill(false);
-        int r = 0;
+        Index r = 0;
         while(r < m_permutation.size())
         {
           // search for the next seed
@@ -566,10 +567,10 @@ struct permut_matrix_product_retval
           if(r>=m_permutation.size())
             break;
           // we got one, let's follow it until we are back to the seed
-          int k0 = r++;
-          int kPrev = k0;
+          Index k0 = r++;
+          Index kPrev = k0;
           mask.coeffRef(k0) = true;
-          for(int k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
+          for(Index k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
           {
                   Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
             .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 49a5518e3..4e159896e 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -11,10 +11,15 @@
 #ifndef EIGEN_DENSESTORAGEBASE_H
 #define EIGEN_DENSESTORAGEBASE_H
 
-#ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
-# define EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
+#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
+#elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=std::numeric_limits<Scalar>::quiet_NaN();
 #else
-# define EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+# undef EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
 #endif
 
 namespace Eigen {
@@ -243,11 +248,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
                    && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,nbCols<=MaxColsAtCompileTime)
                    && nbRows>=0 && nbCols>=0 && "Invalid sizes when resizing a matrix or array.");
       internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
-      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
+      #ifdef EIGEN_INITIALIZE_COEFFS
         Index size = nbRows*nbCols;
         bool size_changed = size != this->size();
         m_storage.resize(size, nbRows, nbCols);
-        if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
       #else
         internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
         m_storage.resize(nbRows*nbCols, nbRows, nbCols);
@@ -270,15 +275,15 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     {
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
       eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);
-      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
+      #ifdef EIGEN_INITIALIZE_COEFFS
         bool size_changed = size != this->size();
       #endif
       if(RowsAtCompileTime == 1)
         m_storage.resize(size, 1, size);
       else
         m_storage.resize(size, size, 1);
-      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
-        if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      #ifdef EIGEN_INITIALIZE_COEFFS
+        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
       #endif
     }
 
@@ -435,7 +440,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     EIGEN_STRONG_INLINE explicit PlainObjectBase() : m_storage()
     {
 //       _check_template_params();
-//       EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+//       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -445,7 +450,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     PlainObjectBase(internal::constructor_without_unaligned_array_assert)
       : m_storage(internal::constructor_without_unaligned_array_assert())
     {
-//       _check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+//       _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 #endif
 
@@ -454,7 +459,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       : m_storage(a_size, nbRows, nbCols)
     {
 //       _check_template_params();
-//       EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+//       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
     }
 
     /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 314851d2e..3a08c027c 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -74,8 +74,8 @@ class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_ty
 
   protected:
 
-    const LhsNested m_lhs;
-    const RhsNested m_rhs;
+    LhsNested m_lhs;
+    RhsNested m_rhs;
 };
 
 template<typename Lhs, typename Rhs>
diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h
index 9748167a5..a494b5f87 100644
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@@ -108,7 +108,7 @@ class ProductBase : public MatrixBase<Derived>
     inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }
 
     template<typename Dest>
-    inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { derived().scaleAndAddTo(dst,alpha); }
+    inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); }
 
     const _LhsNested& lhs() const { return m_lhs; }
     const _RhsNested& rhs() const { return m_rhs; }
@@ -195,7 +195,7 @@ class ScaledProduct;
 // Also note that here we accept any compatible scalar types
 template<typename Derived,typename Lhs,typename Rhs>
 const ScaledProduct<Derived>
-operator*(const ProductBase<Derived,Lhs,Rhs>& prod, typename Derived::Scalar x)
+operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::Scalar& x)
 { return ScaledProduct<Derived>(prod.derived(), x); }
 
 template<typename Derived,typename Lhs,typename Rhs>
@@ -207,7 +207,7 @@ operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::Real
 
 template<typename Derived,typename Lhs,typename Rhs>
 const ScaledProduct<Derived>
-operator*(typename Derived::Scalar x,const ProductBase<Derived,Lhs,Rhs>& prod)
+operator*(const typename Derived::Scalar& x,const ProductBase<Derived,Lhs,Rhs>& prod)
 { return ScaledProduct<Derived>(prod.derived(), x); }
 
 template<typename Derived,typename Lhs,typename Rhs>
@@ -241,7 +241,7 @@ class ScaledProduct
     typedef typename Base::PlainObject PlainObject;
 //     EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct)
 
-    ScaledProduct(const NestedProduct& prod, Scalar x)
+    ScaledProduct(const NestedProduct& prod, const Scalar& x)
     : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}
 
     template<typename Dest>
@@ -254,7 +254,7 @@ class ScaledProduct
     inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }
 
     template<typename Dest>
-    inline void scaleAndAddTo(Dest& dst,Scalar a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); }
+    inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); }
 
     const Scalar& alpha() const { return m_alpha; }
     
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 0c0570e44..8aed51022 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -209,7 +209,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
 struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
   {
     etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res);
     res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
@@ -220,7 +220,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
   {
     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
   }
@@ -230,7 +230,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
   {
     eigen_assert(innerDim>0 && "you are using a non initialized matrix");
     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
@@ -248,7 +248,7 @@ struct etor_product_coeff_vectorized_unroller
 {
   typedef typename Lhs::Index Index;
   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
   {
     etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
     pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
@@ -259,7 +259,7 @@ template<typename Lhs, typename Rhs, typename Packet>
 struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
   {
     pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
   }
@@ -271,7 +271,7 @@ struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rh
   typedef typename Lhs::PacketScalar Packet;
   typedef typename Lhs::Index Index;
   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
   {
     Packet pres;
     etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
@@ -284,7 +284,7 @@ template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int R
 struct etor_product_coeff_vectorized_dyn_selector
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
   {
     res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
   }
@@ -296,7 +296,7 @@ template<typename Lhs, typename Rhs, int RhsCols>
 struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
   {
     res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
   }
@@ -306,7 +306,7 @@ template<typename Lhs, typename Rhs, int LhsRows>
 struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
   {
     res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
   }
@@ -316,7 +316,7 @@ template<typename Lhs, typename Rhs>
 struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
   {
     res = lhs.transpose().cwiseProduct(rhs).sum();
   }
@@ -326,7 +326,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
   {
     etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res);
   }
@@ -340,7 +340,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
 struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
   {
     etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
     res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
@@ -351,7 +351,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
 struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
   {
     etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
     res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
@@ -362,7 +362,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
   {
     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
   }
@@ -372,7 +372,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
   {
     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
   }
@@ -382,7 +382,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
   {
     eigen_assert(innerDim>0 && "you are using a non initialized matrix");
     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
@@ -395,7 +395,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
   {
     eigen_assert(innerDim>0 && "you are using a non initialized matrix");
     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 12b3db584..b2c775d90 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -334,7 +334,8 @@ DenseBase<Derived>::redux(const Func& func) const
             ::run(derived(), func);
 }
 
-/** \returns the minimum of all coefficients of *this
+/** \returns the minimum of all coefficients of \c *this.
+  * \warning the result is undefined if \c *this contains NaN.
   */
 template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
@@ -343,7 +344,8 @@ DenseBase<Derived>::minCoeff() const
   return this->redux(Eigen::internal::scalar_min_op<Scalar>());
 }
 
-/** \returns the maximum of all coefficients of *this
+/** \returns the maximum of all coefficients of \c *this.
+  * \warning the result is undefined if \c *this contains NaN.
   */
 template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h
index 9c409eecf..aba795bdb 100644
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -149,6 +149,8 @@ public:
       m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime,
                StrideType::InnerStrideAtCompileTime==Dynamic?0:StrideType::InnerStrideAtCompileTime)
   {}
+  
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(RefBase)
 
 protected:
 
@@ -170,7 +172,7 @@ protected:
     else
       ::new (static_cast<Base*>(this)) Base(expr.data(), expr.rows(), expr.cols());
     ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(),
-                                 StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());
+                                 StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());    
   }
 
   StrideBase m_stride;
@@ -211,8 +213,8 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
 };
 
 // this is the const ref version
-template<typename PlainObjectType, int Options, typename StrideType> class Ref<const PlainObjectType, Options, StrideType>
-  : public RefBase<Ref<const PlainObjectType, Options, StrideType> >
+template<typename TPlainObjectType, int Options, typename StrideType> class Ref<const TPlainObjectType, Options, StrideType>
+  : public RefBase<Ref<const TPlainObjectType, Options, StrideType> >
 {
     typedef internal::traits<Ref> Traits;
   public:
@@ -240,13 +242,12 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref<c
     template<typename Expression>
     void construct(const Expression& expr, internal::false_type)
     {
-//      std::cout << "Ref: copy\n";
-      m_object = expr;
+      m_object.lazyAssign(expr);
       Base::construct(m_object);
     }
 
   protected:
-    PlainObjectType m_object;
+    TPlainObjectType m_object;
 };
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/Select.h b/Eigen/src/Core/Select.h
index 7ee8f23ba..87993bbb5 100644
--- a/Eigen/src/Core/Select.h
+++ b/Eigen/src/Core/Select.h
@@ -136,7 +136,7 @@ template<typename Derived>
 template<typename ThenDerived>
 inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
 DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
-                            typename ThenDerived::Scalar elseScalar) const
+                           const typename ThenDerived::Scalar& elseScalar) const
 {
   return Select<Derived,ThenDerived,typename ThenDerived::ConstantReturnType>(
     derived(), thenMatrix.derived(), ThenDerived::Constant(rows(),cols(),elseScalar));
@@ -150,8 +150,8 @@ DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
 template<typename Derived>
 template<typename ElseDerived>
 inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
-DenseBase<Derived>::select(typename ElseDerived::Scalar thenScalar,
-                            const DenseBase<ElseDerived>& elseMatrix) const
+DenseBase<Derived>::select(const typename ElseDerived::Scalar& thenScalar,
+                           const DenseBase<ElseDerived>& elseMatrix) const
 {
   return Select<Derived,typename ElseDerived::ConstantReturnType,ElseDerived>(
     derived(), ElseDerived::Constant(rows(),cols(),thenScalar), elseMatrix.derived());
diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h
index 82cc4da73..d43789123 100644
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -132,7 +132,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
       * \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
       */
     template<typename DerivedU, typename DerivedV>
-    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha = Scalar(1));
+    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));
 
     /** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
       * \f$ this = this + \alpha ( u u^* ) \f$ where \a u is a vector or matrix.
@@ -145,7 +145,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
       * \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
       */
     template<typename DerivedU>
-    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha = Scalar(1));
+    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
 
 /////////// Cholesky module ///////////
 
diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h
index 40f9eb618..53ac6c387 100644
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -196,7 +196,10 @@ inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
                                         internal::scalar_product_op<Scalar> >::type BinOp;
   typedef typename Derived::PlainObject PlainObject;
   SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
-  tmp = PlainObject::Constant(rows(),cols(), NumTraits<Scalar>::IsInteger ? other : Scalar(1)/other);
+  Scalar actual_other;
+  if(NumTraits<Scalar>::IsInteger)  actual_other = other;
+  else                              actual_other = Scalar(1)/other;
+  tmp = PlainObject::Constant(rows(),cols(), actual_other);
   return derived();
 }
 
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index 1aefd91a8..ea227c535 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -13,6 +13,7 @@
 namespace Eigen { 
 
 namespace internal {
+
 template<typename ExpressionType, typename Scalar>
 inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
 {
@@ -32,7 +33,6 @@ template<typename Derived>
 inline typename NumTraits<typename traits<Derived>::Scalar>::Real
 blueNorm_impl(const EigenBase<Derived>& _vec)
 {
-  typedef typename Derived::Scalar Scalar;
   typedef typename Derived::RealScalar RealScalar;  
   typedef typename Derived::Index Index;
   using std::pow;
@@ -41,43 +41,40 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
   using std::sqrt;
   using std::abs;
   const Derived& vec(_vec.derived());
-  static Index nmax = -1;
+  static bool initialized = false;
   static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
-  if(nmax <= 0)
+  if(!initialized)
   {
-    int nbig, ibeta, it, iemin, iemax, iexp;
-    RealScalar abig, eps;
+    int ibeta, it, iemin, iemax, iexp;
+    RealScalar eps;
     // This program calculates the machine-dependent constants
-    // bl, b2, slm, s2m, relerr overfl, nmax
+    // bl, b2, slm, s2m, relerr overfl
     // from the "basic" machine-dependent numbers
     // nbig, ibeta, it, iemin, iemax, rbig.
     // The following define the basic machine-dependent constants.
     // For portability, the PORT subprograms "ilmaeh" and "rlmach"
     // are used. For any specific computer, each of the assignment
     // statements can be replaced
-    nbig  = (std::numeric_limits<Index>::max)();            // largest integer
-    ibeta = std::numeric_limits<RealScalar>::radix;         // base for floating-point numbers
-    it    = std::numeric_limits<RealScalar>::digits;        // number of base-beta digits in mantissa
-    iemin = std::numeric_limits<RealScalar>::min_exponent;  // minimum exponent
-    iemax = std::numeric_limits<RealScalar>::max_exponent;  // maximum exponent
-    rbig  = (std::numeric_limits<RealScalar>::max)();         // largest floating-point number
+    ibeta = std::numeric_limits<RealScalar>::radix;                 // base for floating-point numbers
+    it    = std::numeric_limits<RealScalar>::digits;                // number of base-beta digits in mantissa
+    iemin = std::numeric_limits<RealScalar>::min_exponent;          // minimum exponent
+    iemax = std::numeric_limits<RealScalar>::max_exponent;          // maximum exponent
+    rbig  = (std::numeric_limits<RealScalar>::max)();               // largest floating-point number
 
     iexp  = -((1-iemin)/2);
-    b1    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));  // lower boundary of midrange
+    b1    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // lower boundary of midrange
     iexp  = (iemax + 1 - it)/2;
-    b2    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // upper boundary of midrange
+    b2    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // upper boundary of midrange
 
     iexp  = (2-iemin)/2;
-    s1m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for lower range
+    s1m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for lower range
     iexp  = - ((iemax+it)/2);
-    s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for upper range
+    s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for upper range
 
-    overfl  = rbig*s2m;             // overflow boundary for abig
+    overfl  = rbig*s2m;                                             // overflow boundary for abig
     eps     = RealScalar(pow(double(ibeta), 1-it));
-    relerr  = sqrt(eps);         // tolerance for neglecting asml
-    abig    = RealScalar(1.0/eps - 1.0);
-    if (RealScalar(nbig)>abig)  nmax = int(abig);  // largest safe n
-    else                        nmax = nbig;
+    relerr  = sqrt(eps);                                            // tolerance for neglecting asml
+    initialized = true;
   }
   Index n = vec.size();
   RealScalar ab2 = b2 / RealScalar(n);
@@ -125,6 +122,7 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
   else
     return abig * sqrt(RealScalar(1) + internal::abs2(asml/abig));
 }
+
 } // end namespace internal
 
 /** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index b5e1468df..6c2da09cb 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -287,7 +287,7 @@ struct inplace_transpose_selector<MatrixType,false> { // non square matrix
   * m = m.transpose().eval();
   * \endcode
   * and is faster and also safer because in the latter line of code, forgetting the eval() results
-  * in a bug caused by aliasing.
+  * in a bug caused by \ref TopicAliasing "aliasing".
   *
   * Notice however that this method is only useful if you want to replace a matrix by its own transpose.
   * If you just need the transpose of a matrix, use transpose().
@@ -298,6 +298,8 @@ struct inplace_transpose_selector<MatrixType,false> { // non square matrix
 template<typename Derived>
 inline void DenseBase<Derived>::transposeInPlace()
 {
+  eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))
+               && "transposeInPlace() called on a non-square non-resizable matrix");
   internal::inplace_transpose_selector<Derived>::run(derived());
 }
 
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h
index 862c0f336..511564875 100644
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -103,8 +103,8 @@ class PartialReduxExpr : internal::no_assignment_operator,
 
 #define EIGEN_MEMBER_FUNCTOR(MEMBER,COST)                               \
   template <typename ResultType>                                        \
-  struct member_##MEMBER {                                           \
-    EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER)                         \
+  struct member_##MEMBER {                                              \
+    EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER)                            \
     typedef ResultType result_type;                                     \
     template<typename Scalar, int Size> struct Cost                     \
     { enum { value = COST }; };                                         \
@@ -233,6 +233,28 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
                        Direction==Vertical   ? 1 : m_matrix.rows(),
                        Direction==Horizontal ? 1 : m_matrix.cols());
     }
+    
+    template<typename OtherDerived> struct OppositeExtendedType {
+      typedef Replicate<OtherDerived,
+                        Direction==Horizontal ? 1 : ExpressionType::RowsAtCompileTime,
+                        Direction==Vertical   ? 1 : ExpressionType::ColsAtCompileTime> Type;
+    };
+
+    /** \internal
+      * Replicates a vector in the opposite direction to match the size of \c *this */
+    template<typename OtherDerived>
+    typename OppositeExtendedType<OtherDerived>::Type
+    extendedToOpposite(const DenseBase<OtherDerived>& other) const
+    {
+      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxColsAtCompileTime==1),
+                          YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
+      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxRowsAtCompileTime==1),
+                          YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
+      return typename OppositeExtendedType<OtherDerived>::Type
+                      (other.derived(),
+                       Direction==Horizontal  ? 1 : m_matrix.rows(),
+                       Direction==Vertical    ? 1 : m_matrix.cols());
+    }
 
   public:
 
@@ -255,6 +277,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
 
     /** \returns a row (or column) vector expression of the smallest coefficient
       * of each column (or row) of the referenced expression.
+      * 
+      * \warning the result is undefined if \c *this contains NaN.
       *
       * Example: \include PartialRedux_minCoeff.cpp
       * Output: \verbinclude PartialRedux_minCoeff.out
@@ -265,6 +289,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
 
     /** \returns a row (or column) vector expression of the largest coefficient
       * of each column (or row) of the referenced expression.
+      * 
+      * \warning the result is undefined if \c *this contains NaN.
       *
       * Example: \include PartialRedux_maxCoeff.cpp
       * Output: \verbinclude PartialRedux_maxCoeff.out
@@ -504,6 +530,23 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
       EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
       return m_matrix / extendedTo(other.derived());
     }
+    
+    /** \returns an expression where each column of row of the referenced matrix are normalized.
+      * The referenced matrix is \b not modified.
+      * \sa MatrixBase::normalized(), normalize()
+      */
+    CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
+                  const ExpressionTypeNestedCleaned,
+                  const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
+    normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); }
+    
+    
+    /** Normalize in-place each row or columns of the referenced matrix.
+      * \sa MatrixBase::normalize(), normalized()
+      */
+    void normalize() {
+      m_matrix = this->normalized();
+    }
 
 /////////// Geometry module ///////////
 
diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h
index abf8d8e8c..64867b7a2 100644
--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@@ -164,8 +164,8 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
 
 } // end namespace internal
 
-/** \returns the minimum of all coefficients of *this
-  * and puts in *row and *col its location.
+/** \returns the minimum of all coefficients of *this and puts in *row and *col its location.
+  * \warning the result is undefined if \c *this contains NaN.
   *
   * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff()
   */
@@ -181,8 +181,8 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
   return minVisitor.res;
 }
 
-/** \returns the minimum of all coefficients of *this
-  * and puts in *index its location.
+/** \returns the minimum of all coefficients of *this and puts in *index its location.
+  * \warning the result is undefined if \c *this contains NaN. 
   *
   * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff()
   */
@@ -198,8 +198,8 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
   return minVisitor.res;
 }
 
-/** \returns the maximum of all coefficients of *this
-  * and puts in *row and *col its location.
+/** \returns the maximum of all coefficients of *this and puts in *row and *col its location.
+  * \warning the result is undefined if \c *this contains NaN. 
   *
   * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
   */
@@ -215,8 +215,8 @@ DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
   return maxVisitor.res;
 }
 
-/** \returns the maximum of all coefficients of *this
-  * and puts in *index its location.
+/** \returns the maximum of all coefficients of *this and puts in *index its location.
+  * \warning the result is undefined if \c *this contains NaN.
   *
   * \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
   */
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 557af8455..5ede55fba 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -31,7 +31,8 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
 
   /* the smallest non denormalized float number */
   _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);
-
+  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf,     0xff800000);//-1.f/0.f);
+  
   /* natural logarithm computed for 4 simultaneous float
     return NaN for x <= 0
   */
@@ -51,7 +52,8 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
 
   Packet4i emm0;
 
-  Packet4f invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
+  Packet4f invalid_mask = _mm_cmplt_ps(x, _mm_setzero_ps());
+  Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
 
   x = pmax(x, p4f_min_norm_pos);  /* cut off denormalized stuff */
   emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
@@ -96,7 +98,9 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
   y2 = pmul(e, p4f_cephes_log_q2);
   x = padd(x, y);
   x = padd(x, y2);
-  return _mm_or_ps(x, invalid_mask); // negative arg will be NAN
+  // negative arg will be NAN, 0 will be -INF
+  return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
+                   _mm_and_ps(iszero_mask, p4f_minus_inf));
 }
 
 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index f84e5b3ec..addb2fc0e 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -173,18 +173,26 @@ template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const
 template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
 {
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_min_epi32(a,b);
+#else
   // after some bench, this version *is* faster than a scalar implementation
   Packet4i mask = _mm_cmplt_epi32(a,b);
   return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
 }
 
 template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
 {
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_max_epi32(a,b);
+#else
   // after some bench, this version *is* faster than a scalar implementation
   Packet4i mask = _mm_cmpgt_epi32(a,b);
   return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
 }
 
 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
index 312a05c71..51fc5fd58 100644
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -152,7 +152,7 @@ class CoeffBasedProduct
     {
       // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
       // We still allow to mix T and complex<T>.
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+      EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
       eigen_assert(lhs.cols() == rhs.rows()
         && "invalid matrix product"
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 09912fafb..780fa74d3 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -69,8 +69,8 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdi
   * - the number of scalars that fit into a packet (when vectorization is enabled).
   *
   * \sa setCpuCacheSizes */
-template<typename LhsScalar, typename RhsScalar, int KcFactor>
-void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType>
+void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
 {
   EIGEN_UNUSED_VARIABLE(n);
   // Explanations:
@@ -91,13 +91,13 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
   };
 
   manage_caching_sizes(GetAction, &l1, &l2);
-  k = std::min<std::ptrdiff_t>(k, l1/kdiv);
-  std::ptrdiff_t _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
+  k = std::min<SizeType>(k, l1/kdiv);
+  SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
   if(_m<m) m = _m & mr_mask;
 }
 
-template<typename LhsScalar, typename RhsScalar>
-inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
+template<typename LhsScalar, typename RhsScalar, typename SizeType>
+inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
 {
   computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
 }
@@ -529,7 +529,14 @@ struct gebp_kernel
 
   EIGEN_DONT_INLINE
   void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
-                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
+                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB=0);
+};
+
+template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_DONT_INLINE
+void gebp_kernel<LhsScalar,RhsScalar,Index,mr,nr,ConjugateLhs,ConjugateRhs>
+  ::operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
+               Index strideA, Index strideB, Index offsetA, Index offsetB, RhsScalar* unpackedB)
   {
     Traits traits;
     
@@ -1089,7 +1096,7 @@ EIGEN_ASM_COMMENT("mybegin4");
       }
     }
   }
-};
+
 
 #undef CJMADD
 
@@ -1110,80 +1117,83 @@ EIGEN_ASM_COMMENT("mybegin4");
 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
 struct gemm_pack_lhs
 {
-  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
-                  Index stride=0, Index offset=0)
+  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, StorageOrder, Conjugate, PanelMode>
+  ::operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride, Index offset)
+{
+  typedef typename packet_traits<Scalar>::type Packet;
+  enum { PacketSize = packet_traits<Scalar>::size };
+
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
+  Index count = 0;
+  Index peeled_mc = (rows/Pack1)*Pack1;
+  for(Index i=0; i<peeled_mc; i+=Pack1)
   {
-    typedef typename packet_traits<Scalar>::type Packet;
-    enum { PacketSize = packet_traits<Scalar>::size };
-
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
-    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
-    eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
-    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-    const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
-    Index count = 0;
-    Index peeled_mc = (rows/Pack1)*Pack1;
-    for(Index i=0; i<peeled_mc; i+=Pack1)
-    {
-      if(PanelMode) count += Pack1 * offset;
+    if(PanelMode) count += Pack1 * offset;
 
-      if(StorageOrder==ColMajor)
+    if(StorageOrder==ColMajor)
+    {
+      for(Index k=0; k<depth; k++)
       {
-        for(Index k=0; k<depth; k++)
-        {
-          Packet A, B, C, D;
-          if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
-          if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
-          if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
-          if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
-          if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
-          if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
-          if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
-          if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
-        }
+        Packet A, B, C, D;
+        if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
+        if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
+        if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
+        if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
+        if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
+        if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
+        if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
+        if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
       }
-      else
+    }
+    else
+    {
+      for(Index k=0; k<depth; k++)
       {
-        for(Index k=0; k<depth; k++)
+        // TODO add a vectorized transpose here
+        Index w=0;
+        for(; w<Pack1-3; w+=4)
         {
-          // TODO add a vectorized transpose here
-          Index w=0;
-          for(; w<Pack1-3; w+=4)
-          {
-            Scalar a(cj(lhs(i+w+0, k))),
-                   b(cj(lhs(i+w+1, k))),
-                   c(cj(lhs(i+w+2, k))),
-                   d(cj(lhs(i+w+3, k)));
-            blockA[count++] = a;
-            blockA[count++] = b;
-            blockA[count++] = c;
-            blockA[count++] = d;
-          }
-          if(Pack1%4)
-            for(;w<Pack1;++w)
-              blockA[count++] = cj(lhs(i+w, k));
+          Scalar a(cj(lhs(i+w+0, k))),
+                  b(cj(lhs(i+w+1, k))),
+                  c(cj(lhs(i+w+2, k))),
+                  d(cj(lhs(i+w+3, k)));
+          blockA[count++] = a;
+          blockA[count++] = b;
+          blockA[count++] = c;
+          blockA[count++] = d;
         }
+        if(Pack1%4)
+          for(;w<Pack1;++w)
+            blockA[count++] = cj(lhs(i+w, k));
       }
-      if(PanelMode) count += Pack1 * (stride-offset-depth);
-    }
-    if(rows-peeled_mc>=Pack2)
-    {
-      if(PanelMode) count += Pack2*offset;
-      for(Index k=0; k<depth; k++)
-        for(Index w=0; w<Pack2; w++)
-          blockA[count++] = cj(lhs(peeled_mc+w, k));
-      if(PanelMode) count += Pack2 * (stride-offset-depth);
-      peeled_mc += Pack2;
-    }
-    for(Index i=peeled_mc; i<rows; i++)
-    {
-      if(PanelMode) count += offset;
-      for(Index k=0; k<depth; k++)
-        blockA[count++] = cj(lhs(i, k));
-      if(PanelMode) count += (stride-offset-depth);
     }
+    if(PanelMode) count += Pack1 * (stride-offset-depth);
   }
-};
+  if(rows-peeled_mc>=Pack2)
+  {
+    if(PanelMode) count += Pack2*offset;
+    for(Index k=0; k<depth; k++)
+      for(Index w=0; w<Pack2; w++)
+        blockA[count++] = cj(lhs(peeled_mc+w, k));
+    if(PanelMode) count += Pack2 * (stride-offset-depth);
+    peeled_mc += Pack2;
+  }
+  for(Index i=peeled_mc; i<rows; i++)
+  {
+    if(PanelMode) count += offset;
+    for(Index k=0; k<depth; k++)
+      blockA[count++] = cj(lhs(i, k));
+    if(PanelMode) count += (stride-offset-depth);
+  }
+}
 
 // copy a complete panel of the rhs
 // this version is optimized for column major matrices
@@ -1197,92 +1207,98 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
 {
   typedef typename packet_traits<Scalar>::type Packet;
   enum { PacketSize = packet_traits<Scalar>::size };
-  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
-                  Index stride=0, Index offset=0)
+  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
+  ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
+{
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  Index packet_cols = (cols/nr) * nr;
+  Index count = 0;
+  for(Index j2=0; j2<packet_cols; j2+=nr)
   {
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
-    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
-    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-    Index packet_cols = (cols/nr) * nr;
-    Index count = 0;
-    for(Index j2=0; j2<packet_cols; j2+=nr)
+    // skip what we have before
+    if(PanelMode) count += nr * offset;
+    const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+    const Scalar* b1 = &rhs[(j2+1)*rhsStride];
+    const Scalar* b2 = &rhs[(j2+2)*rhsStride];
+    const Scalar* b3 = &rhs[(j2+3)*rhsStride];
+    for(Index k=0; k<depth; k++)
     {
-      // skip what we have before
-      if(PanelMode) count += nr * offset;
-      const Scalar* b0 = &rhs[(j2+0)*rhsStride];
-      const Scalar* b1 = &rhs[(j2+1)*rhsStride];
-      const Scalar* b2 = &rhs[(j2+2)*rhsStride];
-      const Scalar* b3 = &rhs[(j2+3)*rhsStride];
-      for(Index k=0; k<depth; k++)
-      {
-                  blockB[count+0] = cj(b0[k]);
-                  blockB[count+1] = cj(b1[k]);
-        if(nr==4) blockB[count+2] = cj(b2[k]);
-        if(nr==4) blockB[count+3] = cj(b3[k]);
-        count += nr;
-      }
-      // skip what we have after
-      if(PanelMode) count += nr * (stride-offset-depth);
+                blockB[count+0] = cj(b0[k]);
+                blockB[count+1] = cj(b1[k]);
+      if(nr==4) blockB[count+2] = cj(b2[k]);
+      if(nr==4) blockB[count+3] = cj(b3[k]);
+      count += nr;
     }
+    // skip what we have after
+    if(PanelMode) count += nr * (stride-offset-depth);
+  }
 
-    // copy the remaining columns one at a time (nr==1)
-    for(Index j2=packet_cols; j2<cols; ++j2)
+  // copy the remaining columns one at a time (nr==1)
+  for(Index j2=packet_cols; j2<cols; ++j2)
+  {
+    if(PanelMode) count += offset;
+    const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+    for(Index k=0; k<depth; k++)
     {
-      if(PanelMode) count += offset;
-      const Scalar* b0 = &rhs[(j2+0)*rhsStride];
-      for(Index k=0; k<depth; k++)
-      {
-        blockB[count] = cj(b0[k]);
-        count += 1;
-      }
-      if(PanelMode) count += (stride-offset-depth);
+      blockB[count] = cj(b0[k]);
+      count += 1;
     }
+    if(PanelMode) count += (stride-offset-depth);
   }
-};
+}
 
 // this version is optimized for row major matrices
 template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
 struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
 {
   enum { PacketSize = packet_traits<Scalar>::size };
-  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
-                  Index stride=0, Index offset=0)
+  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
+  ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
+{
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  Index packet_cols = (cols/nr) * nr;
+  Index count = 0;
+  for(Index j2=0; j2<packet_cols; j2+=nr)
   {
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
-    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
-    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-    Index packet_cols = (cols/nr) * nr;
-    Index count = 0;
-    for(Index j2=0; j2<packet_cols; j2+=nr)
+    // skip what we have before
+    if(PanelMode) count += nr * offset;
+    for(Index k=0; k<depth; k++)
     {
-      // skip what we have before
-      if(PanelMode) count += nr * offset;
-      for(Index k=0; k<depth; k++)
-      {
-        const Scalar* b0 = &rhs[k*rhsStride + j2];
-                  blockB[count+0] = cj(b0[0]);
-                  blockB[count+1] = cj(b0[1]);
-        if(nr==4) blockB[count+2] = cj(b0[2]);
-        if(nr==4) blockB[count+3] = cj(b0[3]);
-        count += nr;
-      }
-      // skip what we have after
-      if(PanelMode) count += nr * (stride-offset-depth);
+      const Scalar* b0 = &rhs[k*rhsStride + j2];
+                blockB[count+0] = cj(b0[0]);
+                blockB[count+1] = cj(b0[1]);
+      if(nr==4) blockB[count+2] = cj(b0[2]);
+      if(nr==4) blockB[count+3] = cj(b0[3]);
+      count += nr;
     }
-    // copy the remaining columns one at a time (nr==1)
-    for(Index j2=packet_cols; j2<cols; ++j2)
+    // skip what we have after
+    if(PanelMode) count += nr * (stride-offset-depth);
+  }
+  // copy the remaining columns one at a time (nr==1)
+  for(Index j2=packet_cols; j2<cols; ++j2)
+  {
+    if(PanelMode) count += offset;
+    const Scalar* b0 = &rhs[j2];
+    for(Index k=0; k<depth; k++)
     {
-      if(PanelMode) count += offset;
-      const Scalar* b0 = &rhs[j2];
-      for(Index k=0; k<depth; k++)
-      {
-        blockB[count] = cj(b0[k*rhsStride]);
-        count += 1;
-      }
-      if(PanelMode) count += stride-offset-depth;
+      blockB[count] = cj(b0[k*rhsStride]);
+      count += 1;
     }
+    if(PanelMode) count += stride-offset-depth;
   }
-};
+}
 
 } // end namespace internal
 
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 73a465ec5..3f5ffcf51 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -50,6 +50,7 @@ template<
   typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
 struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
 {
+
 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 static void run(Index rows, Index cols, Index depth,
   const LhsScalar* _lhs, Index lhsStride,
@@ -169,7 +170,6 @@ static void run(Index rows, Index cols, Index depth,
       // vertical panel which is, in practice, a very low number.
       pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
 
-
       // For each mc x kc block of the lhs's vertical panel...
       // (==GEPP_VAR1)
       for(Index i2=0; i2<rows; i2+=mc)
@@ -183,7 +183,6 @@ static void run(Index rows, Index cols, Index depth,
 
         // Everything is packed, we can now call the block * panel kernel:
         gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
-
       }
     }
   }
@@ -204,7 +203,7 @@ struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
 template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
 struct gemm_functor
 {
-  gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha,
+  gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha,
                   BlockingType& blocking)
     : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
   {}
@@ -395,7 +394,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
       EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
     }
 
-    template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+    template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
     {
       eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
 
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index 432d3a9dc..5c3763909 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -12,6 +12,9 @@
 
 namespace Eigen { 
 
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
+struct selfadjoint_rank1_update;
+
 namespace internal {
 
 /**********************************************************************
@@ -39,7 +42,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
 {
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
-                                      const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
+                                      const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
   {
     general_matrix_matrix_triangular_product<Index,
         RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
@@ -55,7 +58,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
 {
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
-                                      const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
+                                      const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
   {
     const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
     const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
@@ -133,7 +136,7 @@ struct tribb_kernel
   enum {
     BlockSize  = EIGEN_PLAIN_ENUM_MAX(mr,nr)
   };
-  void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, ResScalar alpha, RhsScalar* workspace)
+  void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace)
   {
     gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
     Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
@@ -180,31 +183,92 @@ struct tribb_kernel
 
 // high level API
 
+template<typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct>
+struct general_product_to_triangular_selector;
+
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
+{
+  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+  {
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename MatrixType::Index Index;
+    
+    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    
+    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+    enum {
+      StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+      UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
+      UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1
+    };
+    
+    internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs;
+    ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(),
+      (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));
+    if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
+    
+    internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs;
+    ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(),
+      (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));
+    if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+    
+    
+    selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
+                              LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
+                              RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex>
+          ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha);
+  }
+};
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
+{
+  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+  {
+    typedef typename MatrixType::Index Index;
+    
+    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    
+    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+    internal::general_matrix_matrix_triangular_product<Index,
+      typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+      typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+      MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
+      ::run(mat.cols(), actualLhs.cols(),
+            &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
+            mat.data(), mat.outerStride(), actualAlpha);
+  }
+};
+
 template<typename MatrixType, unsigned int UpLo>
 template<typename ProductDerived, typename _Lhs, typename _Rhs>
 TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
 {
-  typedef typename internal::remove_all<typename ProductDerived::LhsNested>::type Lhs;
-  typedef internal::blas_traits<Lhs> LhsBlasTraits;
-  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
-  typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
-  typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
-  
-  typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
-  typedef internal::blas_traits<Rhs> RhsBlasTraits;
-  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
-  typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
-  typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
-
-  typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
-
-  internal::general_matrix_matrix_triangular_product<Index,
-    typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
-    typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
-    MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
-    ::run(m_matrix.cols(), actualLhs.cols(),
-          &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
-          const_cast<Scalar*>(m_matrix.data()), m_matrix.outerStride(), actualAlpha);
+  general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha);
   
   return *this;
 }
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index 8895d3ab2..9bdd588df 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -56,6 +56,18 @@ EIGEN_DONT_INLINE static void run(
   #ifdef EIGEN_INTERNAL_DEBUGGING
     resIncr
   #endif
+  , RhsScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
+  Index rows, Index cols,
+  const LhsScalar* lhs, Index lhsStride,
+  const RhsScalar* rhs, Index rhsIncr,
+  ResScalar* res, Index
+  #ifdef EIGEN_INTERNAL_DEBUGGING
+    resIncr
+  #endif
   , RhsScalar alpha)
 {
   eigen_internal_assert(resIncr==1);
@@ -274,7 +286,6 @@ EIGEN_DONT_INLINE static void run(
   } while(Vectorizable);
   #undef _EIGEN_ACCUMULATE_PACKETS
 }
-};
 
 /* Optimized row-major matrix * vector product:
  * This algorithm processes 4 rows at onces that allows to both reduce
@@ -312,6 +323,15 @@ EIGEN_DONT_INLINE static void run(
   const LhsScalar* lhs, Index lhsStride,
   const RhsScalar* rhs, Index rhsIncr,
   ResScalar* res, Index resIncr,
+  ResScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
+  Index rows, Index cols,
+  const LhsScalar* lhs, Index lhsStride,
+  const RhsScalar* rhs, Index rhsIncr,
+  ResScalar* res, Index resIncr,
   ResScalar alpha)
 {
   EIGEN_UNUSED_VARIABLE(rhsIncr);
@@ -545,7 +565,6 @@ EIGEN_DONT_INLINE static void run(
 
   #undef _EIGEN_ACCUMULATE_PACKETS
 }
-};
 
 } // end namespace internal
 
diff --git a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
index e9de6af3e..1cb9fe6b5 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
@@ -53,7 +53,7 @@ struct general_matrix_vector_product_gemv :
 #define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
 template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
 struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
   Index rows, Index cols, \
   const Scalar* lhs, Index lhsStride, \
   const Scalar* rhs, Index rhsIncr, \
@@ -70,7 +70,7 @@ static EIGEN_DONT_INLINE void run( \
 }; \
 template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
 struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
   Index rows, Index cols, \
   const Scalar* lhs, Index lhsStride, \
   const Scalar* rhs, Index rhsIncr, \
@@ -92,7 +92,7 @@ struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,Conjugat
 { \
 typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
 \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
   Index rows, Index cols, \
   const EIGTYPE* lhs, Index lhsStride, \
   const EIGTYPE* rhs, Index rhsIncr, \
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index 48209636e..ee619df99 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -211,7 +211,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
     const Scalar* lhs, Index lhsStride,
     const Scalar* rhs, Index rhsStride,
     Scalar* res,       Index resStride,
-    Scalar alpha)
+    const Scalar& alpha)
   {
     product_selfadjoint_matrix<Scalar, Index,
       EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
@@ -234,7 +234,18 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha)
+    const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
+    Index rows, Index cols,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha)
   {
     Index size = rows;
 
@@ -301,7 +312,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
       }
     }
   }
-};
 
 // matrix * selfadjoint product
 template <typename Scalar, typename Index,
@@ -315,7 +325,18 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha)
+    const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
+    Index rows, Index cols,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha)
   {
     Index size = cols;
 
@@ -353,7 +374,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
       }
     }
   }
-};
 
 } // end namespace internal
 
@@ -383,7 +403,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
     RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
   };
 
-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
   {
     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
 
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
index 4e5c4125c..dfa687fef 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
@@ -23,7 +23,7 @@
  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
+//
  ********************************************************************************
  *   Content : Eigen bindings to Intel(R) MKL
  *   Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
@@ -47,7 +47,7 @@ template <typename Index, \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
 {\
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index rows, Index cols, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
@@ -98,7 +98,7 @@ template <typename Index, \
           int RhsStorageOrder, bool ConjugateRhs> \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
 {\
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index rows, Index cols, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
@@ -174,7 +174,7 @@ template <typename Index, \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
 {\
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index rows, Index cols, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
@@ -224,7 +224,7 @@ template <typename Index, \
           int RhsStorageOrder, bool ConjugateRhs> \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
 {\
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index rows, Index cols, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index c3145c69a..f70f4894c 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -32,10 +32,18 @@ static EIGEN_DONT_INLINE void run(
   const Scalar*  lhs, Index lhsStride,
   const Scalar* _rhs, Index rhsIncr,
   Scalar* res,
+  Scalar alpha);
+};
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
+  Index size,
+  const Scalar*  lhs, Index lhsStride,
+  const Scalar* _rhs, Index rhsIncr,
+  Scalar* res,
   Scalar alpha)
 {
   typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
 
   enum {
@@ -153,7 +161,6 @@ static EIGEN_DONT_INLINE void run(
     res[j] += alpha * t2;
   }
 }
-};
 
 } // end namespace internal 
 
@@ -180,7 +187,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
 
   SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
   {
     typedef typename Dest::Scalar ResScalar;
     typedef typename Base::RhsScalar RhsScalar;
@@ -260,7 +267,7 @@ struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
 
   SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
   {
     // let's simply transpose the product
     Transpose<Dest> destT(dest);
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
index f88d483b6..86684b66d 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
@@ -50,7 +50,7 @@ struct selfadjoint_matrix_vector_product_symv :
 #define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
 template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
 struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
   Index size, const Scalar*  lhs, Index lhsStride, \
   const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
     enum {\
@@ -77,7 +77,7 @@ struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,Co
 { \
 typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
 \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
 Index size, const EIGTYPE*  lhs, Index lhsStride, \
 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
 { \
diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h
index 6a55f3d77..6ca4ae6c0 100644
--- a/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/Eigen/src/Core/products/SelfadjointProduct.h
@@ -18,21 +18,19 @@
 
 namespace Eigen { 
 
-template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
-struct selfadjoint_rank1_update;
 
 template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
 struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
 {
-  static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
+  static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
   {
     internal::conj_if<ConjRhs> cj;
     typedef Map<const Matrix<Scalar,Dynamic,1> > OtherMap;
-    typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjRhsType;
+    typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjLhsType;
     for (Index i=0; i<size; ++i)
     {
       Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1)))
-          += (alpha * cj(vec[i])) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
+          += (alpha * cj(vecY[i])) * ConjLhsType(OtherMap(vecX+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
     }
   }
 };
@@ -40,9 +38,9 @@ struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
 template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
 struct selfadjoint_rank1_update<Scalar,Index,RowMajor,UpLo,ConjLhs,ConjRhs>
 {
-  static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
+  static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
   {
-    selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vec,alpha);
+    selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vecY,vecX,alpha);
   }
 };
 
@@ -52,7 +50,7 @@ struct selfadjoint_product_selector;
 template<typename MatrixType, typename OtherType, int UpLo>
 struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
 {
-  static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
+  static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
   {
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::Index Index;
@@ -78,14 +76,14 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
     selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
                               OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,
                             (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
-          ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualAlpha);
+          ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualOtherPtr, actualAlpha);
   }
 };
 
 template<typename MatrixType, typename OtherType, int UpLo>
 struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
 {
-  static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
+  static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
   {
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::Index Index;
@@ -113,7 +111,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
 template<typename MatrixType, unsigned int UpLo>
 template<typename DerivedU>
 SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
-::rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha)
+::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
 {
   selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
 
diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h
index 57a98cc2d..4b57f189d 100644
--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -24,7 +24,7 @@ struct selfadjoint_rank2_update_selector;
 template<typename Scalar, typename Index, typename UType, typename VType>
 struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
 {
-  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
+  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
   {
     const Index size = u.size();
     for (Index i=0; i<size; ++i)
@@ -39,7 +39,7 @@ struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
 template<typename Scalar, typename Index, typename UType, typename VType>
 struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
 {
-  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
+  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
   {
     const Index size = u.size();
     for (Index i=0; i<size; ++i)
@@ -58,7 +58,7 @@ template<bool Cond, typename T> struct conj_expr_if
 template<typename MatrixType, unsigned int UpLo>
 template<typename DerivedU, typename DerivedV>
 SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
-::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha)
+::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
 {
   typedef internal::blas_traits<DerivedU> UBlasTraits;
   typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index 92cba66f6..8110507b5 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -61,7 +61,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
     const Scalar* lhs, Index lhsStride,
     const Scalar* rhs, Index rhsStride,
     Scalar* res,       Index resStride,
-    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     product_triangular_matrix_matrix<Scalar, Index,
       (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
@@ -96,7 +96,20 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
+                                                        LhsStorageOrder,ConjugateLhs,
+                                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+    Index _rows, Index _cols, Index _depth,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     // strip zeros
     Index diagSize  = (std::min)(_rows,_depth);
@@ -203,15 +216,14 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
       }
     }
   }
-};
 
 // implements col-major += alpha * op(general) * op(triangular)
 template <typename Scalar, typename Index, int Mode,
           int LhsStorageOrder, bool ConjugateLhs,
           int RhsStorageOrder, bool ConjugateRhs, int Version>
 struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
-                                           LhsStorageOrder,ConjugateLhs,
-                                           RhsStorageOrder,ConjugateRhs,ColMajor,Version>
+                                        LhsStorageOrder,ConjugateLhs,
+                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>
 {
   typedef gebp_traits<Scalar,Scalar> Traits;
   enum {
@@ -225,7 +237,20 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
+                                                        LhsStorageOrder,ConjugateLhs,
+                                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+    Index _rows, Index _cols, Index _depth,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     // strip zeros
     Index diagSize  = (std::min)(_cols,_depth);
@@ -343,7 +368,6 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
       }
     }
   }
-};
 
 /***************************************************************************
 * Wrapper to product_triangular_matrix_matrix
@@ -364,7 +388,7 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
 
   TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
   {
     typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
     typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
index 4d20de617..ba41a1c99 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
@@ -91,7 +91,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
     conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
   }; \
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index _rows, Index _cols, Index _depth, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
@@ -205,7 +205,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
     conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
   }; \
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index _rows, Index _cols, Index _depth, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index b1c10c201..c8b7d28c4 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -27,7 +27,13 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
     HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
   };
   static EIGEN_DONT_INLINE  void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
-                                     const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
+                                     const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
+  ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+        const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
   {
     static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
     Index size = (std::min)(_rows,_cols);
@@ -78,7 +84,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
           _res, resIncr, alpha);
     }
   }
-};
 
 template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
 struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
@@ -89,8 +94,14 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
     HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
     HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
   };
-  static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
-                  const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
+  static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+                                    const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
+  ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+        const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
   {
     static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
     Index diagSize = (std::min)(_rows,_cols);
@@ -141,7 +152,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
             &res.coeffRef(diagSize), resIncr, alpha);
     }
   }
-};
 
 /***************************************************************************
 * Wrapper to product_triangular_vector
@@ -171,7 +181,7 @@ struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>
 
   TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
   {
     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
   
@@ -187,7 +197,7 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
 
   TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
   {
     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
 
@@ -205,7 +215,7 @@ namespace internal {
 template<> struct trmv_selector<ColMajor>
 {
   template<int Mode, typename Lhs, typename Rhs, typename Dest>
-  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
+  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
   {
     typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
     typedef typename ProductType::Index Index;
@@ -246,7 +256,7 @@ template<> struct trmv_selector<ColMajor>
     if(!evalToDest)
     {
       #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      int size = dest.size();
+      Index size = dest.size();
       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
       #endif
       if(!alphaIsCompatible)
@@ -281,7 +291,7 @@ template<> struct trmv_selector<ColMajor>
 template<> struct trmv_selector<RowMajor>
 {
   template<int Mode, typename Lhs, typename Rhs, typename Dest>
-  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
+  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
   {
     typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
     typedef typename ProductType::LhsScalar LhsScalar;
diff --git a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
index 3c2c3049a..09f110da7 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
@@ -50,7 +50,7 @@ struct triangular_matrix_vector_product_trmv :
 #define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
                                      const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
       triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
         _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
@@ -58,7 +58,7 @@ struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs
 }; \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
                                      const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
       triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
         _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
@@ -81,8 +81,8 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
     IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
     LowUp = IsLower ? Lower : Upper \
   }; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
-                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
  { \
    if (ConjLhs || IsZeroDiag) { \
      triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
@@ -166,8 +166,8 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
     IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
     LowUp = IsLower ? Lower : Upper \
   }; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
-                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
  { \
    if (IsZeroDiag) { \
      triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h
index a49ea3183..f103eae72 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -18,7 +18,7 @@ namespace internal {
 template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
 struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
 {
-  static EIGEN_DONT_INLINE void run(
+  static void run(
     Index size, Index cols,
     const Scalar*  tri, Index triStride,
     Scalar* _other, Index otherStride,
@@ -42,6 +42,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
     Index size, Index otherSize,
     const Scalar* _tri, Index triStride,
     Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherStride,
     level3_blocking<Scalar,Scalar>& blocking)
   {
     Index cols = otherSize;
@@ -173,7 +180,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
       }
     }
   }
-};
 
 /* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
  */
@@ -184,6 +190,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
     Index size, Index otherSize,
     const Scalar* _tri, Index triStride,
     Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherStride,
     level3_blocking<Scalar,Scalar>& blocking)
   {
     Index rows = otherSize;
@@ -308,7 +321,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
       }
     }
   }
-};
 
 } // end namespace internal
 
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
index a4f508b2e..6a0bb8339 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
@@ -48,7 +48,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
     IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
     conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
   }; \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
       Index size, Index otherSize, \
       const EIGTYPE* _tri, Index triStride, \
       EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
@@ -103,7 +103,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
     IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
     conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
   }; \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
       Index size, Index otherSize, \
       const EIGTYPE* _tri, Index triStride, \
       EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index bf6a9293c..64348cd16 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -13,7 +13,7 @@
 
 #define EIGEN_WORLD_VERSION 3
 #define EIGEN_MAJOR_VERSION 1
-#define EIGEN_MINOR_VERSION 90
+#define EIGEN_MINOR_VERSION 91
 
 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                       (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index b03bc3701..3ca666fd9 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -19,6 +19,10 @@
 #ifndef EIGEN_MEMORY_H
 #define EIGEN_MEMORY_H
 
+#ifndef EIGEN_MALLOC_ALREADY_ALIGNED
+
+// Try to determine automatically if malloc is already aligned.
+
 // On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
 //   http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
 // This is true at least since glibc 2.8.
@@ -27,7 +31,7 @@
 // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
 // quite safe, at least within the context of glibc, to equate 64-bit with LP64.
 #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
- && defined(__LP64__)
+ && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ )
   #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
 #else
   #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
@@ -52,6 +56,8 @@
   #define EIGEN_MALLOC_ALREADY_ALIGNED 0
 #endif
 
+#endif
+
 #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) \
  && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
   #define EIGEN_HAS_POSIX_MEMALIGN 1
@@ -88,11 +94,11 @@ inline void throw_std_bad_alloc()
 /** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
   * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
   */
-inline void* handmade_aligned_malloc(size_t size)
+inline void* handmade_aligned_malloc(std::size_t size)
 {
   void *original = std::malloc(size+16);
   if (original == 0) return 0;
-  void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
   *(reinterpret_cast<void**>(aligned) - 1) = original;
   return aligned;
 }
@@ -108,13 +114,18 @@ inline void handmade_aligned_free(void *ptr)
   * Since we know that our handmade version is based on std::realloc
   * we can use std::realloc to implement efficient reallocation.
   */
-inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
+inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
 {
   if (ptr == 0) return handmade_aligned_malloc(size);
   void *original = *(reinterpret_cast<void**>(ptr) - 1);
+  std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
   original = std::realloc(original,size+16);
   if (original == 0) return 0;
-  void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
+  void *previous_aligned = static_cast<char *>(original)+previous_offset;
+  if(aligned!=previous_aligned)
+    std::memmove(aligned, previous_aligned, size);
+  
   *(reinterpret_cast<void**>(aligned) - 1) = original;
   return aligned;
 }
@@ -123,7 +134,7 @@ inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
 *** Implementation of generic aligned realloc (when no realloc can be used)***
 *****************************************************************************/
 
-void* aligned_malloc(size_t size);
+void* aligned_malloc(std::size_t size);
 void  aligned_free(void *ptr);
 
 /** \internal
@@ -227,7 +238,7 @@ inline void aligned_free(void *ptr)
     std::free(ptr);
   #elif EIGEN_HAS_MM_MALLOC
     _mm_free(ptr);
-  #elif defined(_MSC_VER)
+  #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
     _aligned_free(ptr);
   #else
     handmade_aligned_free(ptr);
@@ -446,7 +457,6 @@ template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *
 template<typename Scalar, typename Index>
 static inline Index first_aligned(const Scalar* array, Index size)
 {
-  typedef typename packet_traits<Scalar>::type Packet;
   enum { PacketSize = packet_traits<Scalar>::size,
          PacketAlignedMask = PacketSize-1
   };
@@ -745,11 +755,16 @@ public:
 #    if defined(__PIC__) && defined(__i386__)
        // Case for x86 with PIC
 #      define EIGEN_CPUID(abcd,func,id) \
-         __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+         __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+#    elif defined(__PIC__) && defined(__x86_64__)
+       // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
+       // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
+#      define EIGEN_CPUID(abcd,func,id) \
+        __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
 #    else
        // Case for x86_64 or x86 w/o PIC
 #      define EIGEN_CPUID(abcd,func,id) \
-         __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
+         __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
 #    endif
 #  elif defined(_MSC_VER)
 #    if (_MSC_VER > 1500) && ( defined(_M_IX86) || defined(_M_X64) )
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index a5f31164d..71d587108 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -186,23 +186,35 @@ template<int Y, int InfX, int SupX>
 class meta_sqrt<Y, InfX, SupX, true> { public:  enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
 
 /** \internal determines whether the product of two numeric types is allowed and what the return type is */
-template<typename T, typename U> struct scalar_product_traits;
+template<typename T, typename U> struct scalar_product_traits
+{
+  enum { Defined = 0 };
+};
 
 template<typename T> struct scalar_product_traits<T,T>
 {
-  //enum { Cost = NumTraits<T>::MulCost };
+  enum {
+    // Cost = NumTraits<T>::MulCost,
+    Defined = 1
+  };
   typedef T ReturnType;
 };
 
 template<typename T> struct scalar_product_traits<T,std::complex<T> >
 {
-  //enum { Cost = 2*NumTraits<T>::MulCost };
+  enum {
+    // Cost = 2*NumTraits<T>::MulCost,
+    Defined = 1
+  };
   typedef std::complex<T> ReturnType;
 };
 
 template<typename T> struct scalar_product_traits<std::complex<T>, T>
 {
-  //enum { Cost = 2*NumTraits<T>::MulCost  };
+  enum {
+    // Cost = 2*NumTraits<T>::MulCost,
+    Defined = 1
+  };
   typedef std::complex<T> ReturnType;
 };
 
diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
index 95c70aecb..bd41bf7ed 100644
--- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h
+++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@@ -242,7 +242,7 @@ template<typename _MatrixType> class ComplexEigenSolver
     EigenvectorType m_matX;
 
   private:
-    void doComputeEigenvectors(RealScalar matrixnorm);
+    void doComputeEigenvectors(const RealScalar& matrixnorm);
     void sortEigenvalues(bool computeEigenvectors);
 };
 
@@ -252,7 +252,7 @@ ComplexEigenSolver<MatrixType>&
 ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
 {
   // this code is inspired from Jampack
-  assert(matrix.cols() == matrix.rows());
+  eigen_assert(matrix.cols() == matrix.rows());
 
   // Do a complex Schur decomposition, A = U T U^*
   // The eigenvalues are on the diagonal of T.
@@ -273,7 +273,7 @@ ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEi
 
 
 template<typename MatrixType>
-void ComplexEigenSolver<MatrixType>::doComputeEigenvectors(RealScalar matrixnorm)
+void ComplexEigenSolver<MatrixType>::doComputeEigenvectors(const RealScalar& matrixnorm)
 {
   const Index n = m_eivalues.size();
 
diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h
index 57ce23e42..62b57ff66 100644
--- a/Eigen/src/Eigenvalues/ComplexSchur.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -364,7 +364,6 @@ struct complex_schur_reduce_to_hessenberg<MatrixType, false>
   static void run(ComplexSchur<MatrixType>& _this, const MatrixType& matrix, bool computeU)
   {
     typedef typename ComplexSchur<MatrixType>::ComplexScalar ComplexScalar;
-    typedef typename ComplexSchur<MatrixType>::ComplexMatrixType ComplexMatrixType;
 
     // Note: m_hess is over RealScalar; m_matT and m_matU is over ComplexScalar
     _this.m_hess.compute(matrix);
diff --git a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
index ada7a24e3..91496ae5b 100644
--- a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
@@ -49,7 +49,7 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matri
   typedef MatrixType::RealScalar RealScalar; \
   typedef std::complex<RealScalar> ComplexScalar; \
 \
-  assert(matrix.cols() == matrix.rows()); \
+  eigen_assert(matrix.cols() == matrix.rows()); \
 \
   m_matUisUptodate = false; \
   if(matrix.cols() == 1) \
diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h
index 201ea620d..594ec6576 100644
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h
@@ -366,7 +366,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
 {
   using std::sqrt;
   using std::abs;
-  assert(matrix.cols() == matrix.rows());
+  eigen_assert(matrix.cols() == matrix.rows());
 
   // Reduce to real Schur form.
   m_realSchur.compute(matrix, computeEigenvectors);
@@ -410,7 +410,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
 
 // Complex scalar division.
 template<typename Scalar>
-std::complex<Scalar> cdiv(Scalar xr, Scalar xi, Scalar yr, Scalar yi)
+std::complex<Scalar> cdiv(const Scalar& xr, const Scalar& xi, const Scalar& yr, const Scalar& yi)
 {
   using std::abs;
   Scalar r,d;
diff --git a/Eigen/src/Eigenvalues/HessenbergDecomposition.h b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
index b8378b08a..ebd8ae908 100644
--- a/Eigen/src/Eigenvalues/HessenbergDecomposition.h
+++ b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
@@ -291,7 +291,7 @@ template<typename _MatrixType> class HessenbergDecomposition
 template<typename MatrixType>
 void HessenbergDecomposition<MatrixType>::_compute(MatrixType& matA, CoeffVectorType& hCoeffs, VectorType& temp)
 {
-  assert(matA.rows()==matA.cols());
+  eigen_assert(matA.rows()==matA.cols());
   Index n = matA.rows();
   temp.resize(n);
   for (Index i = 0; i<n-1; ++i)
diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h
index dcaa9fbd6..5706eeebe 100644
--- a/Eigen/src/Eigenvalues/RealQZ.h
+++ b/Eigen/src/Eigenvalues/RealQZ.h
@@ -559,7 +559,7 @@ namespace Eigen {
 
       const Index dim = A_in.cols();
 
-      assert (A_in.rows()==dim && A_in.cols()==dim 
+      eigen_assert (A_in.rows()==dim && A_in.cols()==dim 
           && B_in.rows()==dim && B_in.cols()==dim 
           && "Need square matrices of the same dimension");
 
diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h
index 7680f9929..64d136341 100644
--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h
@@ -234,8 +234,8 @@ template<typename _MatrixType> class RealSchur
     typedef Matrix<Scalar,3,1> Vector3s;
 
     Scalar computeNormOfT();
-    Index findSmallSubdiagEntry(Index iu, Scalar norm);
-    void splitOffTwoRows(Index iu, bool computeU, Scalar exshift);
+    Index findSmallSubdiagEntry(Index iu, const Scalar& norm);
+    void splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift);
     void computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo);
     void initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector);
     void performFrancisQRStep(Index il, Index im, Index iu, bool computeU, const Vector3s& firstHouseholderVector, Scalar* workspace);
@@ -245,7 +245,7 @@ template<typename _MatrixType> class RealSchur
 template<typename MatrixType>
 RealSchur<MatrixType>& RealSchur<MatrixType>::compute(const MatrixType& matrix, bool computeU)
 {
-  assert(matrix.cols() == matrix.rows());
+  eigen_assert(matrix.cols() == matrix.rows());
   Index maxIters = m_maxIters;
   if (maxIters == -1)
     maxIters = m_maxIterationsPerRow * matrix.rows();
@@ -343,7 +343,7 @@ inline typename MatrixType::Scalar RealSchur<MatrixType>::computeNormOfT()
 
 /** \internal Look for single small sub-diagonal element and returns its index */
 template<typename MatrixType>
-inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, Scalar norm)
+inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, const Scalar& norm)
 {
   using std::abs;
   Index res = iu;
@@ -361,7 +361,7 @@ inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(I
 
 /** \internal Update T given that rows iu-1 and iu decouple from the rest. */
 template<typename MatrixType>
-inline void RealSchur<MatrixType>::splitOffTwoRows(Index iu, bool computeU, Scalar exshift)
+inline void RealSchur<MatrixType>::splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift)
 {
   using std::sqrt;
   using std::abs;
@@ -467,8 +467,8 @@ inline void RealSchur<MatrixType>::initFrancisQRStep(Index il, Index iu, const V
 template<typename MatrixType>
 inline void RealSchur<MatrixType>::performFrancisQRStep(Index il, Index im, Index iu, bool computeU, const Vector3s& firstHouseholderVector, Scalar* workspace)
 {
-  assert(im >= il);
-  assert(im <= iu-2);
+  eigen_assert(im >= il);
+  eigen_assert(im <= iu-2);
 
   const Index size = m_matT.cols();
 
diff --git a/Eigen/src/Eigenvalues/RealSchur_MKL.h b/Eigen/src/Eigenvalues/RealSchur_MKL.h
index 960ec3c76..ad9736460 100644
--- a/Eigen/src/Eigenvalues/RealSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/RealSchur_MKL.h
@@ -48,7 +48,7 @@ RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<E
   typedef MatrixType::Scalar Scalar; \
   typedef MatrixType::RealScalar RealScalar; \
 \
-  assert(matrix.cols() == matrix.rows()); \
+  eigen_assert(matrix.cols() == matrix.rows()); \
 \
   lapack_int n = matrix.cols(), sdim, info; \
   lapack_int lda = matrix.outerStride(); \
diff --git a/Eigen/src/Eigenvalues/Tridiagonalization.h b/Eigen/src/Eigenvalues/Tridiagonalization.h
index 5118874cd..e8408761d 100644
--- a/Eigen/src/Eigenvalues/Tridiagonalization.h
+++ b/Eigen/src/Eigenvalues/Tridiagonalization.h
@@ -426,8 +426,6 @@ struct tridiagonalization_inplace_selector;
 template<typename MatrixType, typename DiagonalType, typename SubDiagonalType>
 void tridiagonalization_inplace(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
 {
-  typedef typename MatrixType::Index Index;
-  //Index n = mat.rows();
   eigen_assert(mat.cols()==mat.rows() && diag.size()==mat.rows() && subdiag.size()==mat.rows()-1);
   tridiagonalization_inplace_selector<MatrixType>::run(mat, diag, subdiag, extractQ);
 }
diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h
index 48cc0a488..538a5afb7 100644
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@@ -71,7 +71,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
   template<typename Derived>
   inline explicit AlignedBox(const MatrixBase<Derived>& a_p)
   {
-    const typename internal::nested<Derived,2>::type p(a_p.derived());
+    typename internal::nested<Derived,2>::type p(a_p.derived());
     m_min = p;
     m_max = p;
   }
@@ -282,7 +282,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const AlignedBox& other, RealScalar prec = ScalarTraits::dummy_precision()) const
+  bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const
   { return m_min.isApprox(other.m_min, prec) && m_max.isApprox(other.m_max, prec); }
 
 protected:
@@ -296,7 +296,7 @@ template<typename Scalar,int AmbientDim>
 template<typename Derived>
 inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const MatrixBase<Derived>& a_p) const
 {
-  const typename internal::nested<Derived,2*AmbientDim>::type p(a_p.derived());
+  typename internal::nested<Derived,2*AmbientDim>::type p(a_p.derived());
   Scalar dist2(0);
   Scalar aux;
   for (Index k=0; k<dim(); ++k)
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index 11ad5829c..4c1bf5fcd 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -78,8 +78,8 @@ MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
 
   typedef typename internal::nested<Derived,2>::type DerivedNested;
   typedef typename internal::nested<OtherDerived,2>::type OtherDerivedNested;
-  const DerivedNested lhs(derived());
-  const OtherDerivedNested rhs(other.derived());
+  DerivedNested lhs(derived());
+  OtherDerivedNested rhs(other.derived());
 
   return internal::cross3_impl<Architecture::Target,
                         typename internal::remove_all<DerivedNested>::type,
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 45824fbeb..d036c018a 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -154,7 +154,7 @@ public:
     * \a t in [0;1]
     * see http://en.wikipedia.org/wiki/Slerp
     */
-  template<class OtherDerived> Quaternion<Scalar> slerp(Scalar t, const QuaternionBase<OtherDerived>& other) const;
+  template<class OtherDerived> Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
     * determined by \a prec.
@@ -683,7 +683,7 @@ QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& oth
 template <class Derived>
 template <class OtherDerived>
 Quaternion<typename internal::traits<Derived>::Scalar>
-QuaternionBase<Derived>::slerp(Scalar t, const QuaternionBase<OtherDerived>& other) const
+QuaternionBase<Derived>::slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const
 {
   using std::acos;
   using std::sin;
diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
index 5a822e0ea..fbefb696f 100644
--- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
+++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@@ -44,6 +44,11 @@ bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x,
   VectorType r0 = r;
   
   RealScalar r0_sqnorm = rhs.squaredNorm();
+  if(r0_sqnorm == 0)
+  {
+    x.setZero();
+    return true;
+  }
   Scalar rho    = 1;
   Scalar alpha  = 1;
   Scalar w      = 1;
diff --git a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
index f64f2534d..00b5647c6 100644
--- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
+++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
@@ -41,15 +41,29 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
   int n = mat.cols();
 
   VectorType residual = rhs - mat * x; //initial residual
-  VectorType p(n);
 
+  RealScalar rhsNorm2 = rhs.squaredNorm();
+  if(rhsNorm2 == 0) 
+  {
+    x.setZero();
+    iters = 0;
+    tol_error = 0;
+    return;
+  }
+  RealScalar threshold = tol*tol*rhsNorm2;
+  RealScalar residualNorm2 = residual.squaredNorm();
+  if (residualNorm2 < threshold)
+  {
+    iters = 0;
+    tol_error = sqrt(residualNorm2 / rhsNorm2);
+    return;
+  }
+  
+  VectorType p(n);
   p = precond.solve(residual);      //initial search direction
 
   VectorType z(n), tmp(n);
   RealScalar absNew = internal::real(residual.dot(p));  // the square of the absolute value of r scaled by invM
-  RealScalar rhsNorm2 = rhs.squaredNorm();
-  RealScalar residualNorm2 = 0;
-  RealScalar threshold = tol*tol*rhsNorm2;
   int i = 0;
   while(i < maxIters)
   {
diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
index 5b408f83d..17d18ef58 100644
--- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
+++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
@@ -24,14 +24,15 @@ namespace internal {
   * \param ind The array of index for the elements in @p row
   * \param ncut  The number of largest elements to keep
   **/ 
-template <typename VectorV, typename VectorI>
-int QuickSplit(VectorV &row, VectorI &ind, int ncut)
+template <typename VectorV, typename VectorI, typename Index>
+Index QuickSplit(VectorV &row, VectorI &ind, Index ncut)
 {
   typedef typename VectorV::RealScalar RealScalar;
   using std::swap;
-  int mid;
-  int n = row.size(); /* length of the vector */
-  int first, last ; 
+  using std::abs;
+  Index mid;
+  Index n = row.size(); /* length of the vector */
+  Index first, last ;
   
   ncut--; /* to fit the zero-based indices */
   first = 0; 
@@ -40,9 +41,9 @@ int QuickSplit(VectorV &row, VectorI &ind, int ncut)
   
   do {
     mid = first; 
-    RealScalar abskey = std::abs(row(mid)); 
-    for (int j = first + 1; j <= last; j++) {
-      if ( std::abs(row(j)) > abskey) {
+    RealScalar abskey = abs(row(mid)); 
+    for (Index j = first + 1; j <= last; j++) {
+      if ( abs(row(j)) > abskey) {
         ++mid;
         swap(row(mid), row(j));
         swap(ind(mid), ind(j));
@@ -110,7 +111,7 @@ class IncompleteLUT : internal::noncopyable
     {}
     
     template<typename MatrixType>
-    IncompleteLUT(const MatrixType& mat, RealScalar droptol=NumTraits<Scalar>::dummy_precision(), int fillfactor = 10)
+    IncompleteLUT(const MatrixType& mat, const RealScalar& droptol=NumTraits<Scalar>::dummy_precision(), int fillfactor = 10)
       : m_droptol(droptol),m_fillfactor(fillfactor),
         m_analysisIsOk(false),m_factorizationIsOk(false),m_isInitialized(false)
     {
@@ -154,7 +155,7 @@ class IncompleteLUT : internal::noncopyable
       return *this;
     }
 
-    void setDroptol(RealScalar droptol); 
+    void setDroptol(const RealScalar& droptol); 
     void setFillfactor(int fillfactor); 
     
     template<typename Rhs, typename Dest>
@@ -203,7 +204,7 @@ protected:
  *  \param droptol   Drop any element whose magnitude is less than this tolerance 
  **/ 
 template<typename Scalar>
-void IncompleteLUT<Scalar>::setDroptol(RealScalar droptol)
+void IncompleteLUT<Scalar>::setDroptol(const RealScalar& droptol)
 {
   this->m_droptol = droptol;   
 }
@@ -246,7 +247,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
   using std::abs;
 
   eigen_assert((amat.rows() == amat.cols()) && "The factorization should be done on a square matrix");
-  int n = amat.cols();  // Size of the matrix
+  Index n = amat.cols();  // Size of the matrix
   m_lu.resize(n,n);
   // Declare Working vectors and variables
   Vector u(n) ;     // real values of the row -- maximum size is n --
@@ -264,21 +265,21 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
   u.fill(0);
 
   // number of largest elements to keep in each row:
-  int fill_in =   static_cast<int> (amat.nonZeros()*m_fillfactor)/n+1;
+  Index fill_in =   static_cast<Index> (amat.nonZeros()*m_fillfactor)/n+1;
   if (fill_in > n) fill_in = n;
 
   // number of largest nonzero elements to keep in the L and the U part of the current row:
-  int nnzL = fill_in/2;
-  int nnzU = nnzL;
+  Index nnzL = fill_in/2;
+  Index nnzU = nnzL;
   m_lu.reserve(n * (nnzL + nnzU + 1));
 
   // global loop over the rows of the sparse matrix
-  for (int ii = 0; ii < n; ii++)
+  for (Index ii = 0; ii < n; ii++)
   {
     // 1 - copy the lower and the upper part of the row i of mat in the working vector u
 
-    int sizeu = 1; // number of nonzero elements in the upper part of the current row
-    int sizel = 0; // number of nonzero elements in the lower part of the current row
+    Index sizeu = 1; // number of nonzero elements in the upper part of the current row
+    Index sizel = 0; // number of nonzero elements in the lower part of the current row
     ju(ii)    = ii;
     u(ii)     = 0;
     jr(ii)    = ii;
@@ -287,7 +288,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
     typename FactorType::InnerIterator j_it(mat, ii); // Iterate through the current row ii
     for (; j_it; ++j_it)
     {
-      int k = j_it.index();
+      Index k = j_it.index();
       if (k < ii)
       {
         // copy the lower part
@@ -303,7 +304,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
       else
       {
         // copy the upper part
-        int jpos = ii + sizeu;
+        Index jpos = ii + sizeu;
         ju(jpos) = k;
         u(jpos) = j_it.value();
         jr(k) = jpos;
@@ -322,19 +323,19 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
     rownorm = sqrt(rownorm);
 
     // 3 - eliminate the previous nonzero rows
-    int jj = 0;
-    int len = 0;
+    Index jj = 0;
+    Index len = 0;
     while (jj < sizel)
     {
       // In order to eliminate in the correct order,
       // we must select first the smallest column index among  ju(jj:sizel)
-      int k;
-      int minrow = ju.segment(jj,sizel-jj).minCoeff(&k); // k is relative to the segment
+      Index k;
+      Index minrow = ju.segment(jj,sizel-jj).minCoeff(&k); // k is relative to the segment
       k += jj;
       if (minrow != ju(jj))
       {
         // swap the two locations
-        int j = ju(jj);
+        Index j = ju(jj);
         swap(ju(jj), ju(k));
         jr(minrow) = jj;   jr(j) = k;
         swap(u(jj), u(k));
@@ -360,11 +361,11 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
       for (; ki_it; ++ki_it)
       {
         Scalar prod = fact * ki_it.value();
-        int j       = ki_it.index();
-        int jpos    = jr(j);
+        Index j       = ki_it.index();
+        Index jpos    = jr(j);
         if (jpos == -1) // fill-in element
         {
-          int newpos;
+          Index newpos;
           if (j >= ii) // dealing with the upper part
           {
             newpos = ii + sizeu;
@@ -393,7 +394,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
     } // end of the elimination on the row ii
 
     // reset the upper part of the pointer jr to zero
-    for(int k = 0; k <sizeu; k++) jr(ju(ii+k)) = -1;
+    for(Index k = 0; k <sizeu; k++) jr(ju(ii+k)) = -1;
 
     // 4 - partially sort and insert the elements in the m_lu matrix
 
@@ -406,7 +407,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
 
     // store the largest m_fill elements of the L part
     m_lu.startVec(ii);
-    for(int k = 0; k < len; k++)
+    for(Index k = 0; k < len; k++)
       m_lu.insertBackByOuterInnerUnordered(ii,ju(k)) = u(k);
 
     // store the diagonal element
@@ -418,7 +419,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
     // sort the U-part of the row
     // apply the dropping rule first
     len = 0;
-    for(int k = 1; k < sizeu; k++)
+    for(Index k = 1; k < sizeu; k++)
     {
       if(abs(u(ii+k)) > m_droptol * rownorm )
       {
@@ -434,7 +435,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
     internal::QuickSplit(uu, juu, len);
 
     // store the largest elements of the U part
-    for(int k = ii + 1; k < ii + len; k++)
+    for(Index k = ii + 1; k < ii + len; k++)
       m_lu.insertBackByOuterInnerUnordered(ii,ju(k)) = u(k);
   }
 
diff --git a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
index 11706ceba..2036922d6 100644
--- a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
+++ b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
@@ -120,7 +120,7 @@ public:
   RealScalar tolerance() const { return m_tolerance; }
   
   /** Sets the tolerance threshold used by the stopping criteria */
-  Derived& setTolerance(RealScalar tolerance)
+  Derived& setTolerance(const RealScalar& tolerance)
   {
     m_tolerance = tolerance;
     return derived();
diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h
index 20e227640..d9d75196c 100644
--- a/Eigen/src/Jacobi/Jacobi.h
+++ b/Eigen/src/Jacobi/Jacobi.h
@@ -63,7 +63,7 @@ template<typename Scalar> class JacobiRotation
 
     template<typename Derived>
     bool makeJacobi(const MatrixBase<Derived>&, typename Derived::Index p, typename Derived::Index q);
-    bool makeJacobi(RealScalar x, Scalar y, RealScalar z);
+    bool makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z);
 
     void makeGivens(const Scalar& p, const Scalar& q, Scalar* z=0);
 
@@ -80,7 +80,7 @@ template<typename Scalar> class JacobiRotation
   * \sa MatrixBase::makeJacobi(const MatrixBase<Derived>&, Index, Index), MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
   */
 template<typename Scalar>
-bool JacobiRotation<Scalar>::makeJacobi(RealScalar x, Scalar y, RealScalar z)
+bool JacobiRotation<Scalar>::makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z)
 {
   using std::sqrt;
   using std::abs;
diff --git a/Eigen/src/LU/Determinant.h b/Eigen/src/LU/Determinant.h
index d862c5d77..bb8e78a8a 100644
--- a/Eigen/src/LU/Determinant.h
+++ b/Eigen/src/LU/Determinant.h
@@ -91,7 +91,7 @@ template<typename Derived> struct determinant_impl<Derived, 4>
 template<typename Derived>
 inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const
 {
-  assert(rows() == cols());
+  eigen_assert(rows() == cols());
   typedef typename internal::nested<Derived,Base::RowsAtCompileTime>::type Nested;
   return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived());
 }
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index bcd30be00..44699b763 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -417,6 +417,9 @@ FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix)
 template<typename MatrixType>
 FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
 {
+  // the permutations are stored as int indices, so just to be sure:
+  eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest());
+  
   m_isInitialized = true;
   m_lu = matrix;
 
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 9cf1d61d8..1d389ecac 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -242,7 +242,7 @@ struct partial_lu_impl
     const Index cols = lu.cols();
     const Index size = (std::min)(rows,cols);
     nb_transpositions = 0;
-    int first_zero_pivot = -1;
+    Index first_zero_pivot = -1;
     for(Index k = 0; k < size; ++k)
     {
       Index rrows = rows-k-1;
@@ -253,7 +253,7 @@ struct partial_lu_impl
         = lu.col(k).tail(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col);
       row_of_biggest_in_col += k;
 
-      row_transpositions[k] = row_of_biggest_in_col;
+      row_transpositions[k] = PivIndex(row_of_biggest_in_col);
 
       if(biggest_in_corner != RealScalar(0))
       {
@@ -318,7 +318,7 @@ struct partial_lu_impl
     }
 
     nb_transpositions = 0;
-    int first_zero_pivot = -1;
+    Index first_zero_pivot = -1;
     for(Index k = 0; k < size; k+=blockSize)
     {
       Index bs = (std::min)(size-k,blockSize); // actual size of the block
@@ -386,6 +386,9 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t
 template<typename MatrixType>
 PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const MatrixType& matrix)
 {
+  // the row permutation is stored as int indices, so just to be sure:
+  eigen_assert(matrix.rows()<NumTraits<int>::highest());
+  
   m_lu = matrix;
 
   eigen_assert(matrix.rows() == matrix.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
diff --git a/Eigen/src/MetisSupport/MetisSupport.h b/Eigen/src/MetisSupport/MetisSupport.h
index 3a723b384..818355e79 100644
--- a/Eigen/src/MetisSupport/MetisSupport.h
+++ b/Eigen/src/MetisSupport/MetisSupport.h
@@ -29,7 +29,7 @@ public:
   void get_symmetrized_graph(const MatrixType& A)
   {
     Index m = A.cols(); 
-    
+    eigen_assert((A.rows() == A.cols()) && "ONLY FOR SQUARED MATRICES");
     // Get the transpose of the input matrix 
     MatrixType At = A.transpose(); 
     // Get the number of nonzeros elements in each row/col of At+A
diff --git a/Eigen/src/OrderingMethods/Amd.h b/Eigen/src/OrderingMethods/Amd.h
index 8878ef863..41b4fd7e3 100644
--- a/Eigen/src/OrderingMethods/Amd.h
+++ b/Eigen/src/OrderingMethods/Amd.h
@@ -2,10 +2,6 @@
 // for linear algebra.
 //
 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 /*
 
@@ -95,7 +91,6 @@ template<typename Scalar, typename Index>
 void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,Index>& C, PermutationMatrix<Dynamic,Dynamic,Index>& perm)
 {
   using std::sqrt;
-  typedef SparseMatrix<Scalar,ColMajor,Index> CCS;
   
   int d, dk, dext, lemax = 0, e, elenk, eln, i, j, k, k1,
       k2, k3, jlast, ln, dense, nzmax, mindeg = 0, nvi, nvj, nvk, mark, wnvi,
diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h
index 6dc1f280d..44548f660 100644
--- a/Eigen/src/OrderingMethods/Eigen_Colamd.h
+++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h
@@ -50,6 +50,7 @@
   
 #ifndef EIGEN_COLAMD_H
 #define EIGEN_COLAMD_H
+
 namespace internal {
 /* Ensure that debugging is turned off: */
 #ifndef COLAMD_NDEBUG
@@ -133,107 +134,106 @@ namespace internal {
 /* === Colamd reporting mechanism =========================================== */
 /* ========================================================================== */
 
-    // == Row and Column structures ==
-typedef struct colamd_col_struct
+// == Row and Column structures ==
+template <typename Index>
+struct colamd_col
 {
-    int start ;   /* index for A of first row in this column, or DEAD */
-      /* if column is dead */
-    int length ;  /* number of rows in this column */
-    union
-    {
-  int thickness ; /* number of original columns represented by this */
-      /* col, if the column is alive */
-  int parent ;  /* parent in parent tree super-column structure, if */
-      /* the column is dead */
-    } shared1 ;
-    union
-    {
-  int score ; /* the score used to maintain heap, if col is alive */
-  int order ; /* pivot ordering of this column, if col is dead */
-    } shared2 ;
-    union
-    {
-  int headhash ;  /* head of a hash bucket, if col is at the head of */
-      /* a degree list */
-  int hash ;  /* hash value, if col is not in a degree list */
-  int prev ;  /* previous column in degree list, if col is in a */
-      /* degree list (but not at the head of a degree list) */
-    } shared3 ;
-    union
-    {
-  int degree_next ; /* next column, if col is in a degree list */
-  int hash_next ;   /* next column, if col is in a hash list */
-    } shared4 ;
-
-} colamd_col ;
-
-typedef struct Colamd_Row_struct
+  Index start ;   /* index for A of first row in this column, or DEAD */
+  /* if column is dead */
+  Index length ;  /* number of rows in this column */
+  union
+  {
+    Index thickness ; /* number of original columns represented by this */
+    /* col, if the column is alive */
+    Index parent ;  /* parent in parent tree super-column structure, if */
+    /* the column is dead */
+  } shared1 ;
+  union
+  {
+    Index score ; /* the score used to maintain heap, if col is alive */
+    Index order ; /* pivot ordering of this column, if col is dead */
+  } shared2 ;
+  union
+  {
+    Index headhash ;  /* head of a hash bucket, if col is at the head of */
+    /* a degree list */
+    Index hash ;  /* hash value, if col is not in a degree list */
+    Index prev ;  /* previous column in degree list, if col is in a */
+    /* degree list (but not at the head of a degree list) */
+  } shared3 ;
+  union
+  {
+    Index degree_next ; /* next column, if col is in a degree list */
+    Index hash_next ;   /* next column, if col is in a hash list */
+  } shared4 ;
+  
+};
+ 
+template <typename Index>
+struct Colamd_Row
 {
-    int start ;   /* index for A of first col in this row */
-    int length ;  /* number of principal columns in this row */
-    union
-    {
-  int degree ;  /* number of principal & non-principal columns in row */
-  int p ;   /* used as a row pointer in init_rows_cols () */
-    } shared1 ;
-    union
-    {
-  int mark ;  /* for computing set differences and marking dead rows*/
-  int first_column ;/* first column in row (used in garbage collection) */
-    } shared2 ;
-
-} Colamd_Row ;
-    
+  Index start ;   /* index for A of first col in this row */
+  Index length ;  /* number of principal columns in this row */
+  union
+  {
+    Index degree ;  /* number of principal & non-principal columns in row */
+    Index p ;   /* used as a row pointer in init_rows_cols () */
+  } shared1 ;
+  union
+  {
+    Index mark ;  /* for computing set differences and marking dead rows*/
+    Index first_column ;/* first column in row (used in garbage collection) */
+  } shared2 ;
+  
+};
+ 
 /* ========================================================================== */
 /* === Colamd recommended memory size ======================================= */
 /* ========================================================================== */
-
+ 
 /*
-    The recommended length Alen of the array A passed to colamd is given by
-    the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro.  It returns -1 if any
-    argument is negative.  2*nnz space is required for the row and column
-    indices of the matrix. colamd_c (n_col) + colamd_r (n_row) space is
-    required for the Col and Row arrays, respectively, which are internal to
-    colamd.  An additional n_col space is the minimal amount of "elbow room",
-    and nnz/5 more space is recommended for run time efficiency.
-
-    This macro is not needed when using symamd.
-
-    Explicit typecast to int added Sept. 23, 2002, COLAMD version 2.2, to avoid
-    gcc -pedantic warning messages.
+  The recommended length Alen of the array A passed to colamd is given by
+  the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro.  It returns -1 if any
+  argument is negative.  2*nnz space is required for the row and column
+  indices of the matrix. colamd_c (n_col) + colamd_r (n_row) space is
+  required for the Col and Row arrays, respectively, which are internal to
+  colamd.  An additional n_col space is the minimal amount of "elbow room",
+  and nnz/5 more space is recommended for run time efficiency.
+  
+  This macro is not needed when using symamd.
+  
+  Explicit typecast to Index added Sept. 23, 2002, COLAMD version 2.2, to avoid
+  gcc -pedantic warning messages.
 */
+template <typename Index>
+inline Index colamd_c(Index n_col) 
+{ return Index( ((n_col) + 1) * sizeof (colamd_col<Index>) / sizeof (Index) ) ; }
 
-inline int colamd_c(int n_col) 
-{ return int( ((n_col) + 1) * sizeof (colamd_col) / sizeof (int) ) ; }
-
-inline int  colamd_r(int n_row)
-{ return int(((n_row) + 1) * sizeof (Colamd_Row) / sizeof (int)); }
-
-    // Various routines
-inline int colamd_recommended (int nnz, int n_row, int n_col) ;
-
-static inline void colamd_set_defaults (double knobs [COLAMD_KNOBS]) ;
+template <typename Index>
+inline Index  colamd_r(Index n_row)
+{ return Index(((n_row) + 1) * sizeof (Colamd_Row<Index>) / sizeof (Index)); }
 
-static bool colamd (int n_row, int n_col, int Alen, int A [], int p [], double knobs[COLAMD_KNOBS], int stats [COLAMD_STATS]) ;
+// Prototypes of non-user callable routines
+template <typename Index>
+static Index init_rows_cols (Index n_row, Index n_col, Colamd_Row<Index> Row [], colamd_col<Index> col [], Index A [], Index p [], Index stats[COLAMD_STATS] ); 
 
-static int init_rows_cols (int n_row, int n_col, Colamd_Row Row [], colamd_col col [], int A [], int p [], int stats[COLAMD_STATS] ); 
+template <typename Index>
+static void init_scoring (Index n_row, Index n_col, Colamd_Row<Index> Row [], colamd_col<Index> Col [], Index A [], Index head [], double knobs[COLAMD_KNOBS], Index *p_n_row2, Index *p_n_col2, Index *p_max_deg);
 
-static void init_scoring (int n_row, int n_col, Colamd_Row Row [], colamd_col Col [], int A [], int head [], double knobs[COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg);
+template <typename Index>
+static Index find_ordering (Index n_row, Index n_col, Index Alen, Colamd_Row<Index> Row [], colamd_col<Index> Col [], Index A [], Index head [], Index n_col2, Index max_deg, Index pfree);
 
-static int find_ordering (int n_row, int n_col, int Alen, Colamd_Row Row [], colamd_col Col [], int A [], int head [], int n_col2, int max_deg, int pfree);
+template <typename Index>
+static void order_children (Index n_col, colamd_col<Index> Col [], Index p []);
 
-static void order_children (int n_col, colamd_col Col [], int p []);
+template <typename Index>
+static void detect_super_cols (colamd_col<Index> Col [], Index A [], Index head [], Index row_start, Index row_length ) ;
 
-static void detect_super_cols (
-  colamd_col Col [],
-  int A [],
-  int head [],
-  int row_start,
-  int row_length ) ;
+template <typename Index>
+static Index garbage_collection (Index n_row, Index n_col, Colamd_Row<Index> Row [], colamd_col<Index> Col [], Index A [], Index *pfree) ;
 
-static int garbage_collection (int n_row, int n_col, Colamd_Row Row [], colamd_col Col [], int A [], int *pfree) ;
-
-static inline  int clear_mark (int n_row, Colamd_Row Row [] ) ;
+template <typename Index>
+static inline  Index clear_mark (Index n_row, Colamd_Row<Index> Row [] ) ;
 
 /* === No debugging ========================================================= */
 
@@ -260,7 +260,8 @@ static inline  int clear_mark (int n_row, Colamd_Row Row [] ) ;
  * \param n_col number of columns in A
  * \return recommended value of Alen for use by colamd
  */
-inline int colamd_recommended ( int nnz, int n_row, int n_col)
+template <typename Index>
+inline Index colamd_recommended ( Index nnz, Index n_row, Index n_col)
 {
   if ((nnz) < 0 || (n_row) < 0 || (n_col) < 0)
     return (-1);
@@ -288,22 +289,23 @@ inline int colamd_recommended ( int nnz, int n_row, int n_col)
  * 
  * \param knobs parameter settings for colamd
  */
+
 static inline void colamd_set_defaults(double knobs[COLAMD_KNOBS])
 {
-   /* === Local variables ================================================== */
-
-    int i ;
+  /* === Local variables ================================================== */
+  
+  int i ;
 
-    if (!knobs)
-    {
-  return ;      /* no knobs to initialize */
-    }
-    for (i = 0 ; i < COLAMD_KNOBS ; i++)
-    {
-  knobs [i] = 0 ;
-    }
-    knobs [COLAMD_DENSE_ROW] = 0.5 ;  /* ignore rows over 50% dense */
-    knobs [COLAMD_DENSE_COL] = 0.5 ;  /* ignore columns over 50% dense */
+  if (!knobs)
+  {
+    return ;      /* no knobs to initialize */
+  }
+  for (i = 0 ; i < COLAMD_KNOBS ; i++)
+  {
+    knobs [i] = 0 ;
+  }
+  knobs [COLAMD_DENSE_ROW] = 0.5 ;  /* ignore rows over 50% dense */
+  knobs [COLAMD_DENSE_COL] = 0.5 ;  /* ignore columns over 50% dense */
 }
 
 /** 
@@ -323,144 +325,145 @@ static inline void colamd_set_defaults(double knobs[COLAMD_KNOBS])
  * \param knobs parameter settings for colamd
  * \param stats colamd output statistics and error codes
  */
-static bool colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[COLAMD_KNOBS], int stats[COLAMD_STATS])
+template <typename Index>
+static bool colamd(Index n_row, Index n_col, Index Alen, Index *A, Index *p, double knobs[COLAMD_KNOBS], Index stats[COLAMD_STATS])
 {
-      /* === Local variables ================================================== */
-
-    int i ;     /* loop index */
-    int nnz ;     /* nonzeros in A */
-    int Row_size ;    /* size of Row [], in integers */
-    int Col_size ;    /* size of Col [], in integers */
-    int need ;      /* minimum required length of A */
-    Colamd_Row *Row ;   /* pointer into A of Row [0..n_row] array */
-    colamd_col *Col ;   /* pointer into A of Col [0..n_col] array */
-    int n_col2 ;    /* number of non-dense, non-empty columns */
-    int n_row2 ;    /* number of non-dense, non-empty rows */
-    int ngarbage ;    /* number of garbage collections performed */
-    int max_deg ;   /* maximum row degree */
-    double default_knobs [COLAMD_KNOBS] ; /* default knobs array */
-
-
-    /* === Check the input arguments ======================================== */
-
-    if (!stats)
-    {
-  COLAMD_DEBUG0 (("colamd: stats not present\n")) ;
-  return (false) ;
-    }
-    for (i = 0 ; i < COLAMD_STATS ; i++)
-    {
-  stats [i] = 0 ;
-    }
-    stats [COLAMD_STATUS] = COLAMD_OK ;
-    stats [COLAMD_INFO1] = -1 ;
-    stats [COLAMD_INFO2] = -1 ;
-
-    if (!A)   /* A is not present */
-    {
-  stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ;
-  COLAMD_DEBUG0 (("colamd: A not present\n")) ;
-  return (false) ;
-    }
-
-    if (!p)   /* p is not present */
-    {
-  stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ;
-  COLAMD_DEBUG0 (("colamd: p not present\n")) ;
-      return (false) ;
-    }
-
-    if (n_row < 0)  /* n_row must be >= 0 */
-    {
-  stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ;
-  stats [COLAMD_INFO1] = n_row ;
-  COLAMD_DEBUG0 (("colamd: nrow negative %d\n", n_row)) ;
-      return (false) ;
-    }
-
-    if (n_col < 0)  /* n_col must be >= 0 */
-    {
-  stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ;
-  stats [COLAMD_INFO1] = n_col ;
-  COLAMD_DEBUG0 (("colamd: ncol negative %d\n", n_col)) ;
-      return (false) ;
-    }
-
-    nnz = p [n_col] ;
-    if (nnz < 0)  /* nnz must be >= 0 */
-    {
-  stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ;
-  stats [COLAMD_INFO1] = nnz ;
-  COLAMD_DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ;
-  return (false) ;
-    }
-
-    if (p [0] != 0)
-    {
-  stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ;
-  stats [COLAMD_INFO1] = p [0] ;
-  COLAMD_DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ;
-  return (false) ;
-    }
-
-    /* === If no knobs, set default knobs =================================== */
-
-    if (!knobs)
-    {
-  colamd_set_defaults (default_knobs) ;
-  knobs = default_knobs ;
-    }
-
-    /* === Allocate the Row and Col arrays from array A ===================== */
-
-    Col_size = colamd_c (n_col) ;
-    Row_size = colamd_r (n_row) ;
-    need = 2*nnz + n_col + Col_size + Row_size ;
-
-    if (need > Alen)
-    {
-  /* not enough space in array A to perform the ordering */
-  stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ;
-  stats [COLAMD_INFO1] = need ;
-  stats [COLAMD_INFO2] = Alen ;
-  COLAMD_DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen));
-  return (false) ;
-    }
-
-    Alen -= Col_size + Row_size ;
-    Col = (colamd_col *) &A [Alen] ;
-    Row = (Colamd_Row *) &A [Alen + Col_size] ;
-
-    /* === Construct the row and column data structures ===================== */
-
-    if (!init_rows_cols (n_row, n_col, Row, Col, A, p, stats))
-    {
-  /* input matrix is invalid */
-  COLAMD_DEBUG0 (("colamd: Matrix invalid\n")) ;
-  return (false) ;
-    }
-
-    /* === Initialize scores, kill dense rows/columns ======================= */
-
-    init_scoring (n_row, n_col, Row, Col, A, p, knobs,
-  &n_row2, &n_col2, &max_deg) ;
-
-    /* === Order the supercolumns =========================================== */
-
-    ngarbage = find_ordering (n_row, n_col, Alen, Row, Col, A, p,
-  n_col2, max_deg, 2*nnz) ;
-
-    /* === Order the non-principal columns ================================== */
-
-    order_children (n_col, Col, p) ;
+  /* === Local variables ================================================== */
+  
+  Index i ;     /* loop index */
+  Index nnz ;     /* nonzeros in A */
+  Index Row_size ;    /* size of Row [], in integers */
+  Index Col_size ;    /* size of Col [], in integers */
+  Index need ;      /* minimum required length of A */
+  Colamd_Row<Index> *Row ;   /* pointer into A of Row [0..n_row] array */
+  colamd_col<Index> *Col ;   /* pointer into A of Col [0..n_col] array */
+  Index n_col2 ;    /* number of non-dense, non-empty columns */
+  Index n_row2 ;    /* number of non-dense, non-empty rows */
+  Index ngarbage ;    /* number of garbage collections performed */
+  Index max_deg ;   /* maximum row degree */
+  double default_knobs [COLAMD_KNOBS] ; /* default knobs array */
+  
+  
+  /* === Check the input arguments ======================================== */
+  
+  if (!stats)
+  {
+    COLAMD_DEBUG0 (("colamd: stats not present\n")) ;
+    return (false) ;
+  }
+  for (i = 0 ; i < COLAMD_STATS ; i++)
+  {
+    stats [i] = 0 ;
+  }
+  stats [COLAMD_STATUS] = COLAMD_OK ;
+  stats [COLAMD_INFO1] = -1 ;
+  stats [COLAMD_INFO2] = -1 ;
+  
+  if (!A)   /* A is not present */
+  {
+    stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ;
+    COLAMD_DEBUG0 (("colamd: A not present\n")) ;
+    return (false) ;
+  }
+  
+  if (!p)   /* p is not present */
+  {
+    stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ;
+    COLAMD_DEBUG0 (("colamd: p not present\n")) ;
+    return (false) ;
+  }
+  
+  if (n_row < 0)  /* n_row must be >= 0 */
+  {
+    stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ;
+    stats [COLAMD_INFO1] = n_row ;
+    COLAMD_DEBUG0 (("colamd: nrow negative %d\n", n_row)) ;
+    return (false) ;
+  }
+  
+  if (n_col < 0)  /* n_col must be >= 0 */
+  {
+    stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ;
+    stats [COLAMD_INFO1] = n_col ;
+    COLAMD_DEBUG0 (("colamd: ncol negative %d\n", n_col)) ;
+    return (false) ;
+  }
+  
+  nnz = p [n_col] ;
+  if (nnz < 0)  /* nnz must be >= 0 */
+  {
+    stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ;
+    stats [COLAMD_INFO1] = nnz ;
+    COLAMD_DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ;
+    return (false) ;
+  }
+  
+  if (p [0] != 0)
+  {
+    stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ;
+    stats [COLAMD_INFO1] = p [0] ;
+    COLAMD_DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ;
+    return (false) ;
+  }
+  
+  /* === If no knobs, set default knobs =================================== */
+  
+  if (!knobs)
+  {
+    colamd_set_defaults (default_knobs) ;
+    knobs = default_knobs ;
+  }
+  
+  /* === Allocate the Row and Col arrays from array A ===================== */
+  
+  Col_size = colamd_c (n_col) ;
+  Row_size = colamd_r (n_row) ;
+  need = 2*nnz + n_col + Col_size + Row_size ;
+  
+  if (need > Alen)
+  {
+    /* not enough space in array A to perform the ordering */
+    stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ;
+    stats [COLAMD_INFO1] = need ;
+    stats [COLAMD_INFO2] = Alen ;
+    COLAMD_DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen));
+    return (false) ;
+  }
+  
+  Alen -= Col_size + Row_size ;
+  Col = (colamd_col<Index> *) &A [Alen] ;
+  Row = (Colamd_Row<Index> *) &A [Alen + Col_size] ;
 
-    /* === Return statistics in stats ======================================= */
+  /* === Construct the row and column data structures ===================== */
+  
+  if (!Eigen::internal::init_rows_cols (n_row, n_col, Row, Col, A, p, stats))
+  {
+    /* input matrix is invalid */
+    COLAMD_DEBUG0 (("colamd: Matrix invalid\n")) ;
+    return (false) ;
+  }
+  
+  /* === Initialize scores, kill dense rows/columns ======================= */
 
-    stats [COLAMD_DENSE_ROW] = n_row - n_row2 ;
-    stats [COLAMD_DENSE_COL] = n_col - n_col2 ;
-    stats [COLAMD_DEFRAG_COUNT] = ngarbage ;
-    COLAMD_DEBUG0 (("colamd: done.\n")) ; 
-    return (true) ;
+  Eigen::internal::init_scoring (n_row, n_col, Row, Col, A, p, knobs,
+		&n_row2, &n_col2, &max_deg) ;
+  
+  /* === Order the supercolumns =========================================== */
+  
+  ngarbage = Eigen::internal::find_ordering (n_row, n_col, Alen, Row, Col, A, p,
+			    n_col2, max_deg, 2*nnz) ;
+  
+  /* === Order the non-principal columns ================================== */
+  
+  Eigen::internal::order_children (n_col, Col, p) ;
+  
+  /* === Return statistics in stats ======================================= */
+  
+  stats [COLAMD_DENSE_ROW] = n_row - n_row2 ;
+  stats [COLAMD_DENSE_COL] = n_col - n_col2 ;
+  stats [COLAMD_DEFRAG_COUNT] = ngarbage ;
+  COLAMD_DEBUG0 (("colamd: done.\n")) ; 
+  return (true) ;
 }
 
 /* ========================================================================== */
@@ -475,218 +478,218 @@ static bool colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[
 /* ========================================================================== */
 
 /*
-    Takes the column form of the matrix in A and creates the row form of the
-    matrix.  Also, row and column attributes are stored in the Col and Row
-    structs.  If the columns are un-sorted or contain duplicate row indices,
-    this routine will also sort and remove duplicate row indices from the
-    column form of the matrix.  Returns false if the matrix is invalid,
-    true otherwise.  Not user-callable.
+  Takes the column form of the matrix in A and creates the row form of the
+  matrix.  Also, row and column attributes are stored in the Col and Row
+  structs.  If the columns are un-sorted or contain duplicate row indices,
+  this routine will also sort and remove duplicate row indices from the
+  column form of the matrix.  Returns false if the matrix is invalid,
+  true otherwise.  Not user-callable.
 */
-
- static int init_rows_cols  /* returns true if OK, or false otherwise */
-(
+template <typename Index>
+static Index init_rows_cols  /* returns true if OK, or false otherwise */
+  (
     /* === Parameters ======================================================= */
 
-    int n_row,      /* number of rows of A */
-    int n_col,      /* number of columns of A */
-    Colamd_Row Row [],    /* of size n_row+1 */
-    colamd_col Col [],    /* of size n_col+1 */
-    int A [],     /* row indices of A, of size Alen */
-    int p [],     /* pointers to columns in A, of size n_col+1 */
-    int stats [COLAMD_STATS]  /* colamd statistics */ 
-)
+    Index n_row,      /* number of rows of A */
+    Index n_col,      /* number of columns of A */
+    Colamd_Row<Index> Row [],    /* of size n_row+1 */
+    colamd_col<Index> Col [],    /* of size n_col+1 */
+    Index A [],     /* row indices of A, of size Alen */
+    Index p [],     /* pointers to columns in A, of size n_col+1 */
+    Index stats [COLAMD_STATS]  /* colamd statistics */ 
+    )
 {
-    /* === Local variables ================================================== */
+  /* === Local variables ================================================== */
 
-    int col ;     /* a column index */
-    int row ;     /* a row index */
-    int *cp ;     /* a column pointer */
-    int *cp_end ;   /* a pointer to the end of a column */
-    int *rp ;     /* a row pointer */
-    int *rp_end ;   /* a pointer to the end of a row */
-    int last_row ;    /* previous row */
+  Index col ;     /* a column index */
+  Index row ;     /* a row index */
+  Index *cp ;     /* a column pointer */
+  Index *cp_end ;   /* a pointer to the end of a column */
+  Index *rp ;     /* a row pointer */
+  Index *rp_end ;   /* a pointer to the end of a row */
+  Index last_row ;    /* previous row */
 
-    /* === Initialize columns, and check column pointers ==================== */
-
-    for (col = 0 ; col < n_col ; col++)
-    {
-  Col [col].start = p [col] ;
-  Col [col].length = p [col+1] - p [col] ;
+  /* === Initialize columns, and check column pointers ==================== */
 
-  if (Col [col].length < 0)
+  for (col = 0 ; col < n_col ; col++)
   {
+    Col [col].start = p [col] ;
+    Col [col].length = p [col+1] - p [col] ;
+
+    if (Col [col].length < 0)
+    {
       /* column pointers must be non-decreasing */
       stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;
       stats [COLAMD_INFO1] = col ;
       stats [COLAMD_INFO2] = Col [col].length ;
       COLAMD_DEBUG0 (("colamd: col %d length %d < 0\n", col, Col [col].length)) ;
       return (false) ;
-  }
-
-  Col [col].shared1.thickness = 1 ;
-  Col [col].shared2.score = 0 ;
-  Col [col].shared3.prev = COLAMD_EMPTY ;
-  Col [col].shared4.degree_next = COLAMD_EMPTY ;
     }
 
-    /* p [0..n_col] no longer needed, used as "head" in subsequent routines */
-
-    /* === Scan columns, compute row degrees, and check row indices ========= */
+    Col [col].shared1.thickness = 1 ;
+    Col [col].shared2.score = 0 ;
+    Col [col].shared3.prev = COLAMD_EMPTY ;
+    Col [col].shared4.degree_next = COLAMD_EMPTY ;
+  }
 
-    stats [COLAMD_INFO3] = 0 ;  /* number of duplicate or unsorted row indices*/
+  /* p [0..n_col] no longer needed, used as "head" in subsequent routines */
 
-    for (row = 0 ; row < n_row ; row++)
-    {
-  Row [row].length = 0 ;
-  Row [row].shared2.mark = -1 ;
-    }
+  /* === Scan columns, compute row degrees, and check row indices ========= */
 
-    for (col = 0 ; col < n_col ; col++)
-    {
-  last_row = -1 ;
+  stats [COLAMD_INFO3] = 0 ;  /* number of duplicate or unsorted row indices*/
 
-  cp = &A [p [col]] ;
-  cp_end = &A [p [col+1]] ;
+  for (row = 0 ; row < n_row ; row++)
+  {
+    Row [row].length = 0 ;
+    Row [row].shared2.mark = -1 ;
+  }
 
-  while (cp < cp_end)
+  for (col = 0 ; col < n_col ; col++)
   {
+    last_row = -1 ;
+
+    cp = &A [p [col]] ;
+    cp_end = &A [p [col+1]] ;
+
+    while (cp < cp_end)
+    {
       row = *cp++ ;
 
       /* make sure row indices within range */
       if (row < 0 || row >= n_row)
       {
-    stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ;
-    stats [COLAMD_INFO1] = col ;
-    stats [COLAMD_INFO2] = row ;
-    stats [COLAMD_INFO3] = n_row ;
-    COLAMD_DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ;
-    return (false) ;
+	stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ;
+	stats [COLAMD_INFO1] = col ;
+	stats [COLAMD_INFO2] = row ;
+	stats [COLAMD_INFO3] = n_row ;
+	COLAMD_DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ;
+	return (false) ;
       }
 
       if (row <= last_row || Row [row].shared2.mark == col)
       {
-    /* row index are unsorted or repeated (or both), thus col */
-    /* is jumbled.  This is a notice, not an error condition. */
-    stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ;
-    stats [COLAMD_INFO1] = col ;
-    stats [COLAMD_INFO2] = row ;
-    (stats [COLAMD_INFO3]) ++ ;
-    COLAMD_DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col));
+	/* row index are unsorted or repeated (or both), thus col */
+	/* is jumbled.  This is a notice, not an error condition. */
+	stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ;
+	stats [COLAMD_INFO1] = col ;
+	stats [COLAMD_INFO2] = row ;
+	(stats [COLAMD_INFO3]) ++ ;
+	COLAMD_DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col));
       }
 
       if (Row [row].shared2.mark != col)
       {
-    Row [row].length++ ;
+	Row [row].length++ ;
       }
       else
       {
-    /* this is a repeated entry in the column, */
-    /* it will be removed */
-    Col [col].length-- ;
+	/* this is a repeated entry in the column, */
+	/* it will be removed */
+	Col [col].length-- ;
       }
 
       /* mark the row as having been seen in this column */
       Row [row].shared2.mark = col ;
 
       last_row = row ;
-  }
     }
+  }
 
-    /* === Compute row pointers ============================================= */
+  /* === Compute row pointers ============================================= */
 
-    /* row form of the matrix starts directly after the column */
-    /* form of matrix in A */
-    Row [0].start = p [n_col] ;
-    Row [0].shared1.p = Row [0].start ;
-    Row [0].shared2.mark = -1 ;
-    for (row = 1 ; row < n_row ; row++)
-    {
-  Row [row].start = Row [row-1].start + Row [row-1].length ;
-  Row [row].shared1.p = Row [row].start ;
-  Row [row].shared2.mark = -1 ;
-    }
+  /* row form of the matrix starts directly after the column */
+  /* form of matrix in A */
+  Row [0].start = p [n_col] ;
+  Row [0].shared1.p = Row [0].start ;
+  Row [0].shared2.mark = -1 ;
+  for (row = 1 ; row < n_row ; row++)
+  {
+    Row [row].start = Row [row-1].start + Row [row-1].length ;
+    Row [row].shared1.p = Row [row].start ;
+    Row [row].shared2.mark = -1 ;
+  }
 
-    /* === Create row form ================================================== */
+  /* === Create row form ================================================== */
 
-    if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
-    {
-  /* if cols jumbled, watch for repeated row indices */
-  for (col = 0 ; col < n_col ; col++)
+  if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
   {
+    /* if cols jumbled, watch for repeated row indices */
+    for (col = 0 ; col < n_col ; col++)
+    {
       cp = &A [p [col]] ;
       cp_end = &A [p [col+1]] ;
       while (cp < cp_end)
       {
-    row = *cp++ ;
-    if (Row [row].shared2.mark != col)
-    {
-        A [(Row [row].shared1.p)++] = col ;
-        Row [row].shared2.mark = col ;
-    }
+	row = *cp++ ;
+	if (Row [row].shared2.mark != col)
+	{
+	  A [(Row [row].shared1.p)++] = col ;
+	  Row [row].shared2.mark = col ;
+	}
       }
-  }
     }
-    else
-    {
-  /* if cols not jumbled, we don't need the mark (this is faster) */
-  for (col = 0 ; col < n_col ; col++)
+  }
+  else
   {
+    /* if cols not jumbled, we don't need the mark (this is faster) */
+    for (col = 0 ; col < n_col ; col++)
+    {
       cp = &A [p [col]] ;
       cp_end = &A [p [col+1]] ;
       while (cp < cp_end)
       {
-    A [(Row [*cp++].shared1.p)++] = col ;
+	A [(Row [*cp++].shared1.p)++] = col ;
       }
-  }
     }
+  }
 
-    /* === Clear the row marks and set row degrees ========================== */
+  /* === Clear the row marks and set row degrees ========================== */
 
-    for (row = 0 ; row < n_row ; row++)
-    {
-  Row [row].shared2.mark = 0 ;
-  Row [row].shared1.degree = Row [row].length ;
-    }
+  for (row = 0 ; row < n_row ; row++)
+  {
+    Row [row].shared2.mark = 0 ;
+    Row [row].shared1.degree = Row [row].length ;
+  }
 
-    /* === See if we need to re-create columns ============================== */
+  /* === See if we need to re-create columns ============================== */
 
-    if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
-    {
-      COLAMD_DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ;
+  if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
+  {
+    COLAMD_DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ;
 
 
-  /* === Compute col pointers ========================================= */
+    /* === Compute col pointers ========================================= */
 
-  /* col form of the matrix starts at A [0]. */
-  /* Note, we may have a gap between the col form and the row */
-  /* form if there were duplicate entries, if so, it will be */
-  /* removed upon the first garbage collection */
-  Col [0].start = 0 ;
-  p [0] = Col [0].start ;
-  for (col = 1 ; col < n_col ; col++)
-  {
+    /* col form of the matrix starts at A [0]. */
+    /* Note, we may have a gap between the col form and the row */
+    /* form if there were duplicate entries, if so, it will be */
+    /* removed upon the first garbage collection */
+    Col [0].start = 0 ;
+    p [0] = Col [0].start ;
+    for (col = 1 ; col < n_col ; col++)
+    {
       /* note that the lengths here are for pruned columns, i.e. */
       /* no duplicate row indices will exist for these columns */
       Col [col].start = Col [col-1].start + Col [col-1].length ;
       p [col] = Col [col].start ;
-  }
+    }
 
-  /* === Re-create col form =========================================== */
+    /* === Re-create col form =========================================== */
 
-  for (row = 0 ; row < n_row ; row++)
-  {
+    for (row = 0 ; row < n_row ; row++)
+    {
       rp = &A [Row [row].start] ;
       rp_end = rp + Row [row].length ;
       while (rp < rp_end)
       {
-    A [(p [*rp++])++] = row ;
+	A [(p [*rp++])++] = row ;
       }
-  }
     }
+  }
 
-    /* === Done.  Matrix is not (or no longer) jumbled ====================== */
+  /* === Done.  Matrix is not (or no longer) jumbled ====================== */
 
-    return (true) ;
+  return (true) ;
 }
 
 
@@ -695,83 +698,83 @@ static bool colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[
 /* ========================================================================== */
 
 /*
-    Kills dense or empty columns and rows, calculates an initial score for
-    each column, and places all columns in the degree lists.  Not user-callable.
+  Kills dense or empty columns and rows, calculates an initial score for
+  each column, and places all columns in the degree lists.  Not user-callable.
 */
-
+template <typename Index>
 static void init_scoring
-(
+  (
     /* === Parameters ======================================================= */
 
-    int n_row,      /* number of rows of A */
-    int n_col,      /* number of columns of A */
-    Colamd_Row Row [],    /* of size n_row+1 */
-    colamd_col Col [],    /* of size n_col+1 */
-    int A [],     /* column form and row form of A */
-    int head [],    /* of size n_col+1 */
+    Index n_row,      /* number of rows of A */
+    Index n_col,      /* number of columns of A */
+    Colamd_Row<Index> Row [],    /* of size n_row+1 */
+    colamd_col<Index> Col [],    /* of size n_col+1 */
+    Index A [],     /* column form and row form of A */
+    Index head [],    /* of size n_col+1 */
     double knobs [COLAMD_KNOBS],/* parameters */
-    int *p_n_row2,    /* number of non-dense, non-empty rows */
-    int *p_n_col2,    /* number of non-dense, non-empty columns */
-    int *p_max_deg    /* maximum row degree */
-)
+    Index *p_n_row2,    /* number of non-dense, non-empty rows */
+    Index *p_n_col2,    /* number of non-dense, non-empty columns */
+    Index *p_max_deg    /* maximum row degree */
+    )
 {
-    /* === Local variables ================================================== */
-
-    int c ;     /* a column index */
-    int r, row ;    /* a row index */
-    int *cp ;     /* a column pointer */
-    int deg ;     /* degree of a row or column */
-    int *cp_end ;   /* a pointer to the end of a column */
-    int *new_cp ;   /* new column pointer */
-    int col_length ;    /* length of pruned column */
-    int score ;     /* current column score */
-    int n_col2 ;    /* number of non-dense, non-empty columns */
-    int n_row2 ;    /* number of non-dense, non-empty rows */
-    int dense_row_count ; /* remove rows with more entries than this */
-    int dense_col_count ; /* remove cols with more entries than this */
-    int min_score ;   /* smallest column score */
-    int max_deg ;   /* maximum row degree */
-    int next_col ;    /* Used to add to degree list.*/
-
-
-    /* === Extract knobs ==================================================== */
-
-    dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
-    dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
-    COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ;
-    max_deg = 0 ;
-    n_col2 = n_col ;
-    n_row2 = n_row ;
-
-    /* === Kill empty columns =============================================== */
-
-    /* Put the empty columns at the end in their natural order, so that LU */
-    /* factorization can proceed as far as possible. */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-  deg = Col [c].length ;
-  if (deg == 0)
+  /* === Local variables ================================================== */
+
+  Index c ;     /* a column index */
+  Index r, row ;    /* a row index */
+  Index *cp ;     /* a column pointer */
+  Index deg ;     /* degree of a row or column */
+  Index *cp_end ;   /* a pointer to the end of a column */
+  Index *new_cp ;   /* new column pointer */
+  Index col_length ;    /* length of pruned column */
+  Index score ;     /* current column score */
+  Index n_col2 ;    /* number of non-dense, non-empty columns */
+  Index n_row2 ;    /* number of non-dense, non-empty rows */
+  Index dense_row_count ; /* remove rows with more entries than this */
+  Index dense_col_count ; /* remove cols with more entries than this */
+  Index min_score ;   /* smallest column score */
+  Index max_deg ;   /* maximum row degree */
+  Index next_col ;    /* Used to add to degree list.*/
+
+
+  /* === Extract knobs ==================================================== */
+
+  dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
+  dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
+  COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ;
+  max_deg = 0 ;
+  n_col2 = n_col ;
+  n_row2 = n_row ;
+
+  /* === Kill empty columns =============================================== */
+
+  /* Put the empty columns at the end in their natural order, so that LU */
+  /* factorization can proceed as far as possible. */
+  for (c = n_col-1 ; c >= 0 ; c--)
   {
+    deg = Col [c].length ;
+    if (deg == 0)
+    {
       /* this is a empty column, kill and order it last */
       Col [c].shared2.order = --n_col2 ;
       KILL_PRINCIPAL_COL (c) ;
-  }
     }
-    COLAMD_DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ;
+  }
+  COLAMD_DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ;
 
-    /* === Kill dense columns =============================================== */
+  /* === Kill dense columns =============================================== */
 
-    /* Put the dense columns at the end, in their natural order */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-  /* skip any dead columns */
-  if (COL_IS_DEAD (c))
+  /* Put the dense columns at the end, in their natural order */
+  for (c = n_col-1 ; c >= 0 ; c--)
   {
+    /* skip any dead columns */
+    if (COL_IS_DEAD (c))
+    {
       continue ;
-  }
-  deg = Col [c].length ;
-  if (deg > dense_col_count)
-  {
+    }
+    deg = Col [c].length ;
+    if (deg > dense_col_count)
+    {
       /* this is a dense column, kill and order it last */
       Col [c].shared2.order = --n_col2 ;
       /* decrement the row degrees */
@@ -779,60 +782,60 @@ static void init_scoring
       cp_end = cp + Col [c].length ;
       while (cp < cp_end)
       {
-    Row [*cp++].shared1.degree-- ;
+	Row [*cp++].shared1.degree-- ;
       }
       KILL_PRINCIPAL_COL (c) ;
-  }
     }
-    COLAMD_DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ;
+  }
+  COLAMD_DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ;
 
-    /* === Kill dense and empty rows ======================================== */
+  /* === Kill dense and empty rows ======================================== */
 
-    for (r = 0 ; r < n_row ; r++)
-    {
-  deg = Row [r].shared1.degree ;
-  COLAMD_ASSERT (deg >= 0 && deg <= n_col) ;
-  if (deg > dense_row_count || deg == 0)
+  for (r = 0 ; r < n_row ; r++)
   {
+    deg = Row [r].shared1.degree ;
+    COLAMD_ASSERT (deg >= 0 && deg <= n_col) ;
+    if (deg > dense_row_count || deg == 0)
+    {
       /* kill a dense or empty row */
       KILL_ROW (r) ;
       --n_row2 ;
-  }
-  else
-  {
+    }
+    else
+    {
       /* keep track of max degree of remaining rows */
       max_deg = COLAMD_MAX (max_deg, deg) ;
-  }
     }
-    COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ;
+  }
+  COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ;
 
-    /* === Compute initial column scores ==================================== */
+  /* === Compute initial column scores ==================================== */
 
-    /* At this point the row degrees are accurate.  They reflect the number */
-    /* of "live" (non-dense) columns in each row.  No empty rows exist. */
-    /* Some "live" columns may contain only dead rows, however.  These are */
-    /* pruned in the code below. */
+  /* At this point the row degrees are accurate.  They reflect the number */
+  /* of "live" (non-dense) columns in each row.  No empty rows exist. */
+  /* Some "live" columns may contain only dead rows, however.  These are */
+  /* pruned in the code below. */
 
-    /* now find the initial matlab score for each column */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-  /* skip dead column */
-  if (COL_IS_DEAD (c))
+  /* now find the initial matlab score for each column */
+  for (c = n_col-1 ; c >= 0 ; c--)
   {
+    /* skip dead column */
+    if (COL_IS_DEAD (c))
+    {
       continue ;
-  }
-  score = 0 ;
-  cp = &A [Col [c].start] ;
-  new_cp = cp ;
-  cp_end = cp + Col [c].length ;
-  while (cp < cp_end)
-  {
+    }
+    score = 0 ;
+    cp = &A [Col [c].start] ;
+    new_cp = cp ;
+    cp_end = cp + Col [c].length ;
+    while (cp < cp_end)
+    {
       /* get a row */
       row = *cp++ ;
       /* skip if dead */
       if (ROW_IS_DEAD (row))
       {
-    continue ;
+	continue ;
       }
       /* compact the column */
       *new_cp++ = row ;
@@ -840,52 +843,52 @@ static void init_scoring
       score += Row [row].shared1.degree - 1 ;
       /* guard against integer overflow */
       score = COLAMD_MIN (score, n_col) ;
-  }
-  /* determine pruned column length */
-  col_length = (int) (new_cp - &A [Col [c].start]) ;
-  if (col_length == 0)
-  {
+    }
+    /* determine pruned column length */
+    col_length = (Index) (new_cp - &A [Col [c].start]) ;
+    if (col_length == 0)
+    {
       /* a newly-made null column (all rows in this col are "dense" */
       /* and have already been killed) */
       COLAMD_DEBUG2 (("Newly null killed: %d\n", c)) ;
       Col [c].shared2.order = --n_col2 ;
       KILL_PRINCIPAL_COL (c) ;
-  }
-  else
-  {
+    }
+    else
+    {
       /* set column length and set score */
       COLAMD_ASSERT (score >= 0) ;
       COLAMD_ASSERT (score <= n_col) ;
       Col [c].length = col_length ;
       Col [c].shared2.score = score ;
-  }
     }
-    COLAMD_DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n",
-      n_col-n_col2)) ;
+  }
+  COLAMD_DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n",
+		  n_col-n_col2)) ;
 
-    /* At this point, all empty rows and columns are dead.  All live columns */
-    /* are "clean" (containing no dead rows) and simplicial (no supercolumns */
-    /* yet).  Rows may contain dead columns, but all live rows contain at */
-    /* least one live column. */
+  /* At this point, all empty rows and columns are dead.  All live columns */
+  /* are "clean" (containing no dead rows) and simplicial (no supercolumns */
+  /* yet).  Rows may contain dead columns, but all live rows contain at */
+  /* least one live column. */
 
-    /* === Initialize degree lists ========================================== */
+  /* === Initialize degree lists ========================================== */
 
 
-    /* clear the hash buckets */
-    for (c = 0 ; c <= n_col ; c++)
-    {
-  head [c] = COLAMD_EMPTY ;
-    }
-    min_score = n_col ;
-    /* place in reverse order, so low column indices are at the front */
-    /* of the lists.  This is to encourage natural tie-breaking */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-  /* only add principal columns to degree lists */
-  if (COL_IS_ALIVE (c))
+  /* clear the hash buckets */
+  for (c = 0 ; c <= n_col ; c++)
+  {
+    head [c] = COLAMD_EMPTY ;
+  }
+  min_score = n_col ;
+  /* place in reverse order, so low column indices are at the front */
+  /* of the lists.  This is to encourage natural tie-breaking */
+  for (c = n_col-1 ; c >= 0 ; c--)
   {
+    /* only add principal columns to degree lists */
+    if (COL_IS_ALIVE (c))
+    {
       COLAMD_DEBUG4 (("place %d score %d minscore %d ncol %d\n",
-    c, Col [c].shared2.score, min_score, n_col)) ;
+		      c, Col [c].shared2.score, min_score, n_col)) ;
 
       /* === Add columns score to DList =============================== */
 
@@ -906,7 +909,7 @@ static void init_scoring
       /* previous pointer to this new column */
       if (next_col != COLAMD_EMPTY)
       {
-    Col [next_col].shared3.prev = c ;
+	Col [next_col].shared3.prev = c ;
       }
       head [score] = c ;
 
@@ -914,15 +917,15 @@ static void init_scoring
       min_score = COLAMD_MIN (min_score, score) ;
 
 
-  }
     }
+  }
 
 
-    /* === Return number of remaining columns, and max row degree =========== */
+  /* === Return number of remaining columns, and max row degree =========== */
 
-    *p_n_col2 = n_col2 ;
-    *p_n_row2 = n_row2 ;
-    *p_max_deg = max_deg ;
+  *p_n_col2 = n_col2 ;
+  *p_n_row2 = n_row2 ;
+  *p_max_deg = max_deg ;
 }
 
 
@@ -931,235 +934,235 @@ static void init_scoring
 /* ========================================================================== */
 
 /*
-    Order the principal columns of the supercolumn form of the matrix
-    (no supercolumns on input).  Uses a minimum approximate column minimum
-    degree ordering method.  Not user-callable.
+  Order the principal columns of the supercolumn form of the matrix
+  (no supercolumns on input).  Uses a minimum approximate column minimum
+  degree ordering method.  Not user-callable.
 */
-
-static int find_ordering /* return the number of garbage collections */
-(
+template <typename Index>
+static Index find_ordering /* return the number of garbage collections */
+  (
     /* === Parameters ======================================================= */
 
-    int n_row,      /* number of rows of A */
-    int n_col,      /* number of columns of A */
-    int Alen,     /* size of A, 2*nnz + n_col or larger */
-    Colamd_Row Row [],    /* of size n_row+1 */
-    colamd_col Col [],    /* of size n_col+1 */
-    int A [],     /* column form and row form of A */
-    int head [],    /* of size n_col+1 */
-    int n_col2,     /* Remaining columns to order */
-    int max_deg,    /* Maximum row degree */
-    int pfree     /* index of first free slot (2*nnz on entry) */
-)
+    Index n_row,      /* number of rows of A */
+    Index n_col,      /* number of columns of A */
+    Index Alen,     /* size of A, 2*nnz + n_col or larger */
+    Colamd_Row<Index> Row [],    /* of size n_row+1 */
+    colamd_col<Index> Col [],    /* of size n_col+1 */
+    Index A [],     /* column form and row form of A */
+    Index head [],    /* of size n_col+1 */
+    Index n_col2,     /* Remaining columns to order */
+    Index max_deg,    /* Maximum row degree */
+    Index pfree     /* index of first free slot (2*nnz on entry) */
+    )
 {
-    /* === Local variables ================================================== */
-
-    int k ;     /* current pivot ordering step */
-    int pivot_col ;   /* current pivot column */
-    int *cp ;     /* a column pointer */
-    int *rp ;     /* a row pointer */
-    int pivot_row ;   /* current pivot row */
-    int *new_cp ;   /* modified column pointer */
-    int *new_rp ;   /* modified row pointer */
-    int pivot_row_start ; /* pointer to start of pivot row */
-    int pivot_row_degree ;  /* number of columns in pivot row */
-    int pivot_row_length ;  /* number of supercolumns in pivot row */
-    int pivot_col_score ; /* score of pivot column */
-    int needed_memory ;   /* free space needed for pivot row */
-    int *cp_end ;   /* pointer to the end of a column */
-    int *rp_end ;   /* pointer to the end of a row */
-    int row ;     /* a row index */
-    int col ;     /* a column index */
-    int max_score ;   /* maximum possible score */
-    int cur_score ;   /* score of current column */
-    unsigned int hash ;   /* hash value for supernode detection */
-    int head_column ;   /* head of hash bucket */
-    int first_col ;   /* first column in hash bucket */
-    int tag_mark ;    /* marker value for mark array */
-    int row_mark ;    /* Row [row].shared2.mark */
-    int set_difference ;  /* set difference size of row with pivot row */
-    int min_score ;   /* smallest column score */
-    int col_thickness ;   /* "thickness" (no. of columns in a supercol) */
-    int max_mark ;    /* maximum value of tag_mark */
-    int pivot_col_thickness ; /* number of columns represented by pivot col */
-    int prev_col ;    /* Used by Dlist operations. */
-    int next_col ;    /* Used by Dlist operations. */
-    int ngarbage ;    /* number of garbage collections performed */
-
-
-    /* === Initialization and clear mark ==================================== */
-
-    max_mark = INT_MAX - n_col ;  /* INT_MAX defined in <limits.h> */
-    tag_mark = clear_mark (n_row, Row) ;
-    min_score = 0 ;
-    ngarbage = 0 ;
-    COLAMD_DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ;
-
-    /* === Order the columns ================================================ */
-
-    for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */)
-    {
+  /* === Local variables ================================================== */
+
+  Index k ;     /* current pivot ordering step */
+  Index pivot_col ;   /* current pivot column */
+  Index *cp ;     /* a column pointer */
+  Index *rp ;     /* a row pointer */
+  Index pivot_row ;   /* current pivot row */
+  Index *new_cp ;   /* modified column pointer */
+  Index *new_rp ;   /* modified row pointer */
+  Index pivot_row_start ; /* pointer to start of pivot row */
+  Index pivot_row_degree ;  /* number of columns in pivot row */
+  Index pivot_row_length ;  /* number of supercolumns in pivot row */
+  Index pivot_col_score ; /* score of pivot column */
+  Index needed_memory ;   /* free space needed for pivot row */
+  Index *cp_end ;   /* pointer to the end of a column */
+  Index *rp_end ;   /* pointer to the end of a row */
+  Index row ;     /* a row index */
+  Index col ;     /* a column index */
+  Index max_score ;   /* maximum possible score */
+  Index cur_score ;   /* score of current column */
+  unsigned int hash ;   /* hash value for supernode detection */
+  Index head_column ;   /* head of hash bucket */
+  Index first_col ;   /* first column in hash bucket */
+  Index tag_mark ;    /* marker value for mark array */
+  Index row_mark ;    /* Row [row].shared2.mark */
+  Index set_difference ;  /* set difference size of row with pivot row */
+  Index min_score ;   /* smallest column score */
+  Index col_thickness ;   /* "thickness" (no. of columns in a supercol) */
+  Index max_mark ;    /* maximum value of tag_mark */
+  Index pivot_col_thickness ; /* number of columns represented by pivot col */
+  Index prev_col ;    /* Used by Dlist operations. */
+  Index next_col ;    /* Used by Dlist operations. */
+  Index ngarbage ;    /* number of garbage collections performed */
+
+
+  /* === Initialization and clear mark ==================================== */
+
+  max_mark = INT_MAX - n_col ;  /* INT_MAX defined in <limits.h> */
+  tag_mark = Eigen::internal::clear_mark (n_row, Row) ;
+  min_score = 0 ;
+  ngarbage = 0 ;
+  COLAMD_DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ;
+
+  /* === Order the columns ================================================ */
+
+  for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */)
+  {
 
-  /* === Select pivot column, and order it ============================ */
+    /* === Select pivot column, and order it ============================ */
 
-  /* make sure degree list isn't empty */
-  COLAMD_ASSERT (min_score >= 0) ;
-  COLAMD_ASSERT (min_score <= n_col) ;
-  COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ;
+    /* make sure degree list isn't empty */
+    COLAMD_ASSERT (min_score >= 0) ;
+    COLAMD_ASSERT (min_score <= n_col) ;
+    COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ;
 
-  /* get pivot column from head of minimum degree list */
-  while (head [min_score] == COLAMD_EMPTY && min_score < n_col)
-  {
+    /* get pivot column from head of minimum degree list */
+    while (head [min_score] == COLAMD_EMPTY && min_score < n_col)
+    {
       min_score++ ;
-  }
-  pivot_col = head [min_score] ;
-  COLAMD_ASSERT (pivot_col >= 0 && pivot_col <= n_col) ;
-  next_col = Col [pivot_col].shared4.degree_next ;
-  head [min_score] = next_col ;
-  if (next_col != COLAMD_EMPTY)
-  {
+    }
+    pivot_col = head [min_score] ;
+    COLAMD_ASSERT (pivot_col >= 0 && pivot_col <= n_col) ;
+    next_col = Col [pivot_col].shared4.degree_next ;
+    head [min_score] = next_col ;
+    if (next_col != COLAMD_EMPTY)
+    {
       Col [next_col].shared3.prev = COLAMD_EMPTY ;
-  }
+    }
 
-  COLAMD_ASSERT (COL_IS_ALIVE (pivot_col)) ;
-  COLAMD_DEBUG3 (("Pivot col: %d\n", pivot_col)) ;
+    COLAMD_ASSERT (COL_IS_ALIVE (pivot_col)) ;
+    COLAMD_DEBUG3 (("Pivot col: %d\n", pivot_col)) ;
 
-  /* remember score for defrag check */
-  pivot_col_score = Col [pivot_col].shared2.score ;
+    /* remember score for defrag check */
+    pivot_col_score = Col [pivot_col].shared2.score ;
 
-  /* the pivot column is the kth column in the pivot order */
-  Col [pivot_col].shared2.order = k ;
+    /* the pivot column is the kth column in the pivot order */
+    Col [pivot_col].shared2.order = k ;
 
-  /* increment order count by column thickness */
-  pivot_col_thickness = Col [pivot_col].shared1.thickness ;
-  k += pivot_col_thickness ;
-  COLAMD_ASSERT (pivot_col_thickness > 0) ;
+    /* increment order count by column thickness */
+    pivot_col_thickness = Col [pivot_col].shared1.thickness ;
+    k += pivot_col_thickness ;
+    COLAMD_ASSERT (pivot_col_thickness > 0) ;
 
-  /* === Garbage_collection, if necessary ============================= */
+    /* === Garbage_collection, if necessary ============================= */
 
-  needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ;
-  if (pfree + needed_memory >= Alen)
-  {
-      pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
+    needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ;
+    if (pfree + needed_memory >= Alen)
+    {
+      pfree = Eigen::internal::garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
       ngarbage++ ;
       /* after garbage collection we will have enough */
       COLAMD_ASSERT (pfree + needed_memory < Alen) ;
       /* garbage collection has wiped out the Row[].shared2.mark array */
-      tag_mark = clear_mark (n_row, Row) ;
+      tag_mark = Eigen::internal::clear_mark (n_row, Row) ;
 
-  }
+    }
 
-  /* === Compute pivot row pattern ==================================== */
+    /* === Compute pivot row pattern ==================================== */
 
-  /* get starting location for this new merged row */
-  pivot_row_start = pfree ;
+    /* get starting location for this new merged row */
+    pivot_row_start = pfree ;
 
-  /* initialize new row counts to zero */
-  pivot_row_degree = 0 ;
+    /* initialize new row counts to zero */
+    pivot_row_degree = 0 ;
 
-  /* tag pivot column as having been visited so it isn't included */
-  /* in merged pivot row */
-  Col [pivot_col].shared1.thickness = -pivot_col_thickness ;
+    /* tag pivot column as having been visited so it isn't included */
+    /* in merged pivot row */
+    Col [pivot_col].shared1.thickness = -pivot_col_thickness ;
 
-  /* pivot row is the union of all rows in the pivot column pattern */
-  cp = &A [Col [pivot_col].start] ;
-  cp_end = cp + Col [pivot_col].length ;
-  while (cp < cp_end)
-  {
+    /* pivot row is the union of all rows in the pivot column pattern */
+    cp = &A [Col [pivot_col].start] ;
+    cp_end = cp + Col [pivot_col].length ;
+    while (cp < cp_end)
+    {
       /* get a row */
       row = *cp++ ;
       COLAMD_DEBUG4 (("Pivot col pattern %d %d\n", ROW_IS_ALIVE (row), row)) ;
       /* skip if row is dead */
       if (ROW_IS_DEAD (row))
       {
-    continue ;
+	continue ;
       }
       rp = &A [Row [row].start] ;
       rp_end = rp + Row [row].length ;
       while (rp < rp_end)
       {
-    /* get a column */
-    col = *rp++ ;
-    /* add the column, if alive and untagged */
-    col_thickness = Col [col].shared1.thickness ;
-    if (col_thickness > 0 && COL_IS_ALIVE (col))
-    {
-        /* tag column in pivot row */
-        Col [col].shared1.thickness = -col_thickness ;
-        COLAMD_ASSERT (pfree < Alen) ;
-        /* place column in pivot row */
-        A [pfree++] = col ;
-        pivot_row_degree += col_thickness ;
-    }
+	/* get a column */
+	col = *rp++ ;
+	/* add the column, if alive and untagged */
+	col_thickness = Col [col].shared1.thickness ;
+	if (col_thickness > 0 && COL_IS_ALIVE (col))
+	{
+	  /* tag column in pivot row */
+	  Col [col].shared1.thickness = -col_thickness ;
+	  COLAMD_ASSERT (pfree < Alen) ;
+	  /* place column in pivot row */
+	  A [pfree++] = col ;
+	  pivot_row_degree += col_thickness ;
+	}
       }
-  }
+    }
 
-  /* clear tag on pivot column */
-  Col [pivot_col].shared1.thickness = pivot_col_thickness ;
-  max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ;
+    /* clear tag on pivot column */
+    Col [pivot_col].shared1.thickness = pivot_col_thickness ;
+    max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ;
 
 
-  /* === Kill all rows used to construct pivot row ==================== */
+    /* === Kill all rows used to construct pivot row ==================== */
 
-  /* also kill pivot row, temporarily */
-  cp = &A [Col [pivot_col].start] ;
-  cp_end = cp + Col [pivot_col].length ;
-  while (cp < cp_end)
-  {
+    /* also kill pivot row, temporarily */
+    cp = &A [Col [pivot_col].start] ;
+    cp_end = cp + Col [pivot_col].length ;
+    while (cp < cp_end)
+    {
       /* may be killing an already dead row */
       row = *cp++ ;
       COLAMD_DEBUG3 (("Kill row in pivot col: %d\n", row)) ;
       KILL_ROW (row) ;
-  }
+    }
 
-  /* === Select a row index to use as the new pivot row =============== */
+    /* === Select a row index to use as the new pivot row =============== */
 
-  pivot_row_length = pfree - pivot_row_start ;
-  if (pivot_row_length > 0)
-  {
+    pivot_row_length = pfree - pivot_row_start ;
+    if (pivot_row_length > 0)
+    {
       /* pick the "pivot" row arbitrarily (first row in col) */
       pivot_row = A [Col [pivot_col].start] ;
       COLAMD_DEBUG3 (("Pivotal row is %d\n", pivot_row)) ;
-  }
-  else
-  {
+    }
+    else
+    {
       /* there is no pivot row, since it is of zero length */
       pivot_row = COLAMD_EMPTY ;
       COLAMD_ASSERT (pivot_row_length == 0) ;
-  }
-  COLAMD_ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ;
+    }
+    COLAMD_ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ;
 
-  /* === Approximate degree computation =============================== */
+    /* === Approximate degree computation =============================== */
 
-  /* Here begins the computation of the approximate degree.  The column */
-  /* score is the sum of the pivot row "length", plus the size of the */
-  /* set differences of each row in the column minus the pattern of the */
-  /* pivot row itself.  The column ("thickness") itself is also */
-  /* excluded from the column score (we thus use an approximate */
-  /* external degree). */
+    /* Here begins the computation of the approximate degree.  The column */
+    /* score is the sum of the pivot row "length", plus the size of the */
+    /* set differences of each row in the column minus the pattern of the */
+    /* pivot row itself.  The column ("thickness") itself is also */
+    /* excluded from the column score (we thus use an approximate */
+    /* external degree). */
 
-  /* The time taken by the following code (compute set differences, and */
-  /* add them up) is proportional to the size of the data structure */
-  /* being scanned - that is, the sum of the sizes of each column in */
-  /* the pivot row.  Thus, the amortized time to compute a column score */
-  /* is proportional to the size of that column (where size, in this */
-  /* context, is the column "length", or the number of row indices */
-  /* in that column).  The number of row indices in a column is */
-  /* monotonically non-decreasing, from the length of the original */
-  /* column on input to colamd. */
+    /* The time taken by the following code (compute set differences, and */
+    /* add them up) is proportional to the size of the data structure */
+    /* being scanned - that is, the sum of the sizes of each column in */
+    /* the pivot row.  Thus, the amortized time to compute a column score */
+    /* is proportional to the size of that column (where size, in this */
+    /* context, is the column "length", or the number of row indices */
+    /* in that column).  The number of row indices in a column is */
+    /* monotonically non-decreasing, from the length of the original */
+    /* column on input to colamd. */
 
-  /* === Compute set differences ====================================== */
+    /* === Compute set differences ====================================== */
 
-  COLAMD_DEBUG3 (("** Computing set differences phase. **\n")) ;
+    COLAMD_DEBUG3 (("** Computing set differences phase. **\n")) ;
 
-  /* pivot row is currently dead - it will be revived later. */
+    /* pivot row is currently dead - it will be revived later. */
 
-  COLAMD_DEBUG3 (("Pivot row: ")) ;
-  /* for each column in pivot row */
-  rp = &A [pivot_row_start] ;
-  rp_end = rp + pivot_row_length ;
-  while (rp < rp_end)
-  {
+    COLAMD_DEBUG3 (("Pivot row: ")) ;
+    /* for each column in pivot row */
+    rp = &A [pivot_row_start] ;
+    rp_end = rp + pivot_row_length ;
+    while (rp < rp_end)
+    {
       col = *rp++ ;
       COLAMD_ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ;
       COLAMD_DEBUG3 (("Col: %d\n", col)) ;
@@ -1179,15 +1182,15 @@ static int find_ordering /* return the number of garbage collections */
       COLAMD_ASSERT (cur_score >= COLAMD_EMPTY) ;
       if (prev_col == COLAMD_EMPTY)
       {
-    head [cur_score] = next_col ;
+	head [cur_score] = next_col ;
       }
       else
       {
-    Col [prev_col].shared4.degree_next = next_col ;
+	Col [prev_col].shared4.degree_next = next_col ;
       }
       if (next_col != COLAMD_EMPTY)
       {
-    Col [next_col].shared3.prev = prev_col ;
+	Col [next_col].shared3.prev = prev_col ;
       }
 
       /* === Scan the column ========================================== */
@@ -1196,49 +1199,49 @@ static int find_ordering /* return the number of garbage collections */
       cp_end = cp + Col [col].length ;
       while (cp < cp_end)
       {
-    /* get a row */
-    row = *cp++ ;
-    row_mark = Row [row].shared2.mark ;
-    /* skip if dead */
-    if (ROW_IS_MARKED_DEAD (row_mark))
-    {
-        continue ;
-    }
-    COLAMD_ASSERT (row != pivot_row) ;
-    set_difference = row_mark - tag_mark ;
-    /* check if the row has been seen yet */
-    if (set_difference < 0)
-    {
-        COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ;
-        set_difference = Row [row].shared1.degree ;
-    }
-    /* subtract column thickness from this row's set difference */
-    set_difference -= col_thickness ;
-    COLAMD_ASSERT (set_difference >= 0) ;
-    /* absorb this row if the set difference becomes zero */
-    if (set_difference == 0)
-    {
-        COLAMD_DEBUG3 (("aggressive absorption. Row: %d\n", row)) ;
-        KILL_ROW (row) ;
-    }
-    else
-    {
-        /* save the new mark */
-        Row [row].shared2.mark = set_difference + tag_mark ;
-    }
+	/* get a row */
+	row = *cp++ ;
+	row_mark = Row [row].shared2.mark ;
+	/* skip if dead */
+	if (ROW_IS_MARKED_DEAD (row_mark))
+	{
+	  continue ;
+	}
+	COLAMD_ASSERT (row != pivot_row) ;
+	set_difference = row_mark - tag_mark ;
+	/* check if the row has been seen yet */
+	if (set_difference < 0)
+	{
+	  COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ;
+	  set_difference = Row [row].shared1.degree ;
+	}
+	/* subtract column thickness from this row's set difference */
+	set_difference -= col_thickness ;
+	COLAMD_ASSERT (set_difference >= 0) ;
+	/* absorb this row if the set difference becomes zero */
+	if (set_difference == 0)
+	{
+	  COLAMD_DEBUG3 (("aggressive absorption. Row: %d\n", row)) ;
+	  KILL_ROW (row) ;
+	}
+	else
+	{
+	  /* save the new mark */
+	  Row [row].shared2.mark = set_difference + tag_mark ;
+	}
       }
-  }
+    }
 
 
-  /* === Add up set differences for each column ======================= */
+    /* === Add up set differences for each column ======================= */
 
-  COLAMD_DEBUG3 (("** Adding set differences phase. **\n")) ;
+    COLAMD_DEBUG3 (("** Adding set differences phase. **\n")) ;
 
-  /* for each column in pivot row */
-  rp = &A [pivot_row_start] ;
-  rp_end = rp + pivot_row_length ;
-  while (rp < rp_end)
-  {
+    /* for each column in pivot row */
+    rp = &A [pivot_row_start] ;
+    rp_end = rp + pivot_row_length ;
+    while (rp < rp_end)
+    {
       /* get a column */
       col = *rp++ ;
       COLAMD_ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ;
@@ -1253,119 +1256,117 @@ static int find_ordering /* return the number of garbage collections */
 
       while (cp < cp_end)
       {
-    /* get a row */
-    row = *cp++ ;
-    COLAMD_ASSERT(row >= 0 && row < n_row) ;
-    row_mark = Row [row].shared2.mark ;
-    /* skip if dead */
-    if (ROW_IS_MARKED_DEAD (row_mark))
-    {
-        continue ;
-    }
-    COLAMD_ASSERT (row_mark > tag_mark) ;
-    /* compact the column */
-    *new_cp++ = row ;
-    /* compute hash function */
-    hash += row ;
-    /* add set difference */
-    cur_score += row_mark - tag_mark ;
-    /* integer overflow... */
-    cur_score = COLAMD_MIN (cur_score, n_col) ;
+	/* get a row */
+	row = *cp++ ;
+	COLAMD_ASSERT(row >= 0 && row < n_row) ;
+	row_mark = Row [row].shared2.mark ;
+	/* skip if dead */
+	if (ROW_IS_MARKED_DEAD (row_mark))
+	{
+	  continue ;
+	}
+	COLAMD_ASSERT (row_mark > tag_mark) ;
+	/* compact the column */
+	*new_cp++ = row ;
+	/* compute hash function */
+	hash += row ;
+	/* add set difference */
+	cur_score += row_mark - tag_mark ;
+	/* integer overflow... */
+	cur_score = COLAMD_MIN (cur_score, n_col) ;
       }
 
       /* recompute the column's length */
-      Col [col].length = (int) (new_cp - &A [Col [col].start]) ;
+      Col [col].length = (Index) (new_cp - &A [Col [col].start]) ;
 
       /* === Further mass elimination ================================= */
 
       if (Col [col].length == 0)
       {
-    COLAMD_DEBUG4 (("further mass elimination. Col: %d\n", col)) ;
-    /* nothing left but the pivot row in this column */
-    KILL_PRINCIPAL_COL (col) ;
-    pivot_row_degree -= Col [col].shared1.thickness ;
-    COLAMD_ASSERT (pivot_row_degree >= 0) ;
-    /* order it */
-    Col [col].shared2.order = k ;
-    /* increment order count by column thickness */
-    k += Col [col].shared1.thickness ;
+	COLAMD_DEBUG4 (("further mass elimination. Col: %d\n", col)) ;
+	/* nothing left but the pivot row in this column */
+	KILL_PRINCIPAL_COL (col) ;
+	pivot_row_degree -= Col [col].shared1.thickness ;
+	COLAMD_ASSERT (pivot_row_degree >= 0) ;
+	/* order it */
+	Col [col].shared2.order = k ;
+	/* increment order count by column thickness */
+	k += Col [col].shared1.thickness ;
       }
       else
       {
-    /* === Prepare for supercolumn detection ==================== */
-
-    COLAMD_DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ;
-
-    /* save score so far */
-    Col [col].shared2.score = cur_score ;
-
-    /* add column to hash table, for supercolumn detection */
-    hash %= n_col + 1 ;
-
-    COLAMD_DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ;
-    COLAMD_ASSERT (hash <= n_col) ;
-
-    head_column = head [hash] ;
-    if (head_column > COLAMD_EMPTY)
-    {
-        /* degree list "hash" is non-empty, use prev (shared3) of */
-        /* first column in degree list as head of hash bucket */
-        first_col = Col [head_column].shared3.headhash ;
-        Col [head_column].shared3.headhash = col ;
-    }
-    else
-    {
-        /* degree list "hash" is empty, use head as hash bucket */
-        first_col = - (head_column + 2) ;
-        head [hash] = - (col + 2) ;
-    }
-    Col [col].shared4.hash_next = first_col ;
-
-    /* save hash function in Col [col].shared3.hash */
-    Col [col].shared3.hash = (int) hash ;
-    COLAMD_ASSERT (COL_IS_ALIVE (col)) ;
+	/* === Prepare for supercolumn detection ==================== */
+
+	COLAMD_DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ;
+
+	/* save score so far */
+	Col [col].shared2.score = cur_score ;
+
+	/* add column to hash table, for supercolumn detection */
+	hash %= n_col + 1 ;
+
+	COLAMD_DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ;
+	COLAMD_ASSERT (hash <= n_col) ;
+
+	head_column = head [hash] ;
+	if (head_column > COLAMD_EMPTY)
+	{
+	  /* degree list "hash" is non-empty, use prev (shared3) of */
+	  /* first column in degree list as head of hash bucket */
+	  first_col = Col [head_column].shared3.headhash ;
+	  Col [head_column].shared3.headhash = col ;
+	}
+	else
+	{
+	  /* degree list "hash" is empty, use head as hash bucket */
+	  first_col = - (head_column + 2) ;
+	  head [hash] = - (col + 2) ;
+	}
+	Col [col].shared4.hash_next = first_col ;
+
+	/* save hash function in Col [col].shared3.hash */
+	Col [col].shared3.hash = (Index) hash ;
+	COLAMD_ASSERT (COL_IS_ALIVE (col)) ;
       }
-  }
-
-  /* The approximate external column degree is now computed.  */
+    }
 
-  /* === Supercolumn detection ======================================== */
+    /* The approximate external column degree is now computed.  */
 
-  COLAMD_DEBUG3 (("** Supercolumn detection phase. **\n")) ;
+    /* === Supercolumn detection ======================================== */
 
-  detect_super_cols (
+    COLAMD_DEBUG3 (("** Supercolumn detection phase. **\n")) ;
 
-    Col, A, head, pivot_row_start, pivot_row_length) ;
+    Eigen::internal::detect_super_cols (Col, A, head, pivot_row_start, pivot_row_length) ;
 
-  /* === Kill the pivotal column ====================================== */
+    /* === Kill the pivotal column ====================================== */
 
-  KILL_PRINCIPAL_COL (pivot_col) ;
+    KILL_PRINCIPAL_COL (pivot_col) ;
 
-  /* === Clear mark =================================================== */
+    /* === Clear mark =================================================== */
 
-  tag_mark += (max_deg + 1) ;
-  if (tag_mark >= max_mark)
-  {
+    tag_mark += (max_deg + 1) ;
+    if (tag_mark >= max_mark)
+    {
       COLAMD_DEBUG2 (("clearing tag_mark\n")) ;
-      tag_mark = clear_mark (n_row, Row) ;
-  }
+      tag_mark = Eigen::internal::clear_mark (n_row, Row) ;
+    }
 
-  /* === Finalize the new pivot row, and column scores ================ */
+    /* === Finalize the new pivot row, and column scores ================ */
 
-  COLAMD_DEBUG3 (("** Finalize scores phase. **\n")) ;
+    COLAMD_DEBUG3 (("** Finalize scores phase. **\n")) ;
 
-  /* for each column in pivot row */
-  rp = &A [pivot_row_start] ;
-  /* compact the pivot row */
-  new_rp = rp ;
-  rp_end = rp + pivot_row_length ;
-  while (rp < rp_end)
-  {
+    /* for each column in pivot row */
+    rp = &A [pivot_row_start] ;
+    /* compact the pivot row */
+    new_rp = rp ;
+    rp_end = rp + pivot_row_length ;
+    while (rp < rp_end)
+    {
       col = *rp++ ;
       /* skip dead columns */
       if (COL_IS_DEAD (col))
       {
-    continue ;
+	continue ;
       }
       *new_rp++ = col ;
       /* add new pivot row to column */
@@ -1403,32 +1404,32 @@ static int find_ordering /* return the number of garbage collections */
       Col [col].shared3.prev = COLAMD_EMPTY ;
       if (next_col != COLAMD_EMPTY)
       {
-    Col [next_col].shared3.prev = col ;
+	Col [next_col].shared3.prev = col ;
       }
       head [cur_score] = col ;
 
       /* see if this score is less than current min */
       min_score = COLAMD_MIN (min_score, cur_score) ;
 
-  }
+    }
 
-  /* === Resurrect the new pivot row ================================== */
+    /* === Resurrect the new pivot row ================================== */
 
-  if (pivot_row_degree > 0)
-  {
+    if (pivot_row_degree > 0)
+    {
       /* update pivot row length to reflect any cols that were killed */
       /* during super-col detection and mass elimination */
       Row [pivot_row].start  = pivot_row_start ;
-      Row [pivot_row].length = (int) (new_rp - &A[pivot_row_start]) ;
+      Row [pivot_row].length = (Index) (new_rp - &A[pivot_row_start]) ;
       Row [pivot_row].shared1.degree = pivot_row_degree ;
       Row [pivot_row].shared2.mark = 0 ;
       /* pivot row is no longer dead */
-  }
     }
+  }
 
-    /* === All principal columns have now been ordered ====================== */
+  /* === All principal columns have now been ordered ====================== */
 
-    return (ngarbage) ;
+  return (ngarbage) ;
 }
 
 
@@ -1437,47 +1438,47 @@ static int find_ordering /* return the number of garbage collections */
 /* ========================================================================== */
 
 /*
-    The find_ordering routine has ordered all of the principal columns (the
-    representatives of the supercolumns).  The non-principal columns have not
-    yet been ordered.  This routine orders those columns by walking up the
-    parent tree (a column is a child of the column which absorbed it).  The
-    final permutation vector is then placed in p [0 ... n_col-1], with p [0]
-    being the first column, and p [n_col-1] being the last.  It doesn't look
-    like it at first glance, but be assured that this routine takes time linear
-    in the number of columns.  Although not immediately obvious, the time
-    taken by this routine is O (n_col), that is, linear in the number of
-    columns.  Not user-callable.
+  The find_ordering routine has ordered all of the principal columns (the
+  representatives of the supercolumns).  The non-principal columns have not
+  yet been ordered.  This routine orders those columns by walking up the
+  parent tree (a column is a child of the column which absorbed it).  The
+  final permutation vector is then placed in p [0 ... n_col-1], with p [0]
+  being the first column, and p [n_col-1] being the last.  It doesn't look
+  like it at first glance, but be assured that this routine takes time linear
+  in the number of columns.  Although not immediately obvious, the time
+  taken by this routine is O (n_col), that is, linear in the number of
+  columns.  Not user-callable.
 */
-
+template <typename Index>
 static inline  void order_children
 (
-    /* === Parameters ======================================================= */
+  /* === Parameters ======================================================= */
 
-    int n_col,      /* number of columns of A */
-    colamd_col Col [],    /* of size n_col+1 */
-    int p []      /* p [0 ... n_col-1] is the column permutation*/
-)
+  Index n_col,      /* number of columns of A */
+  colamd_col<Index> Col [],    /* of size n_col+1 */
+  Index p []      /* p [0 ... n_col-1] is the column permutation*/
+  )
 {
-    /* === Local variables ================================================== */
+  /* === Local variables ================================================== */
 
-    int i ;     /* loop counter for all columns */
-    int c ;     /* column index */
-    int parent ;    /* index of column's parent */
-    int order ;     /* column's order */
+  Index i ;     /* loop counter for all columns */
+  Index c ;     /* column index */
+  Index parent ;    /* index of column's parent */
+  Index order ;     /* column's order */
 
-    /* === Order each non-principal column ================================== */
+  /* === Order each non-principal column ================================== */
 
-    for (i = 0 ; i < n_col ; i++)
-    {
-  /* find an un-ordered non-principal column */
-  COLAMD_ASSERT (COL_IS_DEAD (i)) ;
-  if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == COLAMD_EMPTY)
+  for (i = 0 ; i < n_col ; i++)
   {
+    /* find an un-ordered non-principal column */
+    COLAMD_ASSERT (COL_IS_DEAD (i)) ;
+    if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == COLAMD_EMPTY)
+    {
       parent = i ;
       /* once found, find its principal parent */
       do
       {
-    parent = Col [parent].shared1.parent ;
+	parent = Col [parent].shared1.parent ;
       } while (!COL_IS_DEAD_PRINCIPAL (parent)) ;
 
       /* now, order all un-ordered non-principal columns along path */
@@ -1488,32 +1489,32 @@ static inline  void order_children
 
       do
       {
-    COLAMD_ASSERT (Col [c].shared2.order == COLAMD_EMPTY) ;
+	COLAMD_ASSERT (Col [c].shared2.order == COLAMD_EMPTY) ;
 
-    /* order this column */
-    Col [c].shared2.order = order++ ;
-    /* collaps tree */
-    Col [c].shared1.parent = parent ;
+	/* order this column */
+	Col [c].shared2.order = order++ ;
+	/* collaps tree */
+	Col [c].shared1.parent = parent ;
 
-    /* get immediate parent of this column */
-    c = Col [c].shared1.parent ;
+	/* get immediate parent of this column */
+	c = Col [c].shared1.parent ;
 
-    /* continue until we hit an ordered column.  There are */
-    /* guarranteed not to be anymore unordered columns */
-    /* above an ordered column */
+	/* continue until we hit an ordered column.  There are */
+	/* guarranteed not to be anymore unordered columns */
+	/* above an ordered column */
       } while (Col [c].shared2.order == COLAMD_EMPTY) ;
 
       /* re-order the super_col parent to largest order for this group */
       Col [parent].shared2.order = order ;
-  }
     }
+  }
 
-    /* === Generate the permutation ========================================= */
+  /* === Generate the permutation ========================================= */
 
-    for (c = 0 ; c < n_col ; c++)
-    {
-  p [Col [c].shared2.order] = c ;
-    }
+  for (c = 0 ; c < n_col ; c++)
+  {
+    p [Col [c].shared2.order] = c ;
+  }
 }
 
 
@@ -1522,94 +1523,94 @@ static inline  void order_children
 /* ========================================================================== */
 
 /*
-    Detects supercolumns by finding matches between columns in the hash buckets.
-    Check amongst columns in the set A [row_start ... row_start + row_length-1].
-    The columns under consideration are currently *not* in the degree lists,
-    and have already been placed in the hash buckets.
+  Detects supercolumns by finding matches between columns in the hash buckets.
+  Check amongst columns in the set A [row_start ... row_start + row_length-1].
+  The columns under consideration are currently *not* in the degree lists,
+  and have already been placed in the hash buckets.
 
-    The hash bucket for columns whose hash function is equal to h is stored
-    as follows:
+  The hash bucket for columns whose hash function is equal to h is stored
+  as follows:
 
   if head [h] is >= 0, then head [h] contains a degree list, so:
 
-    head [h] is the first column in degree bucket h.
-    Col [head [h]].headhash gives the first column in hash bucket h.
+  head [h] is the first column in degree bucket h.
+  Col [head [h]].headhash gives the first column in hash bucket h.
 
   otherwise, the degree list is empty, and:
 
-    -(head [h] + 2) is the first column in hash bucket h.
+  -(head [h] + 2) is the first column in hash bucket h.
 
-    For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous
-    column" pointer.  Col [c].shared3.hash is used instead as the hash number
-    for that column.  The value of Col [c].shared4.hash_next is the next column
-    in the same hash bucket.
+  For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous
+  column" pointer.  Col [c].shared3.hash is used instead as the hash number
+  for that column.  The value of Col [c].shared4.hash_next is the next column
+  in the same hash bucket.
 
-    Assuming no, or "few" hash collisions, the time taken by this routine is
-    linear in the sum of the sizes (lengths) of each column whose score has
-    just been computed in the approximate degree computation.
-    Not user-callable.
+  Assuming no, or "few" hash collisions, the time taken by this routine is
+  linear in the sum of the sizes (lengths) of each column whose score has
+  just been computed in the approximate degree computation.
+  Not user-callable.
 */
-
+template <typename Index>
 static void detect_super_cols
 (
-    /* === Parameters ======================================================= */
-
-    colamd_col Col [],    /* of size n_col+1 */
-    int A [],     /* row indices of A */
-    int head [],    /* head of degree lists and hash buckets */
-    int row_start,    /* pointer to set of columns to check */
-    int row_length    /* number of columns to check */
+  /* === Parameters ======================================================= */
+  
+  colamd_col<Index> Col [],    /* of size n_col+1 */
+  Index A [],     /* row indices of A */
+  Index head [],    /* head of degree lists and hash buckets */
+  Index row_start,    /* pointer to set of columns to check */
+  Index row_length    /* number of columns to check */
 )
 {
-    /* === Local variables ================================================== */
-
-    int hash ;      /* hash value for a column */
-    int *rp ;     /* pointer to a row */
-    int c ;     /* a column index */
-    int super_c ;   /* column index of the column to absorb into */
-    int *cp1 ;      /* column pointer for column super_c */
-    int *cp2 ;      /* column pointer for column c */
-    int length ;    /* length of column super_c */
-    int prev_c ;    /* column preceding c in hash bucket */
-    int i ;     /* loop counter */
-    int *rp_end ;   /* pointer to the end of the row */
-    int col ;     /* a column index in the row to check */
-    int head_column ;   /* first column in hash bucket or degree list */
-    int first_col ;   /* first column in hash bucket */
-
-    /* === Consider each column in the row ================================== */
-
-    rp = &A [row_start] ;
-    rp_end = rp + row_length ;
-    while (rp < rp_end)
-    {
-  col = *rp++ ;
-  if (COL_IS_DEAD (col))
+  /* === Local variables ================================================== */
+
+  Index hash ;      /* hash value for a column */
+  Index *rp ;     /* pointer to a row */
+  Index c ;     /* a column index */
+  Index super_c ;   /* column index of the column to absorb into */
+  Index *cp1 ;      /* column pointer for column super_c */
+  Index *cp2 ;      /* column pointer for column c */
+  Index length ;    /* length of column super_c */
+  Index prev_c ;    /* column preceding c in hash bucket */
+  Index i ;     /* loop counter */
+  Index *rp_end ;   /* pointer to the end of the row */
+  Index col ;     /* a column index in the row to check */
+  Index head_column ;   /* first column in hash bucket or degree list */
+  Index first_col ;   /* first column in hash bucket */
+
+  /* === Consider each column in the row ================================== */
+
+  rp = &A [row_start] ;
+  rp_end = rp + row_length ;
+  while (rp < rp_end)
   {
+    col = *rp++ ;
+    if (COL_IS_DEAD (col))
+    {
       continue ;
-  }
+    }
 
-  /* get hash number for this column */
-  hash = Col [col].shared3.hash ;
-  COLAMD_ASSERT (hash <= n_col) ;
+    /* get hash number for this column */
+    hash = Col [col].shared3.hash ;
+    COLAMD_ASSERT (hash <= n_col) ;
 
-  /* === Get the first column in this hash bucket ===================== */
+    /* === Get the first column in this hash bucket ===================== */
 
-  head_column = head [hash] ;
-  if (head_column > COLAMD_EMPTY)
-  {
+    head_column = head [hash] ;
+    if (head_column > COLAMD_EMPTY)
+    {
       first_col = Col [head_column].shared3.headhash ;
-  }
-  else
-  {
+    }
+    else
+    {
       first_col = - (head_column + 2) ;
-  }
+    }
 
-  /* === Consider each column in the hash bucket ====================== */
+    /* === Consider each column in the hash bucket ====================== */
 
-  for (super_c = first_col ; super_c != COLAMD_EMPTY ;
-      super_c = Col [super_c].shared4.hash_next)
-  {
+    for (super_c = first_col ; super_c != COLAMD_EMPTY ;
+	 super_c = Col [super_c].shared4.hash_next)
+    {
       COLAMD_ASSERT (COL_IS_ALIVE (super_c)) ;
       COLAMD_ASSERT (Col [super_c].shared3.hash == hash) ;
       length = Col [super_c].length ;
@@ -1620,71 +1621,71 @@ static void detect_super_cols
       /* === Compare super_c with all columns after it ================ */
 
       for (c = Col [super_c].shared4.hash_next ;
-     c != COLAMD_EMPTY ; c = Col [c].shared4.hash_next)
+	   c != COLAMD_EMPTY ; c = Col [c].shared4.hash_next)
       {
-    COLAMD_ASSERT (c != super_c) ;
-    COLAMD_ASSERT (COL_IS_ALIVE (c)) ;
-    COLAMD_ASSERT (Col [c].shared3.hash == hash) ;
-
-    /* not identical if lengths or scores are different */
-    if (Col [c].length != length ||
-        Col [c].shared2.score != Col [super_c].shared2.score)
-    {
-        prev_c = c ;
-        continue ;
+	COLAMD_ASSERT (c != super_c) ;
+	COLAMD_ASSERT (COL_IS_ALIVE (c)) ;
+	COLAMD_ASSERT (Col [c].shared3.hash == hash) ;
+
+	/* not identical if lengths or scores are different */
+	if (Col [c].length != length ||
+	    Col [c].shared2.score != Col [super_c].shared2.score)
+	{
+	  prev_c = c ;
+	  continue ;
+	}
+
+	/* compare the two columns */
+	cp1 = &A [Col [super_c].start] ;
+	cp2 = &A [Col [c].start] ;
+
+	for (i = 0 ; i < length ; i++)
+	{
+	  /* the columns are "clean" (no dead rows) */
+	  COLAMD_ASSERT (ROW_IS_ALIVE (*cp1))  ;
+	  COLAMD_ASSERT (ROW_IS_ALIVE (*cp2))  ;
+	  /* row indices will same order for both supercols, */
+	  /* no gather scatter nessasary */
+	  if (*cp1++ != *cp2++)
+	  {
+	    break ;
+	  }
+	}
+
+	/* the two columns are different if the for-loop "broke" */
+	if (i != length)
+	{
+	  prev_c = c ;
+	  continue ;
+	}
+
+	/* === Got it!  two columns are identical =================== */
+
+	COLAMD_ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ;
+
+	Col [super_c].shared1.thickness += Col [c].shared1.thickness ;
+	Col [c].shared1.parent = super_c ;
+	KILL_NON_PRINCIPAL_COL (c) ;
+	/* order c later, in order_children() */
+	Col [c].shared2.order = COLAMD_EMPTY ;
+	/* remove c from hash bucket */
+	Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ;
+      }
     }
 
-    /* compare the two columns */
-    cp1 = &A [Col [super_c].start] ;
-    cp2 = &A [Col [c].start] ;
+    /* === Empty this hash bucket ======================================= */
 
-    for (i = 0 ; i < length ; i++)
-    {
-        /* the columns are "clean" (no dead rows) */
-        COLAMD_ASSERT (ROW_IS_ALIVE (*cp1))  ;
-        COLAMD_ASSERT (ROW_IS_ALIVE (*cp2))  ;
-        /* row indices will same order for both supercols, */
-        /* no gather scatter nessasary */
-        if (*cp1++ != *cp2++)
-        {
-      break ;
-        }
-    }
-
-    /* the two columns are different if the for-loop "broke" */
-    if (i != length)
+    if (head_column > COLAMD_EMPTY)
     {
-        prev_c = c ;
-        continue ;
-    }
-
-    /* === Got it!  two columns are identical =================== */
-
-    COLAMD_ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ;
-
-    Col [super_c].shared1.thickness += Col [c].shared1.thickness ;
-    Col [c].shared1.parent = super_c ;
-    KILL_NON_PRINCIPAL_COL (c) ;
-    /* order c later, in order_children() */
-    Col [c].shared2.order = COLAMD_EMPTY ;
-    /* remove c from hash bucket */
-    Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ;
-      }
-  }
-
-  /* === Empty this hash bucket ======================================= */
-
-  if (head_column > COLAMD_EMPTY)
-  {
       /* corresponding degree list "hash" is not empty */
       Col [head_column].shared3.headhash = COLAMD_EMPTY ;
-  }
-  else
-  {
+    }
+    else
+    {
       /* corresponding degree list "hash" is empty */
       head [hash] = COLAMD_EMPTY ;
-  }
     }
+  }
 }
 
 
@@ -1693,93 +1694,93 @@ static void detect_super_cols
 /* ========================================================================== */
 
 /*
-    Defragments and compacts columns and rows in the workspace A.  Used when
-    all avaliable memory has been used while performing row merging.  Returns
-    the index of the first free position in A, after garbage collection.  The
-    time taken by this routine is linear is the size of the array A, which is
-    itself linear in the number of nonzeros in the input matrix.
-    Not user-callable.
+  Defragments and compacts columns and rows in the workspace A.  Used when
+  all avaliable memory has been used while performing row merging.  Returns
+  the index of the first free position in A, after garbage collection.  The
+  time taken by this routine is linear is the size of the array A, which is
+  itself linear in the number of nonzeros in the input matrix.
+  Not user-callable.
 */
-
-static int garbage_collection  /* returns the new value of pfree */
-(
+template <typename Index>
+static Index garbage_collection  /* returns the new value of pfree */
+  (
     /* === Parameters ======================================================= */
-
-    int n_row,      /* number of rows */
-    int n_col,      /* number of columns */
-    Colamd_Row Row [],    /* row info */
-    colamd_col Col [],    /* column info */
-    int A [],     /* A [0 ... Alen-1] holds the matrix */
-    int *pfree      /* &A [0] ... pfree is in use */
-)
+    
+    Index n_row,      /* number of rows */
+    Index n_col,      /* number of columns */
+    Colamd_Row<Index> Row [],    /* row info */
+    colamd_col<Index> Col [],    /* column info */
+    Index A [],     /* A [0 ... Alen-1] holds the matrix */
+    Index *pfree      /* &A [0] ... pfree is in use */
+    )
 {
-    /* === Local variables ================================================== */
+  /* === Local variables ================================================== */
 
-    int *psrc ;     /* source pointer */
-    int *pdest ;    /* destination pointer */
-    int j ;     /* counter */
-    int r ;     /* a row index */
-    int c ;     /* a column index */
-    int length ;    /* length of a row or column */
+  Index *psrc ;     /* source pointer */
+  Index *pdest ;    /* destination pointer */
+  Index j ;     /* counter */
+  Index r ;     /* a row index */
+  Index c ;     /* a column index */
+  Index length ;    /* length of a row or column */
 
-    /* === Defragment the columns =========================================== */
+  /* === Defragment the columns =========================================== */
 
-    pdest = &A[0] ;
-    for (c = 0 ; c < n_col ; c++)
-    {
-  if (COL_IS_ALIVE (c))
+  pdest = &A[0] ;
+  for (c = 0 ; c < n_col ; c++)
   {
+    if (COL_IS_ALIVE (c))
+    {
       psrc = &A [Col [c].start] ;
 
       /* move and compact the column */
       COLAMD_ASSERT (pdest <= psrc) ;
-      Col [c].start = (int) (pdest - &A [0]) ;
+      Col [c].start = (Index) (pdest - &A [0]) ;
       length = Col [c].length ;
       for (j = 0 ; j < length ; j++)
       {
-    r = *psrc++ ;
-    if (ROW_IS_ALIVE (r))
-    {
-        *pdest++ = r ;
-    }
+	r = *psrc++ ;
+	if (ROW_IS_ALIVE (r))
+	{
+	  *pdest++ = r ;
+	}
       }
-      Col [c].length = (int) (pdest - &A [Col [c].start]) ;
-  }
+      Col [c].length = (Index) (pdest - &A [Col [c].start]) ;
     }
+  }
 
-    /* === Prepare to defragment the rows =================================== */
+  /* === Prepare to defragment the rows =================================== */
 
-    for (r = 0 ; r < n_row ; r++)
-    {
-  if (ROW_IS_ALIVE (r))
+  for (r = 0 ; r < n_row ; r++)
   {
+    if (ROW_IS_ALIVE (r))
+    {
       if (Row [r].length == 0)
       {
-    /* this row is of zero length.  cannot compact it, so kill it */
-    COLAMD_DEBUG3 (("Defrag row kill\n")) ;
-    KILL_ROW (r) ;
+	/* this row is of zero length.  cannot compact it, so kill it */
+	COLAMD_DEBUG3 (("Defrag row kill\n")) ;
+	KILL_ROW (r) ;
       }
       else
       {
-    /* save first column index in Row [r].shared2.first_column */
-    psrc = &A [Row [r].start] ;
-    Row [r].shared2.first_column = *psrc ;
-    COLAMD_ASSERT (ROW_IS_ALIVE (r)) ;
-    /* flag the start of the row with the one's complement of row */
-    *psrc = ONES_COMPLEMENT (r) ;
+	/* save first column index in Row [r].shared2.first_column */
+	psrc = &A [Row [r].start] ;
+	Row [r].shared2.first_column = *psrc ;
+	COLAMD_ASSERT (ROW_IS_ALIVE (r)) ;
+	/* flag the start of the row with the one's complement of row */
+	*psrc = ONES_COMPLEMENT (r) ;
 
       }
-  }
     }
+  }
 
-    /* === Defragment the rows ============================================== */
+  /* === Defragment the rows ============================================== */
 
-    psrc = pdest ;
-    while (psrc < pfree)
-    {
-  /* find a negative number ... the start of a row */
-  if (*psrc++ < 0)
+  psrc = pdest ;
+  while (psrc < pfree)
   {
+    /* find a negative number ... the start of a row */
+    if (*psrc++ < 0)
+    {
       psrc-- ;
       /* get the row index */
       r = ONES_COMPLEMENT (*psrc) ;
@@ -1790,26 +1791,26 @@ static int garbage_collection  /* returns the new value of pfree */
 
       /* move and compact the row */
       COLAMD_ASSERT (pdest <= psrc) ;
-      Row [r].start = (int) (pdest - &A [0]) ;
+      Row [r].start = (Index) (pdest - &A [0]) ;
       length = Row [r].length ;
       for (j = 0 ; j < length ; j++)
       {
-    c = *psrc++ ;
-    if (COL_IS_ALIVE (c))
-    {
-        *pdest++ = c ;
-    }
+	c = *psrc++ ;
+	if (COL_IS_ALIVE (c))
+	{
+	  *pdest++ = c ;
+	}
       }
-      Row [r].length = (int) (pdest - &A [Row [r].start]) ;
+      Row [r].length = (Index) (pdest - &A [Row [r].start]) ;
 
-  }
     }
-    /* ensure we found all the rows */
-    COLAMD_ASSERT (debug_rows == 0) ;
+  }
+  /* ensure we found all the rows */
+  COLAMD_ASSERT (debug_rows == 0) ;
 
-    /* === Return the new value of pfree ==================================== */
+  /* === Return the new value of pfree ==================================== */
 
-    return ((int) (pdest - &A [0])) ;
+  return ((Index) (pdest - &A [0])) ;
 }
 
 
@@ -1818,30 +1819,30 @@ static int garbage_collection  /* returns the new value of pfree */
 /* ========================================================================== */
 
 /*
-    Clears the Row [].shared2.mark array, and returns the new tag_mark.
-    Return value is the new tag_mark.  Not user-callable.
+  Clears the Row [].shared2.mark array, and returns the new tag_mark.
+  Return value is the new tag_mark.  Not user-callable.
 */
-
-static inline  int clear_mark  /* return the new value for tag_mark */
-(
-    /* === Parameters ======================================================= */
-
-    int n_row,    /* number of rows in A */
-    Colamd_Row Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */
-)
+template <typename Index>
+static inline  Index clear_mark  /* return the new value for tag_mark */
+  (
+      /* === Parameters ======================================================= */
+
+    Index n_row,    /* number of rows in A */
+    Colamd_Row<Index> Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */
+    )
 {
-    /* === Local variables ================================================== */
+  /* === Local variables ================================================== */
 
-    int r ;
+  Index r ;
 
-    for (r = 0 ; r < n_row ; r++)
-    {
-  if (ROW_IS_ALIVE (r))
+  for (r = 0 ; r < n_row ; r++)
   {
+    if (ROW_IS_ALIVE (r))
+    {
       Row [r].shared2.mark = 0 ;
-  }
     }
-    return (1) ;
+  }
+  return (1) ;
 }
 
 
diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h
index 2471316b9..b4da6531a 100644
--- a/Eigen/src/OrderingMethods/Ordering.h
+++ b/Eigen/src/OrderingMethods/Ordering.h
@@ -4,29 +4,13 @@
 //
 // Copyright (C) 2012  Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_ORDERING_H
 #define EIGEN_ORDERING_H
 
-#include "Amd.h"
 namespace Eigen {
   
 #include "Eigen_Colamd.h"
@@ -53,6 +37,8 @@ void ordering_helper_at_plus_a(const MatrixType& mat, MatrixType& symmat)
     
 }
 
+#ifndef EIGEN_MPL2_ONLY
+
 /** \ingroup OrderingMethods_Module
   * \class AMDOrdering
   *
@@ -94,6 +80,8 @@ class AMDOrdering
     }
 };
 
+#endif // EIGEN_MPL2_ONLY
+
 /** \ingroup OrderingMethods_Module
   * \class NaturalOrdering
   *
@@ -134,26 +122,26 @@ class COLAMDOrdering
     template <typename MatrixType>
     void operator() (const MatrixType& mat, PermutationType& perm)
     {
-      int m = mat.rows();
-      int n = mat.cols();
-      int nnz = mat.nonZeros();
+      Index m = mat.rows();
+      Index n = mat.cols();
+      Index nnz = mat.nonZeros();
       // Get the recommended value of Alen to be used by colamd
-      int Alen = internal::colamd_recommended(nnz, m, n); 
+      Index Alen = internal::colamd_recommended(nnz, m, n); 
       // Set the default parameters
       double knobs [COLAMD_KNOBS]; 
-      int stats [COLAMD_STATS];
+      Index stats [COLAMD_STATS];
       internal::colamd_set_defaults(knobs);
       
-      int info;
+      Index info;
       IndexVector p(n+1), A(Alen); 
-      for(int i=0; i <= n; i++)   p(i) = mat.outerIndexPtr()[i];
-      for(int i=0; i < nnz; i++)  A(i) = mat.innerIndexPtr()[i];
+      for(Index i=0; i <= n; i++)   p(i) = mat.outerIndexPtr()[i];
+      for(Index i=0; i < nnz; i++)  A(i) = mat.innerIndexPtr()[i];
       // Call Colamd routine to compute the ordering 
       info = internal::colamd(m, n, Alen, A.data(), p.data(), knobs, stats); 
       eigen_assert( info && "COLAMD failed " );
       
       perm.resize(n);
-      for (int i = 0; i < n; i++) perm.indices()(p(i)) = i;
+      for (Index i = 0; i < n; i++) perm.indices()(p(i)) = i;
     }
 };
 
diff --git a/Eigen/src/PaStiXSupport/PaStiXSupport.h b/Eigen/src/PaStiXSupport/PaStiXSupport.h
index 82e137c64..a955287d1 100644
--- a/Eigen/src/PaStiXSupport/PaStiXSupport.h
+++ b/Eigen/src/PaStiXSupport/PaStiXSupport.h
@@ -157,27 +157,6 @@ class PastixBase : internal::noncopyable
     template<typename Rhs,typename Dest>
     bool _solve (const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const;
     
-    /** \internal */
-    template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex>
-    void _solve_sparse(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
-    {
-      eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
-      eigen_assert(rows()==b.rows());
-      
-      // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
-      static const int NbColsAtOnce = 1;
-      int rhsCols = b.cols();
-      int size = b.rows();
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,rhsCols);
-      for(int k=0; k<rhsCols; k+=NbColsAtOnce)
-      {
-        int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
-        tmp.leftCols(actualCols) = b.middleCols(k,actualCols);
-        tmp.leftCols(actualCols) = derived().solve(tmp.leftCols(actualCols));
-        dest.middleCols(k,actualCols) = tmp.leftCols(actualCols).sparseView();
-      }
-    }
-    
     Derived& derived()
     {
       return *static_cast<Derived*>(this);
@@ -731,7 +710,7 @@ struct sparse_solve_retval<PastixBase<_MatrixType>, Rhs>
 
   template<typename Dest> void evalTo(Dest& dst) const
   {
-    dec()._solve_sparse(rhs(),dst);
+    this->defaultEvalTo(dst);
   }
 };
 
diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h
index d623bf518..1c48f0df7 100644
--- a/Eigen/src/PardisoSupport/PardisoSupport.h
+++ b/Eigen/src/PardisoSupport/PardisoSupport.h
@@ -206,29 +206,6 @@ class PardisoImpl
     template<typename BDerived, typename XDerived>
     bool _solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const;
 
-    /** \internal */
-    template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex>
-    void _solve_sparse(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
-    {
-      eigen_assert(m_size==b.rows());
-
-      // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
-      static const int NbColsAtOnce = 4;
-      int rhsCols = b.cols();
-      int size = b.rows();
-      // Pardiso cannot solve in-place,
-      // so we need two temporaries
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic,ColMajor> tmp_rhs(size,rhsCols);
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic,ColMajor> tmp_res(size,rhsCols);
-      for(int k=0; k<rhsCols; k+=NbColsAtOnce)
-      {
-        int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
-        tmp_rhs.leftCols(actualCols) = b.middleCols(k,actualCols);
-        tmp_res.leftCols(actualCols) = derived().solve(tmp_rhs.leftCols(actualCols));
-        dest.middleCols(k,actualCols) = tmp_res.leftCols(actualCols).sparseView();
-      }
-    }
-
   protected:
     void pardisoRelease()
     {
@@ -604,7 +581,7 @@ struct sparse_solve_retval<PardisoImpl<Derived>, Rhs>
 
   template<typename Dest> void evalTo(Dest& dst) const
   {
-    dec().derived()._solve_sparse(rhs(),dst);
+    this->defaultEvalTo(dst);
   }
 };
 
diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h
index 47f67e6cd..9ec8a65e4 100644
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -56,6 +56,12 @@ template<typename _MatrixType> class ColPivHouseholderQR
     typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
     typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType;
     typedef typename HouseholderSequence<MatrixType,HCoeffsType>::ConjugateReturnType HouseholderSequenceType;
+    
+  private:
+    
+    typedef typename PermutationType::Index PermIndexType;
+    
+  public:
 
     /**
     * \brief Default Constructor.
@@ -81,7 +87,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
     ColPivHouseholderQR(Index rows, Index cols)
       : m_qr(rows, cols),
         m_hCoeffs((std::min)(rows,cols)),
-        m_colsPermutation(cols),
+        m_colsPermutation(PermIndexType(cols)),
         m_colsTranspositions(cols),
         m_temp(cols),
         m_colSqNorms(cols),
@@ -91,7 +97,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
     ColPivHouseholderQR(const MatrixType& matrix)
       : m_qr(matrix.rows(), matrix.cols()),
         m_hCoeffs((std::min)(matrix.rows(),matrix.cols())),
-        m_colsPermutation(matrix.cols()),
+        m_colsPermutation(PermIndexType(matrix.cols())),
         m_colsTranspositions(matrix.cols()),
         m_temp(matrix.cols()),
         m_colSqNorms(matrix.cols()),
@@ -139,7 +145,22 @@ template<typename _MatrixType> class ColPivHouseholderQR
       eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
       return m_qr;
     }
-
+    
+    /** \returns a reference to the matrix where the result Householder QR is stored 
+     * \warning The strict lower part of this matrix contains internal values. 
+     * Only the upper triangular part should be referenced. To get it, use
+     * \code matrixR().template triangularView<Upper>() \endcode
+     * For rank-deficient matrices, use 
+     * \code 
+     * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>() 
+     * \endcode
+     */
+    const MatrixType& matrixR() const
+    {
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      return m_qr;
+    }
+    
     ColPivHouseholderQR& compute(const MatrixType& matrix);
 
     const PermutationType& colsPermutation() const
@@ -330,6 +351,18 @@ template<typename _MatrixType> class ColPivHouseholderQR
       *          diagonal coefficient of R.
       */
     RealScalar maxPivot() const { return m_maxpivot; }
+    
+    /** \brief Reports whether the QR factorization was succesful.
+      *
+      * \note This function always returns \c Success. It is provided for compatibility 
+      * with other factorization routines.
+      * \returns \c Success 
+      */
+    ComputationInfo info() const
+    {
+      eigen_assert(m_isInitialized && "Decomposition is not initialized.");
+      return Success;
+    }
 
   protected:
     MatrixType m_qr;
@@ -368,6 +401,9 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
   Index rows = matrix.rows();
   Index cols = matrix.cols();
   Index size = matrix.diagonalSize();
+  
+  // the column permutation is stored as int indices, so just to be sure:
+  eigen_assert(cols<=NumTraits<int>::highest());
 
   m_qr = matrix;
   m_hCoeffs.resize(size);
@@ -443,9 +479,9 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
     m_colSqNorms.tail(cols-k-1) -= m_qr.row(k).tail(cols-k-1).cwiseAbs2();
   }
 
-  m_colsPermutation.setIdentity(cols);
-  for(Index k = 0; k < m_nonzero_pivots; ++k)
-    m_colsPermutation.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k));
+  m_colsPermutation.setIdentity(PermIndexType(cols));
+  for(PermIndexType k = 0; k < m_nonzero_pivots; ++k)
+    m_colsPermutation.applyTranspositionOnTheRight(k, PermIndexType(m_colsTranspositions.coeff(k)));
 
   m_det_pq = (number_of_transpositions%2) ? -1 : 1;
   m_isInitialized = true;
@@ -482,7 +518,7 @@ struct solve_retval<ColPivHouseholderQR<_MatrixType>, Rhs>
 		     .transpose()
       );
 
-    dec().matrixQR()
+    dec().matrixR()
        .topLeftCorner(nonzero_pivots, nonzero_pivots)
        .template triangularView<Upper>()
        .solveInPlace(c.topRows(nonzero_pivots));
diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index 9db64e219..0314d5259 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -241,7 +241,6 @@ void householder_qr_inplace_blocked(MatrixQR& mat, HCoeffs& hCoeffs,
 {
   typedef typename MatrixQR::Index Index;
   typedef typename MatrixQR::Scalar Scalar;
-  typedef typename MatrixQR::RealScalar RealScalar;
   typedef Block<MatrixQR,Dynamic,Dynamic> BlockType;
 
   Index rows = mat.rows();
diff --git a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
index 17b764a37..0ffb894f6 100644
--- a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
+++ b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
@@ -60,7 +60,7 @@ class SPQR
     typedef typename _MatrixType::Scalar Scalar;
     typedef typename _MatrixType::RealScalar RealScalar;
     typedef UF_long Index ; 
-    typedef SparseMatrix<Scalar, _MatrixType::Flags, Index> MatrixType;
+    typedef SparseMatrix<Scalar, ColMajor, Index> MatrixType;
     typedef PermutationMatrix<Dynamic, Dynamic> PermutationType;
   public:
     SPQR() 
@@ -88,7 +88,7 @@ class SPQR
       delete[] m_E;
       delete[] m_HPinv; 
     }
-    void compute(const MatrixType& matrix)
+    void compute(const _MatrixType& matrix)
     {
       MatrixType mat(matrix);
       cholmod_sparse A; 
@@ -105,20 +105,18 @@ class SPQR
       }
       m_info = Success;
       m_isInitialized = true;
+      m_isRUpToDate = false;
     }
     /** 
-     * Get the number of rows of the triangular matrix. 
+     * Get the number of rows of the input matrix and the Q matrix
      */
-    inline Index rows() const { return m_cR->nrow; }
+    inline Index rows() const {return m_H->nrow; }
     
     /** 
-     * Get the number of columns of the triangular matrix. 
+     * Get the number of columns of the input matrix. 
      */
     inline Index cols() const { return m_cR->ncol; }
-    /**
-     * This is the number of rows in the input matrix and the Q matrix
-     */
-    inline Index rowsQ() const {return m_HTau->nrow; }
+   
       /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
       *
       * \sa compute()
@@ -126,8 +124,8 @@ class SPQR
     template<typename Rhs>
     inline const internal::solve_retval<SPQR, Rhs> solve(const MatrixBase<Rhs>& B) const 
     {
-      eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()"); 
-      eigen_assert(rows()==B.rows()
+      eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()");
+      eigen_assert(this->rows()==B.rows()
                     && "SPQR::solve(): invalid number of rows of the right hand side matrix B");
           return internal::solve_retval<SPQR, Rhs>(*this, B.derived());
     }
@@ -139,22 +137,28 @@ class SPQR
       eigen_assert(b.cols()==1 && "This method is for vectors only");
       
       //Compute Q^T * b
-      dest = matrixQ().transpose() * b;
-      
-        // Solves with the triangular matrix R
       Dest y; 
-      y = this->matrixQR().template triangularView<Upper>().solve(dest.derived());
+      y = matrixQ().transpose() * b;
+        // Solves with the triangular matrix R
+      Index rk = this->rank();
+      y.topRows(rk) = this->matrixR().topLeftCorner(rk, rk).template triangularView<Upper>().solve(y.topRows(rk));
+      y.bottomRows(cols()-rk).setZero();
       // Apply the column permutation 
-      dest = colsPermutation() * y;
+      dest.topRows(cols()) = colsPermutation() * y.topRows(cols());
       
       m_info = Success;
     }
-    /// Get the sparse triangular matrix R. It is a sparse matrix
-    MatrixType matrixQR() const
+    
+    /** \returns the sparse triangular factor R. It is a sparse matrix
+     */
+    const MatrixType matrixR() const
     {
-      MatrixType R; 
-      R = viewAsEigen<Scalar, MatrixType::Flags, typename MatrixType::Index>(*m_cR);
-      return R; 
+      eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()");
+      if(!m_isRUpToDate) {
+        m_R = viewAsEigen<Scalar,ColMajor, typename MatrixType::Index>(*m_cR);
+        m_isRUpToDate = true;
+      }
+      return m_R;
     }
     /// Get an expression of the matrix Q
     SPQRMatrixQReturnType<SPQR> matrixQ() const
@@ -181,15 +185,12 @@ class SPQR
       return m_cc.SPQR_istat[4];
     }
     /// Set the fill-reducing ordering method to be used
-    void setOrdering(int ord) { m_ordering = ord;}
+    void setSPQROrdering(int ord) { m_ordering = ord;}
     /// Set the tolerance tol to treat columns with 2-norm < =tol as zero
-    void setThreshold(RealScalar tol) { m_tolerance = tol; }
+    void setPivotThreshold(const RealScalar& tol) { m_tolerance = tol; }
     
-    /// Return a pointer to SPQR workspace 
-    cholmod_common *cc() const { return &m_cc; }
-    cholmod_sparse * H() const { return m_H; }
-    Index  *HPinv() const { return m_HPinv; }
-    cholmod_dense* HTau() const { return m_HTau; }
+    /** \returns a pointer to the SPQR workspace */
+    cholmod_common *cholmodCommon() const { return &m_cc; }
     
     
     /** \brief Reports whether previous computation was successful.
@@ -206,17 +207,20 @@ class SPQR
     bool m_isInitialized;
     bool m_analysisIsOk;
     bool m_factorizationIsOk;
+    mutable bool m_isRUpToDate;
     mutable ComputationInfo m_info;
     int m_ordering; // Ordering method to use, see SPQR's manual
     int m_allow_tol; // Allow to use some tolerance during numerical factorization.
     RealScalar m_tolerance; // treat columns with 2-norm below this tolerance as zero
     mutable cholmod_sparse *m_cR; // The sparse R factor in cholmod format
+    mutable MatrixType m_R; // The sparse matrix R in Eigen format
     mutable Index *m_E; // The permutation applied to columns
     mutable cholmod_sparse *m_H;  //The householder vectors
     mutable Index *m_HPinv; // The row permutation of H
     mutable cholmod_dense *m_HTau; // The Householder coefficients
     mutable Index m_rank; // The rank of the matrix
     mutable cholmod_common m_cc; // Workspace and parameters
+    template<typename ,typename > friend struct SPQR_QProduct;
 };
 
 template <typename SPQRType, typename Derived>
@@ -227,7 +231,7 @@ struct SPQR_QProduct : ReturnByValue<SPQR_QProduct<SPQRType,Derived> >
   //Define the constructor to get reference to argument types
   SPQR_QProduct(const SPQRType& spqr, const Derived& other, bool transpose) : m_spqr(spqr),m_other(other),m_transpose(transpose) {}
   
-  inline Index rows() const { return m_transpose ? m_spqr.rowsQ() : m_spqr.cols(); }
+  inline Index rows() const { return m_transpose ? m_spqr.rows() : m_spqr.cols(); }
   inline Index cols() const { return m_other.cols(); }
   // Assign to a vector
   template<typename ResType>
@@ -236,9 +240,9 @@ struct SPQR_QProduct : ReturnByValue<SPQR_QProduct<SPQRType,Derived> >
     cholmod_dense y_cd;
     cholmod_dense *x_cd; 
     int method = m_transpose ? SPQR_QTX : SPQR_QX; 
-    cholmod_common *cc = m_spqr.cc();
+    cholmod_common *cc = m_spqr.cholmodCommon();
     y_cd = viewAsCholmod(m_other.const_cast_derived());
-    x_cd = SuiteSparseQR_qmult<Scalar>(method, m_spqr.H(), m_spqr.HTau(), m_spqr.HPinv(), &y_cd, cc);
+    x_cd = SuiteSparseQR_qmult<Scalar>(method, m_spqr.m_H, m_spqr.m_HTau, m_spqr.m_HPinv, &y_cd, cc);
     res = Matrix<Scalar,ResType::RowsAtCompileTime,ResType::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x), x_cd->nrow, x_cd->ncol);
     cholmod_free_dense(&x_cd, cc); 
   }
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index 1616fe560..2ab4fc05e 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -78,7 +78,8 @@ public:
   {
     if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
     {
-      m_qr = FullPivHouseholderQR<MatrixType>(svd.rows(), svd.cols());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.rows(), svd.cols());
     }
     if (svd.m_computeFullU) m_workspace.resize(svd.rows());
   }
@@ -96,7 +97,8 @@ public:
     return false;
   }
 private:
-  FullPivHouseholderQR<MatrixType> m_qr;
+  typedef FullPivHouseholderQR<MatrixType> QRType;
+  QRType m_qr;
   WorkspaceType m_workspace;
 };
 
@@ -121,7 +123,8 @@ public:
   {
     if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
     {
-      m_qr = FullPivHouseholderQR<TransposeTypeWithSameStorageOrder>(svd.cols(), svd.rows());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.cols(), svd.rows());
     }
     m_adjoint.resize(svd.cols(), svd.rows());
     if (svd.m_computeFullV) m_workspace.resize(svd.cols());
@@ -141,7 +144,8 @@ public:
     else return false;
   }
 private:
-  FullPivHouseholderQR<TransposeTypeWithSameStorageOrder> m_qr;
+  typedef FullPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
+  QRType m_qr;
   TransposeTypeWithSameStorageOrder m_adjoint;
   typename internal::plain_row_type<MatrixType>::type m_workspace;
 };
@@ -158,7 +162,8 @@ public:
   {
     if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
     {
-      m_qr = ColPivHouseholderQR<MatrixType>(svd.rows(), svd.cols());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.rows(), svd.cols());
     }
     if (svd.m_computeFullU) m_workspace.resize(svd.rows());
     else if (svd.m_computeThinU) m_workspace.resize(svd.cols());
@@ -183,7 +188,8 @@ public:
   }
 
 private:
-  ColPivHouseholderQR<MatrixType> m_qr;
+  typedef ColPivHouseholderQR<MatrixType> QRType;
+  QRType m_qr;
   typename internal::plain_col_type<MatrixType>::type m_workspace;
 };
 
@@ -209,7 +215,8 @@ public:
   {
     if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
     {
-      m_qr = ColPivHouseholderQR<TransposeTypeWithSameStorageOrder>(svd.cols(), svd.rows());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.cols(), svd.rows());
     }
     if (svd.m_computeFullV) m_workspace.resize(svd.cols());
     else if (svd.m_computeThinV) m_workspace.resize(svd.rows());
@@ -237,7 +244,8 @@ public:
   }
 
 private:
-  ColPivHouseholderQR<TransposeTypeWithSameStorageOrder> m_qr;
+  typedef ColPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
+  QRType m_qr;
   TransposeTypeWithSameStorageOrder m_adjoint;
   typename internal::plain_row_type<MatrixType>::type m_workspace;
 };
@@ -254,7 +262,8 @@ public:
   {
     if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
     {
-      m_qr = HouseholderQR<MatrixType>(svd.rows(), svd.cols());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.rows(), svd.cols());
     }
     if (svd.m_computeFullU) m_workspace.resize(svd.rows());
     else if (svd.m_computeThinU) m_workspace.resize(svd.cols());
@@ -278,7 +287,8 @@ public:
     return false;
   }
 private:
-  HouseholderQR<MatrixType> m_qr;
+  typedef HouseholderQR<MatrixType> QRType;
+  QRType m_qr;
   typename internal::plain_col_type<MatrixType>::type m_workspace;
 };
 
@@ -304,7 +314,8 @@ public:
   {
     if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
     {
-      m_qr = HouseholderQR<TransposeTypeWithSameStorageOrder>(svd.cols(), svd.rows());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.cols(), svd.rows());
     }
     if (svd.m_computeFullV) m_workspace.resize(svd.cols());
     else if (svd.m_computeThinV) m_workspace.resize(svd.rows());
@@ -332,7 +343,8 @@ public:
   }
 
 private:
-  HouseholderQR<TransposeTypeWithSameStorageOrder> m_qr;
+  typedef HouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
+  QRType m_qr;
   TransposeTypeWithSameStorageOrder m_adjoint;
   typename internal::plain_row_type<MatrixType>::type m_workspace;
 };
diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h
index 51f6fe9ef..62747279d 100644
--- a/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h
@@ -1,52 +1,12 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2012 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-/*
-
-NOTE: the _symbolic, and _numeric functions has been adapted from
-      the LDL library:
-
-LDL Copyright (c) 2005 by Timothy A. Davis.  All Rights Reserved.
-
-LDL License:
-
-    Your use or distribution of LDL or any modified version of
-    LDL implies that you agree to this License.
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
-    USA
-
-    Permission is hereby granted to use or copy this program under the
-    terms of the GNU LGPL, provided that the Copyright, this License,
-    and the Availability of the original version is retained on all copies.
-    User documentation of any code that uses this code or any modified
-    version of this code must cite the Copyright, this License, the
-    Availability note, and "Used by permission." Permission to modify
-    the code and to distribute modified code is granted, provided the
-    Copyright, this License, and the Availability note are retained,
-    and a notice that the code was modified is included.
- */
-
-#include "../Core/util/NonMPL2.h"
-
 #ifndef EIGEN_SIMPLICIAL_CHOLESKY_H
 #define EIGEN_SIMPLICIAL_CHOLESKY_H
 
@@ -215,27 +175,6 @@ class SimplicialCholeskyBase : internal::noncopyable
         dest = m_Pinv * dest;
     }
 
-    /** \internal */
-    template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex>
-    void _solve_sparse(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
-    {
-      eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
-      eigen_assert(m_matrix.rows()==b.rows());
-      
-      // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
-      static const int NbColsAtOnce = 4;
-      int rhsCols = b.cols();
-      int size = b.rows();
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,rhsCols);
-      for(int k=0; k<rhsCols; k+=NbColsAtOnce)
-      {
-        int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
-        tmp.leftCols(actualCols) = b.middleCols(k,actualCols);
-        tmp.leftCols(actualCols) = derived().solve(tmp.leftCols(actualCols));
-        dest.middleCols(k,actualCols) = tmp.leftCols(actualCols).sparseView();
-      }
-    }
-
 #endif // EIGEN_PARSED_BY_DOXYGEN
 
   protected:
@@ -693,153 +632,6 @@ void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, CholMatrixTy
   ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
 }
 
-template<typename Derived>
-void SimplicialCholeskyBase<Derived>::analyzePattern_preordered(const CholMatrixType& ap, bool doLDLT)
-{
-  const Index size = ap.rows();
-  m_matrix.resize(size, size);
-  m_parent.resize(size);
-  m_nonZerosPerCol.resize(size);
-  
-  ei_declare_aligned_stack_constructed_variable(Index, tags, size, 0);
-
-  for(Index k = 0; k < size; ++k)
-  {
-    /* L(k,:) pattern: all nodes reachable in etree from nz in A(0:k-1,k) */
-    m_parent[k] = -1;             /* parent of k is not yet known */
-    tags[k] = k;                  /* mark node k as visited */
-    m_nonZerosPerCol[k] = 0;      /* count of nonzeros in column k of L */
-    for(typename CholMatrixType::InnerIterator it(ap,k); it; ++it)
-    {
-      Index i = it.index();
-      if(i < k)
-      {
-        /* follow path from i to root of etree, stop at flagged node */
-        for(; tags[i] != k; i = m_parent[i])
-        {
-          /* find parent of i if not yet determined */
-          if (m_parent[i] == -1)
-            m_parent[i] = k;
-          m_nonZerosPerCol[i]++;        /* L (k,i) is nonzero */
-          tags[i] = k;                  /* mark i as visited */
-        }
-      }
-    }
-  }
-  
-  /* construct Lp index array from m_nonZerosPerCol column counts */
-  Index* Lp = m_matrix.outerIndexPtr();
-  Lp[0] = 0;
-  for(Index k = 0; k < size; ++k)
-    Lp[k+1] = Lp[k] + m_nonZerosPerCol[k] + (doLDLT ? 0 : 1);
-
-  m_matrix.resizeNonZeros(Lp[size]);
-  
-  m_isInitialized     = true;
-  m_info              = Success;
-  m_analysisIsOk      = true;
-  m_factorizationIsOk = false;
-}
-
-
-template<typename Derived>
-template<bool DoLDLT>
-void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType& ap)
-{
-  using std::sqrt;
-  
-  eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
-  eigen_assert(ap.rows()==ap.cols());
-  const Index size = ap.rows();
-  eigen_assert(m_parent.size()==size);
-  eigen_assert(m_nonZerosPerCol.size()==size);
-
-  const Index* Lp = m_matrix.outerIndexPtr();
-  Index* Li = m_matrix.innerIndexPtr();
-  Scalar* Lx = m_matrix.valuePtr();
-
-  ei_declare_aligned_stack_constructed_variable(Scalar, y, size, 0);
-  ei_declare_aligned_stack_constructed_variable(Index,  pattern, size, 0);
-  ei_declare_aligned_stack_constructed_variable(Index,  tags, size, 0);
-  
-  bool ok = true;
-  m_diag.resize(DoLDLT ? size : 0);
-  
-  for(Index k = 0; k < size; ++k)
-  {
-    // compute nonzero pattern of kth row of L, in topological order
-    y[k] = 0.0;                     // Y(0:k) is now all zero
-    Index top = size;               // stack for pattern is empty
-    tags[k] = k;                    // mark node k as visited
-    m_nonZerosPerCol[k] = 0;        // count of nonzeros in column k of L
-    for(typename MatrixType::InnerIterator it(ap,k); it; ++it)
-    {
-      Index i = it.index();
-      if(i <= k)
-      {
-        y[i] += internal::conj(it.value());            /* scatter A(i,k) into Y (sum duplicates) */
-        Index len;
-        for(len = 0; tags[i] != k; i = m_parent[i])
-        {
-          pattern[len++] = i;     /* L(k,i) is nonzero */
-          tags[i] = k;            /* mark i as visited */
-        }
-        while(len > 0)
-          pattern[--top] = pattern[--len];
-      }
-    }
-
-    /* compute numerical values kth row of L (a sparse triangular solve) */
-
-    RealScalar d = internal::real(y[k]) * m_shiftScale + m_shiftOffset;    // get D(k,k), apply the shift function, and clear Y(k)
-    y[k] = 0.0;
-    for(; top < size; ++top)
-    {
-      Index i = pattern[top];       /* pattern[top:n-1] is pattern of L(:,k) */
-      Scalar yi = y[i];             /* get and clear Y(i) */
-      y[i] = 0.0;
-      
-      /* the nonzero entry L(k,i) */
-      Scalar l_ki;
-      if(DoLDLT)
-        l_ki = yi / m_diag[i];       
-      else
-        yi = l_ki = yi / Lx[Lp[i]];
-      
-      Index p2 = Lp[i] + m_nonZerosPerCol[i];
-      Index p;
-      for(p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p)
-        y[Li[p]] -= internal::conj(Lx[p]) * yi;
-      d -= internal::real(l_ki * internal::conj(yi));
-      Li[p] = k;                          /* store L(k,i) in column form of L */
-      Lx[p] = l_ki;
-      ++m_nonZerosPerCol[i];              /* increment count of nonzeros in col i */
-    }
-    if(DoLDLT)
-    {
-      m_diag[k] = d;
-      if(d == RealScalar(0))
-      {
-        ok = false;                         /* failure, D(k,k) is zero */
-        break;
-      }
-    }
-    else
-    {
-      Index p = Lp[k] + m_nonZerosPerCol[k]++;
-      Li[p] = k ;                /* store L(k,k) = sqrt (d) in column k */
-      if(d <= RealScalar(0)) {
-        ok = false;              /* failure, matrix is not positive definite */
-        break;
-      }
-      Lx[p] = sqrt(d) ;
-    }
-  }
-
-  m_info = ok ? Success : NumericalIssue;
-  m_factorizationIsOk = true;
-}
-
 namespace internal {
   
 template<typename Derived, typename Rhs>
@@ -864,7 +656,7 @@ struct sparse_solve_retval<SimplicialCholeskyBase<Derived>, Rhs>
 
   template<typename Dest> void evalTo(Dest& dst) const
   {
-    dec().derived()._solve_sparse(rhs(),dst);
+    this->defaultEvalTo(dst);
   }
 };
 
diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
new file mode 100644
index 000000000..4b249868f
--- /dev/null
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
@@ -0,0 +1,199 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+
+/*
+
+NOTE: thes functions vave been adapted from the LDL library:
+
+LDL Copyright (c) 2005 by Timothy A. Davis.  All Rights Reserved.
+
+LDL License:
+
+    Your use or distribution of LDL or any modified version of
+    LDL implies that you agree to this License.
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+    USA
+
+    Permission is hereby granted to use or copy this program under the
+    terms of the GNU LGPL, provided that the Copyright, this License,
+    and the Availability of the original version is retained on all copies.
+    User documentation of any code that uses this code or any modified
+    version of this code must cite the Copyright, this License, the
+    Availability note, and "Used by permission." Permission to modify
+    the code and to distribute modified code is granted, provided the
+    Copyright, this License, and the Availability note are retained,
+    and a notice that the code was modified is included.
+ */
+
+#include "../Core/util/NonMPL2.h"
+
+#ifndef EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H
+#define EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H
+
+namespace Eigen {
+
+template<typename Derived>
+void SimplicialCholeskyBase<Derived>::analyzePattern_preordered(const CholMatrixType& ap, bool doLDLT)
+{
+  const Index size = ap.rows();
+  m_matrix.resize(size, size);
+  m_parent.resize(size);
+  m_nonZerosPerCol.resize(size);
+
+  ei_declare_aligned_stack_constructed_variable(Index, tags, size, 0);
+
+  for(Index k = 0; k < size; ++k)
+  {
+    /* L(k,:) pattern: all nodes reachable in etree from nz in A(0:k-1,k) */
+    m_parent[k] = -1;             /* parent of k is not yet known */
+    tags[k] = k;                  /* mark node k as visited */
+    m_nonZerosPerCol[k] = 0;      /* count of nonzeros in column k of L */
+    for(typename CholMatrixType::InnerIterator it(ap,k); it; ++it)
+    {
+      Index i = it.index();
+      if(i < k)
+      {
+        /* follow path from i to root of etree, stop at flagged node */
+        for(; tags[i] != k; i = m_parent[i])
+        {
+          /* find parent of i if not yet determined */
+          if (m_parent[i] == -1)
+            m_parent[i] = k;
+          m_nonZerosPerCol[i]++;        /* L (k,i) is nonzero */
+          tags[i] = k;                  /* mark i as visited */
+        }
+      }
+    }
+  }
+
+  /* construct Lp index array from m_nonZerosPerCol column counts */
+  Index* Lp = m_matrix.outerIndexPtr();
+  Lp[0] = 0;
+  for(Index k = 0; k < size; ++k)
+    Lp[k+1] = Lp[k] + m_nonZerosPerCol[k] + (doLDLT ? 0 : 1);
+
+  m_matrix.resizeNonZeros(Lp[size]);
+
+  m_isInitialized     = true;
+  m_info              = Success;
+  m_analysisIsOk      = true;
+  m_factorizationIsOk = false;
+}
+
+
+template<typename Derived>
+template<bool DoLDLT>
+void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType& ap)
+{
+  using std::sqrt;
+
+  eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
+  eigen_assert(ap.rows()==ap.cols());
+  const Index size = ap.rows();
+  eigen_assert(m_parent.size()==size);
+  eigen_assert(m_nonZerosPerCol.size()==size);
+
+  const Index* Lp = m_matrix.outerIndexPtr();
+  Index* Li = m_matrix.innerIndexPtr();
+  Scalar* Lx = m_matrix.valuePtr();
+
+  ei_declare_aligned_stack_constructed_variable(Scalar, y, size, 0);
+  ei_declare_aligned_stack_constructed_variable(Index,  pattern, size, 0);
+  ei_declare_aligned_stack_constructed_variable(Index,  tags, size, 0);
+
+  bool ok = true;
+  m_diag.resize(DoLDLT ? size : 0);
+
+  for(Index k = 0; k < size; ++k)
+  {
+    // compute nonzero pattern of kth row of L, in topological order
+    y[k] = 0.0;                     // Y(0:k) is now all zero
+    Index top = size;               // stack for pattern is empty
+    tags[k] = k;                    // mark node k as visited
+    m_nonZerosPerCol[k] = 0;        // count of nonzeros in column k of L
+    for(typename MatrixType::InnerIterator it(ap,k); it; ++it)
+    {
+      Index i = it.index();
+      if(i <= k)
+      {
+        y[i] += internal::conj(it.value());            /* scatter A(i,k) into Y (sum duplicates) */
+        Index len;
+        for(len = 0; tags[i] != k; i = m_parent[i])
+        {
+          pattern[len++] = i;     /* L(k,i) is nonzero */
+          tags[i] = k;            /* mark i as visited */
+        }
+        while(len > 0)
+          pattern[--top] = pattern[--len];
+      }
+    }
+
+    /* compute numerical values kth row of L (a sparse triangular solve) */
+
+    RealScalar d = internal::real(y[k]) * m_shiftScale + m_shiftOffset;    // get D(k,k), apply the shift function, and clear Y(k)
+    y[k] = 0.0;
+    for(; top < size; ++top)
+    {
+      Index i = pattern[top];       /* pattern[top:n-1] is pattern of L(:,k) */
+      Scalar yi = y[i];             /* get and clear Y(i) */
+      y[i] = 0.0;
+
+      /* the nonzero entry L(k,i) */
+      Scalar l_ki;
+      if(DoLDLT)
+        l_ki = yi / m_diag[i];
+      else
+        yi = l_ki = yi / Lx[Lp[i]];
+
+      Index p2 = Lp[i] + m_nonZerosPerCol[i];
+      Index p;
+      for(p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p)
+        y[Li[p]] -= internal::conj(Lx[p]) * yi;
+      d -= internal::real(l_ki * internal::conj(yi));
+      Li[p] = k;                          /* store L(k,i) in column form of L */
+      Lx[p] = l_ki;
+      ++m_nonZerosPerCol[i];              /* increment count of nonzeros in col i */
+    }
+    if(DoLDLT)
+    {
+      m_diag[k] = d;
+      if(d == RealScalar(0))
+      {
+        ok = false;                         /* failure, D(k,k) is zero */
+        break;
+      }
+    }
+    else
+    {
+      Index p = Lp[k] + m_nonZerosPerCol[k]++;
+      Li[p] = k ;                /* store L(k,k) = sqrt (d) in column k */
+      if(d <= RealScalar(0)) {
+        ok = false;              /* failure, matrix is not positive definite */
+        break;
+      }
+      Lx[p] = sqrt(d) ;
+    }
+  }
+
+  m_info = ok ? Success : NumericalIssue;
+  m_factorizationIsOk = true;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H
diff --git a/Eigen/src/SparseCore/AmbiVector.h b/Eigen/src/SparseCore/AmbiVector.h
index dca738751..17fff96a7 100644
--- a/Eigen/src/SparseCore/AmbiVector.h
+++ b/Eigen/src/SparseCore/AmbiVector.h
@@ -288,7 +288,7 @@ class AmbiVector<_Scalar,_Index>::Iterator
       * In practice, all coefficients having a magnitude smaller than \a epsilon
       * are skipped.
       */
-    Iterator(const AmbiVector& vec, RealScalar epsilon = 0)
+    Iterator(const AmbiVector& vec, const RealScalar& epsilon = 0)
       : m_vector(vec)
     {
       using std::abs;
diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
index 16b5e1dba..4b13f08d4 100644
--- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@@ -121,9 +121,9 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
 namespace internal {
 
 template<typename Lhs, typename Rhs, typename ResultType,
-  int LhsStorageOrder = traits<Lhs>::Flags&RowMajorBit,
-  int RhsStorageOrder = traits<Rhs>::Flags&RowMajorBit,
-  int ResStorageOrder = traits<ResultType>::Flags&RowMajorBit>
+  int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+  int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+  int ResStorageOrder = (traits<ResultType>::Flags&RowMajorBit) ? RowMajor : ColMajor>
 struct conservative_sparse_sparse_product_selector;
 
 template<typename Lhs, typename Rhs, typename ResultType>
diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h
index b1eaf0b2c..e025e4d40 100644
--- a/Eigen/src/SparseCore/SparseBlock.h
+++ b/Eigen/src/SparseCore/SparseBlock.h
@@ -61,7 +61,7 @@ public:
 
   protected:
 
-    const typename XprType::Nested m_matrix;
+    typename XprType::Nested m_matrix;
     Index m_outerStart;
     const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
 };
@@ -129,59 +129,58 @@ public:
 
       // 2 - let's check whether there is enough allocated memory
       Index nnz           = tmp.nonZeros();
-      Index nnz_previous  = nonZeros();
-      Index free_size     = Index(matrix.data().allocatedSize()) + nnz_previous;
-      Index nnz_head      = m_outerStart==0 ? 0 : matrix.outerIndexPtr()[m_outerStart];
-      Index tail          = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()];
-      Index nnz_tail      = matrix.nonZeros() - tail;
+      Index start         = m_outerStart==0 ? 0 : matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block
+      Index end           = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]; // ending posiiton of the current block
+      Index block_size    = end - start;                                                // available room in the current block
+      Index tail_size     = m_matrix.outerIndexPtr()[m_matrix.outerSize()] - end;
+      
+      Index free_size     = m_matrix.isCompressed()
+                          ? Index(matrix.data().allocatedSize()) + block_size
+                          : block_size;
 
-      if(nnz>free_size)
+      if(nnz>free_size) 
       {
         // realloc manually to reduce copies
-        typename SparseMatrixType::Storage newdata(m_matrix.nonZeros() - nnz_previous + nnz);
+        typename SparseMatrixType::Storage newdata(m_matrix.data().allocatedSize() - block_size + nnz);
 
-        std::memcpy(&newdata.value(0), &m_matrix.data().value(0), nnz_head*sizeof(Scalar));
-        std::memcpy(&newdata.index(0), &m_matrix.data().index(0), nnz_head*sizeof(Index));
+        std::memcpy(&newdata.value(0), &m_matrix.data().value(0), start*sizeof(Scalar));
+        std::memcpy(&newdata.index(0), &m_matrix.data().index(0), start*sizeof(Index));
 
-        std::memcpy(&newdata.value(nnz_head), &tmp.data().value(0), nnz*sizeof(Scalar));
-        std::memcpy(&newdata.index(nnz_head), &tmp.data().index(0), nnz*sizeof(Index));
+        std::memcpy(&newdata.value(start), &tmp.data().value(0), nnz*sizeof(Scalar));
+        std::memcpy(&newdata.index(start), &tmp.data().index(0), nnz*sizeof(Index));
 
-        std::memcpy(&newdata.value(nnz_head+nnz), &matrix.data().value(tail), nnz_tail*sizeof(Scalar));
-        std::memcpy(&newdata.index(nnz_head+nnz), &matrix.data().index(tail), nnz_tail*sizeof(Index));
+        std::memcpy(&newdata.value(start+nnz), &matrix.data().value(end), tail_size*sizeof(Scalar));
+        std::memcpy(&newdata.index(start+nnz), &matrix.data().index(end), tail_size*sizeof(Index));
+        
+        newdata.resize(m_matrix.outerIndexPtr()[m_matrix.outerSize()] - block_size + nnz);
 
         matrix.data().swap(newdata);
       }
       else
       {
         // no need to realloc, simply copy the tail at its respective position and insert tmp
-        matrix.data().resize(nnz_head + nnz + nnz_tail);
-
-        if(nnz<nnz_previous)
-        {
-          std::memcpy(&matrix.data().value(nnz_head+nnz), &matrix.data().value(tail), nnz_tail*sizeof(Scalar));
-          std::memcpy(&matrix.data().index(nnz_head+nnz), &matrix.data().index(tail), nnz_tail*sizeof(Index));
-        }
-        else
-        {
-          for(Index i=nnz_tail-1; i>=0; --i)
-          {
-            matrix.data().value(nnz_head+nnz+i) = matrix.data().value(tail+i);
-            matrix.data().index(nnz_head+nnz+i) = matrix.data().index(tail+i);
-          }
-        }
-
-        std::memcpy(&matrix.data().value(nnz_head), &tmp.data().value(0), nnz*sizeof(Scalar));
-        std::memcpy(&matrix.data().index(nnz_head), &tmp.data().index(0), nnz*sizeof(Index));
+        matrix.data().resize(start + nnz + tail_size);
+
+        std::memmove(&matrix.data().value(start+nnz), &matrix.data().value(end), tail_size*sizeof(Scalar));
+        std::memmove(&matrix.data().index(start+nnz), &matrix.data().index(end), tail_size*sizeof(Index));
+
+        std::memcpy(&matrix.data().value(start), &tmp.data().value(0), nnz*sizeof(Scalar));
+        std::memcpy(&matrix.data().index(start), &tmp.data().index(0), nnz*sizeof(Index));
       }
+      
+      // update innerNonZeros
+      if(!m_matrix.isCompressed())
+        for(Index j=0; j<m_outerSize.value(); ++j)
+          matrix.innerNonZeroPtr()[m_outerStart+j] = tmp.innerVector(j).nonZeros();
 
       // update outer index pointers
-      Index p = nnz_head;
+      Index p = start;
       for(Index k=0; k<m_outerSize.value(); ++k)
       {
         matrix.outerIndexPtr()[m_outerStart+k] = p;
         p += tmp.innerVector(k).nonZeros();
       }
-      std::ptrdiff_t offset = nnz - nnz_previous;
+      std::ptrdiff_t offset = nnz - block_size;
       for(Index k = m_outerStart + m_outerSize.value(); k<=matrix.outerSize(); ++k)
       {
         matrix.outerIndexPtr()[k] += offset;
@@ -353,7 +352,7 @@ public:
           m_block(block),
           m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
       {
-        while(Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()))
+        while( (Base::operator bool()) && (Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value())) )
           Base::operator++();
       }
 
@@ -376,7 +375,7 @@ public:
           m_block(block),
           m_begin(IsRowMajor ? block.m_startCol.value() : block.m_startRow.value())
       {
-        while(Base::index() >= (IsRowMajor ? m_block.m_startCol.value()+block.m_blockCols.value() : m_block.m_startRow.value()+block.m_blockRows.value()) )
+        while( (Base::operator bool()) && (Base::index() >= (IsRowMajor ? m_block.m_startCol.value()+block.m_blockCols.value() : m_block.m_startRow.value()+block.m_blockRows.value())) )
           Base::operator--();
       }
 
@@ -391,7 +390,7 @@ public:
     friend class InnerIterator;
     friend class ReverseInnerIterator;
 
-    const typename XprType::Nested m_matrix;
+    typename XprType::Nested m_matrix;
     const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
     const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
     const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
diff --git a/Eigen/src/SparseCore/SparseColEtree.h b/Eigen/src/SparseCore/SparseColEtree.h
index df6b9f966..f89ca3814 100644
--- a/Eigen/src/SparseCore/SparseColEtree.h
+++ b/Eigen/src/SparseCore/SparseColEtree.h
@@ -36,11 +36,11 @@ namespace Eigen {
 namespace internal {
 
 /** Find the root of the tree/set containing the vertex i : Use Path halving */ 
-template<typename IndexVector>
-int etree_find (int i, IndexVector& pp)
+template<typename Index, typename IndexVector>
+Index etree_find (Index i, IndexVector& pp)
 {
-  int p = pp(i); // Parent 
-  int gp = pp(p); // Grand parent 
+  Index p = pp(i); // Parent 
+  Index gp = pp(p); // Grand parent 
   while (gp != p) 
   {
     pp(i) = gp; // Parent pointer on find path is changed to former grand parent
@@ -55,9 +55,10 @@ int etree_find (int i, IndexVector& pp)
   * \param mat The matrix in column-major format. 
   * \param parent The elimination tree
   * \param firstRowElt The column index of the first element in each row
+  * \param perm The permutation to apply to the column of \b mat
   */
 template <typename MatrixType, typename IndexVector>
-int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowElt)
+int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowElt, typename MatrixType::Index *perm=0)
 {
   typedef typename MatrixType::Index Index;
   Index nc = mat.cols(); // Number of columns 
@@ -68,14 +69,16 @@ int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowEl
   pp.setZero(); // Initialize disjoint sets 
   parent.resize(mat.cols());
   //Compute first nonzero column in each row 
-  int row,col; 
+  Index row,col; 
   firstRowElt.resize(m);
   firstRowElt.setConstant(nc);
   firstRowElt.segment(0, nc).setLinSpaced(nc, 0, nc-1);
   bool found_diag;
   for (col = 0; col < nc; col++)
   {
-    for (typename MatrixType::InnerIterator it(mat, col); it; ++it)
+    Index pcol = col;
+    if(perm) pcol  = perm[col];
+    for (typename MatrixType::InnerIterator it(mat, pcol); it; ++it)
     { 
       row = it.row();
       firstRowElt(row) = (std::min)(firstRowElt(row), col);
@@ -85,7 +88,7 @@ int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowEl
           except use (firstRowElt[r],c) in place of an edge (r,c) of A.
     Thus each row clique in A'*A is replaced by a star
     centered at its first vertex, which has the same fill. */
-  int rset, cset, rroot; 
+  Index rset, cset, rroot; 
   for (col = 0; col < nc; col++) 
   {
     found_diag = false;
@@ -95,9 +98,11 @@ int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowEl
     parent(col) = nc; 
     /* The diagonal element is treated here even if it does not exist in the matrix
      * hence the loop is executed once more */ 
-    for (typename MatrixType::InnerIterator it(mat, col); it||!found_diag; ++it)
+    Index pcol = col;
+    if(perm) pcol  = perm[col];
+    for (typename MatrixType::InnerIterator it(mat, pcol); it||!found_diag; ++it)
     { //  A sequence of interleaved find and union is performed 
-      int i = col;
+      Index i = col;
       if(it) i = it.index();
       if (i == col) found_diag = true;
       row = firstRowElt(i);
@@ -120,10 +125,10 @@ int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowEl
   * Depth-first search from vertex n.  No recursion.
   * This routine was contributed by Cédric Doucet, CEDRAT Group, Meylan, France.
 */
-template <typename IndexVector>
-void nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVector& next_kid, IndexVector& post, int postnum)
+template <typename Index, typename IndexVector>
+void nr_etdfs (Index n, IndexVector& parent, IndexVector& first_kid, IndexVector& next_kid, IndexVector& post, Index postnum)
 {
-  int current = n, first, next;
+  Index current = n, first, next;
   while (postnum != n) 
   {
     // No kid for the current node
@@ -167,18 +172,18 @@ void nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVector&
   * \param parent Input tree
   * \param post postordered tree
   */
-template <typename IndexVector>
-void treePostorder(int n, IndexVector& parent, IndexVector& post)
+template <typename Index, typename IndexVector>
+void treePostorder(Index n, IndexVector& parent, IndexVector& post)
 {
   IndexVector first_kid, next_kid; // Linked list of children 
-  int postnum; 
+  Index postnum; 
   // Allocate storage for working arrays and results 
   first_kid.resize(n+1); 
   next_kid.setZero(n+1);
   post.setZero(n+1);
   
   // Set up structure describing children
-  int v, dad; 
+  Index v, dad; 
   first_kid.setConstant(-1); 
   for (v = n-1; v >= 0; v--) 
   {
diff --git a/Eigen/src/SparseCore/SparseDot.h b/Eigen/src/SparseCore/SparseDot.h
index b25911c72..dfeb3a8df 100644
--- a/Eigen/src/SparseCore/SparseDot.h
+++ b/Eigen/src/SparseCore/SparseDot.h
@@ -54,8 +54,8 @@ SparseMatrixBase<Derived>::dot(const SparseMatrixBase<OtherDerived>& other) cons
   typedef typename internal::remove_all<Nested>::type  NestedCleaned;
   typedef typename internal::remove_all<OtherNested>::type  OtherNestedCleaned;
 
-  const Nested nthis(derived());
-  const OtherNested nother(other.derived());
+  Nested nthis(derived());
+  OtherNested nother(other.derived());
 
   typename NestedCleaned::InnerIterator i(nthis,0);
   typename OtherNestedCleaned::InnerIterator j(nother,0);
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 5ff01da28..dc57f77fc 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -213,7 +213,7 @@ class SparseMatrix
       * inserted in increasing inner index order, and in O(nnz_j) for a random insertion.
       *
       */
-    EIGEN_DONT_INLINE Scalar& insert(Index row, Index col)
+    Scalar& insert(Index row, Index col)
     {
       if(isCompressed())
       {
@@ -300,11 +300,11 @@ class SparseMatrix
           totalReserveSize += reserveSizes[j];
         }
         m_data.reserve(totalReserveSize);
-        std::ptrdiff_t previousOuterIndex = m_outerIndex[m_outerSize];
-        for(std::ptrdiff_t j=m_outerSize-1; j>=0; --j)
+        Index previousOuterIndex = m_outerIndex[m_outerSize];
+        for(Index j=m_outerSize-1; j>=0; --j)
         {
-          ptrdiff_t innerNNZ = previousOuterIndex - m_outerIndex[j];
-          for(std::ptrdiff_t i=innerNNZ-1; i>=0; --i)
+          Index innerNNZ = previousOuterIndex - m_outerIndex[j];
+          for(Index i=innerNNZ-1; i>=0; --i)
           {
             m_data.index(newOuterIndex[j]+i) = m_data.index(m_outerIndex[j]+i);
             m_data.value(newOuterIndex[j]+i) = m_data.value(m_outerIndex[j]+i);
@@ -327,19 +327,19 @@ class SparseMatrix
         {
           newOuterIndex[j] = count;
           Index alreadyReserved = (m_outerIndex[j+1]-m_outerIndex[j]) - m_innerNonZeros[j];
-          Index toReserve = std::max<std::ptrdiff_t>(reserveSizes[j], alreadyReserved);
+          Index toReserve = std::max<Index>(reserveSizes[j], alreadyReserved);
           count += toReserve + m_innerNonZeros[j];
         }
         newOuterIndex[m_outerSize] = count;
         
         m_data.resize(count);
-        for(ptrdiff_t j=m_outerSize-1; j>=0; --j)
+        for(Index j=m_outerSize-1; j>=0; --j)
         {
-          std::ptrdiff_t offset = newOuterIndex[j] - m_outerIndex[j];
+          Index offset = newOuterIndex[j] - m_outerIndex[j];
           if(offset>0)
           {
-            std::ptrdiff_t innerNNZ = m_innerNonZeros[j];
-            for(std::ptrdiff_t i=innerNNZ-1; i>=0; --i)
+            Index innerNNZ = m_innerNonZeros[j];
+            for(Index i=innerNNZ-1; i>=0; --i)
             {
               m_data.index(newOuterIndex[j]+i) = m_data.index(m_outerIndex[j]+i);
               m_data.value(newOuterIndex[j]+i) = m_data.value(m_outerIndex[j]+i);
@@ -434,7 +434,7 @@ class SparseMatrix
     
     /** \internal
       * same as insert(Index,Index) except that the indices are given relative to the storage order */
-    EIGEN_DONT_INLINE Scalar& insertByOuterInner(Index j, Index i)
+    Scalar& insertByOuterInner(Index j, Index i)
     {
       return insert(IsRowMajor ? j : i, IsRowMajor ? i : j);
     }
@@ -451,7 +451,7 @@ class SparseMatrix
       for(Index j=1; j<m_outerSize; ++j)
       {
         Index nextOldStart = m_outerIndex[j+1];
-        std::ptrdiff_t offset = oldStart - m_outerIndex[j];
+        Index offset = oldStart - m_outerIndex[j];
         if(offset>0)
         {
           for(Index k=0; k<m_innerNonZeros[j]; ++k)
@@ -474,7 +474,7 @@ class SparseMatrix
     {
       if(m_innerNonZeros != 0)
         return; 
-      m_innerNonZeros = new Index[m_outerSize]; 
+      m_innerNonZeros = static_cast<Index*>(std::malloc(m_outerSize * sizeof(Index)));
       for (int i = 0; i < m_outerSize; i++)
       {
         m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; 
@@ -711,62 +711,7 @@ class SparseMatrix
     #endif
 
     template<typename OtherDerived>
-    EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other)
-    {
-      const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
-      if (needToTranspose)
-      {
-        // two passes algorithm:
-        //  1 - compute the number of coeffs per dest inner vector
-        //  2 - do the actual copy/eval
-        // Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed
-        typedef typename internal::nested<OtherDerived,2>::type OtherCopy;
-        typedef typename internal::remove_all<OtherCopy>::type _OtherCopy;
-        OtherCopy otherCopy(other.derived());
-
-        SparseMatrix dest(other.rows(),other.cols());
-        Eigen::Map<Matrix<Index, Dynamic, 1> > (dest.m_outerIndex,dest.outerSize()).setZero();
-
-        // pass 1
-        // FIXME the above copy could be merged with that pass
-        for (Index j=0; j<otherCopy.outerSize(); ++j)
-          for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
-            ++dest.m_outerIndex[it.index()];
-
-        // prefix sum
-        Index count = 0;
-        VectorXi positions(dest.outerSize());
-        for (Index j=0; j<dest.outerSize(); ++j)
-        {
-          Index tmp = dest.m_outerIndex[j];
-          dest.m_outerIndex[j] = count;
-          positions[j] = count;
-          count += tmp;
-        }
-        dest.m_outerIndex[dest.outerSize()] = count;
-        // alloc
-        dest.m_data.resize(count);
-        // pass 2
-        for (Index j=0; j<otherCopy.outerSize(); ++j)
-        {
-          for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
-          {
-            Index pos = positions[it.index()]++;
-            dest.m_data.index(pos) = j;
-            dest.m_data.value(pos) = it.value();
-          }
-        }
-        this->swap(dest);
-        return *this;
-      }
-      else
-      {
-        if(other.isRValue())
-          initAssignment(other.derived());
-        // there is no special optimization
-        return Base::operator=(other.derived());
-      }
-    }
+    EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other);
 
     friend std::ostream & operator << (std::ostream & s, const SparseMatrix& m)
     {
@@ -836,111 +781,7 @@ protected:
 
     /** \internal
       * \sa insert(Index,Index) */
-    EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col)
-    {
-      eigen_assert(isCompressed());
-
-      const Index outer = IsRowMajor ? row : col;
-      const Index inner = IsRowMajor ? col : row;
-
-      Index previousOuter = outer;
-      if (m_outerIndex[outer+1]==0)
-      {
-        // we start a new inner vector
-        while (previousOuter>=0 && m_outerIndex[previousOuter]==0)
-        {
-          m_outerIndex[previousOuter] = static_cast<Index>(m_data.size());
-          --previousOuter;
-        }
-        m_outerIndex[outer+1] = m_outerIndex[outer];
-      }
-
-      // here we have to handle the tricky case where the outerIndex array
-      // starts with: [ 0 0 0 0 0 1 ...] and we are inserted in, e.g.,
-      // the 2nd inner vector...
-      bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0))
-                    && (size_t(m_outerIndex[outer+1]) == m_data.size());
-
-      size_t startId = m_outerIndex[outer];
-      // FIXME let's make sure sizeof(long int) == sizeof(size_t)
-      size_t p = m_outerIndex[outer+1];
-      ++m_outerIndex[outer+1];
-
-      float reallocRatio = 1;
-      if (m_data.allocatedSize()<=m_data.size())
-      {
-        // if there is no preallocated memory, let's reserve a minimum of 32 elements
-        if (m_data.size()==0)
-        {
-          m_data.reserve(32);
-        }
-        else
-        {
-          // we need to reallocate the data, to reduce multiple reallocations
-          // we use a smart resize algorithm based on the current filling ratio
-          // in addition, we use float to avoid integers overflows
-          float nnzEstimate = float(m_outerIndex[outer])*float(m_outerSize)/float(outer+1);
-          reallocRatio = (nnzEstimate-float(m_data.size()))/float(m_data.size());
-          // furthermore we bound the realloc ratio to:
-          //   1) reduce multiple minor realloc when the matrix is almost filled
-          //   2) avoid to allocate too much memory when the matrix is almost empty
-          reallocRatio = (std::min)((std::max)(reallocRatio,1.5f),8.f);
-        }
-      }
-      m_data.resize(m_data.size()+1,reallocRatio);
-
-      if (!isLastVec)
-      {
-        if (previousOuter==-1)
-        {
-          // oops wrong guess.
-          // let's correct the outer offsets
-          for (Index k=0; k<=(outer+1); ++k)
-            m_outerIndex[k] = 0;
-          Index k=outer+1;
-          while(m_outerIndex[k]==0)
-            m_outerIndex[k++] = 1;
-          while (k<=m_outerSize && m_outerIndex[k]!=0)
-            m_outerIndex[k++]++;
-          p = 0;
-          --k;
-          k = m_outerIndex[k]-1;
-          while (k>0)
-          {
-            m_data.index(k) = m_data.index(k-1);
-            m_data.value(k) = m_data.value(k-1);
-            k--;
-          }
-        }
-        else
-        {
-          // we are not inserting into the last inner vec
-          // update outer indices:
-          Index j = outer+2;
-          while (j<=m_outerSize && m_outerIndex[j]!=0)
-            m_outerIndex[j++]++;
-          --j;
-          // shift data of last vecs:
-          Index k = m_outerIndex[j]-1;
-          while (k>=Index(p))
-          {
-            m_data.index(k) = m_data.index(k-1);
-            m_data.value(k) = m_data.value(k-1);
-            k--;
-          }
-        }
-      }
-
-      while ( (p > startId) && (m_data.index(p-1) > inner) )
-      {
-        m_data.index(p) = m_data.index(p-1);
-        m_data.value(p) = m_data.value(p-1);
-        --p;
-      }
-
-      m_data.index(p) = inner;
-      return (m_data.value(p) = 0);
-    }
+    EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col);
 
     /** \internal
       * A vector object that is equal to 0 everywhere but v at the position i */
@@ -959,36 +800,7 @@ protected:
 
     /** \internal
       * \sa insert(Index,Index) */
-    EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col)
-    {
-      eigen_assert(!isCompressed());
-
-      const Index outer = IsRowMajor ? row : col;
-      const Index inner = IsRowMajor ? col : row;
-
-      std::ptrdiff_t room = m_outerIndex[outer+1] - m_outerIndex[outer];
-      std::ptrdiff_t innerNNZ = m_innerNonZeros[outer];
-      if(innerNNZ>=room)
-      {
-        // this inner vector is full, we need to reallocate the whole buffer :(
-        reserve(SingletonVector(outer,std::max<std::ptrdiff_t>(2,innerNNZ)));
-      }
-
-      Index startId = m_outerIndex[outer];
-      Index p = startId + m_innerNonZeros[outer];
-      while ( (p > startId) && (m_data.index(p-1) > inner) )
-      {
-        m_data.index(p) = m_data.index(p-1);
-        m_data.value(p) = m_data.value(p-1);
-        --p;
-      }
-      eigen_assert((p<=startId || m_data.index(p-1)!=inner) && "you cannot insert an element that already exist, you must call coeffRef to this end");
-
-      m_innerNonZeros[outer]++;
-
-      m_data.index(p) = inner;
-      return (m_data.value(p) = 0);
-    }
+    EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col);
 
 public:
     /** \internal
@@ -1097,7 +909,6 @@ void set_from_triplets(const InputIterator& begin, const InputIterator& end, Spa
   EIGEN_UNUSED_VARIABLE(Options);
   enum { IsRowMajor = SparseMatrixType::IsRowMajor };
   typedef typename SparseMatrixType::Scalar Scalar;
-  typedef typename SparseMatrixType::Index Index;
   SparseMatrix<Scalar,IsRowMajor?ColMajor:RowMajor> trMat(mat.rows(),mat.cols());
 
   // pass 1: count the nnz per inner-vector
@@ -1205,6 +1016,204 @@ void SparseMatrix<Scalar,_Options,_Index>::sumupDuplicates()
   m_data.resize(m_outerIndex[m_outerSize]);
 }
 
+template<typename Scalar, int _Options, typename _Index>
+template<typename OtherDerived>
+EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Options,_Index>::operator=(const SparseMatrixBase<OtherDerived>& other)
+{
+  const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
+  if (needToTranspose)
+  {
+    // two passes algorithm:
+    //  1 - compute the number of coeffs per dest inner vector
+    //  2 - do the actual copy/eval
+    // Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed
+    typedef typename internal::nested<OtherDerived,2>::type OtherCopy;
+    typedef typename internal::remove_all<OtherCopy>::type _OtherCopy;
+    OtherCopy otherCopy(other.derived());
+
+    SparseMatrix dest(other.rows(),other.cols());
+    Eigen::Map<Matrix<Index, Dynamic, 1> > (dest.m_outerIndex,dest.outerSize()).setZero();
+
+    // pass 1
+    // FIXME the above copy could be merged with that pass
+    for (Index j=0; j<otherCopy.outerSize(); ++j)
+      for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
+        ++dest.m_outerIndex[it.index()];
+
+    // prefix sum
+    Index count = 0;
+    VectorXi positions(dest.outerSize());
+    for (Index j=0; j<dest.outerSize(); ++j)
+    {
+      Index tmp = dest.m_outerIndex[j];
+      dest.m_outerIndex[j] = count;
+      positions[j] = count;
+      count += tmp;
+    }
+    dest.m_outerIndex[dest.outerSize()] = count;
+    // alloc
+    dest.m_data.resize(count);
+    // pass 2
+    for (Index j=0; j<otherCopy.outerSize(); ++j)
+    {
+      for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
+      {
+        Index pos = positions[it.index()]++;
+        dest.m_data.index(pos) = j;
+        dest.m_data.value(pos) = it.value();
+      }
+    }
+    this->swap(dest);
+    return *this;
+  }
+  else
+  {
+    if(other.isRValue())
+      initAssignment(other.derived());
+    // there is no special optimization
+    return Base::operator=(other.derived());
+  }
+}
+
+template<typename _Scalar, int _Options, typename _Index>
+EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertUncompressed(Index row, Index col)
+{
+  eigen_assert(!isCompressed());
+
+  const Index outer = IsRowMajor ? row : col;
+  const Index inner = IsRowMajor ? col : row;
+
+  Index room = m_outerIndex[outer+1] - m_outerIndex[outer];
+  Index innerNNZ = m_innerNonZeros[outer];
+  if(innerNNZ>=room)
+  {
+    // this inner vector is full, we need to reallocate the whole buffer :(
+    reserve(SingletonVector(outer,std::max<Index>(2,innerNNZ)));
+  }
+
+  Index startId = m_outerIndex[outer];
+  Index p = startId + m_innerNonZeros[outer];
+  while ( (p > startId) && (m_data.index(p-1) > inner) )
+  {
+    m_data.index(p) = m_data.index(p-1);
+    m_data.value(p) = m_data.value(p-1);
+    --p;
+  }
+  eigen_assert((p<=startId || m_data.index(p-1)!=inner) && "you cannot insert an element that already exist, you must call coeffRef to this end");
+
+  m_innerNonZeros[outer]++;
+
+  m_data.index(p) = inner;
+  return (m_data.value(p) = 0);
+}
+
+template<typename _Scalar, int _Options, typename _Index>
+EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertCompressed(Index row, Index col)
+{
+  eigen_assert(isCompressed());
+
+  const Index outer = IsRowMajor ? row : col;
+  const Index inner = IsRowMajor ? col : row;
+
+  Index previousOuter = outer;
+  if (m_outerIndex[outer+1]==0)
+  {
+    // we start a new inner vector
+    while (previousOuter>=0 && m_outerIndex[previousOuter]==0)
+    {
+      m_outerIndex[previousOuter] = static_cast<Index>(m_data.size());
+      --previousOuter;
+    }
+    m_outerIndex[outer+1] = m_outerIndex[outer];
+  }
+
+  // here we have to handle the tricky case where the outerIndex array
+  // starts with: [ 0 0 0 0 0 1 ...] and we are inserted in, e.g.,
+  // the 2nd inner vector...
+  bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0))
+                && (size_t(m_outerIndex[outer+1]) == m_data.size());
+
+  size_t startId = m_outerIndex[outer];
+  // FIXME let's make sure sizeof(long int) == sizeof(size_t)
+  size_t p = m_outerIndex[outer+1];
+  ++m_outerIndex[outer+1];
+
+  float reallocRatio = 1;
+  if (m_data.allocatedSize()<=m_data.size())
+  {
+    // if there is no preallocated memory, let's reserve a minimum of 32 elements
+    if (m_data.size()==0)
+    {
+      m_data.reserve(32);
+    }
+    else
+    {
+      // we need to reallocate the data, to reduce multiple reallocations
+      // we use a smart resize algorithm based on the current filling ratio
+      // in addition, we use float to avoid integers overflows
+      float nnzEstimate = float(m_outerIndex[outer])*float(m_outerSize)/float(outer+1);
+      reallocRatio = (nnzEstimate-float(m_data.size()))/float(m_data.size());
+      // furthermore we bound the realloc ratio to:
+      //   1) reduce multiple minor realloc when the matrix is almost filled
+      //   2) avoid to allocate too much memory when the matrix is almost empty
+      reallocRatio = (std::min)((std::max)(reallocRatio,1.5f),8.f);
+    }
+  }
+  m_data.resize(m_data.size()+1,reallocRatio);
+
+  if (!isLastVec)
+  {
+    if (previousOuter==-1)
+    {
+      // oops wrong guess.
+      // let's correct the outer offsets
+      for (Index k=0; k<=(outer+1); ++k)
+        m_outerIndex[k] = 0;
+      Index k=outer+1;
+      while(m_outerIndex[k]==0)
+        m_outerIndex[k++] = 1;
+      while (k<=m_outerSize && m_outerIndex[k]!=0)
+        m_outerIndex[k++]++;
+      p = 0;
+      --k;
+      k = m_outerIndex[k]-1;
+      while (k>0)
+      {
+        m_data.index(k) = m_data.index(k-1);
+        m_data.value(k) = m_data.value(k-1);
+        k--;
+      }
+    }
+    else
+    {
+      // we are not inserting into the last inner vec
+      // update outer indices:
+      Index j = outer+2;
+      while (j<=m_outerSize && m_outerIndex[j]!=0)
+        m_outerIndex[j++]++;
+      --j;
+      // shift data of last vecs:
+      Index k = m_outerIndex[j]-1;
+      while (k>=Index(p))
+      {
+        m_data.index(k) = m_data.index(k-1);
+        m_data.value(k) = m_data.value(k-1);
+        k--;
+      }
+    }
+  }
+
+  while ( (p > startId) && (m_data.index(p-1) > inner) )
+  {
+    m_data.index(p) = m_data.index(p-1);
+    m_data.value(p) = m_data.value(p-1);
+    --p;
+  }
+
+  m_data.index(p) = inner;
+  return (m_data.value(p) = 0);
+}
+
 } // end namespace Eigen
 
 #endif // EIGEN_SPARSEMATRIX_H
diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h
index c10853791..9630b60f5 100644
--- a/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h
@@ -213,7 +213,6 @@ class SparseSelfAdjointTimeDenseProduct
       // TODO use alpha
       eigen_assert(alpha==Scalar(1) && "alpha != 1 is not implemented yet, sorry");
       typedef typename internal::remove_all<Lhs>::type _Lhs;
-      typedef typename internal::remove_all<Rhs>::type _Rhs;
       typedef typename _Lhs::InnerIterator LhsInnerIterator;
       enum {
         LhsIsRowMajor = (_Lhs::Flags&RowMajorBit)==RowMajorBit,
diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h
index a9c8979cf..cd1e76070 100644
--- a/Eigen/src/SparseCore/SparseVector.h
+++ b/Eigen/src/SparseCore/SparseVector.h
@@ -230,7 +230,8 @@ class SparseVector
     template<typename OtherDerived>
     inline SparseVector& operator=(const SparseMatrixBase<OtherDerived>& other)
     {
-      if (int(RowsAtCompileTime)!=int(OtherDerived::RowsAtCompileTime))
+      if ( (bool(OtherDerived::IsVectorAtCompileTime) && int(RowsAtCompileTime)!=int(OtherDerived::RowsAtCompileTime))
+          || ((!bool(OtherDerived::IsVectorAtCompileTime)) && ( bool(IsColVector) ? other.cols()>1 : other.rows()>1 )))
         return assign(other.transpose());
       else
         return assign(other);
@@ -309,30 +310,7 @@ class SparseVector
 
 protected:
     template<typename OtherDerived>
-    EIGEN_DONT_INLINE SparseVector& assign(const SparseMatrixBase<OtherDerived>& _other)
-    {
-      const OtherDerived& other(_other.derived());
-      const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
-      if(needToTranspose)
-      {
-        Index size = other.size();
-        Index nnz = other.nonZeros();
-        resize(size);
-        reserve(nnz);
-        for(Index i=0; i<size; ++i)
-        {
-          typename OtherDerived::InnerIterator it(other, i);
-          if(it)
-              insert(i) = it.value();
-        }
-        return *this;
-      }
-      else
-      {
-        // there is no special optimization
-        return Base::operator=(other);
-      }
-    }
+    EIGEN_DONT_INLINE SparseVector& assign(const SparseMatrixBase<OtherDerived>& _other);
     
     Storage m_data;
     Index m_size;
@@ -402,6 +380,33 @@ class SparseVector<Scalar,_Options,_Index>::ReverseInnerIterator
     const Index m_start;
 };
 
+template<typename Scalar, int _Options, typename _Index>
+template<typename OtherDerived>
+EIGEN_DONT_INLINE SparseVector<Scalar,_Options,_Index>& SparseVector<Scalar,_Options,_Index>::assign(const SparseMatrixBase<OtherDerived>& _other)
+{
+  const OtherDerived& other(_other.derived());
+  const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
+  if(needToTranspose)
+  {
+    Index size = other.size();
+    Index nnz = other.nonZeros();
+    resize(size);
+    reserve(nnz);
+    for(Index i=0; i<size; ++i)
+    {
+      typename OtherDerived::InnerIterator it(other, i);
+      if(it)
+          insert(i) = it.value();
+    }
+    return *this;
+  }
+  else
+  {
+    // there is no special optimization
+    return Base::operator=(other);
+  }
+}
+    
 } // end namespace Eigen
 
 #endif // EIGEN_SPARSEVECTOR_H
diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h
index 175794811..e78250084 100644
--- a/Eigen/src/SparseLU/SparseLU.h
+++ b/Eigen/src/SparseLU/SparseLU.h
@@ -2,6 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -13,6 +14,8 @@
 
 namespace Eigen {
 
+template <typename _MatrixType, typename _OrderingType> class SparseLU;
+template <typename MappedSparseMatrixType> struct SparseLUMatrixLReturnType; 
 /** \ingroup SparseLU_Module
   * \class SparseLU
   * 
@@ -39,7 +42,7 @@ namespace Eigen {
   * \code
   * VectorXd x(n), b(n);
   * SparseMatrix<double, ColMajor> A;
-  * SparseLU<SparseMatrix<scalar, ColMajor>, COLAMDOrdering<int> >   solver;
+  * SparseLU<SparseMatrix<scalar, ColMajor>, COLAMDOrdering<Index> >   solver;
   * // fill A and b;
   * // Compute the ordering permutation vector from the structural pattern of A
   * solver.analyzePattern(A); 
@@ -65,7 +68,7 @@ namespace Eigen {
   * \sa \ref OrderingMethods_Module
   */
 template <typename _MatrixType, typename _OrderingType>
-class SparseLU
+class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::Index>
 {
   public:
     typedef _MatrixType MatrixType; 
@@ -74,17 +77,18 @@ class SparseLU
     typedef typename MatrixType::RealScalar RealScalar; 
     typedef typename MatrixType::Index Index; 
     typedef SparseMatrix<Scalar,ColMajor,Index> NCMatrix;
-    typedef SuperNodalMatrix<Scalar, Index> SCMatrix; 
+    typedef internal::MappedSuperNodalMatrix<Scalar, Index> SCMatrix; 
     typedef Matrix<Scalar,Dynamic,1> ScalarVector;
     typedef Matrix<Index,Dynamic,1> IndexVector;
     typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType;
+    typedef internal::SparseLUImpl<Scalar, Index> Base;
     
   public:
-    SparseLU():m_isInitialized(true),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0)
+    SparseLU():m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0)
     {
       initperfvalues(); 
     }
-    SparseLU(const MatrixType& matrix):m_isInitialized(true),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0)
+    SparseLU(const MatrixType& matrix):m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0)
     {
       initperfvalues(); 
       compute(matrix);
@@ -119,36 +123,26 @@ class SparseLU
       m_symmetricmode = sym;
     }
     
-    /** Set the threshold used for a diagonal entry to be an acceptable pivot. */
-    void diagPivotThresh(RealScalar thresh)
-    {
-      m_diagpivotthresh = thresh; 
-    }
-     
-    /** Return the number of nonzero elements in the L factor */
-    int nnzL()
+    /** Returns an expression of the matrix L, internally stored as supernodes 
+     * For a triangular solve with this matrix, use
+     * \code
+     * y = b; matrixL().solveInPlace(y);
+     * \endcode
+     */
+    SparseLUMatrixLReturnType<SCMatrix> matrixL() const
     {
-      if (m_factorizationIsOk)
-        return m_nnzL; 
-      else
-      {
-        std::cerr<<"Numerical factorization should be done before\n"; 
-        return 0; 
-      }
+      return SparseLUMatrixLReturnType<SCMatrix>(m_Lstore);
     }
-    /** Return the number of nonzero elements in the U factor */
-    int nnzU()
+    /** Set the threshold used for a diagonal entry to be an acceptable pivot. */
+    void setPivotThreshold(const RealScalar& thresh)
     {
-      if (m_factorizationIsOk)
-        return m_nnzU; 
-      else
-      {
-        std::cerr<<"Numerical factorization should be done before\n"; 
-        return 0; 
-      }
+      m_diagpivotthresh = thresh; 
     }
+
     /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
       *
+      * \warning the destination matrix X in X = this->solve(B) must be colmun-major.
+      *
       * \sa compute()
       */
     template<typename Rhs>
@@ -160,6 +154,18 @@ class SparseLU
           return internal::solve_retval<SparseLU, Rhs>(*this, B.derived());
     }
 
+        /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
+      *
+      * \sa compute()
+      */
+    template<typename Rhs>
+    inline const internal::sparse_solve_retval<SparseLU, Rhs> solve(const SparseMatrixBase<Rhs>& B) const 
+    {
+      eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); 
+      eigen_assert(rows()==B.rows()
+                    && "SparseLU::solve(): invalid number of rows of the right hand side matrix B");
+          return internal::sparse_solve_retval<SparseLU, Rhs>(*this, B.derived());
+    }
     
      /** \brief Reports whether previous computation was successful.
       *
@@ -174,7 +180,13 @@ class SparseLU
       eigen_assert(m_isInitialized && "Decomposition is not initialized.");
       return m_info;
     }
-
+    /**
+     * \returns A string describing the type of error
+     */
+    std::string lastErrorMessage() const
+    {
+      return m_lastError; 
+    }
     template<typename Rhs, typename Dest>
     bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &_X) const
     {
@@ -184,20 +196,21 @@ class SparseLU
                         THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
       
       
-      int nrhs = B.cols(); 
+      Index nrhs = B.cols(); 
       Index n = B.rows(); 
       
       // Permute the right hand side to form X = Pr*B
       // on return, X is overwritten by the computed solution
       X.resize(n,nrhs);
-      for(int j = 0; j < nrhs; ++j)
+      for(Index j = 0; j < nrhs; ++j)
         X.col(j) = m_perm_r * B.col(j); 
       
       //Forward substitution with L 
-      m_Lstore.solveInPlace(X);
+//       m_Lstore.solveInPlace(X);
+        this->matrixL().solveInPlace(X);
       
       // Backward solve with U
-      for (int k = m_Lstore.nsuper(); k >= 0; k--)
+      for (Index k = m_Lstore.nsuper(); k >= 0; k--)
       {
         Index fsupc = m_Lstore.supToCol()[k];
         Index lda = m_Lstore.colIndexPtr()[fsupc+1] - m_Lstore.colIndexPtr()[fsupc]; // leading dimension
@@ -206,7 +219,7 @@ class SparseLU
         
         if (nsupc == 1)
         {
-          for (int j = 0; j < nrhs; j++)
+          for (Index j = 0; j < nrhs; j++)
           {
             X(fsupc, j) /= m_Lstore.valuePtr()[luptr]; 
           }
@@ -218,11 +231,11 @@ class SparseLU
           U = A.template triangularView<Upper>().solve(U); 
         }
         
-        for (int j = 0; j < nrhs; ++j)
+        for (Index j = 0; j < nrhs; ++j)
         {
-          for (int jcol = fsupc; jcol < fsupc + nsupc; jcol++)
+          for (Index jcol = fsupc; jcol < fsupc + nsupc; jcol++)
           {
-            typename MappedSparseMatrix<Scalar>::InnerIterator it(m_Ustore, jcol);
+            typename MappedSparseMatrix<Scalar,ColMajor, Index>::InnerIterator it(m_Ustore, jcol);
             for ( ; it; ++it)
             {
               Index irow = it.index(); 
@@ -233,7 +246,7 @@ class SparseLU
       } // End For U-solve
       
       // Permute back the solution 
-      for (int j = 0; j < nrhs; ++j)
+      for (Index j = 0; j < nrhs; ++j)
         X.col(j) = m_perm_c.inverse() * X.col(j); 
       
       return true; 
@@ -256,23 +269,23 @@ class SparseLU
     bool m_isInitialized;
     bool m_factorizationIsOk;
     bool m_analysisIsOk;
+    std::string m_lastError;
     NCMatrix m_mat; // The input (permuted ) matrix 
     SCMatrix m_Lstore; // The lower triangular matrix (supernodal)
-    MappedSparseMatrix<Scalar> m_Ustore; // The upper triangular matrix
+    MappedSparseMatrix<Scalar,ColMajor,Index> m_Ustore; // The upper triangular matrix
     PermutationType m_perm_c; // Column permutation 
     PermutationType m_perm_r ; // Row permutation
     IndexVector m_etree; // Column elimination tree 
     
-    LU_GlobalLU_t<IndexVector, ScalarVector> m_glu; 
+    typename Base::GlobalLU_t m_glu; 
                                
-    // SuperLU/SparseLU options 
+    // SparseLU options 
     bool m_symmetricmode;
-    
     // values for performance 
-    LU_perfvalues m_perfv; 
+    internal::perfvalues<Index> m_perfv; 
     RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot
-    int m_nnzL, m_nnzU; // Nonzeros in L and U factors 
-  
+    Index m_nnzL, m_nnzU; // Nonzeros in L and U factors 
+    
   private:
     // Copy constructor 
     SparseLU (SparseLU& ) {}
@@ -301,18 +314,17 @@ void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
   ord(mat,m_perm_c);
   
   // Apply the permutation to the column of the input  matrix
-//   m_mat = mat * m_perm_c.inverse(); //FIXME It should be less expensive here to permute only the structural pattern of the matrix
-   
   //First copy the whole input matrix. 
   m_mat = mat;
-  m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used.  
-  //Then, permute only the column pointers
-  for (int i = 0; i < mat.cols(); i++)
-  {
-    m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i]; 
-    m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; 
+  if (m_perm_c.size()) {
+    m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used.  
+    //Then, permute only the column pointers
+    for (Index i = 0; i < mat.cols(); i++)
+    {
+      m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i]; 
+      m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; 
+    }
   }
-    
   // Compute the column elimination tree of the permuted matrix 
   IndexVector firstRowElt;
   internal::coletree(m_mat, m_etree,firstRowElt); 
@@ -325,18 +337,20 @@ void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
       
    
     // Renumber etree in postorder 
-    int m = m_mat.cols(); 
+    Index m = m_mat.cols(); 
     iwork.resize(m+1);
-    for (int i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i));
+    for (Index i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i));
     m_etree = iwork;
     
     // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree
-    PermutationType post_perm(m); //FIXME Use directly a constructor with post
-    for (int i = 0; i < m; i++) 
+    PermutationType post_perm(m); 
+    for (Index i = 0; i < m; i++) 
       post_perm.indices()(i) = post(i); 
         
     // Combine the two permutations : postorder the permutation for future use
-    m_perm_c = post_perm * m_perm_c;
+    if(m_perm_c.size()) {
+      m_perm_c = post_perm * m_perm_c;
+    }
     
   } // end postordering 
   
@@ -367,7 +381,7 @@ void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
 template <typename MatrixType, typename OrderingType>
 void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
 {
-  
+  using internal::emptyIdxLU;
   eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); 
   eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices");
   
@@ -377,24 +391,32 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
   // Apply the column permutation computed in analyzepattern()
   //   m_mat = matrix * m_perm_c.inverse(); 
   m_mat = matrix;
-  m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers.
-  //Then, permute only the column pointers
-  for (int i = 0; i < matrix.cols(); i++)
+  if (m_perm_c.size()) 
   {
-    m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i]; 
-    m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i+1] - matrix.outerIndexPtr()[i]; 
+    m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers.
+    //Then, permute only the column pointers
+    for (Index i = 0; i < matrix.cols(); i++)
+    {
+      m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i]; 
+      m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i+1] - matrix.outerIndexPtr()[i]; 
+    }
+  } 
+  else 
+  { //FIXME This should not be needed if the empty permutation is handled transparently
+    m_perm_c.resize(matrix.cols());
+    for(Index i = 0; i < matrix.cols(); ++i) m_perm_c.indices()(i) = i;
   }
   
-  int m = m_mat.rows();
-  int n = m_mat.cols();
-  int nnz = m_mat.nonZeros();
-  int maxpanel = m_perfv.panel_size * m;
+  Index m = m_mat.rows();
+  Index n = m_mat.cols();
+  Index nnz = m_mat.nonZeros();
+  Index maxpanel = m_perfv.panel_size * m;
   // Allocate working storage common to the factor routines
-  int lwork = 0;
-  int info = SparseLUBase<Scalar,Index>::LUMemInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); 
+  Index lwork = 0;
+  Index info = Base::memInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); 
   if (info) 
   {
-    std::cerr << "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ;
+    m_lastError = "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ;
     m_factorizationIsOk = false;
     return ; 
   }
@@ -406,7 +428,7 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
   IndexVector repfnz(maxpanel);
   IndexVector panel_lsub(maxpanel);
   IndexVector xprune(n); xprune.setZero();
-  IndexVector marker(m*LU_NO_MARKER); marker.setZero();
+  IndexVector marker(m*internal::LUNoMarker); marker.setZero();
   
   repfnz.setConstant(-1); 
   panel_lsub.setConstant(-1);
@@ -415,7 +437,7 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
   ScalarVector dense; 
   dense.setZero(maxpanel);
   ScalarVector tempv; 
-  tempv.setZero(LU_NUM_TEMPV(m, m_perfv.panel_size, m_perfv.maxsuper, /*m_perfv.rowblk*/m) );
+  tempv.setZero(internal::LUnumTempV(m, m_perfv.panel_size, m_perfv.maxsuper, /*m_perfv.rowblk*/m) );
   
   // Compute the inverse of perm_c
   PermutationType iperm_c(m_perm_c.inverse()); 
@@ -423,35 +445,35 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
   // Identify initial relaxed snodes
   IndexVector relax_end(n);
   if ( m_symmetricmode == true ) 
-    SparseLUBase<Scalar,Index>::LU_heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
+    Base::heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
   else
-    SparseLUBase<Scalar,Index>::LU_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
+    Base::relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
   
   
   m_perm_r.resize(m); 
   m_perm_r.indices().setConstant(-1);
   marker.setConstant(-1);
   
-  m_glu.supno(0) = IND_EMPTY; m_glu.xsup.setConstant(0);
+  m_glu.supno(0) = emptyIdxLU; m_glu.xsup.setConstant(0);
   m_glu.xsup(0) = m_glu.xlsub(0) = m_glu.xusub(0) = m_glu.xlusup(0) = Index(0);
   
   // Work on one 'panel' at a time. A panel is one of the following :
   //  (a) a relaxed supernode at the bottom of the etree, or
   //  (b) panel_size contiguous columns, <panel_size> defined by the user
-  int jcol; 
+  Index jcol; 
   IndexVector panel_histo(n);
   Index pivrow; // Pivotal row number in the original row matrix
-  int nseg1; // Number of segments in U-column above panel row jcol
-  int nseg; // Number of segments in each U-column 
-  int irep; 
-  int i, k, jj; 
+  Index nseg1; // Number of segments in U-column above panel row jcol
+  Index nseg; // Number of segments in each U-column 
+  Index irep; 
+  Index i, k, jj; 
   for (jcol = 0; jcol < n; )
   {
     // Adjust panel size so that a panel won't overlap with the next relaxed snode. 
-    int panel_size = m_perfv.panel_size; // upper bound on panel width
+    Index panel_size = m_perfv.panel_size; // upper bound on panel width
     for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
     {
-      if (relax_end(k) != IND_EMPTY) 
+      if (relax_end(k) != emptyIdxLU) 
       {
         panel_size = k - jcol; 
         break; 
@@ -461,10 +483,10 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
       panel_size = n - jcol; 
       
     // Symbolic outer factorization on a panel of columns 
-    SparseLUBase<Scalar,Index>::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); 
+    Base::panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); 
     
     // Numeric sup-panel updates in topological order 
-    SparseLUBase<Scalar,Index>::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); 
+    Base::panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); 
     
     // Sparse LU within the panel, and below the panel diagonal 
     for ( jj = jcol; jj< jcol + panel_size; jj++) 
@@ -475,10 +497,10 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
       //Depth-first-search for the current column
       VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
       VectorBlock<IndexVector> repfnz_k(repfnz, k, m); 
-      info = SparseLUBase<Scalar,Index>::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); 
+      info = Base::column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); 
       if ( info ) 
       {
-        std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n";
+        m_lastError =  "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() ";
         m_info = NumericalIssue; 
         m_factorizationIsOk = false; 
         return; 
@@ -486,62 +508,82 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
       // Numeric updates to this column 
       VectorBlock<ScalarVector> dense_k(dense, k, m); 
       VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1); 
-      info = SparseLUBase<Scalar,Index>::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); 
+      info = Base::column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); 
       if ( info ) 
       {
-        std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n";
+        m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() ";
         m_info = NumericalIssue; 
         m_factorizationIsOk = false; 
         return; 
       }
       
       // Copy the U-segments to ucol(*)
-      info = SparseLUBase<Scalar,Index>::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); 
+      info = Base::copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); 
       if ( info ) 
       {
-        std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n";
+        m_lastError = "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() ";
         m_info = NumericalIssue; 
         m_factorizationIsOk = false; 
         return; 
       }
       
       // Form the L-segment 
-      info = SparseLUBase<Scalar,Index>::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
+      info = Base::pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
       if ( info ) 
       {
-        std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl; 
+        m_lastError = "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT ";
+        std::ostringstream returnInfo;
+        returnInfo << info; 
+        m_lastError += returnInfo.str();
         m_info = NumericalIssue; 
         m_factorizationIsOk = false; 
         return; 
       }
       
       // Prune columns (0:jj-1) using column jj
-      SparseLUBase<Scalar,Index>::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); 
+      Base::pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); 
       
       // Reset repfnz for this column 
       for (i = 0; i < nseg; i++)
       {
         irep = segrep(i); 
-        repfnz_k(irep) = IND_EMPTY; 
+        repfnz_k(irep) = emptyIdxLU; 
       }
     } // end SparseLU within the panel  
     jcol += panel_size;  // Move to the next panel
   } // end for -- end elimination 
   
   // Count the number of nonzeros in factors 
-  SparseLUBase<Scalar,Index>::LU_countnz(n, m_nnzL, m_nnzU, m_glu); 
+  Base::countnz(n, m_nnzL, m_nnzU, m_glu); 
   // Apply permutation  to the L subscripts 
-  SparseLUBase<Scalar,Index>::LU_fixupL(n, m_perm_r.indices(), m_glu); 
+  Base::fixupL(n, m_perm_r.indices(), m_glu); 
   
   // Create supernode matrix L 
   m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); 
   // Create the column major upper sparse matrix  U; 
-  new (&m_Ustore) MappedSparseMatrix<Scalar> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); 
+  new (&m_Ustore) MappedSparseMatrix<Scalar, ColMajor, Index> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); 
   
   m_info = Success;
   m_factorizationIsOk = true;
 }
 
+template<typename MappedSupernodalType>
+struct SparseLUMatrixLReturnType
+{
+  typedef typename MappedSupernodalType::Index Index;
+  typedef typename MappedSupernodalType::Scalar Scalar;
+  SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL)
+  { }
+  Index rows() { return m_mapL.rows(); }
+  Index cols() { return m_mapL.cols(); }
+  template<typename Dest>
+  void solveInPlace( MatrixBase<Dest> &X) const
+  {
+    m_mapL.solveInPlace(X);
+  }
+  const MappedSupernodalType& m_mapL;
+};
+
 namespace internal {
   
 template<typename _MatrixType, typename Derived, typename Rhs>
@@ -557,6 +599,18 @@ struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
   }
 };
 
+template<typename _MatrixType, typename Derived, typename Rhs>
+struct sparse_solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
+  : sparse_solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs>
+{
+  typedef SparseLU<_MatrixType,Derived> Dec;
+  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    this->defaultEvalTo(dst);
+  }
+};
 } // end namespace internal
 
 } // End namespace Eigen 
diff --git a/Eigen/src/SparseLU/SparseLUBase.h b/Eigen/src/SparseLU/SparseLUBase.h
deleted file mode 100644
index f4c5fbead..000000000
--- a/Eigen/src/SparseLU/SparseLUBase.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef SPARSELUBASE_H
-#define SPARSELUBASE_H
-
-namespace Eigen {
-  
-/** \ingroup SparseLU_Module
-  * \class SparseLUBase
-  * Base class for sparseLU
-  */
-template <typename Scalar, typename Index>
-struct SparseLUBase
-{
-  typedef Matrix<Scalar,Dynamic,1> ScalarVector;
-  typedef Matrix<Index,Dynamic,1> IndexVector; 
-  typedef typename ScalarVector::RealScalar RealScalar; 
-  typedef Ref<Matrix<Scalar,Dynamic,1> > BlockScalarVector;
-  typedef Ref<Matrix<Index,Dynamic,1> > BlockIndexVector;
-  typedef LU_GlobalLU_t<IndexVector, ScalarVector> GlobalLU_t; 
-  typedef SparseMatrix<Scalar,ColMajor,Index> MatrixType; 
-  
-  template <typename VectorType>
-  static int expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_expansions);
-  static int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size,  GlobalLU_t& glu); 
-  template <typename VectorType>
-  static int LUMemXpand(VectorType& vec, int& maxlen, int nbElts, LU_MemType memtype, int& num_expansions);
-  static void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end); 
-  static void LU_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end); 
-  static int LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat,  IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t<IndexVector, ScalarVector>& glu); 
-  static int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu);
-  static int LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, GlobalLU_t& glu);
-  template <typename Traits>
-  static void LU_dfs_kernel(const int jj, IndexVector& perm_r,
-                   int& nseg, IndexVector& panel_lsub, IndexVector& segrep,
-                   Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
-                   IndexVector& xplore, GlobalLU_t& glu, int& nextl_col, int krow, Traits& traits);
-  static void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
-   
-  static void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu);
-  static int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg,  BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
-  static int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, int fpanelc, GlobalLU_t& glu); 
-  static int LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu); 
-  static void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu);
-  static void LU_countnz(const int n, int& nnzL, int& nnzU, GlobalLU_t& glu); 
-  static void LU_fixupL(const int n, const IndexVector& perm_r, GlobalLU_t& glu); 
-
-}; 
-
-} // end namespace Eigen
-
-#endif
diff --git a/Eigen/src/SparseLU/SparseLUImpl.h b/Eigen/src/SparseLU/SparseLUImpl.h
new file mode 100644
index 000000000..14d70897d
--- /dev/null
+++ b/Eigen/src/SparseLU/SparseLUImpl.h
@@ -0,0 +1,64 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef SPARSELU_IMPL_H
+#define SPARSELU_IMPL_H
+
+namespace Eigen {
+namespace internal {
+  
+/** \ingroup SparseLU_Module
+  * \class SparseLUImpl
+  * Base class for sparseLU
+  */
+template <typename Scalar, typename Index>
+class SparseLUImpl
+{
+  public:
+    typedef Matrix<Scalar,Dynamic,1> ScalarVector;
+    typedef Matrix<Index,Dynamic,1> IndexVector; 
+    typedef typename ScalarVector::RealScalar RealScalar; 
+    typedef Ref<Matrix<Scalar,Dynamic,1> > BlockScalarVector;
+    typedef Ref<Matrix<Index,Dynamic,1> > BlockIndexVector;
+    typedef LU_GlobalLU_t<IndexVector, ScalarVector> GlobalLU_t; 
+    typedef SparseMatrix<Scalar,ColMajor,Index> MatrixType; 
+    
+  protected:
+     template <typename VectorType>
+     Index expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions);
+     Index memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size,  GlobalLU_t& glu); 
+     template <typename VectorType>
+     Index memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions);
+     void heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end); 
+     void relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end); 
+     Index snode_dfs(const Index jcol, const Index kcol,const MatrixType& mat,  IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu); 
+     Index snode_bmod (const Index jcol, const Index fsupc, ScalarVector& dense, GlobalLU_t& glu);
+     Index pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu);
+     template <typename Traits>
+     void dfs_kernel(const Index jj, IndexVector& perm_r,
+                    Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,
+                    Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
+                    IndexVector& xplore, GlobalLU_t& glu, Index& nextl_col, Index krow, Traits& traits);
+     void panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
+    
+     void panel_bmod(const Index m, const Index w, const Index jcol, const Index nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu);
+     Index column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg,  BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
+     Index column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu); 
+     Index copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu); 
+     void pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu);
+     void countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu); 
+     void fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu); 
+     
+     template<typename , typename >
+     friend struct column_dfs_traits;
+}; 
+
+} // end namespace internal
+} // namespace Eigen
+
+#endif
diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h
index 049d5e694..6d9570d19 100644
--- a/Eigen/src/SparseLU/SparseLU_Memory.h
+++ b/Eigen/src/SparseLU/SparseLU_Memory.h
@@ -32,15 +32,23 @@
 #define EIGEN_SPARSELU_MEMORY
 
 namespace Eigen {
+namespace internal {
   
-#define LU_NO_MARKER 3
-#define LU_NUM_TEMPV(m,w,t,b) ((std::max)(m, (t+b)*w)  )
-#define IND_EMPTY (-1)
+enum { LUNoMarker = 3 };
+enum {emptyIdxLU = -1};
+template<typename Index>
+inline Index LUnumTempV(Index& m, Index& w, Index& t, Index& b)
+{
+  return (std::max)(m, (t+b)*w);
+}
+
+template< typename Scalar, typename Index>
+inline Index LUTempSpace(Index&m, Index& w)
+{
+  return (2*w + 4 + LUNoMarker) * m * sizeof(Index) + (w + 1) * m * sizeof(Scalar);
+}
+
 
-#define LU_Reduce(alpha) ((alpha + 1) / 2) // i.e (alpha-1)/2 + 1
-#define LU_GluIntArray(n) (5* (n) + 5)
-#define LU_TempSpace(m, w) ( (2*w + 4 + LU_NO_MARKER) * m * sizeof(Index) \
-                                  + (w + 1) * m * sizeof(Scalar) )
 
 
 /** 
@@ -53,11 +61,11 @@ namespace Eigen {
   */
 template <typename Scalar, typename Index>
 template <typename VectorType>
-int  SparseLUBase<Scalar,Index>::expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_expansions) 
+Index  SparseLUImpl<Scalar,Index>::expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions) 
 {
   
   float alpha = 1.5; // Ratio of the memory increase 
-  int new_len; // New size of the allocated memory
+  Index new_len; // New size of the allocated memory
   
   if(num_expansions == 0 || keep_prev) 
     new_len = length ; // First time allocate requested
@@ -88,10 +96,10 @@ int  SparseLUBase<Scalar,Index>::expand(VectorType& vec, int& length, int nbElts
     else 
     {
       // Reduce the size and increase again 
-      int tries = 0; // Number of attempts
+      Index tries = 0; // Number of attempts
       do 
       {
-        alpha = LU_Reduce(alpha);
+        alpha = (alpha + 1)/2;
         new_len = alpha * length ; 
         try
         {
@@ -128,18 +136,20 @@ int  SparseLUBase<Scalar,Index>::expand(VectorType& vec, int& length, int nbElts
  * \note Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation
  */
 template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size,  GlobalLU_t& glu)
+Index SparseLUImpl<Scalar,Index>::memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size,  GlobalLU_t& glu)
 {
-  int& num_expansions = glu.num_expansions; //No memory expansions so far
+  Index& num_expansions = glu.num_expansions; //No memory expansions so far
   num_expansions = 0; 
   glu.nzumax = glu.nzlumax = (std::max)(fillratio * annz, m*n); // estimated number of nonzeros in U 
   glu.nzlmax  = (std::max)(1., fillratio/4.) * annz; // estimated  nnz in L factor
 
   // Return the estimated size to the user if necessary
-  if (lwork == IND_EMPTY) 
+  Index tempSpace;
+  tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar);
+  if (lwork == emptyIdxLU) 
   {
-    int estimated_size;
-    estimated_size = LU_GluIntArray(n) * sizeof(Index)  + LU_TempSpace(m, panel_size)
+    Index estimated_size;
+    estimated_size = (5 * n + 5) * sizeof(Index)  + tempSpace
                     + (glu.nzlmax + glu.nzumax) * sizeof(Index) + (glu.nzlumax+glu.nzumax) *  sizeof(Scalar) + n; 
     return estimated_size;
   }
@@ -192,13 +202,13 @@ int SparseLUBase<Scalar,Index>::LUMemInit(int m, int n, int annz, int lwork, int
  */
 template <typename Scalar, typename Index>
 template <typename VectorType>
-int SparseLUBase<Scalar,Index>::LUMemXpand(VectorType& vec, int& maxlen, int nbElts, LU_MemType memtype, int& num_expansions)
+Index SparseLUImpl<Scalar,Index>::memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions)
 {
-  int failed_size; 
+  Index failed_size; 
   if (memtype == USUB)
-     failed_size = expand<VectorType>(vec, maxlen, nbElts, 1, num_expansions);
+     failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 1, num_expansions);
   else
-    failed_size = expand<VectorType>(vec, maxlen, nbElts, 0, num_expansions);
+    failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 0, num_expansions);
 
   if (failed_size)
     return failed_size; 
@@ -206,6 +216,7 @@ int SparseLUBase<Scalar,Index>::LUMemXpand(VectorType& vec, int& maxlen, int nbE
   return 0 ;  
 }
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif // EIGEN_SPARSELU_MEMORY
diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h
index 89d6e81b7..24d6bf179 100644
--- a/Eigen/src/SparseLU/SparseLU_Structs.h
+++ b/Eigen/src/SparseLU/SparseLU_Structs.h
@@ -68,10 +68,10 @@
 
 #ifndef EIGEN_LU_STRUCTS
 #define EIGEN_LU_STRUCTS
-
 namespace Eigen {
+namespace internal {
   
-typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} LU_MemType; 
+typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType; 
 
 template <typename IndexVector, typename ScalarVector>
 struct LU_GlobalLU_t {
@@ -89,21 +89,23 @@ struct LU_GlobalLU_t {
   IndexVector xusub; // Pointers to the beginning of each column of U in ucol 
   Index   nzumax; // Current max size of ucol
   Index   n; // Number of columns in the matrix  
-  int   num_expansions; 
+  Index   num_expansions; 
 };
 
-// Values to set for performance 
-struct LU_perfvalues {
-  int panel_size; // a panel consists of at most <panel_size> consecutive columns
-  int relax; // To control degree of relaxing supernodes. If the number of nodes (columns) 
+// Values to set for performance
+template <typename Index>
+struct perfvalues {
+  Index panel_size; // a panel consists of at most <panel_size> consecutive columns
+  Index relax; // To control degree of relaxing supernodes. If the number of nodes (columns) 
                 // in a subtree of the elimination tree is less than relax, this subtree is considered 
                 // as one supernode regardless of the row structures of those columns
-  int maxsuper; // The maximum size for a supernode in complete LU
-  int rowblk; // The minimum row dimension for 2-D blocking to be used;
-  int colblk; // The minimum column dimension for 2-D blocking to be used;
-  int fillfactor; // The estimated fills factors for L and U, compared with A
+  Index maxsuper; // The maximum size for a supernode in complete LU
+  Index rowblk; // The minimum row dimension for 2-D blocking to be used;
+  Index colblk; // The minimum column dimension for 2-D blocking to be used;
+  Index fillfactor; // The estimated fills factors for L and U, compared with A
 }; 
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif // EIGEN_LU_STRUCTS
diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
index d5770e1ae..3eae95479 100644
--- a/Eigen/src/SparseLU/SparseLU_Matrix.h
+++ b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
@@ -8,10 +8,11 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_SPARSELU_MATRIX_H
-#define EIGEN_SPARSELU_MATRIX_H
+#ifndef EIGEN_SPARSELU_SUPERNODAL_MATRIX_H
+#define EIGEN_SPARSELU_SUPERNODAL_MATRIX_H
 
 namespace Eigen {
+namespace internal {
 
 /** \ingroup SparseLU_Module
  * \brief a class to manipulate the L supernodal factor from the SparseLU factorization
@@ -23,13 +24,13 @@ namespace Eigen {
  * NOTE : This class corresponds to the SCformat structure in SuperLU
  * 
  */
-/* TO DO
+/* TODO
  * InnerIterator as for sparsematrix 
  * SuperInnerIterator to iterate through all supernodes 
  * Function for triangular solve
  */
 template <typename _Scalar, typename _Index>
-class SuperNodalMatrix
+class MappedSuperNodalMatrix
 {
   public:
     typedef _Scalar Scalar; 
@@ -37,17 +38,17 @@ class SuperNodalMatrix
     typedef Matrix<Index,Dynamic,1> IndexVector; 
     typedef Matrix<Scalar,Dynamic,1> ScalarVector;
   public:
-    SuperNodalMatrix()
+    MappedSuperNodalMatrix()
     {
       
     }
-    SuperNodalMatrix(int m, int n,  ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, 
+    MappedSuperNodalMatrix(Index m, Index n,  ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, 
              IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
     {
       setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col);
     }
     
-    ~SuperNodalMatrix()
+    ~MappedSuperNodalMatrix()
     {
       
     }
@@ -57,7 +58,7 @@ class SuperNodalMatrix
      * FIXME This class will be modified such that it can be use in the course 
      * of the factorization.
      */
-    void setInfos(int m, int n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, 
+    void setInfos(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, 
              IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
     {
       m_row = m;
@@ -69,34 +70,24 @@ class SuperNodalMatrix
       m_nsuper = col_to_sup(n); 
       m_col_to_sup = col_to_sup.data(); 
       m_sup_to_col = sup_to_col.data(); 
-      
     }
     
     /**
      * Number of rows
      */
-    int rows()
-    {
-      return m_row;
-    }
+    Index rows() { return m_row; }
     
     /**
      * Number of columns
      */
-    int cols()
-    {
-      return m_col;
-    }
+    Index cols() { return m_col; }
     
     /**
      * Return the array of nonzero values packed by column
      * 
      * The size is nnz
      */
-    Scalar* valuePtr()
-    {
-      return m_nzval; 
-    }
+    Scalar* valuePtr() {  return m_nzval; }
     
     const Scalar* valuePtr() const 
     {
@@ -118,10 +109,7 @@ class SuperNodalMatrix
     /**
      * Return the array of compressed row indices of all supernodes
      */
-    Index* rowIndex()
-    {
-      return m_rowind; 
-    }
+    Index* rowIndex()  { return m_rowind; }
     
     const Index* rowIndex() const
     {
@@ -131,10 +119,7 @@ class SuperNodalMatrix
     /**
      * Return the location in \em rowvaluePtr() which starts each column
      */
-    Index* rowIndexPtr()
-    {
-      return m_rowind_colptr; 
-    }
+    Index* rowIndexPtr() { return m_rowind_colptr; }
     
     const Index* rowIndexPtr() const 
     {
@@ -144,10 +129,7 @@ class SuperNodalMatrix
     /** 
      * Return the array of column-to-supernode mapping 
      */
-    Index* colToSup()
-    {
-      return m_col_to_sup;       
-    }
+    Index* colToSup()  { return m_col_to_sup; }
     
     const Index* colToSup() const
     {
@@ -156,10 +138,7 @@ class SuperNodalMatrix
     /**
      * Return the array of supernode-to-column mapping
      */
-    Index* supToCol()
-    {
-      return m_sup_to_col;
-    }
+    Index* supToCol() { return m_sup_to_col; }
     
     const Index* supToCol() const 
     {
@@ -169,7 +148,7 @@ class SuperNodalMatrix
     /**
      * Return the number of supernodes
      */
-    int nsuper() const 
+    Index nsuper() const 
     {
       return m_nsuper; 
     }
@@ -196,22 +175,21 @@ class SuperNodalMatrix
 };
 
 /**
-  * \brief InnerIterator class to iterate over nonzero values of the current column in the supernode
+  * \brief InnerIterator class to iterate over nonzero values of the current column in the supernodal matrix L
   * 
   */
 template<typename Scalar, typename Index>
-class SuperNodalMatrix<Scalar,Index>::InnerIterator
+class MappedSuperNodalMatrix<Scalar,Index>::InnerIterator
 {
   public:
-     InnerIterator(const SuperNodalMatrix& mat, Index outer)
+     InnerIterator(const MappedSuperNodalMatrix& mat, Index outer)
       : m_matrix(mat),
         m_outer(outer), 
+        m_supno(mat.colToSup()[outer]),
         m_idval(mat.colIndexPtr()[outer]),
-        m_startval(m_idval),
-        m_endval(mat.colIndexPtr()[outer+1]),
-        m_idrow(mat.rowIndexPtr()[outer]),
-        m_startidrow(m_idrow),
-        m_endidrow(mat.rowIndexPtr()[outer+1])
+        m_startidval(m_idval),
+        m_endidval(mat.colIndexPtr()[outer+1]),
+        m_idrow(mat.rowIndexPtr()[outer])
     {}
     inline InnerIterator& operator++()
     { 
@@ -227,22 +205,21 @@ class SuperNodalMatrix<Scalar,Index>::InnerIterator
     inline Index row() const { return index(); }
     inline Index col() const { return m_outer; }
     
-    inline Index supIndex() const { return m_matrix.colToSup()[m_outer]; }
+    inline Index supIndex() const { return m_supno; }
     
     inline operator bool() const 
     { 
-      return ( (m_idrow < m_endidrow) && (m_idrow > m_startidrow) ); 
+      return ( (m_idval < m_endidval) && (m_idval >= m_startidval) );
     }
     
   protected:
-    const SuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix 
+    const MappedSuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix 
     const Index m_outer; // Current column 
+    const Index m_supno; // Current SuperNode number
     Index m_idval; //Index to browse the values in the current column
-    const Index m_startval; // Start of the column value 
-    const Index m_endval; // End of the column value 
+    const Index m_startidval; // Start of the column value
+    const Index m_endidval; // End of the column value
     Index m_idrow;  //Index to browse the row indices 
-    const Index m_startidrow; // Start of the row indices of the current column value
-    const Index m_endidrow; // End of the row indices of the current column value
 };
 
 /**
@@ -251,14 +228,14 @@ class SuperNodalMatrix<Scalar,Index>::InnerIterator
  */
 template<typename Scalar, typename Index>
 template<typename Dest>
-void SuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const
+void MappedSuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const
 {
     Index n = X.rows(); 
-    int nrhs = X.cols(); 
+    Index nrhs = X.cols(); 
     const Scalar * Lval = valuePtr(); // Nonzero values 
     Matrix<Scalar,Dynamic,Dynamic> work(n, nrhs); // working vector
     work.setZero();
-    for (int k = 0; k <= nsuper(); k ++)
+    for (Index k = 0; k <= nsuper(); k ++)
     {
       Index fsupc = supToCol()[k]; // First column of the current supernode 
       Index istart = rowIndexPtr()[fsupc];  // Pointer index to the subscript of the current column
@@ -269,7 +246,7 @@ void SuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const
       
       if (nsupc == 1 )
       {
-        for (int j = 0; j < nrhs; j++)
+        for (Index j = 0; j < nrhs; j++)
         {
           InnerIterator it(*this, fsupc); 
           ++it; // Skip the diagonal element
@@ -296,10 +273,10 @@ void SuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const
         work.block(0, 0, nrow, nrhs) = A * U; 
         
         //Begin Scatter 
-        for (int j = 0; j < nrhs; j++)
+        for (Index j = 0; j < nrhs; j++)
         {
           Index iptr = istart + nsupc; 
-          for (int i = 0; i < nrow; i++)
+          for (Index i = 0; i < nrow; i++)
           {
             irow = rowIndex()[iptr]; 
             X(irow, j) -= work(i, j); // Scatter operation
@@ -311,6 +288,7 @@ void SuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const
     } 
 }
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif // EIGEN_SPARSELU_MATRIX_H
diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h
index e764823ae..15352ac33 100644
--- a/Eigen/src/SparseLU/SparseLU_Utils.h
+++ b/Eigen/src/SparseLU/SparseLU_Utils.h
@@ -12,18 +12,19 @@
 #define EIGEN_SPARSELU_UTILS_H
 
 namespace Eigen {
+namespace internal {
 
 /**
  * \brief Count Nonzero elements in the factors
  */
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_countnz(const int n, int& nnzL, int& nnzU, GlobalLU_t& glu)
+void SparseLUImpl<Scalar,Index>::countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu)
 {
  nnzL = 0; 
  nnzU = (glu.xusub)(n); 
- int nsuper = (glu.supno)(n); 
- int jlen; 
- int i, j, fsupc;
+ Index nsuper = (glu.supno)(n); 
+ Index jlen; 
+ Index i, j, fsupc;
  if (n <= 0 ) return; 
  // For each supernode
  for (i = 0; i <= nsuper; i++)
@@ -48,12 +49,12 @@ void SparseLUBase<Scalar,Index>::LU_countnz(const int n, int& nnzL, int& nnzU, G
  * 
  */
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_fixupL(const int n, const IndexVector& perm_r, GlobalLU_t& glu)
+void SparseLUImpl<Scalar,Index>::fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu)
 {
-  int fsupc, i, j, k, jstart; 
+  Index fsupc, i, j, k, jstart; 
   
-  int nextl = 0; 
-  int nsuper = (glu.supno)(n); 
+  Index nextl = 0; 
+  Index nsuper = (glu.supno)(n); 
   
   // For each supernode 
   for (i = 0; i <= nsuper; i++)
@@ -73,6 +74,7 @@ void SparseLUBase<Scalar,Index>::LU_fixupL(const int n, const IndexVector& perm_
   glu.xlsub(n) = nextl; 
 }
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif // EIGEN_SPARSELU_UTILS_H
diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h
index 6d557eb81..f24bd87d3 100644
--- a/Eigen/src/SparseLU/SparseLU_column_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h
@@ -32,7 +32,8 @@
 #define SPARSELU_COLUMN_BMOD_H
 
 namespace Eigen {
-  
+
+namespace internal {
 /**
  * \brief Performs numeric block updates (sup-col) in topological order
  * 
@@ -49,11 +50,11 @@ namespace Eigen {
  * 
  */
 template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LU_column_bmod(const int jcol, const int nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, int fpanelc, GlobalLU_t& glu)
+Index SparseLUImpl<Scalar,Index>::column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu)
 {
-  int  jsupno, k, ksub, krep, ksupno; 
-  int lptr, nrow, isub, irow, nextlu, new_next, ufirst; 
-  int fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; 
+  Index  jsupno, k, ksub, krep, ksupno; 
+  Index lptr, nrow, isub, irow, nextlu, new_next, ufirst; 
+  Index fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; 
   /* krep = representative of current k-th supernode
     * fsupc =  first supernodal column
     * nsupc = number of columns in a supernode
@@ -66,10 +67,10 @@ int SparseLUBase<Scalar,Index>::LU_column_bmod(const int jcol, const int nseg, B
   jsupno = glu.supno(jcol);
   // For each nonzero supernode segment of U[*,j] in topological order 
   k = nseg - 1; 
-  int d_fsupc; // distance between the first column of the current panel and the 
+  Index d_fsupc; // distance between the first column of the current panel and the 
                // first column of the current snode
-  int fst_col; // First column within small LU update
-  int segsize; 
+  Index fst_col; // First column within small LU update
+  Index segsize; 
   for (ksub = 0; ksub < nseg; ksub++)
   {
     krep = segrep(k); k--; 
@@ -94,7 +95,7 @@ int SparseLUBase<Scalar,Index>::LU_column_bmod(const int jcol, const int nseg, B
       nsupc = krep - fst_col + 1; 
       nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); 
       nrow = nsupr - d_fsupc - nsupc;
-      int lda = glu.xlusup(fst_col+1) - glu.xlusup(fst_col);
+      Index lda = glu.xlusup(fst_col+1) - glu.xlusup(fst_col);
       
       
       // Perform a triangular solver and block update, 
@@ -112,14 +113,14 @@ int SparseLUBase<Scalar,Index>::LU_column_bmod(const int jcol, const int nseg, B
   fsupc = glu.xsup(jsupno);
   
   // copy the SPA dense into L\U[*,j]
-  int mem; 
+  Index mem; 
   new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc); 
-  int offset = internal::first_multiple<Index>(new_next, internal::packet_traits<Scalar>::size) - new_next;
+  Index offset = internal::first_multiple<Index>(new_next, internal::packet_traits<Scalar>::size) - new_next;
   if(offset)
     new_next += offset;
   while (new_next > glu.nzlumax )
   {
-    mem = LUMemXpand<ScalarVector>(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions);  
+    mem = memXpand<ScalarVector>(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions);  
     if (mem) return mem; 
   }
   
@@ -160,7 +161,7 @@ int SparseLUBase<Scalar,Index>::LU_column_bmod(const int jcol, const int nseg, B
     
     // points to the beginning of jcol in snode L\U(jsupno) 
     ufirst = glu.xlusup(jcol) + d_fsupc; 
-    int lda = glu.xlusup(jcol+1) - glu.xlusup(jcol);
+    Index lda = glu.xlusup(jcol+1) - glu.xlusup(jcol);
     Map<Matrix<Scalar,Dynamic,Dynamic>, 0,  OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) ); 
     VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc); 
     u = A.template triangularView<UnitLower>().solve(u); 
@@ -173,6 +174,7 @@ int SparseLUBase<Scalar,Index>::LU_column_bmod(const int jcol, const int nseg, B
   return 0; 
 }
 
+} // end namespace internal
 } // end namespace Eigen
 
 #endif // SPARSELU_COLUMN_BMOD_H
diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h
index 1bf17330a..bd450ddc7 100644
--- a/Eigen/src/SparseLU/SparseLU_column_dfs.h
+++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h
@@ -30,36 +30,37 @@
 #ifndef SPARSELU_COLUMN_DFS_H
 #define SPARSELU_COLUMN_DFS_H
 
+template <typename Scalar, typename Index> class SparseLUImpl;
 namespace Eigen {
 
 namespace internal {
-  
+
 template<typename IndexVector, typename ScalarVector>
-struct LU_column_dfs_traits
+struct column_dfs_traits
 {
-  typedef typename IndexVector::Scalar Index;
   typedef typename ScalarVector::Scalar Scalar;
-  LU_column_dfs_traits(Index jcol, Index& jsuper, LU_GlobalLU_t<IndexVector, ScalarVector>& glu)
-   : m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu)
+  typedef typename IndexVector::Scalar Index;
+  column_dfs_traits(Index jcol, Index& jsuper, typename SparseLUImpl<Scalar, Index>::GlobalLU_t& glu, SparseLUImpl<Scalar, Index>& luImpl)
+   : m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu), m_luImpl(luImpl)
  {}
   bool update_segrep(Index /*krep*/, Index /*jj*/)
   {
     return true;
   }
-  void mem_expand(IndexVector& lsub, int& nextl, int chmark)
+  void mem_expand(IndexVector& lsub, Index& nextl, Index chmark)
   {
     if (nextl >= m_glu.nzlmax)
-      SparseLUBase<Scalar,Index>::LUMemXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); 
-    if (chmark != (m_jcol-1)) m_jsuper_ref = IND_EMPTY;
+      m_luImpl.memXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); 
+    if (chmark != (m_jcol-1)) m_jsuper_ref = emptyIdxLU;
   }
   enum { ExpandMem = true };
   
-  int m_jcol;
-  int& m_jsuper_ref;
-  LU_GlobalLU_t<IndexVector, ScalarVector>& m_glu;
+  Index m_jcol;
+  Index& m_jsuper_ref;
+  typename SparseLUImpl<Scalar, Index>::GlobalLU_t& m_glu;
+  SparseLUImpl<Scalar, Index>& m_luImpl;
 };
 
-} // end namespace internal
 
 /**
  * \brief Performs a symbolic factorization on column jcol and decide the supernode boundary
@@ -89,34 +90,34 @@ struct LU_column_dfs_traits
  * 
  */
 template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg,  BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
+Index SparseLUImpl<Scalar,Index>::column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg,  BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
 {
   
-  int jsuper = glu.supno(jcol); 
-  int nextl = glu.xlsub(jcol); 
+  Index jsuper = glu.supno(jcol); 
+  Index nextl = glu.xlsub(jcol); 
   VectorBlock<IndexVector> marker2(marker, 2*m, m); 
   
   
-  internal::LU_column_dfs_traits<IndexVector, ScalarVector> traits(jcol, jsuper, glu);
+  column_dfs_traits<IndexVector, ScalarVector> traits(jcol, jsuper, glu, *this);
   
   // For each nonzero in A(*,jcol) do dfs 
-  for (int k = 0; lsub_col[k] != IND_EMPTY; k++) 
+  for (Index k = 0; lsub_col[k] != emptyIdxLU; k++) 
   {
-    int krow = lsub_col(k); 
-    lsub_col(k) = IND_EMPTY; 
-    int kmark = marker2(krow); 
+    Index krow = lsub_col(k); 
+    lsub_col(k) = emptyIdxLU; 
+    Index kmark = marker2(krow); 
     
     // krow was visited before, go to the next nonz; 
     if (kmark == jcol) continue;
     
-    LU_dfs_kernel(jcol, perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent,
+    dfs_kernel(jcol, perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent,
                    xplore, glu, nextl, krow, traits);
   } // for each nonzero ... 
   
-  int fsupc, jptr, jm1ptr, ito, ifrom, istop;
-  int nsuper = glu.supno(jcol);
-  int jcolp1 = jcol + 1;
-  int jcolm1 = jcol - 1;
+  Index fsupc, jptr, jm1ptr, ito, ifrom, istop;
+  Index nsuper = glu.supno(jcol);
+  Index jcolp1 = jcol + 1;
+  Index jcolm1 = jcol - 1;
   
   // check to see if j belongs in the same supernode as j-1
   if ( jcol == 0 )
@@ -130,18 +131,18 @@ int SparseLUBase<Scalar,Index>::LU_column_dfs(const int m, const int jcol, Index
     jm1ptr = glu.xlsub(jcolm1); 
     
     // Use supernodes of type T2 : see SuperLU paper
-    if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = IND_EMPTY;
+    if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = emptyIdxLU;
     
     // Make sure the number of columns in a supernode doesn't
     // exceed threshold
-    if ( (jcol - fsupc) >= maxsuper) jsuper = IND_EMPTY; 
+    if ( (jcol - fsupc) >= maxsuper) jsuper = emptyIdxLU; 
     
     /* If jcol starts a new supernode, reclaim storage space in
      * glu.lsub from previous supernode. Note we only store 
      * the subscript set of the first and last columns of 
      * a supernode. (first for num values, last for pruning)
      */
-    if (jsuper == IND_EMPTY)
+    if (jsuper == emptyIdxLU)
     { // starts a new supernode 
       if ( (fsupc < jcolm1-1) ) 
       { // >= 3 columns in nsuper
@@ -169,6 +170,8 @@ int SparseLUBase<Scalar,Index>::LU_column_dfs(const int m, const int jcol, Index
   return 0; 
 }
 
+} // end namespace internal
+
 } // end namespace Eigen
 
 #endif
diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
index 10c85d4ff..170610d9f 100644
--- a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
+++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
@@ -30,6 +30,7 @@
 #define SPARSELU_COPY_TO_UCOL_H
 
 namespace Eigen {
+namespace internal {
 
 /**
  * \brief Performs numeric block updates (sup-col) in topological order
@@ -46,14 +47,14 @@ namespace Eigen {
  * 
  */
 template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu)
+Index SparseLUImpl<Scalar,Index>::copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu)
 {  
   Index ksub, krep, ksupno; 
     
   Index jsupno = glu.supno(jcol);
   
   // For each nonzero supernode segment of U[*,j] in topological order 
-  int k = nseg - 1, i; 
+  Index k = nseg - 1, i; 
   Index nextu = glu.xusub(jcol); 
   Index kfnz, isub, segsize; 
   Index new_next,irow; 
@@ -65,7 +66,7 @@ int SparseLUBase<Scalar,Index>::LU_copy_to_ucol(const int jcol, const int nseg,
     if (jsupno != ksupno ) // should go into ucol(); 
     {
       kfnz = repfnz(krep); 
-      if (kfnz != IND_EMPTY)
+      if (kfnz != emptyIdxLU)
       { // Nonzero U-segment 
         fsupc = glu.xsup(ksupno); 
         isub = glu.xlsub(fsupc) + kfnz - fsupc; 
@@ -73,9 +74,9 @@ int SparseLUBase<Scalar,Index>::LU_copy_to_ucol(const int jcol, const int nseg,
         new_next = nextu + segsize; 
         while (new_next > glu.nzumax) 
         {
-          mem = LUMemXpand<ScalarVector>(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions); 
+          mem = memXpand<ScalarVector>(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions); 
           if (mem) return mem; 
-          mem = LUMemXpand<IndexVector>(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions); 
+          mem = memXpand<IndexVector>(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions); 
           if (mem) return mem; 
           
         }
@@ -99,6 +100,7 @@ int SparseLUBase<Scalar,Index>::LU_copy_to_ucol(const int jcol, const int nseg,
   return 0; 
 }
 
+} // namespace internal
 } // end namespace Eigen
 
 #endif // SPARSELU_COPY_TO_UCOL_H
diff --git a/Eigen/src/SparseLU/SparseLU_gemm_kernel.h b/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
index 11e7318b5..9e4e3e72b 100644
--- a/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
+++ b/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
@@ -21,9 +21,9 @@ namespace internal {
   *  - lda and ldc must be multiples of the respective packet size
   *  - C must have the same alignment as A
   */
-template<typename Scalar>
+template<typename Scalar,typename Index>
 EIGEN_DONT_INLINE
-void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar* B, int ldb, Scalar* C, int ldc)
+void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const Scalar* B, Index ldb, Scalar* C, Index ldc)
 {
   using namespace Eigen::internal;
   
@@ -37,37 +37,37 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
     BM = 4096/sizeof(Scalar),           // number of rows of A-C per chunk
     SM = PM*PacketSize                  // step along M
   };
-  int d_end = (d/RK)*RK;    // number of columns of A (rows of B) suitable for full register blocking
-  int n_end = (n/RN)*RN;    // number of columns of B-C suitable for processing RN columns at once
-  int i0 = internal::first_aligned(A,m);
+  Index d_end = (d/RK)*RK;    // number of columns of A (rows of B) suitable for full register blocking
+  Index n_end = (n/RN)*RN;    // number of columns of B-C suitable for processing RN columns at once
+  Index i0 = internal::first_aligned(A,m);
   
   eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_aligned(C,m)));
   
   // handle the non aligned rows of A and C without any optimization:
-  for(int i=0; i<i0; ++i)
+  for(Index i=0; i<i0; ++i)
   {
-    for(int j=0; j<n; ++j)
+    for(Index j=0; j<n; ++j)
     {
       Scalar c = C[i+j*ldc];
-      for(int k=0; k<d; ++k)
+      for(Index k=0; k<d; ++k)
         c += B[k+j*ldb] * A[i+k*lda];
       C[i+j*ldc] = c;
     }
   }
   // process the remaining rows per chunk of BM rows
-  for(int ib=i0; ib<m; ib+=BM)
+  for(Index ib=i0; ib<m; ib+=BM)
   {
-    int actual_b = std::min<int>(BM, m-ib);                 // actual number of rows
-    int actual_b_end1 = (actual_b/SM)*SM;                   // actual number of rows suitable for peeling
-    int actual_b_end2 = (actual_b/PacketSize)*PacketSize;   // actual number of rows suitable for vectorization
+    Index actual_b = std::min<Index>(BM, m-ib);                 // actual number of rows
+    Index actual_b_end1 = (actual_b/SM)*SM;                   // actual number of rows suitable for peeling
+    Index actual_b_end2 = (actual_b/PacketSize)*PacketSize;   // actual number of rows suitable for vectorization
     
     // Let's process two columns of B-C at once
-    for(int j=0; j<n_end; j+=RN)
+    for(Index j=0; j<n_end; j+=RN)
     {
       const Scalar* Bc0 = B+(j+0)*ldb;
       const Scalar* Bc1 = B+(j+1)*ldb;
       
-      for(int k=0; k<d_end; k+=RK)
+      for(Index k=0; k<d_end; k+=RK)
       {
         
         // load and expand a RN x RK block of B
@@ -93,30 +93,38 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
         
                   a0 = pload<Packet>(A0);
                   a1 = pload<Packet>(A1);
-        if(RK==4) a2 = pload<Packet>(A2);
-        if(RK==4) a3 = pload<Packet>(A3);
+        if(RK==4)
+        {
+          a2 = pload<Packet>(A2);
+          a3 = pload<Packet>(A3);
+        }
+        else
+        {
+          // workaround "may be used uninitialized in this function" warning
+          a2 = a3 = a0;
+        }
         
-#define KMADD(c, a, b, tmp) tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);
+#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
 #define WORK(I)  \
                     c0 = pload<Packet>(C0+i+(I)*PacketSize);   \
                     c1 = pload<Packet>(C1+i+(I)*PacketSize);   \
-                    KMADD(c0, a0, b00, t0);       \
-                    KMADD(c1, a0, b01, t1);       \
+                    KMADD(c0, a0, b00, t0)      \
+                    KMADD(c1, a0, b01, t1)      \
                     a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
-                    KMADD(c0, a1, b10, t0);       \
-                    KMADD(c1, a1, b11, t1);       \
+                    KMADD(c0, a1, b10, t0)      \
+                    KMADD(c1, a1, b11, t1)       \
                     a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
-          if(RK==4) KMADD(c0, a2, b20, t0);       \
-          if(RK==4) KMADD(c1, a2, b21, t1);       \
+          if(RK==4) KMADD(c0, a2, b20, t0)       \
+          if(RK==4) KMADD(c1, a2, b21, t1)       \
           if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
-          if(RK==4) KMADD(c0, a3, b30, t0);       \
-          if(RK==4) KMADD(c1, a3, b31, t1);       \
+          if(RK==4) KMADD(c0, a3, b30, t0)       \
+          if(RK==4) KMADD(c1, a3, b31, t1)       \
           if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
                     pstore(C0+i+(I)*PacketSize, c0);           \
                     pstore(C1+i+(I)*PacketSize, c1)
         
         // process rows of A' - C' with aggressive vectorization and peeling 
-        for(int i=0; i<actual_b_end1; i+=PacketSize*8)
+        for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
         {
           EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL1");
                     prefetch((A0+i+(5)*PacketSize));
@@ -133,12 +141,13 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
                     WORK(7);
         }
         // process the remaining rows with vectorization only
-        for(int i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
+        for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
         {
           WORK(0);
         }
+#undef WORK
         // process the remaining rows without vectorization
-        for(int i=actual_b_end2; i<actual_b; ++i)
+        for(Index i=actual_b_end2; i<actual_b; ++i)
         {
           if(RK==4)
           {
@@ -154,7 +163,6 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
         
         Bc0 += RK;
         Bc1 += RK;
-#undef WORK
       } // peeled loop on k
     } // peeled loop on the columns j
     // process the last column (we now perform a matrux-vector product)
@@ -162,7 +170,7 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
     {
       const Scalar* Bc0 = B+(n-1)*ldb;
       
-      for(int k=0; k<d_end; k+=RK)
+      for(Index k=0; k<d_end; k+=RK)
       {
         
         // load and expand a 1 x RK block of B
@@ -183,23 +191,31 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
         
                   a0 = pload<Packet>(A0);
                   a1 = pload<Packet>(A1);
-        if(RK==4) a2 = pload<Packet>(A2);
-        if(RK==4) a3 = pload<Packet>(A3);
+        if(RK==4)
+        {
+          a2 = pload<Packet>(A2);
+          a3 = pload<Packet>(A3);
+        }
+        else
+        {
+          // workaround "may be used uninitialized in this function" warning
+          a2 = a3 = a0;
+        }
         
 #define WORK(I) \
                   c0 = pload<Packet>(C0+i+(I)*PacketSize);   \
-                  KMADD(c0, a0, b00, t0);       \
+                  KMADD(c0, a0, b00, t0)       \
                   a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
-                  KMADD(c0, a1, b10, t0);       \
+                  KMADD(c0, a1, b10, t0)       \
                   a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
-        if(RK==4) KMADD(c0, a2, b20, t0);       \
+        if(RK==4) KMADD(c0, a2, b20, t0)       \
         if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
-        if(RK==4) KMADD(c0, a3, b30, t0);       \
+        if(RK==4) KMADD(c0, a3, b30, t0)       \
         if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
                   pstore(C0+i+(I)*PacketSize, c0);
         
         // agressive vectorization and peeling
-        for(int i=0; i<actual_b_end1; i+=PacketSize*8)
+        for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
         {
           EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL2");
           WORK(0);
@@ -212,12 +228,12 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
           WORK(7);
         }
         // vectorization only
-        for(int i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
+        for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
         {
           WORK(0);
         }
         // remaining scalars
-        for(int i=actual_b_end2; i<actual_b; ++i)
+        for(Index i=actual_b_end2; i<actual_b; ++i)
         {
           if(RK==4) 
             C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
@@ -231,10 +247,10 @@ void sparselu_gemm(int m, int n, int d, const Scalar* A, int lda, const Scalar*
     }
     
     // process the last columns of A, corresponding to the last rows of B
-    int rd = d-d_end;
+    Index rd = d-d_end;
     if(rd>0)
     {
-      for(int j=0; j<n; ++j)
+      for(Index j=0; j<n; ++j)
       {
         enum {
           Alignment = PacketSize>1 ? Aligned : 0
diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
index a1ea5bc06..7a4e4305a 100644
--- a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
+++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
@@ -29,6 +29,7 @@
 #define SPARSELU_HEAP_RELAX_SNODE_H
 
 namespace Eigen {
+namespace internal {
 
 /** 
  * \brief Identify the initial relaxed supernodes
@@ -42,14 +43,14 @@ namespace Eigen {
  * \param relax_end last column in a supernode
  */
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end)
+void SparseLUImpl<Scalar,Index>::heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
 {
   
   // The etree may not be postordered, but its heap ordered  
   IndexVector post;
   internal::treePostorder(n, et, post); // Post order etree
   IndexVector inv_post(n+1); 
-  int i;
+  Index i;
   for (i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()???
   
   // Renumber etree in postorder 
@@ -63,8 +64,8 @@ void SparseLUBase<Scalar,Index>::LU_heap_relax_snode (const int n, IndexVector&
   et = iwork; 
   
   // compute the number of descendants of each node in the etree
-  relax_end.setConstant(IND_EMPTY);
-  int j, parent; 
+  relax_end.setConstant(emptyIdxLU);
+  Index j, parent; 
   descendants.setZero();
   for (j = 0; j < n; j++) 
   {
@@ -73,11 +74,11 @@ void SparseLUBase<Scalar,Index>::LU_heap_relax_snode (const int n, IndexVector&
       descendants(parent) += descendants(j) + 1;
   }
   // Identify the relaxed supernodes by postorder traversal of the etree
-  int snode_start; // beginning of a snode 
-  int k;
-  int nsuper_et_post = 0; // Number of relaxed snodes in postordered etree 
-  int nsuper_et = 0; // Number of relaxed snodes in the original etree 
-  int l; 
+  Index snode_start; // beginning of a snode 
+  Index k;
+  Index nsuper_et_post = 0; // Number of relaxed snodes in postordered etree 
+  Index nsuper_et = 0; // Number of relaxed snodes in the original etree 
+  Index l; 
   for (j = 0; j < n; )
   {
     parent = et(j);
@@ -120,6 +121,7 @@ void SparseLUBase<Scalar,Index>::LU_heap_relax_snode (const int n, IndexVector&
   et = et_save; 
 }
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif // SPARSELU_HEAP_RELAX_SNODE_H
diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
index 8b65ff37c..0d0283b13 100644
--- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
@@ -12,6 +12,7 @@
 #define SPARSELU_KERNEL_BMOD_H
 
 namespace Eigen {
+namespace internal {
   
 /**
  * \brief Performs numeric block updates from a given supernode to a single column
@@ -29,88 +30,101 @@ namespace Eigen {
  */
 template <int SegSizeAtCompileTime> struct LU_kernel_bmod
 {
-  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
-  EIGEN_DONT_INLINE static void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int lda, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros)
+  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
+  static EIGEN_DONT_INLINE void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
+                                    const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
+};
+
+template <int SegSizeAtCompileTime>
+template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
+EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
+                                                                  const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
+{
+  typedef typename ScalarVector::Scalar Scalar;
+  // First, copy U[*,j] segment from dense(*) to tempv(*)
+  // The result of triangular solve is in tempv[*]; 
+    // The result of matric-vector update is in dense[*]
+  Index isub = lptr + no_zeros; 
+  int i;
+  Index irow;
+  for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
   {
-    typedef typename ScalarVector::Scalar Scalar;
-    // First, copy U[*,j] segment from dense(*) to tempv(*)
-    // The result of triangular solve is in tempv[*]; 
-      // The result of matric-vector update is in dense[*]
-    int isub = lptr + no_zeros; 
-    int i, irow;
-    for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
-    {
-      irow = lsub(isub); 
-      tempv(i) = dense(irow); 
-      ++isub; 
-    }
-    // Dense triangular solve -- start effective triangle
-    luptr += lda * no_zeros + no_zeros; 
-    // Form Eigen matrix and vector 
-    Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );
-    Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);
-    
-    u = A.template triangularView<UnitLower>().solve(u); 
-    
-    // Dense matrix-vector product y <-- B*x 
-    luptr += segsize;
-    const int PacketSize = internal::packet_traits<Scalar>::size;
-    int ldl = internal::first_multiple(nrow, PacketSize);
-    Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
-    int aligned_offset = internal::first_aligned(tempv.data()+segsize, PacketSize);
-    int aligned_with_B_offset = (PacketSize-internal::first_aligned(B.data(), PacketSize))%PacketSize;
-    Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
-    
-    l.setZero();
-    internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride());
-    
-    // Scatter tempv[] into SPA dense[] as a temporary storage 
-    isub = lptr + no_zeros;
-    for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
-    {
-      irow = lsub(isub++); 
-      dense(irow) = tempv(i);
-    }
-    
-    // Scatter l into SPA dense[]
-    for (i = 0; i < nrow; i++)
-    {
-      irow = lsub(isub++); 
-      dense(irow) -= l(i);
-    } 
+    irow = lsub(isub); 
+    tempv(i) = dense(irow); 
+    ++isub; 
   }
-};
+  // Dense triangular solve -- start effective triangle
+  luptr += lda * no_zeros + no_zeros; 
+  // Form Eigen matrix and vector 
+  Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );
+  Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);
+  
+  u = A.template triangularView<UnitLower>().solve(u); 
+  
+  // Dense matrix-vector product y <-- B*x 
+  luptr += segsize;
+  const Index PacketSize = internal::packet_traits<Scalar>::size;
+  Index ldl = internal::first_multiple(nrow, PacketSize);
+  Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
+  Index aligned_offset = internal::first_aligned(tempv.data()+segsize, PacketSize);
+  Index aligned_with_B_offset = (PacketSize-internal::first_aligned(B.data(), PacketSize))%PacketSize;
+  Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
+  
+  l.setZero();
+  internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride());
+  
+  // Scatter tempv[] into SPA dense[] as a temporary storage 
+  isub = lptr + no_zeros;
+  for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
+  {
+    irow = lsub(isub++); 
+    dense(irow) = tempv(i);
+  }
+  
+  // Scatter l into SPA dense[]
+  for (i = 0; i < nrow; i++)
+  {
+    irow = lsub(isub++); 
+    dense(irow) -= l(i);
+  } 
+}
 
 template <> struct LU_kernel_bmod<1>
 {
-  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
-  EIGEN_DONT_INLINE static void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, int& luptr, const int lda, const int nrow,
-                                    IndexVector& lsub, const int lptr, const int no_zeros)
+  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
+  static EIGEN_DONT_INLINE void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
+                                    const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
+};
+
+
+template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
+EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
+                                              const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
+{
+  typedef typename ScalarVector::Scalar Scalar;
+  Scalar f = dense(lsub(lptr + no_zeros));
+  luptr += lda * no_zeros + no_zeros + 1;
+  const Scalar* a(lusup.data() + luptr);
+  const /*typename IndexVector::Scalar*/Index*  irow(lsub.data()+lptr + no_zeros + 1);
+  Index i = 0;
+  for (; i+1 < nrow; i+=2)
   {
-    typedef typename ScalarVector::Scalar Scalar;
-    Scalar f = dense(lsub(lptr + no_zeros));
-    luptr += lda * no_zeros + no_zeros + 1;
-    const Scalar* a(lusup.data() + luptr);
-    const typename IndexVector::Scalar*  irow(lsub.data()+lptr + no_zeros + 1);
-    int i = 0;
-    for (; i+1 < nrow; i+=2)
-    {
-      int i0 = *(irow++);
-      int i1 = *(irow++);
-      Scalar a0 = *(a++);
-      Scalar a1 = *(a++);
-      Scalar d0 = dense.coeff(i0);
-      Scalar d1 = dense.coeff(i1);
-      d0 -= f*a0;
-      d1 -= f*a1;
-      dense.coeffRef(i0) = d0;
-      dense.coeffRef(i1) = d1;
-    }
-    if(i<nrow)
-      dense.coeffRef(*(irow++)) -= f * *(a++);
+    Index i0 = *(irow++);
+    Index i1 = *(irow++);
+    Scalar a0 = *(a++);
+    Scalar a1 = *(a++);
+    Scalar d0 = dense.coeff(i0);
+    Scalar d1 = dense.coeff(i1);
+    d0 -= f*a0;
+    d1 -= f*a1;
+    dense.coeffRef(i0) = d0;
+    dense.coeffRef(i1) = d1;
   }
-};
+  if(i<nrow)
+    dense.coeffRef(*(irow++)) -= f * *(a++);
+}
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif // SPARSELU_KERNEL_BMOD_H
diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h
index fbc146a36..da0e0fc3c 100644
--- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h
@@ -32,6 +32,7 @@
 #define SPARSELU_PANEL_BMOD_H
 
 namespace Eigen {
+namespace internal {
 
 /**
  * \brief Performs numeric block updates (sup-panel) in topological order.
@@ -52,18 +53,19 @@ namespace Eigen {
  * 
  */
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv,
-                                               IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu)
+void SparseLUImpl<Scalar,Index>::panel_bmod(const Index m, const Index w, const Index jcol, 
+                                            const Index nseg, ScalarVector& dense, ScalarVector& tempv,
+                                            IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu)
 {
   
-  int ksub,jj,nextl_col; 
-  int fsupc, nsupc, nsupr, nrow; 
-  int krep, kfnz; 
-  int lptr; // points to the row subscripts of a supernode 
-  int luptr; // ...
-  int segsize,no_zeros ; 
+  Index ksub,jj,nextl_col; 
+  Index fsupc, nsupc, nsupr, nrow; 
+  Index krep, kfnz; 
+  Index lptr; // points to the row subscripts of a supernode 
+  Index luptr; // ...
+  Index segsize,no_zeros ; 
   // For each nonz supernode segment of U[*,j] in topological order
-  int k = nseg - 1; 
+  Index k = nseg - 1; 
   const Index PacketSize = internal::packet_traits<Scalar>::size;
   
   for (ksub = 0; ksub < nseg; ksub++)
@@ -81,15 +83,15 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
     lptr = glu.xlsub(fsupc); 
     
     // loop over the panel columns to detect the actual number of columns and rows
-    int u_rows = 0;
-    int u_cols = 0;
+    Index u_rows = 0;
+    Index u_cols = 0;
     for (jj = jcol; jj < jcol + w; jj++)
     {
       nextl_col = (jj-jcol) * m; 
       VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
       
       kfnz = repfnz_col(krep); 
-      if ( kfnz == IND_EMPTY ) 
+      if ( kfnz == emptyIdxLU ) 
         continue; // skip any zero segment
       
       segsize = krep - kfnz + 1;
@@ -99,11 +101,11 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
     
     if(nsupc >= 2)
     { 
-      int ldu = internal::first_multiple<Index>(u_rows, PacketSize);
+      Index ldu = internal::first_multiple<Index>(u_rows, PacketSize);
       Map<Matrix<Scalar,Dynamic,Dynamic>, Aligned,  OuterStride<> > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu));
       
       // gather U
-      int u_col = 0;
+      Index u_col = 0;
       for (jj = jcol; jj < jcol + w; jj++)
       {
         nextl_col = (jj-jcol) * m; 
@@ -111,19 +113,19 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
         VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
         
         kfnz = repfnz_col(krep); 
-        if ( kfnz == IND_EMPTY ) 
+        if ( kfnz == emptyIdxLU ) 
           continue; // skip any zero segment
         
         segsize = krep - kfnz + 1;
         luptr = glu.xlusup(fsupc);    
         no_zeros = kfnz - fsupc; 
         
-        int isub = lptr + no_zeros;
-        int off = u_rows-segsize;
-        for (int i = 0; i < off; i++) U(i,u_col) = 0;
-        for (int i = 0; i < segsize; i++)
+        Index isub = lptr + no_zeros;
+        Index off = u_rows-segsize;
+        for (Index i = 0; i < off; i++) U(i,u_col) = 0;
+        for (Index i = 0; i < segsize; i++)
         {
-          int irow = glu.lsub(isub); 
+          Index irow = glu.lsub(isub); 
           U(i+off,u_col) = dense_col(irow); 
           ++isub; 
         }
@@ -131,7 +133,7 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
       }
       // solve U = A^-1 U
       luptr = glu.xlusup(fsupc);
-      int lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc);
+      Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc);
       no_zeros = (krep - u_rows + 1) - fsupc;
       luptr += lda * no_zeros + no_zeros;
       Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) );
@@ -142,8 +144,8 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
       Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) );
       eigen_assert(tempv.size()>w*ldu + nrow*w + 1);
       
-      int ldl = internal::first_multiple<Index>(nrow, PacketSize);
-      int offset = (PacketSize-internal::first_aligned(B.data(), PacketSize)) % PacketSize;
+      Index ldl = internal::first_multiple<Index>(nrow, PacketSize);
+      Index offset = (PacketSize-internal::first_aligned(B.data(), PacketSize)) % PacketSize;
       Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
       
       L.setZero();
@@ -158,25 +160,25 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
         VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
         
         kfnz = repfnz_col(krep); 
-        if ( kfnz == IND_EMPTY ) 
+        if ( kfnz == emptyIdxLU ) 
           continue; // skip any zero segment
         
         segsize = krep - kfnz + 1;
         no_zeros = kfnz - fsupc; 
-        int isub = lptr + no_zeros;
+        Index isub = lptr + no_zeros;
         
-        int off = u_rows-segsize;
-        for (int i = 0; i < segsize; i++)
+        Index off = u_rows-segsize;
+        for (Index i = 0; i < segsize; i++)
         {
-          int irow = glu.lsub(isub++); 
+          Index irow = glu.lsub(isub++); 
           dense_col(irow) = U.coeff(i+off,u_col);
           U.coeffRef(i+off,u_col) = 0;
         }
         
         // Scatter l into SPA dense[]
-        for (int i = 0; i < nrow; i++)
+        for (Index i = 0; i < nrow; i++)
         {
-          int irow = glu.lsub(isub++); 
+          Index irow = glu.lsub(isub++); 
           dense_col(irow) -= L.coeff(i,u_col);
           L.coeffRef(i,u_col) = 0;
         }
@@ -193,13 +195,13 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
         VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
         
         kfnz = repfnz_col(krep); 
-        if ( kfnz == IND_EMPTY ) 
+        if ( kfnz == emptyIdxLU ) 
           continue; // skip any zero segment
         
         segsize = krep - kfnz + 1;
         luptr = glu.xlusup(fsupc);
         
-        int lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);// nsupr
+        Index lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);// nsupr
         
         // Perform a trianglar solve and block update, 
         // then scatter the result of sup-col update to dense[]
@@ -212,7 +214,9 @@ void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const i
     }
     
   } // End for each updating supernode
-}
+} // end panel bmod
+
+} // end namespace internal
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h
index 16e04423b..dc0054efd 100644
--- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h
+++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h
@@ -35,10 +35,10 @@ namespace Eigen {
 namespace internal {
   
 template<typename IndexVector>
-struct LU_panel_dfs_traits
+struct panel_dfs_traits
 {
   typedef typename IndexVector::Scalar Index;
-  LU_panel_dfs_traits(Index jcol, Index* marker)
+  panel_dfs_traits(Index jcol, Index* marker)
     : m_jcol(jcol), m_marker(marker)
   {}
   bool update_segrep(Index krep, Index jj)
@@ -50,30 +50,29 @@ struct LU_panel_dfs_traits
     }
     return false;
   }
-  void mem_expand(IndexVector& /*glu.lsub*/, int /*nextl*/, int /*chmark*/) {}
+  void mem_expand(IndexVector& /*glu.lsub*/, Index /*nextl*/, Index /*chmark*/) {}
   enum { ExpandMem = false };
   Index m_jcol;
   Index* m_marker;
 };
 
-} // end namespace internal
 
 template <typename Scalar, typename Index>
 template <typename Traits>
-void SparseLUBase<Scalar,Index>::LU_dfs_kernel(const int jj, IndexVector& perm_r,
-                   int& nseg, IndexVector& panel_lsub, IndexVector& segrep,
+void SparseLUImpl<Scalar,Index>::dfs_kernel(const Index jj, IndexVector& perm_r,
+                   Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,
                    Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
                    IndexVector& xplore, GlobalLU_t& glu,
-                   int& nextl_col, int krow, Traits& traits
+                   Index& nextl_col, Index krow, Traits& traits
                   )
 {
   
-  int kmark = marker(krow);
+  Index kmark = marker(krow);
       
   // For each unmarked krow of jj
   marker(krow) = jj; 
-  int kperm = perm_r(krow); 
-  if (kperm == IND_EMPTY ) {
+  Index kperm = perm_r(krow); 
+  if (kperm == emptyIdxLU ) {
     // krow is in L : place it in structure of L(*, jj)
     panel_lsub(nextl_col++) = krow;  // krow is indexed into A
     
@@ -84,11 +83,11 @@ void SparseLUBase<Scalar,Index>::LU_dfs_kernel(const int jj, IndexVector& perm_r
     // krow is in U : if its supernode-representative krep
     // has been explored, update repfnz(*)
     // krep = supernode representative of the current row
-    int krep = glu.xsup(glu.supno(kperm)+1) - 1; 
+    Index krep = glu.xsup(glu.supno(kperm)+1) - 1; 
     // First nonzero element in the current column:
-    int myfnz = repfnz_col(krep); 
+    Index myfnz = repfnz_col(krep); 
     
-    if (myfnz != IND_EMPTY )
+    if (myfnz != emptyIdxLU )
     {
       // Representative visited before
       if (myfnz > kperm ) repfnz_col(krep) = kperm; 
@@ -97,28 +96,28 @@ void SparseLUBase<Scalar,Index>::LU_dfs_kernel(const int jj, IndexVector& perm_r
     else 
     {
       // Otherwise, perform dfs starting at krep
-      int oldrep = IND_EMPTY; 
+      Index oldrep = emptyIdxLU; 
       parent(krep) = oldrep; 
       repfnz_col(krep) = kperm; 
-      int xdfs =  glu.xlsub(krep); 
-      int maxdfs = xprune(krep); 
+      Index xdfs =  glu.xlsub(krep); 
+      Index maxdfs = xprune(krep); 
       
-      int kpar;
+      Index kpar;
       do 
       {
         // For each unmarked kchild of krep
         while (xdfs < maxdfs) 
         {
-          int kchild = glu.lsub(xdfs); 
+          Index kchild = glu.lsub(xdfs); 
           xdfs++; 
-          int chmark = marker(kchild); 
+          Index chmark = marker(kchild); 
           
           if (chmark != jj ) 
           {
             marker(kchild) = jj; 
-            int chperm = perm_r(kchild); 
+            Index chperm = perm_r(kchild); 
             
-            if (chperm == IND_EMPTY) 
+            if (chperm == emptyIdxLU) 
             {
               // case kchild is in L: place it in L(*, j)
               panel_lsub(nextl_col++) = kchild;
@@ -129,10 +128,10 @@ void SparseLUBase<Scalar,Index>::LU_dfs_kernel(const int jj, IndexVector& perm_r
               // case kchild is in U :
               // chrep = its supernode-rep. If its rep has been explored, 
               // update its repfnz(*)
-              int chrep = glu.xsup(glu.supno(chperm)+1) - 1; 
+              Index chrep = glu.xsup(glu.supno(chperm)+1) - 1; 
               myfnz = repfnz_col(chrep); 
               
-              if (myfnz != IND_EMPTY) 
+              if (myfnz != emptyIdxLU) 
               { // Visited before 
                 if (myfnz > chperm) 
                   repfnz_col(chrep) = chperm; 
@@ -167,13 +166,13 @@ void SparseLUBase<Scalar,Index>::LU_dfs_kernel(const int jj, IndexVector& perm_r
         }
         
         kpar = parent(krep); // Pop recursion, mimic recursion 
-        if (kpar == IND_EMPTY) 
+        if (kpar == emptyIdxLU) 
           break; // dfs done 
         krep = kpar; 
         xdfs = xplore(krep); 
         maxdfs = xprune(krep); 
 
-      } while (kpar != IND_EMPTY); // Do until empty stack 
+      } while (kpar != emptyIdxLU); // Do until empty stack 
       
     } // end if (myfnz = -1)
 
@@ -217,18 +216,18 @@ void SparseLUBase<Scalar,Index>::LU_dfs_kernel(const int jj, IndexVector& perm_r
  */
 
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
+void SparseLUImpl<Scalar,Index>::panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
 {
-  int nextl_col; // Next available position in panel_lsub[*,jj] 
+  Index nextl_col; // Next available position in panel_lsub[*,jj] 
   
   // Initialize pointers 
   VectorBlock<IndexVector> marker1(marker, m, m); 
   nseg = 0; 
   
-  internal::LU_panel_dfs_traits<IndexVector> traits(jcol, marker1.data());
+  panel_dfs_traits<IndexVector> traits(jcol, marker1.data());
   
   // For each column in the panel 
-  for (int jj = jcol; jj < jcol + w; jj++) 
+  for (Index jj = jcol; jj < jcol + w; jj++) 
   {
     nextl_col = (jj - jcol) * m; 
     
@@ -239,20 +238,21 @@ void SparseLUBase<Scalar,Index>::LU_panel_dfs(const int m, const int w, const in
     // For each nnz in A[*, jj] do depth first search
     for (typename MatrixType::InnerIterator it(A, jj); it; ++it)
     {
-      int krow = it.row(); 
+      Index krow = it.row(); 
       dense_col(krow) = it.value();
       
-      int kmark = marker(krow); 
+      Index kmark = marker(krow); 
       if (kmark == jj) 
         continue; // krow visited before, go to the next nonzero
       
-      LU_dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent,
+      dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent,
                    xplore, glu, nextl_col, krow, traits);
     }// end for nonzeros in column jj
     
   } // end for column jj
 }
 
+} // end namespace internal
 } // end namespace Eigen
 
 #endif // SPARSELU_PANEL_DFS_H
diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h
index 69472da9b..ddcd4ec98 100644
--- a/Eigen/src/SparseLU/SparseLU_pivotL.h
+++ b/Eigen/src/SparseLU/SparseLU_pivotL.h
@@ -31,6 +31,7 @@
 #define SPARSELU_PIVOTL_H
 
 namespace Eigen {
+namespace internal {
   
 /**
  * \brief Performs the numerical pivotin on the current column of L, and the CDIV operation.
@@ -56,7 +57,7 @@ namespace Eigen {
  * 
  */
 template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, GlobalLU_t& glu)
+Index SparseLUImpl<Scalar,Index>::pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu)
 {
   
   Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol
@@ -72,7 +73,7 @@ int SparseLUBase<Scalar,Index>::LU_pivotL(const int jcol, const RealScalar diagp
   Index diagind = iperm_c(jcol); // diagonal index 
   RealScalar pivmax = 0.0; 
   Index pivptr = nsupc; 
-  Index diag = IND_EMPTY; 
+  Index diag = emptyIdxLU; 
   RealScalar rtemp;
   Index isub, icol, itemp, k; 
   for (isub = nsupc; isub < nsupr; ++isub) {
@@ -127,6 +128,7 @@ int SparseLUBase<Scalar,Index>::LU_pivotL(const int jcol, const RealScalar diagp
   return 0;
 }
 
+} // end namespace internal
 } // end namespace Eigen
 
 #endif // SPARSELU_PIVOTL_H
diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h
index 816358bc3..5a855f82f 100644
--- a/Eigen/src/SparseLU/SparseLU_pruneL.h
+++ b/Eigen/src/SparseLU/SparseLU_pruneL.h
@@ -31,6 +31,7 @@
 #define SPARSELU_PRUNEL_H
 
 namespace Eigen {
+namespace internal {
 
 /**
  * \brief Prunes the L-structure.
@@ -49,11 +50,11 @@ namespace Eigen {
  * 
  */
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu)
+void SparseLUImpl<Scalar,Index>::pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu)
 {
   // For each supernode-rep irep in U(*,j]
-  int jsupno = glu.supno(jcol); 
-  int i,irep,irep1; 
+  Index jsupno = glu.supno(jcol); 
+  Index i,irep,irep1; 
   bool movnum, do_prune = false; 
   Index kmin, kmax, minloc, maxloc,krow; 
   for (i = 0; i < nseg; i++)
@@ -63,7 +64,7 @@ void SparseLUBase<Scalar,Index>::LU_pruneL(const int jcol, const IndexVector& pe
     do_prune = false; 
     
     // Don't prune with a zero U-segment 
-    if (repfnz(irep) == IND_EMPTY) continue; 
+    if (repfnz(irep) == emptyIdxLU) continue; 
     
     // If a snode overlaps with the next panel, then the U-segment
     // is fragmented into two parts -- irep and irep1. We should let 
@@ -97,9 +98,9 @@ void SparseLUBase<Scalar,Index>::LU_pruneL(const int jcol, const IndexVector& pe
         
         while (kmin <= kmax)
         {
-          if (perm_r(glu.lsub(kmax)) == IND_EMPTY)
+          if (perm_r(glu.lsub(kmax)) == emptyIdxLU)
             kmax--; 
-          else if ( perm_r(glu.lsub(kmin)) != IND_EMPTY)
+          else if ( perm_r(glu.lsub(kmin)) != emptyIdxLU)
             kmin++;
           else 
           {
@@ -128,6 +129,7 @@ void SparseLUBase<Scalar,Index>::LU_pruneL(const int jcol, const IndexVector& pe
   } // End for each U-segment
 }
 
+} // end namespace internal
 } // end namespace Eigen
 
 #endif // SPARSELU_PRUNEL_H
diff --git a/Eigen/src/SparseLU/SparseLU_relax_snode.h b/Eigen/src/SparseLU/SparseLU_relax_snode.h
index 44b279878..58ec32e27 100644
--- a/Eigen/src/SparseLU/SparseLU_relax_snode.h
+++ b/Eigen/src/SparseLU/SparseLU_relax_snode.h
@@ -29,6 +29,8 @@
 #define SPARSELU_RELAX_SNODE_H
 
 namespace Eigen {
+
+namespace internal {
  
 /** 
  * \brief Identify the initial relaxed supernodes
@@ -42,12 +44,12 @@ namespace Eigen {
  * \param relax_end last column in a supernode
  */
 template <typename Scalar, typename Index>
-void SparseLUBase<Scalar,Index>::LU_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end)
+void SparseLUImpl<Scalar,Index>::relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
 {
   
   // compute the number of descendants of each node in the etree
-  int j, parent; 
-  relax_end.setConstant(IND_EMPTY);
+  Index j, parent; 
+  relax_end.setConstant(emptyIdxLU);
   descendants.setZero();
   for (j = 0; j < n; j++) 
   {
@@ -56,7 +58,7 @@ void SparseLUBase<Scalar,Index>::LU_relax_snode (const int n, IndexVector& et, c
       descendants(parent) += descendants(j) + 1;
   }
   // Identify the relaxed supernodes by postorder traversal of the etree
-  int snode_start; // beginning of a snode 
+  Index snode_start; // beginning of a snode 
   for (j = 0; j < n; )
   {
     parent = et(j);
@@ -75,6 +77,7 @@ void SparseLUBase<Scalar,Index>::LU_relax_snode (const int n, IndexVector& et, c
   
 }
 
-} // end namespace Eigen
+} // end namespace internal
 
+} // end namespace Eigen
 #endif
diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h
index 7fa3e54a5..b3d5cd208 100644
--- a/Eigen/src/SparseQR/SparseQR.h
+++ b/Eigen/src/SparseQR/SparseQR.h
@@ -1,15 +1,15 @@
-#ifndef EIGEN_SPARSE_QR_H
-#define EIGEN_SPARSE_QR_H
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2012 Desire Nuentsa <desire.nuentsa_wakam@inria.fr>
-// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2012-2013 Desire Nuentsa <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2012-2013 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+#ifndef EIGEN_SPARSE_QR_H
+#define EIGEN_SPARSE_QR_H
 
 namespace Eigen {
 
@@ -35,21 +35,22 @@ namespace internal {
 /**
   * \ingroup SparseQR_Module
   * \class SparseQR
-  * \brief Sparse left-looking QR factorization
+  * \brief Sparse left-looking rank-revealing QR factorization
   * 
-  * This class is used to perform a left-looking QR decomposition 
-  * of sparse matrices. The result is then used to solve linear leasts_square systems.
-  * Clearly, a QR factorization is returned such that A*P = Q*R where :
+  * This class implements a left-looking rank-revealing QR decomposition 
+  * of sparse matrices. When a column has a norm less than a given tolerance
+  * it is implicitly permuted to the end. The QR factorization thus obtained is 
+  * given by A*P = Q*R where R is upper triangular or trapezoidal. 
   * 
-  * P is the column permutation. Use colsPermutation() to get it.
+  * P is the column permutation which is the product of the fill-reducing and the
+  * rank-revealing permutations. Use colsPermutation() to get it.
   * 
-  * Q is the orthogonal matrix represented as Householder reflectors. 
+  * Q is the orthogonal matrix represented as products of Householder reflectors. 
   * Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose.
   * You can then apply it to a vector.
   * 
-  * R is the sparse triangular factor. Use matrixR() to get it as SparseMatrix.
-  * 
-  * \note This is not a rank-revealing QR decomposition.
+  * R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient.
+  * matrixR().topLeftCorner(rank(), rank()) always returns a triangular factor of full rank.
   * 
   * \tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<>
   * \tparam _OrderingType The fill-reducing ordering method. See the \link OrderingMethods_Module 
@@ -71,10 +72,10 @@ class SparseQR
     typedef Matrix<Scalar, Dynamic, 1> ScalarVector;
     typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType;
   public:
-    SparseQR () : m_isInitialized(false),m_analysisIsok(false)
+    SparseQR () : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true)
     { }
     
-    SparseQR(const MatrixType& mat) : m_isInitialized(false),m_analysisIsok(false)
+    SparseQR(const MatrixType& mat) : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true)
     {
       compute(mat);
     }
@@ -96,7 +97,17 @@ class SparseQR
     
     /** \returns a const reference to the \b sparse upper triangular matrix R of the QR factorization.
       */
-    const MatrixType& matrixR() const { return m_R; }
+    const QRMatrixType& matrixR() const { return m_R; }
+    
+    /** \returns the number of non linearly dependent columns as determined by the pivoting threshold.
+      *
+      * \sa setPivotThreshold()
+      */
+    Index rank() const 
+    {
+      eigen_assert(m_isInitialized && "The factorization should be called first, use compute()");
+      return m_nonzeropivots; 
+    }
     
     /** \returns an expression of the matrix Q as products of sparse Householder reflectors.
       * You can do the following to get an actual SparseMatrix representation of Q:
@@ -107,35 +118,57 @@ class SparseQR
     SparseQRMatrixQReturnType<SparseQR> matrixQ() const 
     { return SparseQRMatrixQReturnType<SparseQR>(*this); }
     
-    /** \returns a const reference to the fill-in reducing permutation that was applied to the columns of A
+    /** \returns a const reference to the column permutation P that was applied to A such that A*P = Q*R
+      * It is the combination of the fill-in reducing permutation and numerical column pivoting.
       */
     const PermutationType& colsPermutation() const
     { 
       eigen_assert(m_isInitialized && "Decomposition is not initialized.");
-      return m_perm_c;
+      return m_outputPerm_c;
     }
     
+    /** \returns A string describing the type of error.
+      * This method is provided to ease debugging, not to handle errors.
+      */
+    std::string lastErrorMessage() const { return m_lastError; }
+    
     /** \internal */
     template<typename Rhs, typename Dest>
     bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &dest) const
     {
       eigen_assert(m_isInitialized && "The factorization should be called first, use compute()");
       eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix");
-      Index rank = this->matrixR().cols();
+
+      Index rank = this->rank();
+      
       // Compute Q^T * b;
-      dest = this->matrixQ().transpose() * B;      
-      // Solve with the triangular matrix R
-      Dest y;
-      y = this->matrixR().template triangularView<Upper>().solve(dest.derived().topRows(rank));
+      typename Dest::PlainObject y, b;
+      y = this->matrixQ().transpose() * B; 
+      b = y;
       
+      // Solve with the triangular matrix R
+      y.topRows(rank) = this->matrixR().topLeftCorner(rank, rank).template triangularView<Upper>().solve(b.topRows(rank));
+      y.bottomRows(y.size()-rank).setZero();
+
       // Apply the column permutation
-      if (m_perm_c.size())  dest.topRows(rank) =  colsPermutation().inverse() * y;
-      else                  dest = y;
+      if (m_perm_c.size())  dest.topRows(cols()) = colsPermutation() * y.topRows(cols());
+      else                  dest = y.topRows(cols());
       
       m_info = Success;
       return true;
     }
     
+    /** Sets the threshold that is used to determine linearly dependent columns during the factorization.
+      *
+      * In practice, if during the factorization the norm of the column that has to be eliminated is below
+      * this threshold, then the entire column is treated as zero, and it is moved at the end.
+      */
+    void setPivotThreshold(const RealScalar& threshold)
+    {
+      m_useDefaultThreshold = false;
+      m_threshold = threshold;
+    }
+    
     /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
       *
       * \sa compute()
@@ -167,15 +200,20 @@ class SparseQR
     bool m_analysisIsok;
     bool m_factorizationIsok;
     mutable ComputationInfo m_info;
+    std::string m_lastError;
     QRMatrixType m_pmat;            // Temporary matrix
     QRMatrixType m_R;               // The triangular factor matrix
     QRMatrixType m_Q;               // The orthogonal reflectors
     ScalarVector m_hcoeffs;         // The Householder coefficients
-    PermutationType m_perm_c;       // Column  permutation 
-    PermutationType m_perm_r;       // Column permutation 
+    PermutationType m_perm_c;       // Fill-reducing  Column  permutation
+    PermutationType m_pivotperm;    // The permutation for rank revealing
+    PermutationType m_outputPerm_c; // The final column permutation
+    RealScalar m_threshold;         // Threshold to determine null Householder reflections
+    bool m_useDefaultThreshold;     // Use default threshold
+    Index m_nonzeropivots;          // Number of non zero pivots found 
     IndexVector m_etree;            // Column elimination tree
     IndexVector m_firstRowElt;      // First element in each row
-    IndexVector m_found_diag_elem;  // Existence of diagonal elements
+    
     template <typename, typename > friend struct SparseQR_QProduct;
     
 };
@@ -184,7 +222,8 @@ class SparseQR
   * 
   * In this step, the fill-reducing permutation is computed and applied to the columns of A
   * and the column elimination tree is computed as well. Only the sparcity pattern of \a mat is exploited.
-  * \note In this step it is assumed that there is no empty row in the matrix \a mat
+  * 
+  * \note In this step it is assumed that there is no empty row in the matrix \a mat.
   */
 template <typename MatrixType, typename OrderingType>
 void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
@@ -194,21 +233,20 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
   ord(mat, m_perm_c); 
   Index n = mat.cols();
   Index m = mat.rows();
-  // Permute the input matrix... only the column pointers are permuted
-  // FIXME: directly send "m_perm.inverse() * mat" to coletree -> need an InnerIterator to the sparse-permutation-product expression.
-  m_pmat = mat;
-  m_pmat.uncompress();
-  for (int i = 0; i < n; i++)
+  
+  if (!m_perm_c.size())
   {
-    Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i;
-    m_pmat.outerIndexPtr()[p] = mat.outerIndexPtr()[i]; 
-    m_pmat.innerNonZeroPtr()[p] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; 
+    m_perm_c.resize(n);
+    m_perm_c.indices().setLinSpaced(n, 0,n-1);
   }
+  
   // Compute the column elimination tree of the permuted matrix
-  internal::coletree(m_pmat, m_etree, m_firstRowElt);
+  m_outputPerm_c = m_perm_c.inverse();
+  internal::coletree(mat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data());
   
   m_R.resize(n, n);
-  m_Q.resize(m, m);
+  m_Q.resize(m, n);
+  
   // Allocate space for nonzero elements : rough estimation
   m_R.reserve(2*mat.nonZeros()); //FIXME Get a more accurate estimation through symbolic factorization with the etree
   m_Q.reserve(2*mat.nonZeros());
@@ -216,7 +254,7 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
   m_analysisIsok = true;
 }
 
-/** \brief Perform the numerical QR factorization of the input matrix
+/** \brief Performs the numerical QR factorization of the input matrix
   * 
   * The function SparseQR::analyzePattern(const MatrixType&) must have been called beforehand with
   * a matrix having the same sparcity pattern than \a mat.
@@ -226,156 +264,220 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat)
 template <typename MatrixType, typename OrderingType>
 void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat)
 {
+  using std::abs;
+  using std::max;
+  
   eigen_assert(m_analysisIsok && "analyzePattern() should be called before this step");
   Index m = mat.rows();
   Index n = mat.cols();
   IndexVector mark(m); mark.setConstant(-1);  // Record the visited nodes
   IndexVector Ridx(n), Qidx(m);               // Store temporarily the row indexes for the current column of R and Q
   Index nzcolR, nzcolQ;                       // Number of nonzero for the current column of R and Q
-  Index pcol;
-  ScalarVector tval(m); tval.setZero();       // Temporary vector
-  IndexVector iperm(n);
+  ScalarVector tval(m);                       // The dense vector used to compute the current column
   bool found_diag;
-  if (m_perm_c.size())
-    for(int i = 0; i < n; i++) iperm(m_perm_c.indices()(i)) = i;
-  else
-    iperm.setLinSpaced(n, 0, n-1);
-      
-  // Left looking QR factorization : Compute a column of R and Q at a time
-  for (Index col = 0; col < n; col++)
+    
+  m_pmat = mat;
+  m_pmat.uncompress(); // To have the innerNonZeroPtr allocated
+  // Apply the fill-in reducing permutation lazily:
+  for (int i = 0; i < n; i++)
   {
+    Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i;
+    m_pmat.outerIndexPtr()[p] = mat.outerIndexPtr()[i]; 
+    m_pmat.innerNonZeroPtr()[p] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; 
+  }
+  
+  /* Compute the default threshold, see : 
+   * Tim Davis, "Algorithm 915, SuiteSparseQR: Multifrontal Multithreaded Rank-Revealing
+   * Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3 
+   */
+  if(m_useDefaultThreshold) 
+  {
+    RealScalar max2Norm = 0.0;
+    for (int j = 0; j < n; j++) max2Norm = (max)(max2Norm, m_pmat.col(j).norm());
+    m_threshold = 20 * (m + n) * max2Norm * NumTraits<RealScalar>::epsilon();
+  }
+  
+  // Initialize the numerical permutation
+  m_pivotperm.setIdentity(n);
+  
+  Index nonzeroCol = 0; // Record the number of valid pivots
+  
+  // Left looking rank-revealing QR factorization: compute a column of R and Q at a time
+  for (Index col = 0; col < n; ++col)
+  {
+    mark.setConstant(-1);
     m_R.startVec(col);
     m_Q.startVec(col);
-    mark(col) = col;
-    Qidx(0) = col; 
+    mark(nonzeroCol) = col;
+    Qidx(0) = nonzeroCol;
     nzcolR = 0; nzcolQ = 1;
-    pcol = iperm(col);
     found_diag = false;
-    // Find the nonzero locations of the column k of R, 
-    // i.e All the nodes (with indexes lower than k) reachable through the col etree rooted at node k
-    for (typename MatrixType::InnerIterator itp(mat, pcol); itp || !found_diag; ++itp)
+    tval.setZero(); 
+    
+    // Symbolic factorization: find the nonzero locations of the column k of the factors R and Q, i.e.,
+    // all the nodes (with indexes lower than rank) reachable through the column elimination tree (etree) rooted at node k.
+    // Note: if the diagonal entry does not exist, then its contribution must be explicitly added,
+    // thus the trick with found_diag that permits to do one more iteration on the diagonal element if this one has not been found.
+    for (typename MatrixType::InnerIterator itp(m_pmat, col); itp || !found_diag; ++itp)
     {
-      Index curIdx = col;
-      if (itp) curIdx = itp.row();
-      if(curIdx == col) found_diag = true;
-      // Get the nonzeros indexes  of the current column of R
+      Index curIdx = nonzeroCol ;
+      if(itp) curIdx = itp.row();
+      if(curIdx == nonzeroCol) found_diag = true;
+      
+      // Get the nonzeros indexes of the current column of R
       Index st = m_firstRowElt(curIdx); // The traversal of the etree starts here 
       if (st < 0 )
       {
-        std::cerr << " Empty row found during Numerical factorization ... Abort \n";
-        m_info = NumericalIssue;
+        m_lastError = "Empty row found during numerical factorization";
+        m_info = InvalidInput;
         return;
       }
+
       // Traverse the etree 
       Index bi = nzcolR;
       for (; mark(st) != col; st = m_etree(st))
       {
-        Ridx(nzcolR) = st; // Add this row to the list 
-        mark(st) = col; // Mark this row as visited
+        Ridx(nzcolR) = st;  // Add this row to the list,
+        mark(st) = col;     // and mark this row as visited
         nzcolR++;
       }
+
       // Reverse the list to get the topological ordering
       Index nt = nzcolR-bi;
-      for(int i = 0; i < nt/2; i++) std::swap(Ridx(bi+i), Ridx(nzcolR-i-1));
+      for(Index i = 0; i < nt/2; i++) std::swap(Ridx(bi+i), Ridx(nzcolR-i-1));
        
-      // Copy the current row value of mat
-      if (itp) tval(curIdx) = itp.value();
-      else tval(curIdx) = Scalar(0.);
+      // Copy the current (curIdx,pcol) value of the input matrix
+      if(itp) tval(curIdx) = itp.value();
+      else    tval(curIdx) = Scalar(0);
       
       // Compute the pattern of Q(:,k)
-      if (curIdx > col && mark(curIdx) < col) 
+      if(curIdx > nonzeroCol && mark(curIdx) != col ) 
       {
-        Qidx(nzcolQ) = curIdx; // Add this row to the pattern of Q
-        mark(curIdx) = col; // And mark it as visited
+        Qidx(nzcolQ) = curIdx;  // Add this row to the pattern of Q,
+        mark(curIdx) = col;     // and mark it as visited
         nzcolQ++;
       }
     }
-    
+
     // Browse all the indexes of R(:,col) in reverse order
     for (Index i = nzcolR-1; i >= 0; i--)
     {
-      Index curIdx = Ridx(i);
-      // Apply the <curIdx> householder vector  to tval
-      Scalar tdot(0.);
-      //First compute q'*tval
-      for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq)
-      {
-        tdot += internal::conj(itq.value()) * tval(itq.row());
-      }
+      Index curIdx = m_pivotperm.indices()(Ridx(i));
+      
+      // Apply the curIdx-th householder vector to the current column (temporarily stored into tval)
+      Scalar tdot(0);
+      
+      // First compute q' * tval
+      tdot = m_Q.col(curIdx).dot(tval);
+
       tdot *= m_hcoeffs(curIdx);
-      // Then compute tval = tval - q*tau
+      
+      // Then update tval = tval - q * tau
+      // FIXME: tval -= tdot * m_Q.col(curIdx) should amount to the same (need to check/add support for efficient "dense ?= sparse")
       for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq)
-      {
         tval(itq.row()) -= itq.value() * tdot;
-      }
-      //With the topological ordering, updates for curIdx are fully done at this point
-      m_R.insertBackByOuterInnerUnordered(col, curIdx) = tval(curIdx);
-      tval(curIdx) = Scalar(0.);
-      
+
       // Detect fill-in for the current column of Q
-      if(m_etree(curIdx) == col)
+      if(m_etree(Ridx(i)) == nonzeroCol)
       {
         for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq)
         {
           Index iQ = itq.row();
-          if (mark(iQ) < col)
+          if (mark(iQ) != col)
           {
-            Qidx(nzcolQ++) = iQ; // Add this row to the pattern of Q
-            mark(iQ) = col; //And mark it as visited
+            Qidx(nzcolQ++) = iQ;  // Add this row to the pattern of Q,
+            mark(iQ) = col;       // and mark it as visited
           }
         }
       }
-    } // End update current column of R
+    } // End update current column
+        
+    // Compute the Householder reflection that eliminate the current column
+    // FIXME this step should call the Householder module.
+    Scalar tau;
+    RealScalar beta;
+    Scalar c0 = nzcolQ ? tval(Qidx(0)) : Scalar(0);
+    
+    // First, the squared norm of Q((col+1):m, col)
+    RealScalar sqrNorm = 0.;
+    for (Index itq = 1; itq < nzcolQ; ++itq) sqrNorm += internal::abs2(tval(Qidx(itq)));
     
-    // Record the current (unscaled) column of V.
-    for (Index itq = 0; itq < nzcolQ; ++itq)
-    {
-      Index iQ = Qidx(itq);      
-      m_Q.insertBackByOuterInnerUnordered(col,iQ) = tval(iQ);
-      tval(iQ) = Scalar(0.);
-    }
-    // Compute the new Householder reflection
-    RealScalar sqrNorm =0.;
-    Scalar tau; RealScalar beta;
-    typename QRMatrixType::InnerIterator itq(m_Q, col);
-    Scalar c0 = (itq) ? itq.value() : Scalar(0.);
-    //First, the squared norm of Q((col+1):m, col)
-    if(itq) ++itq;
-    for (; itq; ++itq)
-    {
-      sqrNorm += internal::abs2(itq.value());
-    }
     if(sqrNorm == RealScalar(0) && internal::imag(c0) == RealScalar(0))
     {
       tau = RealScalar(0);
       beta = internal::real(c0);
-      typename QRMatrixType::InnerIterator it(m_Q,col);
-      it.valueRef() = 1; //FIXME A row permutation should be performed at this point
-    }
+      tval(Qidx(0)) = 1;
+     }
     else
     {
       beta = std::sqrt(internal::abs2(c0) + sqrNorm);
       if(internal::real(c0) >= RealScalar(0))
         beta = -beta;
-      typename QRMatrixType::InnerIterator it(m_Q,col);
-      it.valueRef() = 1;
-      for (++it; it; ++it)
-      {
-        it.valueRef() /= (c0 - beta);
-      }
+      tval(Qidx(0)) = 1;
+      for (Index itq = 1; itq < nzcolQ; ++itq)
+        tval(Qidx(itq)) /= (c0 - beta);
       tau = internal::conj((beta-c0) / beta);
         
     }
-    m_hcoeffs(col) = tau;
-    m_R.insertBackByOuterInnerUnordered(col, col) = beta;
+
+    // Insert values in R
+    for (Index  i = nzcolR-1; i >= 0; i--)
+    {
+      Index curIdx = Ridx(i);
+      if(curIdx < nonzeroCol) 
+      {
+        m_R.insertBackByOuterInnerUnordered(col, curIdx) = tval(curIdx);
+        tval(curIdx) = Scalar(0.);
+      }
+    }
+
+    if(abs(beta) >= m_threshold)
+    {
+      m_R.insertBackByOuterInner(col, nonzeroCol) = beta;
+      nonzeroCol++;
+      // The householder coefficient
+      m_hcoeffs(col) = tau;
+      // Record the householder reflections
+      for (Index itq = 0; itq < nzcolQ; ++itq)
+      {
+        Index iQ = Qidx(itq);
+        m_Q.insertBackByOuterInnerUnordered(col,iQ) = tval(iQ);
+        tval(iQ) = Scalar(0.);
+      }    
+    }
+    else
+    {
+      // Zero pivot found: move implicitly this column to the end
+      m_hcoeffs(col) = Scalar(0);
+      for (Index j = nonzeroCol; j < n-1; j++) 
+        std::swap(m_pivotperm.indices()(j), m_pivotperm.indices()[j+1]);
+      
+      // Recompute the column elimination tree
+      internal::coletree(m_pmat, m_etree, m_firstRowElt, m_pivotperm.indices().data());
+    }
   }
+  
   // Finalize the column pointers of the sparse matrices R and Q
-  m_R.finalize(); m_R.makeCompressed();
-  m_Q.finalize(); m_Q.makeCompressed();
+  m_Q.finalize();
+  m_Q.makeCompressed();
+  m_R.finalize();
+  m_R.makeCompressed();
+  
+  m_nonzeropivots = nonzeroCol;
+  
+  if(nonzeroCol<n)
+  {
+    // Permute the triangular factor to put the 'dead' columns to the end
+    MatrixType tempR(m_R);
+    m_R = tempR * m_pivotperm;
+    
+    // Update the column permutation
+    m_outputPerm_c = m_outputPerm_c * m_pivotperm;
+  }
+  
   m_isInitialized = true; 
   m_factorizationIsok = true;
   m_info = Success;
-  
 }
 
 namespace internal {
@@ -404,14 +506,13 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
   // Get the references 
   SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) : 
   m_qr(qr),m_other(other),m_transpose(transpose) {}
-  inline Index rows() const { return m_transpose ? m_qr.rowsQ() : m_qr.cols(); }
+  inline Index rows() const { return m_transpose ? m_qr.rows() : m_qr.cols(); }
   inline Index cols() const { return m_other.cols(); }
   
   // Assign to a vector
   template<typename DesType>
   void evalTo(DesType& res) const
   {
-    Index m = m_qr.rows();
     Index n = m_qr.cols(); 
     if (m_transpose)
     {
@@ -420,11 +521,13 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
       res =  m_other;
       for (Index k = 0; k < n; k++)
       {
-        Scalar tau; 
-        // Or alternatively 
-        tau = m_qr.m_Q.col(k).tail(m-k).dot(res.tail(m-k)); 
+        Scalar tau = Scalar(0); 
+        tau = m_qr.m_Q.col(k).dot(res); 
         tau = tau * m_qr.m_hcoeffs(k);
-        res -= tau * m_qr.m_Q.col(k);
+        for (typename MatrixType::InnerIterator itq(m_qr.m_Q, k); itq; ++itq)
+        {
+          res(itq.row()) -= itq.value() * tau;
+        }
       }
     }
     else
@@ -434,8 +537,8 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
       res = m_other;
       for (Index k = n-1; k >=0; k--)
       {
-        Scalar tau;
-        tau = m_qr.m_Q.col(k).tail(m-k).dot(res.tail(m-k));
+        Scalar tau = Scalar(0);
+        tau = m_qr.m_Q.col(k).dot(res); 
         tau = tau * m_qr.m_hcoeffs(k);
         res -= tau * m_qr.m_Q.col(k);
       }
diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h
index cd6c4b91f..3034c7af5 100644
--- a/Eigen/src/SuperLUSupport/SuperLUSupport.h
+++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h
@@ -353,14 +353,14 @@ class SuperLUBase : internal::noncopyable
       *
       * \sa compute()
       */
-//     template<typename Rhs>
-//     inline const internal::sparse_solve_retval<SuperLU, Rhs> solve(const SparseMatrixBase<Rhs>& b) const
-//     {
-//       eigen_assert(m_isInitialized && "SuperLU is not initialized.");
-//       eigen_assert(rows()==b.rows()
-//                 && "SuperLU::solve(): invalid number of rows of the right hand side matrix b");
-//       return internal::sparse_solve_retval<SuperLU, Rhs>(*this, b.derived());
-//     }
+    template<typename Rhs>
+    inline const internal::sparse_solve_retval<SuperLUBase, Rhs> solve(const SparseMatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "SuperLU is not initialized.");
+      eigen_assert(rows()==b.rows()
+                && "SuperLU::solve(): invalid number of rows of the right hand side matrix b");
+      return internal::sparse_solve_retval<SuperLUBase, Rhs>(*this, b.derived());
+    }
     
     /** Performs a symbolic decomposition on the sparcity of \a matrix.
       *
@@ -1015,7 +1015,7 @@ struct sparse_solve_retval<SuperLUBase<_MatrixType,Derived>, Rhs>
 
   template<typename Dest> void evalTo(Dest& dst) const
   {
-    dec().derived()._solve(rhs(),dst);
+    this->defaultEvalTo(dst);
   }
 };
 
diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h
index 22d049089..d85b8be85 100644
--- a/Eigen/src/UmfPackSupport/UmfPackSupport.h
+++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h
@@ -215,14 +215,14 @@ class UmfPackLU : internal::noncopyable
       *
       * \sa compute()
       */
-//     template<typename Rhs>
-//     inline const internal::sparse_solve_retval<UmfPAckLU, Rhs> solve(const SparseMatrixBase<Rhs>& b) const
-//     {
-//       eigen_assert(m_isInitialized && "UmfPAckLU is not initialized.");
-//       eigen_assert(rows()==b.rows()
-//                 && "UmfPAckLU::solve(): invalid number of rows of the right hand side matrix b");
-//       return internal::sparse_solve_retval<UmfPAckLU, Rhs>(*this, b.derived());
-//     }
+    template<typename Rhs>
+    inline const internal::sparse_solve_retval<UmfPackLU, Rhs> solve(const SparseMatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "UmfPackLU is not initialized.");
+      eigen_assert(rows()==b.rows()
+                && "UmfPackLU::solve(): invalid number of rows of the right hand side matrix b");
+      return internal::sparse_solve_retval<UmfPackLU, Rhs>(*this, b.derived());
+    }
 
     /** Performs a symbolic decomposition on the sparcity of \a matrix.
       *
@@ -381,7 +381,8 @@ bool UmfPackLU<MatrixType>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDe
   const int rhsCols = b.cols();
   eigen_assert((BDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major rhs yet");
   eigen_assert((XDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major result yet");
-
+  eigen_assert(b.derived().data() != x.derived().data() && " Umfpack does not support inplace solve");
+  
   int errorCode;
   for (int j=0; j<rhsCols; ++j)
   {
@@ -420,7 +421,7 @@ struct sparse_solve_retval<UmfPackLU<_MatrixType>, Rhs>
 
   template<typename Dest> void evalTo(Dest& dst) const
   {
-    dec()._solve(rhs(),dst);
+    this->defaultEvalTo(dst);
   }
 };
 
diff --git a/Eigen/src/misc/SparseSolve.h b/Eigen/src/misc/SparseSolve.h
index 272c4a479..244bb8ec7 100644
--- a/Eigen/src/misc/SparseSolve.h
+++ b/Eigen/src/misc/SparseSolve.h
@@ -47,6 +47,23 @@ template<typename _DecompositionType, typename Rhs> struct sparse_solve_retval_b
   }
 
   protected:
+    template<typename DestScalar, int DestOptions, typename DestIndex>
+    inline void defaultEvalTo(SparseMatrix<DestScalar,DestOptions,DestIndex>& dst) const
+    {
+      // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
+      static const int NbColsAtOnce = 4;
+      int rhsCols = m_rhs.cols();
+      int size = m_rhs.rows();
+      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,rhsCols);
+      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,rhsCols);
+      for(int k=0; k<rhsCols; k+=NbColsAtOnce)
+      {
+        int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
+        tmp.leftCols(actualCols) = m_rhs.middleCols(k,actualCols);
+        tmpX.leftCols(actualCols) = m_dec.solve(tmp.leftCols(actualCols));
+        dst.middleCols(k,actualCols) = tmpX.leftCols(actualCols).sparseView();
+      }
+    }
     const DecompositionType& m_dec;
     typename Rhs::Nested m_rhs;
 };
diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h
index f52d384a9..b850b6cd1 100644
--- a/blas/level2_cplx_impl.h
+++ b/blas/level2_cplx_impl.h
@@ -216,7 +216,7 @@ int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px
   */
 int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda)
 {
-  typedef void (*functype)(int, Scalar*, int, const Scalar*, Scalar);
+  typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&);
   static functype func[2];
 
   static bool init = false;
@@ -252,7 +252,7 @@ int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px,
   if(code>=2 || func[code]==0)
     return 0;
 
-  func[code](*n, a, *lda, x_cpy, alpha);
+  func[code](*n, a, *lda, x_cpy, x_cpy, alpha);
 
   matrix(a,*n,*n,*lda).diagonal().imag().setZero();
 
diff --git a/blas/level2_impl.h b/blas/level2_impl.h
index bd41f7e60..5f3941975 100644
--- a/blas/level2_impl.h
+++ b/blas/level2_impl.h
@@ -130,7 +130,7 @@ int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar
 
 int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb)
 {
-  typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
+  typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, const Scalar&);
   static functype func[16];
 
   static bool init = false;
diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h
index febf08d1f..8d56eaaa1 100644
--- a/blas/level2_real_impl.h
+++ b/blas/level2_real_impl.h
@@ -85,7 +85,7 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px,
 
 //     init = true;
 //   }
-  typedef void (*functype)(int, Scalar*, int, const Scalar*, Scalar);
+  typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&);
   static functype func[2];
 
   static bool init = false;
@@ -121,7 +121,7 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px,
   if(code>=2 || func[code]==0)
     return 0;
 
-  func[code](*n, c, *ldc, x_cpy, alpha);
+  func[code](*n, c, *ldc, x_cpy, x_cpy, alpha);
 
   if(x_cpy!=x)  delete[] x_cpy;
 
diff --git a/blas/level3_impl.h b/blas/level3_impl.h
index 84c9f4f2b..a57189f53 100644
--- a/blas/level3_impl.h
+++ b/blas/level3_impl.h
@@ -152,7 +152,7 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m,
 int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha,  RealScalar *pa, int *lda, RealScalar *pb, int *ldb)
 {
 //   std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n";
-  typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar, internal::level3_blocking<Scalar,Scalar>&);
+  typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
   static functype func[32];
   static bool init = false;
   if(!init)
@@ -306,7 +306,7 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
 {
 //   std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n";
   #if !ISCOMPLEX
-  typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar);
+  typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&);
   static functype func[8];
 
   static bool init = false;
@@ -500,7 +500,7 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
 // c = alpha*conj(a')*a + beta*c  for op  = 'C'or'c'
 int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc)
 {
-  typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar);
+  typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&);
   static functype func[8];
 
   static bool init = false;
diff --git a/cmake/CMakeDetermineVSServicePack.cmake b/cmake/CMakeDetermineVSServicePack.cmake
deleted file mode 100644
index b89462308..000000000
--- a/cmake/CMakeDetermineVSServicePack.cmake
+++ /dev/null
@@ -1,103 +0,0 @@
-# - Includes a public function for assisting users in trying to determine the
-# Visual Studio service pack in use.
-#
-# Sets the passed in variable to one of the following values or an empty
-# string if unknown.
-#    vc80
-#    vc80sp1
-#    vc90
-#    vc90sp1
-#
-# Usage:
-# ===========================
-#
-#    if(MSVC)
-#       include(CMakeDetermineVSServicePack)
-#       DetermineVSServicePack( my_service_pack )
-#
-#       if( my_service_pack )
-#           message(STATUS "Detected: ${my_service_pack}")
-#       endif()
-#    endif()
-#
-# ===========================
-
-#=============================================================================
-# Copyright 2009-2010 Kitware, Inc.
-# Copyright 2009-2010 Philip Lowman <philip@yhbt.com>
-#
-# Distributed under the OSI-approved BSD License (the "License");
-# see accompanying file Copyright.txt for details.
-#
-# This software is distributed WITHOUT ANY WARRANTY; without even the
-# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the License for more information.
-#=============================================================================
-# (To distribute this file outside of CMake, substitute the full
-#  License text for the above reference.)
-
-# [INTERNAL]
-# Please do not call this function directly
-function(_DetermineVSServicePackFromCompiler _OUT_VAR _cl_version)
-   if    (${_cl_version} VERSION_EQUAL "14.00.50727.42")
-       set(_version "vc80")
-   elseif(${_cl_version} VERSION_EQUAL "14.00.50727.762")
-       set(_version "vc80sp1")
-   elseif(${_cl_version} VERSION_EQUAL "15.00.21022.08")
-       set(_version "vc90")
-   elseif(${_cl_version} VERSION_EQUAL "15.00.30729.01")
-       set(_version "vc90sp1")
-   elseif(${_cl_version} VERSION_EQUAL "16.00.30319.01")
-       set(_version "vc100")
-   else()
-       set(_version "")
-   endif()
-   set(${_OUT_VAR} ${_version} PARENT_SCOPE)
-endfunction()
-
-#
-# A function to call to determine the Visual Studio service pack
-# in use.  See documentation above.
-function(DetermineVSServicePack _pack)
-    if(NOT DETERMINED_VS_SERVICE_PACK OR NOT ${_pack})
-		if(${CMAKE_BUILD_TOOL} STREQUAL "nmake")
-			EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} "/?"
-				ERROR_VARIABLE _output)
-			set(DETERMINED_VS_SERVICE_PACK ${_output})
-	    else()	
-			file(WRITE "${CMAKE_BINARY_DIR}/return0.cc"
-				"int main() { return 0; }\n")
-
-			try_compile(DETERMINED_VS_SERVICE_PACK
-				"${CMAKE_BINARY_DIR}"
-				"${CMAKE_BINARY_DIR}/return0.cc"
-				OUTPUT_VARIABLE _output
-				COPY_FILE "${CMAKE_BINARY_DIR}/return0.cc")
-        
-			file(REMOVE "${CMAKE_BINARY_DIR}/return0.cc")
-		endif()
-                
-        if(DETERMINED_VS_SERVICE_PACK AND _output)
-            string(REGEX MATCH "Compiler Version [0-9]+.[0-9]+.[0-9]+.[0-9]+"
-                _cl_version "${_output}")
-            if(_cl_version)
-                string(REGEX MATCHALL "[0-9]+"
-                    _cl_version_list "${_cl_version}")
-                list(GET _cl_version_list 0 _major)
-                list(GET _cl_version_list 1 _minor)
-                list(GET _cl_version_list 2 _patch)
-                list(GET _cl_version_list 3 _tweak)
-
-                set(_cl_version_string ${_major}.${_minor}.${_patch}.${_tweak})
-                
-                # Call helper function to determine VS version
-                _DetermineVSServicePackFromCompiler(_sp "${_cl_version_string}")
-                if(_sp)
-                    #set(${_pack} "${_sp}(${_cl_version_string})" CACHE INTERNAL
-					set(${_pack} "${_sp}" CACHE INTERNAL
-                        "The Visual Studio Release with Service Pack")
-                endif()
-            endif()
-        endif()
-    endif()
-endfunction()
diff --git a/cmake/EigenConfigureTesting.cmake b/cmake/EigenConfigureTesting.cmake
index cf8f32c01..6216a9009 100644
--- a/cmake/EigenConfigureTesting.cmake
+++ b/cmake/EigenConfigureTesting.cmake
@@ -27,10 +27,10 @@ include(CTest)
 # overwrite default DartConfiguration.tcl
 # The worarounds are different for each version of the MSVC IDE
 if(MSVC_IDE)
-  if(MSVC_VERSION EQUAL 1600) # MSVC 2010
+  if(CMAKE_MAKE_PROGRAM_SAVE MATCHES "devenv") # devenv
+    set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} Eigen.sln /build \"Release\" /project buildtests \n# ")    
+  else() # msbuild
     set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} buildtests.vcxproj /p:Configuration=\${CTEST_CONFIGURATION_TYPE} \n# ")
-  else() # MSVC 2008 (TODO check MSVC 2005)
-    set(EIGEN_MAKECOMMAND_PLACEHOLDER "${CMAKE_MAKE_PROGRAM_SAVE} Eigen.sln /build \"Release\" /project buildtests \n# ")
   endif()
 else()
   # for make and nmake
diff --git a/cmake/EigenDetermineVSServicePack.cmake b/cmake/EigenDetermineVSServicePack.cmake
new file mode 100644
index 000000000..8e5546a85
--- /dev/null
+++ b/cmake/EigenDetermineVSServicePack.cmake
@@ -0,0 +1,27 @@
+include(CMakeDetermineVSServicePack)
+
+# The code is almost identical to the CMake version. The only difference is that we remove
+# _DetermineVSServicePack_FastCheckVersionWithCompiler which lead to errors on some systems.
+function(EigenDetermineVSServicePack _pack)
+    if(NOT DETERMINED_VS_SERVICE_PACK OR NOT ${_pack})
+
+        if(NOT DETERMINED_VS_SERVICE_PACK)
+            _DetermineVSServicePack_CheckVersionWithTryCompile(DETERMINED_VS_SERVICE_PACK _cl_version)
+            if(NOT DETERMINED_VS_SERVICE_PACK)
+                _DetermineVSServicePack_CheckVersionWithTryRun(DETERMINED_VS_SERVICE_PACK _cl_version)
+            endif()
+        endif()
+
+        if(DETERMINED_VS_SERVICE_PACK)
+
+            if(_cl_version)
+                # Call helper function to determine VS version
+                _DetermineVSServicePackFromCompiler(_sp "${_cl_version}")
+                if(_sp)
+                    set(${_pack} ${_sp} CACHE INTERNAL
+                        "The Visual Studio Release with Service Pack")
+                endif()
+            endif()
+        endif()
+    endif()
+endfunction()
diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake
index 266043974..3780888af 100644
--- a/cmake/EigenTesting.cmake
+++ b/cmake/EigenTesting.cmake
@@ -73,6 +73,14 @@ macro(ei_add_test_internal testname testname_with_suffix)
   else()
     add_test(${testname_with_suffix} "${targetname}")
   endif()
+  
+  # Specify target and test labels accoirding to EIGEN_CURRENT_SUBPROJECT
+  get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT)  
+  if ((current_subproject) AND (NOT (current_subproject STREQUAL "")))
+    set_property(TARGET ${targetname} PROPERTY LABELS "Build${current_subproject}")
+    add_dependencies("Build${current_subproject}" ${targetname})
+    set_property(TEST ${testname_with_suffix} PROPERTY LABELS "${current_subproject}")
+  endif()
 
 endmacro(ei_add_test_internal)
 
@@ -263,6 +271,7 @@ macro(ei_testing_print_summary)
 endmacro(ei_testing_print_summary)
 
 macro(ei_init_testing)
+  define_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT BRIEF_DOCS " " FULL_DOCS " ")
   define_property(GLOBAL PROPERTY EIGEN_TESTED_BACKENDS BRIEF_DOCS " " FULL_DOCS " ")
   define_property(GLOBAL PROPERTY EIGEN_MISSING_BACKENDS BRIEF_DOCS " " FULL_DOCS " ")
   define_property(GLOBAL PROPERTY EIGEN_TESTING_SUMMARY BRIEF_DOCS " " FULL_DOCS " ")
@@ -303,8 +312,8 @@ endmacro(ei_set_sitename)
 macro(ei_get_compilerver VAR)
   if(MSVC)
     # on windows system, we use a modified CMake script  
-    include(CMakeDetermineVSServicePack)
-    DetermineVSServicePack( my_service_pack )
+    include(EigenDetermineVSServicePack)
+    EigenDetermineVSServicePack( my_service_pack )
 
     if( my_service_pack )
       set(${VAR} ${my_service_pack})
@@ -426,6 +435,10 @@ macro(ei_set_build_string)
   else()
     set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-64bit)
   endif()
+  
+  if(EIGEN_BUILD_STRING_SUFFIX)
+    set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${EIGEN_BUILD_STRING_SUFFIX})
+  endif()
 
   string(TOLOWER ${TMP_BUILD_STRING} BUILDNAME)
 endmacro(ei_set_build_string)
diff --git a/cmake/FindSuperLU.cmake b/cmake/FindSuperLU.cmake
index ca72b4498..8a3df3666 100644
--- a/cmake/FindSuperLU.cmake
+++ b/cmake/FindSuperLU.cmake
@@ -14,9 +14,10 @@ find_path(SUPERLU_INCLUDES
   ${INCLUDE_INSTALL_DIR}
   PATH_SUFFIXES
   superlu
+  SRC
 )
 
-find_library(SUPERLU_LIBRARIES superlu PATHS $ENV{SUPERLUDIR} ${LIB_INSTALL_DIR})
+find_library(SUPERLU_LIBRARIES superlu PATHS $ENV{SUPERLUDIR} ${LIB_INSTALL_DIR} PATH_SUFFIXES lib)
   
 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(SUPERLU DEFAULT_MSG
diff --git a/cmake/language_support.cmake b/cmake/language_support.cmake
index 3414e6ea6..2ca303c92 100644
--- a/cmake/language_support.cmake
+++ b/cmake/language_support.cmake
@@ -24,6 +24,8 @@ function(workaround_9220 language language_works)
   set(text
     "project(test NONE)
     cmake_minimum_required(VERSION 2.6.0)
+    set (CMAKE_Fortran_FLAGS \"${CMAKE_Fortran_FLAGS}\")
+    set (CMAKE_EXE_LINKER_FLAGS \"${CMAKE_EXE_LINKER_FLAGS}\")
     enable_language(${language} OPTIONAL)
   ")
   file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/language_tests/${language})
diff --git a/debug/msvc/eigen.natvis b/debug/msvc/eigen.natvis
new file mode 100644
index 000000000..da8985717
--- /dev/null
+++ b/debug/msvc/eigen.natvis
@@ -0,0 +1,235 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+
+  <!-- Fixed x Fixed Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,*,*,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>
+      <DisplayString>[{$T2}, {$T3}] (fixed matrix)</DisplayString>
+      <Expand>
+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+          <Rank>2</Rank>
+          <Size>$i==0 ? $T2 : $T3</Size>
+          <ValuePointer>m_storage.m_data.array</ValuePointer>
+        </ArrayItems>
+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+          <Direction>Backward</Direction>
+          <Rank>2</Rank>
+          <Size>$i==0 ? $T2 : $T3</Size>
+          <ValuePointer>m_storage.m_data.array</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- 2 x 2 Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,2,2,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,2,2,*,*,*&gt;"/>
+      <DisplayString>[2, 2] (fixed matrix)</DisplayString>
+      <Expand>
+        <Synthetic Name="[row 0]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]})</DisplayString>
+        </Synthetic>        
+      </Expand>
+  </Type>
+  
+  <!-- 3 x 3 Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,3,3,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,3,3,*,*,*&gt;"/>
+      <DisplayString>[3, 3] (fixed matrix)</DisplayString>
+      <Expand>
+        <Synthetic Name="[row 0]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 2]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 2]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]})</DisplayString>
+        </Synthetic>        
+      </Expand>
+  </Type>
+  
+  <!-- 4 x 4 Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,4,4,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,4,4,*,*,*&gt;"/>
+      <DisplayString>[4, 4] (fixed matrix)</DisplayString>
+      <Expand>
+        <Synthetic Name="[row 0]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 0]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 1]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 2]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 2]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 3]" Condition="Flags%2">
+          <DisplayString>({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]})</DisplayString>
+        </Synthetic>
+        <Synthetic Name="[row 3]" Condition="!(Flags%2)">
+          <DisplayString>({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]})</DisplayString>
+        </Synthetic>        
+      </Expand>
+  </Type>  
+  
+  <!-- Dynamic x Dynamic Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,-1,-1,*,*,*&gt;">      
+      <AlternativeType Name="Eigen::Array&lt;*,-1,-1,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix)</DisplayString>
+      <Expand>
+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+          <Rank>2</Rank>
+          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+          <Direction>Backward</Direction>
+          <Rank>2</Rank>
+          <Size>$i==0 ? m_storage.m_rows : m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Fixed x Dynamic Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,*,-1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,*,-1,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{$T2}, {m_storage.m_cols}] (dynamic column matrix)</DisplayString>
+      <Expand>
+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+          <Rank>2</Rank>
+          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+          <Direction>Backward</Direction>
+          <Rank>2</Rank>
+          <Size>$i==0 ? $T2 : m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Dynamic x Fixed Matrix -->
+  <Type Name="Eigen::Matrix&lt;*,-1,*,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,-1,*,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}, {$T2}] (dynamic row matrix)</DisplayString>
+      <Expand>
+        <ArrayItems Condition="Flags%2"> <!-- row major layout -->
+          <Rank>2</Rank>
+          <Size>$i==0 ? m_storage.m_rows : $T2</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+        <ArrayItems Condition="!(Flags%2)"> <!-- column major layout -->
+          <Direction>Backward</Direction>
+          <Rank>2</Rank>
+          <Size>$i==0 ? m_storage.m_rows : $T2</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Dynamic Column Vector -->
+  <Type Name="Eigen::Matrix&lt;*,1,-1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,1,-1,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_cols}] (dynamic column vector)</DisplayString>
+      <Expand>
+        <Item Name="[size]">m_storage.m_cols</Item>
+        <ArrayItems>
+          <Size>m_storage.m_cols</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Dynamic Row Vector -->
+  <Type Name="Eigen::Matrix&lt;*,-1,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,-1,1,*,*,*&gt;"/>
+      <DisplayString Condition="m_storage.m_data == 0">empty</DisplayString>
+      <DisplayString Condition="m_storage.m_data != 0">[{m_storage.m_rows}] (dynamic row vector)</DisplayString>
+      <Expand>
+        <Item Name="[size]">m_storage.m_rows</Item>
+        <ArrayItems>
+          <Size>m_storage.m_rows</Size>
+          <ValuePointer>m_storage.m_data</ValuePointer>
+        </ArrayItems>
+      </Expand>
+  </Type>
+  
+  <!-- Fixed Vector -->
+  <Type Name="Eigen::Matrix&lt;*,1,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Array&lt;*,1,1,*,*,*&gt;"/>
+      <DisplayString>[1] ({m_storage.m_data.array[0]})</DisplayString>
+      <Expand>
+        <Item Name="[x]">m_storage.m_data.array[0]</Item>
+      </Expand>
+  </Type>
+  
+  <Type Name="Eigen::Matrix&lt;*,2,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Matrix&lt;*,1,2,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,2,1,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,1,2,*,*,*&gt;"/>
+      <DisplayString>[2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]})</DisplayString>
+      <Expand>
+        <Item Name="[x]">m_storage.m_data.array[0]</Item>
+        <Item Name="[y]">m_storage.m_data.array[1]</Item>
+      </Expand>
+  </Type>
+  
+  <Type Name="Eigen::Matrix&lt;*,3,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Matrix&lt;*,1,3,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,3,1,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,1,3,*,*,*&gt;"/>
+      <DisplayString>[3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]})</DisplayString>
+      <Expand>
+        <Item Name="[x]">m_storage.m_data.array[0]</Item>
+        <Item Name="[y]">m_storage.m_data.array[1]</Item>
+        <Item Name="[z]">m_storage.m_data.array[2]</Item>
+      </Expand>
+  </Type>
+  
+    <Type Name="Eigen::Matrix&lt;*,4,1,*,*,*&gt;">
+      <AlternativeType Name="Eigen::Matrix&lt;*,1,4,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,4,1,*,*,*&gt;"/>
+      <AlternativeType Name="Eigen::Array&lt;*,1,4,*,*,*&gt;"/>
+      <DisplayString>[4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]})</DisplayString>
+      <Expand>
+        <Item Name="[x]">m_storage.m_data.array[0]</Item>
+        <Item Name="[y]">m_storage.m_data.array[1]</Item>
+        <Item Name="[z]">m_storage.m_data.array[2]</Item>
+        <Item Name="[w]">m_storage.m_data.array[3]</Item>
+      </Expand>
+  </Type>
+
+</AutoVisualizer>
diff --git a/doc/AsciiQuickReference.txt b/doc/AsciiQuickReference.txt
index 6f94aa45f..6b0a7cd6a 100644
--- a/doc/AsciiQuickReference.txt
+++ b/doc/AsciiQuickReference.txt
@@ -1,8 +1,7 @@
 // A simple quickref for Eigen. Add anything that's missing.
 // Main author: Keir Mierle
 
-#include <Eigen/Core>
-#include <Eigen/Array>
+#include <Eigen/Dense>
 
 Matrix<double, 3, 3> A;               // Fixed rows and cols. Same as Matrix3d.
 Matrix<double, 3, Dynamic> B;         // Fixed rows, dynamic cols.
@@ -11,6 +10,7 @@ Matrix<double, 3, 3, RowMajor> E;     // Row major; default is column-major.
 Matrix3f P, Q, R;                     // 3x3 float matrix.
 Vector3f x, y, z;                     // 3x1 float matrix.
 RowVector3f a, b, c;                  // 1x3 float matrix.
+VectorXd v;                           // Dynamic column vector of doubles
 double s;                            
 
 // Basic usage
@@ -31,9 +31,19 @@ A << 1, 2, 3,     // Initialize A. The elements can also be
      7, 8, 9;     // and then the rows are stacked.
 B << A, A, A;     // B is three horizontally stacked A's.
 A.fill(10);       // Fill A with all 10's.
-A.setRandom();    // Fill A with uniform random numbers in (-1, 1).
-                  // Requires #include <Eigen/Array>.
-A.setIdentity();  // Fill A with the identity.
+
+// Eigen                            // Matlab
+MatrixXd::Identity(rows,cols)       // eye(rows,cols)
+C.setIdentity(rows,cols)            // C = eye(rows,cols)
+MatrixXd::Zero(rows,cols)           // zeros(rows,cols)
+C.setZero(rows,cols)                // C = ones(rows,cols)
+MatrixXd::Ones(rows,cols)           // ones(rows,cols)
+C.setOnes(rows,cols)                // C = ones(rows,cols)
+MatrixXd::Random(rows,cols)         // rand(rows,cols)*2-1        // MatrixXd::Random returns uniform random numbers in (-1, 1).
+C.setRandom(rows,cols)              // C = rand(rows,cols)*2-1
+VectorXd::LinSpace(size,low,high)   // linspace(low,high,size)'
+v.setLinSpace(size,low,high)        // v = linspace(low,high,size)'
+
 
 // Matrix slicing and blocks. All expressions listed here are read/write.
 // Templated size versions are faster. Note that Matlab is 1-based (a size N
@@ -77,8 +87,7 @@ a *= M;            R  = P + Q;      R  = P/s;
                    R += Q;          R *= s;
                    R -= Q;          R /= s;
 
- // Vectorized operations on each element independently
- // (most require #include <Eigen/Array>)
+// Vectorized operations on each element independently
 // Eigen                  // Matlab
 R = P.cwiseProduct(Q);    // R = P .* Q
 R = P.array() * s.array();// R = P .* s
@@ -150,12 +159,11 @@ MatrixXi mat2x2 = Map<Matrix2i>(data);
 MatrixXi mat2x2 = Map<MatrixXi>(data, 2, 2);
 
 // Solve Ax = b. Result stored in x. Matlab: x = A \ b.
-bool solved;
-solved = A.ldlt().solve(b, &x));  // A sym. p.s.d.    #include <Eigen/Cholesky>
-solved = A.llt() .solve(b, &x));  // A sym. p.d.      #include <Eigen/Cholesky>
-solved = A.lu()  .solve(b, &x));  // Stable and fast. #include <Eigen/LU>
-solved = A.qr()  .solve(b, &x));  // No pivoting.     #include <Eigen/QR>
-solved = A.svd() .solve(b, &x));  // Stable, slowest. #include <Eigen/SVD>
+x = A.ldlt().solve(b));  // A sym. p.s.d.    #include <Eigen/Cholesky>
+x = A.llt() .solve(b));  // A sym. p.d.      #include <Eigen/Cholesky>
+x = A.lu()  .solve(b));  // Stable and fast. #include <Eigen/LU>
+x = A.qr()  .solve(b));  // No pivoting.     #include <Eigen/QR>
+x = A.svd() .solve(b));  // Stable, slowest. #include <Eigen/SVD>
 // .ldlt() -> .matrixL() and .matrixD()
 // .llt()  -> .matrixL()
 // .lu()   -> .matrixL() and .matrixU()
@@ -168,3 +176,4 @@ A.eigenvalues();                  // eig(A);
 EigenSolver<Matrix3d> eig(A);     // [vec val] = eig(A)
 eig.eigenvalues();                // diag(val)
 eig.eigenvectors();               // vec
+// For self-adjoint matrices use SelfAdjointEigenSolver<>
diff --git a/doc/Overview.dox b/doc/Overview.dox
index d98e17056..9ab96233a 100644
--- a/doc/Overview.dox
+++ b/doc/Overview.dox
@@ -17,7 +17,7 @@ You're a MatLab user? There is also a <a href="AsciiQuickReference.txt">short AS
 The \b main \b documentation is organized into \em chapters covering different domains of features.
 They are themselves composed of \em user \em manual pages describing the different features in a comprehensive way, and \em reference pages that gives you access to the API documentation through the related Eigen's \em modules and \em classes.
 
-Under the \subpage UserManual_Generalities section, you will find documentation on more general topics such as preprocessor directives, controlling assertions, multi-threading, MKL support, some Eigen's internal insights, and much more...
+Under the  \subpage UserManual_Generalities section, you will find documentation on more general topics such as preprocessor directives, controlling assertions, multi-threading, MKL support, some Eigen's internal insights, and much more...
 
 Finally, do not miss the search engine, useful to quickly get to the documentation of a given class or function.
 
diff --git a/doc/PreprocessorDirectives.dox b/doc/PreprocessorDirectives.dox
index 0c4c47464..eedd5524a 100644
--- a/doc/PreprocessorDirectives.dox
+++ b/doc/PreprocessorDirectives.dox
@@ -27,7 +27,11 @@ are doing.
  - \b EIGEN_DEFAULT_IO_FORMAT - the IOFormat to use when printing a matrix if no %IOFormat is specified.
    Defaults to the %IOFormat constructed by the default constructor IOFormat::IOFormat().
  - \b EIGEN_INITIALIZE_MATRICES_BY_ZERO - if defined, all entries of newly constructed matrices and arrays are
-   initializes to zero, as are new entries in matrices and arrays after resizing. Not defined by default.
+   initialized to zero, as are new entries in matrices and arrays after resizing. Not defined by default.
+ - \b EIGEN_INITIALIZE_MATRICES_BY_NAN - if defined, all entries of newly constructed matrices and arrays are
+   initialized to NaN, as are new entries in matrices and arrays after resizing. This option is especially
+   useful for debugging purpose, though a memory tool like <a href="http://valgrind.org/">valgrind</a> is
+   preferable. Not defined by default.
  - \b EIGEN_NO_AUTOMATIC_RESIZING - if defined, the matrices (or arrays) on both sides of an assignment 
    <tt>a = b</tt> have to be of the same size; otherwise, %Eigen automatically resizes \c a so that it is of
    the correct size. Not defined by default.
@@ -51,6 +55,9 @@ run time. However, these assertions do cost time and can thus be turned off.
 
 \section TopicPreprocessorDirectivesPerformance Alignment, vectorization and performance tweaking
 
+ - \b EIGEN_MALLOC_ALREADY_ALIGNED - Can be set to 0 or 1 to tell whether default system malloc already
+   returns aligned buffers. In not defined, then this information is automatically deduced from the compiler
+   and system preprocessor tokens.
  - \b EIGEN_DONT_ALIGN - disables alignment completely. %Eigen will not try to align its objects and does not
    expect that any objects passed to it are aligned. This will turn off vectorization. Not defined by default.
  - \b EIGEN_DONT_ALIGN_STATICALLY - disables alignment of arrays on the stack. Not defined by default, unless
diff --git a/doc/SparseLinearSystems.dox b/doc/SparseLinearSystems.dox
index e447c40ce..c00be10d3 100644
--- a/doc/SparseLinearSystems.dox
+++ b/doc/SparseLinearSystems.dox
@@ -1,10 +1,57 @@
 namespace Eigen {
 /** \eigenManualPage TopicSparseSystems Solving Sparse Linear Systems
-In Eigen, there are several methods available to solve linear systems when the coefficient matrix is sparse. Because of the special representation of this class of matrices, special care should be taken in order to get a good performance. See \ref TutorialSparse for a detailed introduction about sparse matrices in Eigen. In this page, we briefly present the main steps that are common to all the linear solvers in Eigen together with the main concepts behind them. Depending on the properties of the matrix, the desired accuracy, the end-user is able to tune these steps in order to improve the performance of its code. However, an impatient user does not need to know deeply what's hiding behind these steps: the last section presents a benchmark routine that can be easily used to get an insight on the performance of all the available solvers. 
+In Eigen, there are several methods available to solve linear systems when the coefficient matrix is sparse. Because of the special representation of this class of matrices, special care should be taken in order to get a good performance. See \ref TutorialSparse for a detailed introduction about sparse matrices in Eigen. This page lists the sparse solvers available in Eigen. The main steps that are common to all these linear solvers are introduced as well. Depending on the properties of the matrix, the desired accuracy, the end-user is able to tune those steps in order to improve the performance of its code. Note that it is not required to know deeply what's hiding behind these steps: the last section presents a benchmark routine that can be easily used to get an insight on the performance of all the available solvers. 
 
 \eigenAutoToc
 
-  As summarized in \ref TutorialSparseDirectSolvers, there are many built-in solvers in Eigen as well as interface to external solvers libraries. All these solvers follow the same calling sequence. The basic steps are as follows : 
+\section TutorialSparseDirectSolvers Sparse solvers
+
+%Eigen currently provides a limited set of built-in solvers, as well as wrappers to external solver libraries.
+They are summarized in the following table:
+
+<table class="manual">
+<tr><th>Class</th><th>Module</th><th>Solver kind</th><th>Matrix kind</th><th>Features related to performance</th>
+    <th>Dependencies,License</th><th class="width20em"><p>Notes</p></th></tr>
+<tr><td>SimplicialLLT    </td><td>\link SparseCholesky_Module SparseCholesky \endlink</td><td>Direct LLt factorization</td><td>SPD</td><td>Fill-in reducing</td>
+    <td>built-in, LGPL</td>
+    <td>SimplicialLDLT is often preferable</td></tr>
+<tr><td>SimplicialLDLT   </td><td>\link SparseCholesky_Module SparseCholesky \endlink</td><td>Direct LDLt factorization</td><td>SPD</td><td>Fill-in reducing</td>
+    <td>built-in, LGPL</td>
+    <td>Recommended for very sparse and not too large problems (e.g., 2D Poisson eq.)</td></tr>
+<tr><td>ConjugateGradient</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>Classic iterative CG</td><td>SPD</td><td>Preconditionning</td>
+    <td>built-in, MPL2</td>
+    <td>Recommended for large symmetric problems (e.g., 3D Poisson eq.)</td></tr>
+<tr><td>BiCGSTAB</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>Iterative stabilized bi-conjugate gradient</td><td>Square</td><td>Preconditionning</td>
+    <td>built-in, MPL2</td>
+    <td>To speedup the convergence, try it with the \ref IncompleteLUT preconditioner.</td></tr>
+<tr><td>SparseLU</td> <td>\link SparseLU_Module SparseLU \endlink </td> <td>LU factorization </td>
+    <td>Square </td><td>Fill-in reducing, Leverage fast dense algebra</td>
+    <td> built-in, MPL2</td> <td>optimized for small and large problems with irregular patterns </td></tr>
+<tr><td>SparseQR</td> <td>\link SparseQR_Module SparseQR \endlink</td> <td> QR factorization</td>
+    <td>Any, rectangular</td><td> Fill-in reducing</td>
+    <td>built-in, MPL2</td><td>recommended for least-square problems, has a basic rank-revealing feature</td></tr>
+<tr> <th colspan="7"> Wrappers to external solvers </th></tr>
+<tr><td>PastixLLT \n PastixLDLT \n PastixLU</td><td>\link PaStiXSupport_Module PaStiXSupport \endlink</td><td>Direct LLt, LDLt, LU factorizations</td><td>SPD \n SPD \n Square</td><td>Fill-in reducing, Leverage fast dense algebra, Multithreading</td>
+    <td>Requires the <a href="http://pastix.gforge.inria.fr">PaStiX</a> package, \b CeCILL-C </td>
+    <td>optimized for tough problems and symmetric patterns</td></tr>
+<tr><td>CholmodSupernodalLLT</td><td>\link CholmodSupport_Module CholmodSupport \endlink</td><td>Direct LLt factorization</td><td>SPD</td><td>Fill-in reducing, Leverage fast dense algebra</td>
+    <td>Requires the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">SuiteSparse</a> package, \b GPL </td>
+    <td></td></tr>
+<tr><td>UmfPackLU</td><td>\link UmfPackSupport_Module UmfPackSupport \endlink</td><td>Direct LU factorization</td><td>Square</td><td>Fill-in reducing, Leverage fast dense algebra</td>
+    <td>Requires the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">SuiteSparse</a> package, \b GPL </td>
+    <td></td></tr>
+<tr><td>SuperLU</td><td>\link SuperLUSupport_Module SuperLUSupport \endlink</td><td>Direct LU factorization</td><td>Square</td><td>Fill-in reducing, Leverage fast dense algebra</td>
+    <td>Requires the <a href="http://crd-legacy.lbl.gov/~xiaoye/SuperLU/">SuperLU</a> library, (BSD-like)</td>
+    <td></td></tr>
+<tr><td>SPQR</td><td>\link SPQRSupport_Module SPQRSupport \endlink  </td> <td> QR factorization </td> 
+    <td> Any, rectangular</td><td>fill-in reducing, multithreaded, fast dense algebra</td>
+    <td> requires the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">SuiteSparse</a> package, \b GPL </td><td>recommended for linear least-squares problems, has a rank-revealing feature</tr>
+</table>
+
+Here \c SPD means symmetric positive definite.
+
+All these solvers follow the same general concept.
+Here is a typical and general example:
 \code
 #include <Eigen/RequiredModuleName>
 // ...
@@ -15,21 +62,52 @@ VectorXd b, x;
 // solve Ax = b
 SolverClassName<SparseMatrix<double> > solver;
 solver.compute(A);
-if(solver.info()!=Succeeded) {
+if(solver.info()!=Success) {
   // decomposition failed
   return;
 }
 x = solver.solve(b);
-if(solver.info()!=Succeeded) {
+if(solver.info()!=Success) {
   // solving failed
   return;
 }
+// solve for another right hand side:
+x1 = solver.solve(b1);
 \endcode
 
+For \c SPD solvers, a second optional template argument allows to specify which triangular part have to be used, e.g.:
+
+\code
+#include <Eigen/IterativeLinearSolvers>
+
+ConjugateGradient<SparseMatrix<double>, Eigen::Upper> solver;
+x = solver.compute(A).solve(b);
+\endcode
+In the above example, only the upper triangular part of the input matrix A is considered for solving. The opposite triangle might either be empty or contain arbitrary values.
+
+In the case where multiple problems with the same sparsity pattern have to be solved, then the "compute" step can be decomposed as follow:
+\code
+SolverClassName<SparseMatrix<double> > solver;
+solver.analyzePattern(A);   // for this step the numerical values of A are not used
+solver.factorize(A);
+x1 = solver.solve(b1);
+x2 = solver.solve(b2);
+...
+A = ...;                    // modify the values of the nonzeros of A, the nonzeros pattern must stay unchanged
+solver.factorize(A);
+x1 = solver.solve(b1);
+x2 = solver.solve(b2);
+...
+\endcode
+The compute() method is equivalent to calling both analyzePattern() and factorize().
+
+Finally, each solver provides some specific features, such as determinant, access to the factors, controls of the iterations, and so on.
+More details are availble in the documentations of the respective classes.
+
 \section TheSparseCompute The Compute Step
-In the compute() function, the matrix is generally factorized: LLT for self-adjoint matrices, LDLT for general hermitian matrices and LU for non hermitian matrices. These are the results of using direct solvers. For this class of solvers precisely, the compute step is further subdivided into analyzePattern() and factorize(). 
+In the compute() function, the matrix is generally factorized: LLT for self-adjoint matrices, LDLT for general hermitian matrices, LU for non hermitian matrices and QR for rectangular matrices. These are the results of using direct solvers. For this class of solvers precisely, the compute step is further subdivided into analyzePattern() and factorize(). 
 
-The goal of analyzePattern() is to reorder the nonzero elements of the matrix, such that the factorization step creates less fill-in. This step exploits only the structure of the matrix. Hence, the results of this step can be used for other linear systems where the matrix has the same structure. Note however that sometimes, some external solvers (like SuperLU) require that the values of the matrix are set in this step, for instance to equilibrate the rows and columns of the matrix. In this situation, the results of this step can note be used with other matrices.
+The goal of analyzePattern() is to reorder the nonzero elements of the matrix, such that the factorization step creates less fill-in. This step exploits only the structure of the matrix. Hence, the results of this step can be used for other linear systems where the matrix has the same structure. Note however that sometimes, some external solvers (like SuperLU) require that the values of the matrix are set in this step, for instance to equilibrate the rows and columns of the matrix. In this situation, the results of this step should not be used with other matrices.
 
 Eigen provides a limited set of methods to reorder the matrix in this step, either built-in (COLAMD, AMD) or external (METIS). These methods are set in template parameter list of the solver :
 \code
@@ -40,33 +118,31 @@ See the \link OrderingMethods_Module OrderingMethods module \endlink for the lis
 
 In factorize(), the factors of the coefficient matrix are computed. This step should be called each time the values of the matrix change. However, the structural pattern of the matrix should not change between multiple calls. 
 
-For iterative solvers, the compute step is used to eventually setup a preconditioner. Remember that, basically, the goal of the preconditioner is to speedup the convergence of an iterative method by solving a modified linear system where the coefficient matrix has more clustered eigenvalues. For real problems, an iterative solver should always be used with a preconditioner. In Eigen, a preconditioner is  selected by simply adding it as a template parameter to the iterative solver object. 
+For iterative solvers, the compute step is used to eventually setup a preconditioner. For instance, with the ILUT preconditioner, the incomplete factors L and U are computed in this step. Remember that, basically, the goal of the preconditioner is to speedup the convergence of an iterative method by solving a modified linear system where the coefficient matrix has more clustered eigenvalues. For real problems, an iterative solver should always be used with a preconditioner. In Eigen, a preconditioner is  selected by simply adding it as a template parameter to the iterative solver object. 
 \code
 IterativeSolverClassName<SparseMatrix<double>, PreconditionerName<SparseMatrix<double> > solver; 
 \endcode
 The member function preconditioner() returns a read-write reference to the preconditioner 
- to directly interact with it. 
+ to directly interact with it. See the \link IterativeLinearSolvers_Module Iterative solvers module \endlink and the documentation of each class for the list of available methods.
 
-For instance, with the ILUT preconditioner, the incomplete factors L and U are computed in this step. 
-See \link Sparse_modules the Sparse module \endlink for the list of available preconditioners in Eigen.
 \section TheSparseSolve The Solve step
 The solve() function computes the solution of the linear systems with one or many right hand sides.
 \code
 X = solver.solve(B);
 \endcode 
-Here, B  can be a vector or a matrix where the columns form the different right hand sides. The solve() function can be called several times as well, for instance When all the right hand sides are not available at once. 
+Here, B  can be a vector or a matrix where the columns form the different right hand sides. The solve() function can be called several times as well, for instance when all the right hand sides are not available at once. 
 \code
 x1 = solver.solve(b1);
 // Get the second right hand side b2
 x2 = solver.solve(b2); 
 //  ...
 \endcode
-For direct methods, the solution are computed at the machine precision. Sometimes, the solution need not be too accurate. In this case, the iterative methods are more suitable and the desired accuracy can be set before the solve step using setTolerance(). For all the available functions, please, refer to the documentation of the \link IterativeLinearSolvers_Module Iterative solvers module \endlink. 
+For direct methods, the solution are computed at the machine precision. Sometimes, the solution need not be too accurate. In this case, the iterative methods are more suitable and the desired accuracy can be set before the solve step using \b setTolerance(). For all the available functions, please, refer to the documentation of the \link IterativeLinearSolvers_Module Iterative solvers module \endlink. 
 
 \section BenchmarkRoutine
-Most of the time, all you need is to know how much time it will take to qolve your system, and hopefully, what is the most suitable solver. In Eigen, we provide a benchmark routine that can be used for this purpose. It is very easy to use. First, it should be activated at the configuration step with the flag TEST_REAL_CASES. Then, in bench/spbench, you can compile the routine by typing \b make \e spbenchsolver. You can then run it with --help option to get the list of all available options. Basically, the matrices to test should be in <a href="http://math.nist.gov/MatrixMarket/formats.html">MatrixMarket Coordinate format</a>, and the routine returns the statistics from all available solvers in Eigen. 
+Most of the time, all you need is to know how much time it will take to qolve your system, and hopefully, what is the most suitable solver. In Eigen, we provide a benchmark routine that can be used for this purpose. It is very easy to use. In the build directory, navigate to bench/spbench and compile the routine by typing \b make \e spbenchsolver. Run it with --help option to get the list of all available options. Basically, the matrices to test should be in <a href="http://math.nist.gov/MatrixMarket/formats.html">MatrixMarket Coordinate format</a>, and the routine returns the statistics from all available solvers in Eigen. 
 
-The following table gives an example of XHTML statistics from several Eigen built-in and external solvers. 
+The following table gives an example of XML statistics from several Eigen built-in and external solvers. 
 <TABLE border="1">
  <TR><TH>Matrix <TH> N <TH> NNZ <TH>  <TH > UMFPACK <TH > SUPERLU <TH > PASTIX LU <TH >BiCGSTAB <TH > BiCGSTAB+ILUT <TH >GMRES+ILUT<TH > LDLT <TH> CHOLMOD LDLT <TH > PASTIX LDLT <TH > LLT <TH > CHOLMOD SP LLT <TH > CHOLMOD LLT <TH > PASTIX LLT <TH> CG</TR>
 <TR><TH rowspan="4">vector_graphics <TD rowspan="4"> 12855 <TD rowspan="4"> 72069 <TH>Compute Time <TD>0.0254549<TD>0.0215677<TD>0.0701827<TD>0.000153388<TD>0.0140107<TD>0.0153709<TD>0.0101601<TD style="background-color:red">0.00930502<TD>0.0649689
diff --git a/doc/SparseQuickReference.dox b/doc/SparseQuickReference.dox
index 15015a0ca..4a33d0cc9 100644
--- a/doc/SparseQuickReference.dox
+++ b/doc/SparseQuickReference.dox
@@ -4,61 +4,84 @@ namespace Eigen {
 
 <hr>
 
-In this page, we give a quick summary of the main operations available for sparse matrices in the class SparseMatrix. First, it is recommended to read first the introductory tutorial at \ref TutorialSparse. The important point to have in mind when working on sparse matrices is how they are stored : 
-i.e either row major or column major. The default is column major. Most arithmetic operations on sparse matrices will assert that they have the same storage order. Moreover, when interacting with external libraries that are not yet supported by Eigen, it is important to know how to send the required matrix pointers. 
-
-\section Constructors Constructors and assignments
-SparseMatrix is the core class to build and manipulate sparse matrices in Eigen. It takes as template parameters the Scalar type and the storage order, either RowMajor or ColumnMajor. The default is ColumnMajor.
+In this page, we give a quick summary of the main operations available for sparse matrices in the class SparseMatrix. First, it is recommended to read  the introductory tutorial at \ref TutorialSparse. The important point to have in mind when working on sparse matrices is how they are stored : 
+i.e either row major or column major. The default is column major. Most arithmetic operations on sparse matrices will assert that they have the same storage order. 
 
+\section SparseMatrixInit Sparse Matrix Initialization
+<table class="manual">
+<tr><th> Category </th> <th> Operations</th> <th>Notes</th></tr>
+<tr><td>Constructor</td>
+<td>
 \code
-  SparseMatrix<double> sm1(1000,1000);              // 1000x1000 compressed sparse matrix of double. 
-  SparseMatrix<std::complex<double>,RowMajor> sm2; // Compressed row major matrix of complex double.
+  SparseMatrix<double> sm1(1000,1000); 
+  SparseMatrix<std::complex<double>,RowMajor> sm2;
 \endcode
-The copy constructor and assignment can be used to convert matrices from a storage order to another
+</td> <td> Default is ColMajor</td> </tr>
+<tr class="alt">
+<td> Resize/Reserve</td>
+<td> 
+ \code
+    sm1.resize(m,n);      //Change sm1 to a m x n matrix. 
+    sm1.reserve(nnz);     // Allocate room for nnz nonzeros elements.   
+  \endcode 
+</td>
+<td> Note that when calling reserve(), it is not required that nnz is the exact number of nonzero elements in the final matrix. However, an exact estimation will avoid multiple reallocations during the insertion phase. </td>
+</tr>
+<tr> 
+<td> Assignment </td>
+<td> 
 \code 
   SparseMatrix<double,Colmajor> sm1;
-  // Eventually fill the matrix sm1 ...
-  SparseMatrix<double,Rowmajor> sm2(sm1), sm3;         // Initialize sm2 with sm1.
-  sm3 = sm1; // Assignment and evaluations modify the storage order.
+ // Initialize sm2 with sm1.
+  SparseMatrix<double,Rowmajor> sm2(sm1), sm3;        
+  // Assignment and evaluations modify the storage order.
+  sm3 = sm1; 
  \endcode
-
-\section SparseMatrixInsertion  Allocating and inserting values
-resize() and reserve() are used to set the size and allocate space for nonzero elements
- \code
-    sm1.resize(m,n);      //Change sm to a mxn matrix. 
-    sm1.reserve(nnz);     // Allocate  room for nnz nonzeros elements.   
-  \endcode 
-Note that when calling reserve(), it is not required that nnz is the exact number of nonzero elements in the final matrix. However, an exact estimation will avoid multiple reallocations during the insertion phase. 
-
-Insertions of values in the sparse matrix can be done directly by looping over nonzero elements and use the insert() function
+</td>
+<td> The copy constructor can be used to convert from a storage order to another</td>
+</tr>
+<tr class="alt">
+<td> Element-wise Insertion</td>
+<td>
 \code 
-// Direct insertion of the value v_ij; 
-  sm1.insert(i, j) = v_ij;   // It is assumed that v_ij does not already exist in the matrix. 
-\endcode
+// Insert a new element; 
+ sm1.insert(i, j) = v_ij;  
 
-After insertion, a value at (i,j) can be modified using coeffRef()
-\code
-  // Update the value v_ij
-  sm1.coeffRef(i,j) = v_ij;
-  sm1.coeffRef(i,j) += v_ij;
-  sm1.coeffRef(i,j) -= v_ij;
-  ...
+// Update the value v_ij
+ sm1.coeffRef(i,j) = v_ij;
+ sm1.coeffRef(i,j) += v_ij;
+ sm1.coeffRef(i,j) -= v_ij;
 \endcode
-
-The recommended way to insert values is to build a list of triplets (row, col, val) and then call setFromTriplets(). 
+</td>
+<td> insert() assumes that the element does not already exist; otherwise, use coeffRef()</td>
+</tr>
+<tr> 
+<td> Batch insertion</td>
+<td>
 \code
+  std::vector< Eigen::Triplet<double> > tripletList;
+  tripletList.reserve(estimation_of_entries);
+  // -- Fill tripletList with nonzero elements...
   sm1.setFromTriplets(TripletList.begin(), TripletList.end());
 \endcode
-A complete example is available at \ref TutorialSparseFilling.
-
-The following functions can be used to set constant or random values in the matrix.
+</td>
+<td>A complete example is available at \link TutorialSparseFilling Triplet Insertion \endlink.</td>
+</tr>
+<tr class="alt"> 
+<td> Constant or Random Insertion</td>
+<td>
 \code
-  sm1.setZero(); // Reset the matrix with zero elements
-  ...
+sm1.setZero(); // Set the matrix with zero elements
+sm1.setConstant(val); //Replace all the nonzero values with val
 \endcode
+</td>
+<td> The matrix sm1 should have been created before ???</td>
+</tr>
+</table>
+
 
 \section SparseBasicInfos Matrix properties
-Beyond the functions rows() and cols() that are used to get the number of rows and columns, there are some useful functions that are available to easily get some informations from the matrix. 
+Beyond the basic functions rows() and cols(), there are some useful functions that are available to easily get some informations from the matrix. 
 <table class="manual">
 <tr>
   <td> \code
@@ -67,16 +90,18 @@ Beyond the functions rows() and cols() that are used to get the number of rows a
   sm1.nonZeros();     // Number of non zero values   
   sm1.outerSize();    // Number of columns (resp. rows) for a column major (resp. row major )
   sm1.innerSize();    // Number of rows (resp. columns) for a row major (resp. column major)
-  sm1.norm();         // (Euclidian ??) norm of the matrix
-  sm1.squaredNorm();  // 
+  sm1.norm();         // Euclidian norm of the matrix
+  sm1.squaredNorm();  // Squared norm of the matrix
+  sm1.blueNorm();
   sm1.isVector();     // Check if sm1 is a sparse vector or a sparse matrix
+  sm1.isCompressed(); // Check if sm1 is in compressed form
   ...
   \endcode </td>
 </tr>
 </table>
 
 \section SparseBasicOps Arithmetic operations
-It is easy to perform arithmetic operations on sparse matrices provided that the dimensions are adequate and that the matrices have the same storage order. Note that the evaluation can always be done in a matrix with a different storage order. 
+It is easy to perform arithmetic operations on sparse matrices provided that the dimensions are adequate and that the matrices have the same storage order. Note that the evaluation can always be done in a matrix with a different storage order. In the following, \b sm denotes a sparse matrix, \b dm a dense matrix and \b dv a dense vector.
 <table class="manual">
 <tr><th> Operations </th> <th> Code </th> <th> Notes </th></tr>
 
@@ -103,7 +128,7 @@ It is easy to perform arithmetic operations on sparse matrices provided that the
 </tr>
 
 <tr>
-  <td> Product </td>
+  <td> %Sparse %Product </td>
   <td> \code
   sm3 = sm1 * sm2;
   dm2 = sm1 * dm1;
@@ -123,7 +148,20 @@ It is easy to perform arithmetic operations on sparse matrices provided that the
   Note that the transposition change the storage order. There is no support for transposeInPlace().
   </td>
 </tr> 
-
+<tr>
+<td> Permutation </td>
+<td> 
+\code 
+perm.indices(); // Reference to the vector of indices
+sm1.twistedBy(perm); // Permute rows and columns
+sm2 = sm1 * perm; //Permute the columns
+sm2 = perm * sm1; // Permute the columns
+\endcode 
+</td>
+<td> 
+
+</td>
+</tr>
 <tr>
   <td>
   Component-wise ops
@@ -142,47 +180,70 @@ It is easy to perform arithmetic operations on sparse matrices provided that the
 </tr>
 </table>
 
-
-\section SparseInterops Low-level storage
-There are a set of low-levels functions to get the standard compressed storage pointers. The matrix should be in compressed mode which can be checked by calling isCompressed(); makeCompressed() should do the job otherwise. 
+\section sparseotherops Other supported operations
+<table class="manual">
+<tr><th>Operations</th> <th> Code </th> <th> Notes</th> </tr>
+<tr>
+<td>Sub-matrices</td> 
+<td> 
+\code 
+  sm1.block(startRow, startCol, rows, cols); 
+  sm1.block(startRow, startCol); 
+  sm1.topLeftCorner(rows, cols); 
+  sm1.topRightCorner(rows, cols);
+  sm1.bottomLeftCorner( rows, cols);
+  sm1.bottomRightCorner( rows, cols);
+  \endcode
+</td> <td>  </td>
+</tr>
+<tr> 
+<td> Range </td>
+<td> 
+\code 
+  sm1.innerVector(outer); 
+  sm1.innerVectors(start, size);
+  sm1.leftCols(size);
+  sm2.rightCols(size);
+  sm1.middleRows(start, numRows);
+  sm1.middleCols(start, numCols);
+  sm1.col(j);
+\endcode
+</td>
+<td>A inner vector is either a row (for row-major) or a column (for column-major). As stated earlier, the evaluation can be done in a matrix with different storage order </td>
+</tr>
+<tr>
+<td> Triangular and selfadjoint views</td>
+<td> 
 \code
-  // Scalar pointer to the values of the matrix, size nnz
-  sm1.valuePtr();  
-  // Index pointer to get the row indices (resp. column indices) for column major (resp. row major) matrix, size nnz
-  sm1.innerIndexPtr();
-  // Index pointer to the beginning of each row (resp. column) in valuePtr() and innerIndexPtr() for column major (row major). The size is outersize()+1; 
-  sm1.outerIndexPtr();  
+  sm2 = sm1.triangularview<Lower>();
+  sm2 = sm1.selfadjointview<Lower>();
 \endcode
-These pointers can therefore be easily used to send the matrix to some external libraries/solvers that are not yet supported by Eigen.
-
-\section sparsepermutation Permutations, submatrices and Selfadjoint Views
-In many cases, it is necessary to reorder the rows and/or the columns of the sparse matrix for several purposes : fill-in reducing during matrix decomposition, better data locality for sparse matrix-vector products... The class PermutationMatrix is available to this end. 
- \code
-  PermutationMatrix<Dynamic, Dynamic, int> perm;
-  // Reserve and fill the values of perm; 
-  perm.inverse(n); // Compute eventually the inverse permutation
-  sm1.twistedBy(perm) //Apply the permutation on rows and columns 
-  sm2 = sm1 * perm; // ??? Apply the permutation on columns ???; 
-  sm2 = perm * sm1; // ??? Apply the permutation on rows ???; 
-  \endcode
-
-\section sparsesubmatrices Sub-matrices
-The following functions are useful to extract a block of rows (resp. columns) from a row-major (resp. column major) sparse matrix. Note that because of the particular storage, it is not ?? efficient ?? to extract a submatrix comprising a certain number of subrows and subcolumns.
- \code
-  sm1.innerVector(outer); // Returns the outer -th column (resp. row) of the matrix if sm is col-major (resp. row-major)
-  sm1.innerVectors(outer); // Returns the outer -th column (resp. row) of the matrix if mat is col-major (resp. row-major)
-  sm1.middleRows(start, numRows); // For row major matrices, get a range of numRows rows
-  sm1.middleCols(start, numCols); // For column major matrices, get a range of numCols cols
- \endcode 
- Examples : 
-
-\section sparseselfadjointview Sparse triangular and selfadjoint Views
- \code
-  sm2 = sm1.triangularview<Lower>(); // Get the lower triangular part of the matrix. 
-  dv2 = sm1.triangularView<Upper>().solve(dv1); // Solve the linear system with the uppper triangular part. 
-  sm2 = sm1.selfadjointview<Lower>(); // Build a selfadjoint matrix from the lower part of sm1. 
-  \endcode
-
-
+</td>
+<td> Several combination between triangular views and blocks views are possible
+\code 
+  \endcode </td>
+</tr>
+<tr> 
+<td>Triangular solve </td>
+<td> 
+\code 
+ dv2 = sm1.triangularView<Upper>().solve(dv1);
+ dv2 = sm1.topLeftCorner(size, size).triangularView<Lower>().solve(dv1);
+\endcode 
+</td>
+<td> For general sparse solve, Use any suitable module described at \ref TopicSparseSystems </td>
+</tr>
+<tr>
+<td> Low-level API</td>
+<td>
+\code
+sm1.valuePtr(); // Pointer to the values
+sm1.innerIndextr(); // Pointer to the indices.
+sm1.outerIndexPtr(); //Pointer to the beginning of each inner vector
+\endcode
+</td>
+<td> If the matrix is not in compressed form, makeCompressed() should be called before. Note that these functions are mostly provided for interoperability purposes with external libraries. A better access to the values of the matrix is done by using the InnerIterator class as described in \link TutorialSparse the Tutorial Sparse \endlink section</td>
+</tr>
+</table>
 */
 }
diff --git a/doc/TutorialMapClass.dox b/doc/TutorialMapClass.dox
index a5c20f1bf..f8fb0fd2f 100644
--- a/doc/TutorialMapClass.dox
+++ b/doc/TutorialMapClass.dox
@@ -3,17 +3,17 @@ namespace Eigen {
 /** \eigenManualPage TutorialMapClass Interfacing with raw buffers: the Map class
 
 This page explains how to work with "raw" C/C++ arrays.
-This can be useful in a variety of contexts, particularly when "importing" vectors and matrices from other libraries into Eigen.
+This can be useful in a variety of contexts, particularly when "importing" vectors and matrices from other libraries into %Eigen.
 
 \eigenAutoToc
 
 \section TutorialMapIntroduction Introduction
 
-Occasionally you may have a pre-defined array of numbers that you want to use within Eigen as a vector or matrix. While one option is to make a copy of the data, most commonly you probably want to re-use this memory as an Eigen type. Fortunately, this is very easy with the Map class.
+Occasionally you may have a pre-defined array of numbers that you want to use within %Eigen as a vector or matrix. While one option is to make a copy of the data, most commonly you probably want to re-use this memory as an %Eigen type. Fortunately, this is very easy with the Map class.
 
 \section TutorialMapTypes Map types and declaring Map variables
 
-A Map object has a type defined by its Eigen equivalent:
+A Map object has a type defined by its %Eigen equivalent:
 \code
 Map<Matrix<typename Scalar, int RowsAtCompileTime, int ColsAtCompileTime> >
 \endcode
@@ -49,7 +49,7 @@ However, Stride is even more flexible than this; for details, see the documentat
 
 \section TutorialMapUsing Using Map variables
 
-You can use a Map object just like any other Eigen type:
+You can use a Map object just like any other %Eigen type:
 <table class="example">
 <tr><th>Example:</th><th>Output:</th></tr>
 <tr>
@@ -57,7 +57,7 @@ You can use a Map object just like any other Eigen type:
 <td>\verbinclude Tutorial_Map_using.out </td>
 </table>
 
-However, when writing functions taking Eigen types, it is important to realize that a Map type is \em not identical to its Dense equivalent.  See \ref TopicFunctionTakingEigenTypesMultiarguments for details.
+All %Eigen functions are written to accept Map objects just like other %Eigen types. However, when writing your own functions taking %Eigen types, this does \em not happen automatically: a Map type is not identical to its Dense equivalent.  See \ref TopicFunctionTakingEigenTypes for details.
 
 \section TutorialMapPlacementNew Changing the mapped array
 
diff --git a/doc/TutorialSparse.dox b/doc/TutorialSparse.dox
index 9f06005fa..98c9997e1 100644
--- a/doc/TutorialSparse.dox
+++ b/doc/TutorialSparse.dox
@@ -10,11 +10,14 @@ Manipulating and solving sparse problems involves various modules which are summ
 <tr><th>Module</th><th>Header file</th><th>Contents</th></tr>
 <tr><td>\link SparseCore_Module SparseCore \endlink</td><td>\code#include <Eigen/SparseCore>\endcode</td><td>SparseMatrix and SparseVector classes, matrix assembly, basic sparse linear algebra (including sparse triangular solvers)</td></tr>
 <tr><td>\link SparseCholesky_Module SparseCholesky \endlink</td><td>\code#include <Eigen/SparseCholesky>\endcode</td><td>Direct sparse LLT and LDLT Cholesky factorization to solve sparse self-adjoint positive definite problems</td></tr>
+<tr><td>\link SparseLU_Module SparseLU \endlink</td><td>\code #include<Eigen/SparseLU> \endcode</td>
+<td>%Sparse LU factorization to solve general square sparse systems</td></tr>
+<tr><td>\link SparseQR_Module SparseQR \endlink</td><td>\code #include<Eigen/SparseQR>\endcode </td><td>%Sparse QR factorization for solving sparse linear least-squares problems</td></tr>
 <tr><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>\code#include <Eigen/IterativeLinearSolvers>\endcode</td><td>Iterative solvers to solve large general linear square problems (including self-adjoint positive definite problems)</td></tr>
 <tr><td>\link Sparse_modules Sparse \endlink</td><td>\code#include <Eigen/Sparse>\endcode</td><td>Includes all the above modules</td></tr>
 </table>
 
-\section TutorialSparseIntro Sparse matrix representation
+\section TutorialSparseIntro Sparse matrix format
 
 In many applications (e.g., finite element methods) it is common to deal with very large matrices where only a few coefficients are different from zero.  In such cases, memory consumption can be reduced and performance increased by using a specialized representation storing only the nonzero coefficients. Such a matrix is called a sparse matrix.
 
@@ -224,102 +227,10 @@ A typical scenario of this approach is illustrated bellow:
 - The line 5 suppresses the remaining empty space and transforms the matrix into a compressed column storage.
 
 
-\section TutorialSparseDirectSolvers Solving linear problems
-
-%Eigen currently provides a limited set of built-in solvers, as well as wrappers to external solver libraries.
-They are summarized in the following table:
-
-<table class="manual">
-<tr><th>Class</th><th>Module</th><th>Solver kind</th><th>Matrix kind</th><th>Features related to performance</th>
-    <th>Dependencies,License</th><th class="width20em"><p>Notes</p></th></tr>
-<tr><td>SimplicialLLT    </td><td>\link SparseCholesky_Module SparseCholesky \endlink</td><td>Direct LLt factorization</td><td>SPD</td><td>Fill-in reducing</td>
-    <td>built-in, LGPL</td>
-    <td>SimplicialLDLT is often preferable</td></tr>
-<tr><td>SimplicialLDLT   </td><td>\link SparseCholesky_Module SparseCholesky \endlink</td><td>Direct LDLt factorization</td><td>SPD</td><td>Fill-in reducing</td>
-    <td>built-in, LGPL</td>
-    <td>Recommended for very sparse and not too large problems (e.g., 2D Poisson eq.)</td></tr>
-<tr><td>ConjugateGradient</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>Classic iterative CG</td><td>SPD</td><td>Preconditionning</td>
-    <td>built-in, LGPL</td>
-    <td>Recommended for large symmetric problems (e.g., 3D Poisson eq.)</td></tr>
-<tr><td>BiCGSTAB</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>Iterative stabilized bi-conjugate gradient</td><td>Square</td><td>Preconditionning</td>
-    <td>built-in, LGPL</td>
-    <td>Might not always converge</td></tr>
-
-
-<tr><td>PastixLLT \n PastixLDLT \n PastixLU</td><td>\link PaStiXSupport_Module PaStiXSupport \endlink</td><td>Direct LLt, LDLt, LU factorizations</td><td>SPD \n SPD \n Square</td><td>Fill-in reducing, Leverage fast dense algebra, Multithreading</td>
-    <td>Requires the <a href="http://pastix.gforge.inria.fr">PaStiX</a> package, \b CeCILL-C </td>
-    <td>optimized for tough problems and symmetric patterns</td></tr>
-<tr><td>CholmodSupernodalLLT</td><td>\link CholmodSupport_Module CholmodSupport \endlink</td><td>Direct LLt factorization</td><td>SPD</td><td>Fill-in reducing, Leverage fast dense algebra</td>
-    <td>Requires the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">SuiteSparse</a> package, \b GPL </td>
-    <td></td></tr>
-<tr><td>UmfPackLU</td><td>\link UmfPackSupport_Module UmfPackSupport \endlink</td><td>Direct LU factorization</td><td>Square</td><td>Fill-in reducing, Leverage fast dense algebra</td>
-    <td>Requires the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">SuiteSparse</a> package, \b GPL </td>
-    <td></td></tr>
-<tr><td>SuperLU</td><td>\link SuperLUSupport_Module SuperLUSupport \endlink</td><td>Direct LU factorization</td><td>Square</td><td>Fill-in reducing, Leverage fast dense algebra</td>
-    <td>Requires the <a href="http://crd-legacy.lbl.gov/~xiaoye/SuperLU/">SuperLU</a> library, (BSD-like)</td>
-    <td></td></tr>
-</table>
-
-Here \c SPD means symmetric positive definite.
-
-All these solvers follow the same general concept.
-Here is a typical and general example:
-\code
-#include <Eigen/RequiredModuleName>
-// ...
-SparseMatrix<double> A;
-// fill A
-VectorXd b, x;
-// fill b
-// solve Ax = b
-SolverClassName<SparseMatrix<double> > solver;
-solver.compute(A);
-if(solver.info()!=Success) {
-  // decomposition failed
-  return;
-}
-x = solver.solve(b);
-if(solver.info()!=Success) {
-  // solving failed
-  return;
-}
-// solve for another right hand side:
-x1 = solver.solve(b1);
-\endcode
-
-For \c SPD solvers, a second optional template argument allows to specify which triangular part have to be used, e.g.:
-
-\code
-#include <Eigen/IterativeLinearSolvers>
-
-ConjugateGradient<SparseMatrix<double>, Eigen::Upper> solver;
-x = solver.compute(A).solve(b);
-\endcode
-In the above example, only the upper triangular part of the input matrix A is considered for solving. The opposite triangle might either be empty or contain arbitrary values.
-
-In the case where multiple problems with the same sparcity pattern have to be solved, then the "compute" step can be decomposed as follow:
-\code
-SolverClassName<SparseMatrix<double> > solver;
-solver.analyzePattern(A);   // for this step the numerical values of A are not used
-solver.factorize(A);
-x1 = solver.solve(b1);
-x2 = solver.solve(b2);
-...
-A = ...;                    // modify the values of the nonzeros of A, the nonzeros pattern must stay unchanged
-solver.factorize(A);
-x1 = solver.solve(b1);
-x2 = solver.solve(b2);
-...
-\endcode
-The compute() method is equivalent to calling both analyzePattern() and factorize().
-
-Finally, each solver provides some specific features, such as determinant, access to the factors, controls of the iterations, and so on.
-More details are availble in the documentations of the respective classes.
-
 
 \section TutorialSparseFeatureSet Supported operators and functions
 
-Because of their special storage format, sparse matrices cannot offer the same level of flexbility than dense matrices.
+Because of their special storage format, sparse matrices cannot offer the same level of flexibility than dense matrices.
 In Eigen's sparse module we chose to expose only the subset of the dense matrix API which can be efficiently implemented.
 In the following \em sm denotes a sparse matrix, \em sv a sparse vector, \em dm a dense matrix, and \em dv a dense vector.
 
@@ -420,16 +331,7 @@ sm2 = A.selfadjointView<Upper>().twistedBy(P);                                //
 sm2.selfadjointView<Lower>() = A.selfadjointView<Lower>().twistedBy(P);       // compute P S P' from the lower triangular part of A, and then only compute the lower part
  \endcode
 
-\subsection TutorialSparse_Submat Sub-matrices
-
-%Sparse matrices does not support yet the addressing of arbitrary sub matrices. Currently, one can only reference a set of contiguous \em inner vectors, i.e., a set of contiguous rows for a row-major matrix, or a set of contiguous columns for a column major matrix:
-\code
-  sm1.innerVector(j);       // returns an expression of the j-th column (resp. row) of the matrix if sm1 is col-major (resp. row-major)
-  sm1.innerVectors(j, nb);  // returns an expression of the nb columns (resp. row) starting from the j-th column (resp. row)
-                            // of the matrix if sm1 is col-major (resp. row-major)
-  sm1.middleRows(j, nb);    // for row major matrices only, get a range of nb rows
-  sm1.middleCols(j, nb);    // for column major matrices only, get a range of nb columns
-\endcode
+Please, refer to the \link SparseQuickRefPage Quick Reference \endlink  guide for the list of supported operations. The list of linear solvers available is \link TopicSparseSystems here. \endlink
 
 */
 
diff --git a/doc/snippets/DenseBase_setLinSpaced.cpp b/doc/snippets/DenseBase_setLinSpaced.cpp
index 50871dfcc..46054f234 100644
--- a/doc/snippets/DenseBase_setLinSpaced.cpp
+++ b/doc/snippets/DenseBase_setLinSpaced.cpp
@@ -1,3 +1,3 @@
 VectorXf v;
-v.setLinSpaced(5,0.5f,1.5f).transpose();
+v.setLinSpaced(5,0.5f,1.5f);
 cout << v << endl;
diff --git a/scripts/cdashtesting.cmake.in b/scripts/cdashtesting.cmake.in
new file mode 100644
index 000000000..59cf53328
--- /dev/null
+++ b/scripts/cdashtesting.cmake.in
@@ -0,0 +1,49 @@
+
+set(CTEST_SOURCE_DIRECTORY  "@CMAKE_SOURCE_DIR@")
+set(CTEST_BINARY_DIRECTORY  "@CMAKE_BINARY_DIR@")
+set(CTEST_CMAKE_GENERATOR   "@CMAKE_GENERATOR@")
+set(CTEST_BUILD_NAME        "@BUILDNAME@")
+set(CTEST_SITE              "@SITE@")
+
+set(MODEL Experimental)
+if(${CTEST_SCRIPT_ARG} MATCHES Nightly)
+  set(MODEL Nightly)
+elseif(${CTEST_SCRIPT_ARG} MATCHES Continuous)
+  set(MODEL Continuous)
+endif()
+
+find_program(CTEST_HG_COMMAND NAMES hg)
+set(CTEST_UPDATE_COMMAND "${CTEST_HG_COMMAND}")
+
+ctest_start(${MODEL} ${CTEST_SOURCE_DIRECTORY} ${CTEST_BINARY_DIRECTORY})
+
+ctest_update(SOURCE "${CTEST_SOURCE_DIRECTORY}")
+ctest_submit(PARTS Update Notes)
+
+# to get CTEST_PROJECT_SUBPROJECTS definition:
+include("${CTEST_SOURCE_DIRECTORY}/CTestConfig.cmake")
+
+foreach(subproject ${CTEST_PROJECT_SUBPROJECTS})
+  message("")
+  message("Process ${subproject}")
+  
+  set_property(GLOBAL PROPERTY SubProject ${subproject})
+  set_property(GLOBAL PROPERTY Label ${subproject})
+
+  ctest_configure(BUILD ${CTEST_BINARY_DIRECTORY} SOURCE ${CTEST_SOURCE_DIRECTORY} )
+  ctest_submit(PARTS Configure)
+
+  set(CTEST_BUILD_TARGET "Build${subproject}")
+  message("Build ${CTEST_BUILD_TARGET}")
+  ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" APPEND)
+  # builds target ${CTEST_BUILD_TARGET}
+  ctest_submit(PARTS Build)
+
+  ctest_test(BUILD "${CTEST_BINARY_DIRECTORY}" INCLUDE_LABEL "${subproject}" )
+  # runs only tests that have a LABELS property matching "${subproject}"
+  
+  ctest_coverage(BUILD "${CTEST_BINARY_DIRECTORY}" LABELS "${subproject}" )
+  
+  ctest_submit(PARTS Test)
+  
+endforeach()
diff --git a/scripts/eigen_gen_docs b/scripts/eigen_gen_docs
index 921d600ed..9b71cd8e0 100644
--- a/scripts/eigen_gen_docs
+++ b/scripts/eigen_gen_docs
@@ -8,6 +8,7 @@ USER=${USER:-'orzel'}
 #ulimit -v 1024000
 
 # step 1 : build
+rm build/doc/html -Rf
 mkdir build -p
 (cd build && cmake .. && make doc) || { echo "make failed"; exit 1; }
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 8f8fedc91..be9617d85 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -122,6 +122,9 @@ if(TEST_LIB)
   add_definitions("-DEIGEN_EXTERN_INSTANTIATIONS=1")
 endif(TEST_LIB)
 
+set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official")
+add_custom_target(BuildOfficial)
+
 ei_add_test(meta)
 ei_add_test(sizeof)
 ei_add_test(dynalloc)
@@ -157,6 +160,7 @@ ei_add_test(array)
 ei_add_test(array_for_matrix)
 ei_add_test(array_replicate)
 ei_add_test(array_reverse)
+ei_add_test(ref)
 ei_add_test(triangular)
 ei_add_test(selfadjoint)
 ei_add_test(product_selfadjoint)
@@ -200,9 +204,6 @@ ei_add_test(stdvector_overload)
 ei_add_test(stdlist)
 ei_add_test(stddeque)
 ei_add_test(resize)
-if(QT4_FOUND)
-  ei_add_test(qtvector "" "${QT_QTCORE_LIBRARY}")
-endif(QT4_FOUND)
 ei_add_test(sparse_vector)
 ei_add_test(sparse_basic)
 ei_add_test(sparse_product)
@@ -213,7 +214,6 @@ ei_add_test(swap)
 ei_add_test(conservative_resize)
 ei_add_test(permutationmatrices)
 ei_add_test(sparse_permutations)
-ei_add_test(eigen2support)
 ei_add_test(nullary)
 ei_add_test(nesting_ops "${CMAKE_CXX_FLAGS_DEBUG}")
 ei_add_test(zerosized)
@@ -222,6 +222,7 @@ ei_add_test(evaluators)
 ei_add_test(sizeoverflow)
 ei_add_test(prec_inverse_4x4)
 ei_add_test(vectorwiseop)
+ei_add_test(special_numbers)
 
 ei_add_test(simplicial_cholesky)
 ei_add_test(conjugate_gradient)
@@ -231,6 +232,12 @@ ei_add_test(sparseqr)
 
 # ei_add_test(denseLM)
 
+if(QT4_FOUND)
+  ei_add_test(qtvector "" "${QT_QTCORE_LIBRARY}")
+endif(QT4_FOUND)
+
+ei_add_test(eigen2support)
+
 if(UMFPACK_FOUND)
   ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}")
 endif()
diff --git a/test/adjoint.cpp b/test/adjoint.cpp
index b35e5674b..72ad9e407 100644
--- a/test/adjoint.cpp
+++ b/test/adjoint.cpp
@@ -11,6 +11,47 @@
 
 #include "main.h"
 
+template<bool IsInteger> struct adjoint_specific;
+
+template<> struct adjoint_specific<true> {
+  template<typename Vec, typename Mat, typename Scalar>
+  static void run(const Vec& v1, const Vec& v2, Vec& v3, const Mat& square, Scalar s1, Scalar s2) {
+    VERIFY(test_isApproxWithRef((s1 * v1 + s2 * v2).dot(v3),     internal::conj(s1) * v1.dot(v3) + internal::conj(s2) * v2.dot(v3), 0));
+    VERIFY(test_isApproxWithRef(v3.dot(s1 * v1 + s2 * v2),       s1*v3.dot(v1)+s2*v3.dot(v2), 0));
+    
+    // check compatibility of dot and adjoint
+    VERIFY(test_isApproxWithRef(v1.dot(square * v2), (square.adjoint() * v1).dot(v2), 0));
+  }
+};
+
+template<> struct adjoint_specific<false> {
+  template<typename Vec, typename Mat, typename Scalar>
+  static void run(const Vec& v1, const Vec& v2, Vec& v3, const Mat& square, Scalar s1, Scalar s2) {
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    
+    RealScalar ref = NumTraits<Scalar>::IsInteger ? RealScalar(0) : (std::max)((s1 * v1 + s2 * v2).norm(),v3.norm());
+    VERIFY(test_isApproxWithRef((s1 * v1 + s2 * v2).dot(v3),     internal::conj(s1) * v1.dot(v3) + internal::conj(s2) * v2.dot(v3), ref));
+    VERIFY(test_isApproxWithRef(v3.dot(s1 * v1 + s2 * v2),       s1*v3.dot(v1)+s2*v3.dot(v2), ref));
+  
+    VERIFY_IS_APPROX(v1.squaredNorm(),                v1.norm() * v1.norm());
+    // check normalized() and normalize()
+    VERIFY_IS_APPROX(v1, v1.norm() * v1.normalized());
+    v3 = v1;
+    v3.normalize();
+    VERIFY_IS_APPROX(v1, v1.norm() * v3);
+    VERIFY_IS_APPROX(v3, v1.normalized());
+    VERIFY_IS_APPROX(v3.norm(), RealScalar(1));
+    
+    // check compatibility of dot and adjoint
+    ref = NumTraits<Scalar>::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm()));
+    VERIFY(test_isApproxWithRef(v1.dot(square * v2), (square.adjoint() * v1).dot(v2), ref));
+    
+    // check that Random().normalized() works: tricky as the random xpr must be evaluated by
+    // normalized() in order to produce a consistent result.
+    VERIFY_IS_APPROX(Vec::Random(v1.size()).normalized().norm(), RealScalar(1));
+  }
+};
+
 template<typename MatrixType> void adjoint(const MatrixType& m)
 {
   /* this test covers the following files:
@@ -46,44 +87,20 @@ template<typename MatrixType> void adjoint(const MatrixType& m)
   VERIFY_IS_APPROX((m1.adjoint() * m2).adjoint(),           m2.adjoint() * m1);
   VERIFY_IS_APPROX((s1 * m1).adjoint(),                     internal::conj(s1) * m1.adjoint());
 
-  // check basic properties of dot, norm, norm2
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-  
-  RealScalar ref = NumTraits<Scalar>::IsInteger ? RealScalar(0) : (std::max)((s1 * v1 + s2 * v2).norm(),v3.norm());
-  VERIFY(test_isApproxWithRef((s1 * v1 + s2 * v2).dot(v3),     internal::conj(s1) * v1.dot(v3) + internal::conj(s2) * v2.dot(v3), ref));
-  VERIFY(test_isApproxWithRef(v3.dot(s1 * v1 + s2 * v2),       s1*v3.dot(v1)+s2*v3.dot(v2), ref));
+  // check basic properties of dot, squaredNorm
   VERIFY_IS_APPROX(internal::conj(v1.dot(v2)),               v2.dot(v1));
-  VERIFY_IS_APPROX(internal::real(v1.dot(v1)),                v1.squaredNorm());
-  if(!NumTraits<Scalar>::IsInteger) {
-    VERIFY_IS_APPROX(v1.squaredNorm(),                v1.norm() * v1.norm());
-    // check normalized() and normalize()
-    VERIFY_IS_APPROX(v1, v1.norm() * v1.normalized());
-    v3 = v1;
-    v3.normalize();
-    VERIFY_IS_APPROX(v1, v1.norm() * v3);
-    VERIFY_IS_APPROX(v3, v1.normalized());
-    VERIFY_IS_APPROX(v3.norm(), RealScalar(1));
-  }
-  VERIFY_IS_MUCH_SMALLER_THAN(abs(vzero.dot(v1)),  static_cast<RealScalar>(1));
+  VERIFY_IS_APPROX(internal::real(v1.dot(v1)),               v1.squaredNorm());
   
-  // check compatibility of dot and adjoint
+  adjoint_specific<NumTraits<Scalar>::IsInteger>::run(v1, v2, v3, square, s1, s2);
+  
+  VERIFY_IS_MUCH_SMALLER_THAN(abs(vzero.dot(v1)),  static_cast<RealScalar>(1));
   
-  ref = NumTraits<Scalar>::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm()));
-  VERIFY(test_isApproxWithRef(v1.dot(square * v2), (square.adjoint() * v1).dot(v2), ref));
-
   // like in testBasicStuff, test operator() to check const-qualification
   Index r = internal::random<Index>(0, rows-1),
       c = internal::random<Index>(0, cols-1);
   VERIFY_IS_APPROX(m1.conjugate()(r,c), internal::conj(m1(r,c)));
   VERIFY_IS_APPROX(m1.adjoint()(c,r), internal::conj(m1(r,c)));
 
-  if(!NumTraits<Scalar>::IsInteger)
-  {
-    // check that Random().normalized() works: tricky as the random xpr must be evaluated by
-    // normalized() in order to produce a consistent result.
-    VERIFY_IS_APPROX(VectorType::Random(rows).normalized().norm(), RealScalar(1));
-  }
-
   // check inplace transpose
   m3 = m1;
   m3.transposeInPlace();
diff --git a/test/array.cpp b/test/array.cpp
index 4c6393d9a..ceb00fa05 100644
--- a/test/array.cpp
+++ b/test/array.cpp
@@ -13,7 +13,6 @@ template<typename ArrayType> void array(const ArrayType& m)
 {
   typedef typename ArrayType::Index Index;
   typedef typename ArrayType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Array<Scalar, ArrayType::RowsAtCompileTime, 1> ColVectorType;
   typedef Array<Scalar, 1, ArrayType::ColsAtCompileTime> RowVectorType;
 
@@ -64,9 +63,12 @@ template<typename ArrayType> void array(const ArrayType& m)
   VERIFY_IS_APPROX(m1, m3 / m2);
 
   // reductions
-  VERIFY_IS_APPROX(m1.colwise().sum().sum(), m1.sum());
-  VERIFY_IS_APPROX(m1.rowwise().sum().sum(), m1.sum());
-  if (!internal::isApprox(m1.sum(), (m1+m2).sum(), test_precision<Scalar>()))
+  VERIFY_IS_APPROX(m1.abs().colwise().sum().sum(), m1.abs().sum());
+  VERIFY_IS_APPROX(m1.abs().rowwise().sum().sum(), m1.abs().sum());
+  using std::abs;
+  VERIFY_IS_MUCH_SMALLER_THAN(abs(m1.colwise().sum().sum() - m1.sum()), m1.abs().sum());
+  VERIFY_IS_MUCH_SMALLER_THAN(abs(m1.rowwise().sum().sum() - m1.sum()), m1.abs().sum());
+  if (!internal::isMuchSmallerThan(abs(m1.sum() - (m1+m2).sum()), m1.abs().sum(), test_precision<Scalar>()))
       VERIFY_IS_NOT_APPROX(((m1+m2).rowwise().sum()).sum(), m1.sum());
   VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op<Scalar>()));
 
@@ -87,7 +89,6 @@ template<typename ArrayType> void comparisons(const ArrayType& m)
   typedef typename ArrayType::Index Index;
   typedef typename ArrayType::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  typedef Array<Scalar, ArrayType::RowsAtCompileTime, 1> VectorType;
 
   Index rows = m.rows();
   Index cols = m.cols();
@@ -188,8 +189,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
   if(!NumTraits<Scalar>::IsComplex)
     VERIFY_IS_APPROX(internal::real(m1), m1);
 
-  //VERIFY_IS_APPROX(m1.abs().log(), std::log(std::abs(m1)));
-  VERIFY_IS_APPROX(m1.abs().log(), log(abs(m1)));
+  VERIFY((m1.abs().log() == log(abs(m1))).all());
 
 //   VERIFY_IS_APPROX(m1.exp(), std::exp(m1));
   VERIFY_IS_APPROX(m1.exp() * m2.exp(), exp(m1+m2));
diff --git a/test/array_for_matrix.cpp b/test/array_for_matrix.cpp
index cd8ef03a0..99cda1ffe 100644
--- a/test/array_for_matrix.cpp
+++ b/test/array_for_matrix.cpp
@@ -13,7 +13,6 @@ template<typename MatrixType> void array_for_matrix(const MatrixType& m)
 {
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> ColVectorType;
   typedef Matrix<Scalar, 1, MatrixType::ColsAtCompileTime> RowVectorType; 
 
@@ -42,10 +41,10 @@ template<typename MatrixType> void array_for_matrix(const MatrixType& m)
   VERIFY_IS_APPROX(m3, (m1.array() - s1).matrix());
 
   // reductions
-  VERIFY_IS_MUCH_SMALLER_THAN(m1.colwise().sum().sum() - m1.sum(), m1.cwiseAbs().maxCoeff());
-  VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum().sum() - m1.sum(), m1.cwiseAbs().maxCoeff());
-  VERIFY_IS_MUCH_SMALLER_THAN(m1.colwise().sum() + m2.colwise().sum() - (m1+m2).colwise().sum(), (m1+m2).cwiseAbs().maxCoeff());
-  VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum() - m2.rowwise().sum() - (m1-m2).rowwise().sum(), (m1-m2).cwiseAbs().maxCoeff());
+  VERIFY_IS_MUCH_SMALLER_THAN(m1.colwise().sum().sum() - m1.sum(), m1.squaredNorm());
+  VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum().sum() - m1.sum(), m1.squaredNorm());
+  VERIFY_IS_MUCH_SMALLER_THAN(m1.colwise().sum() + m2.colwise().sum() - (m1+m2).colwise().sum(), (m1+m2).squaredNorm());
+  VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum() - m2.rowwise().sum() - (m1-m2).rowwise().sum(), (m1-m2).squaredNorm());
   VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op<Scalar>()));
 
   // vector-wise ops
@@ -77,7 +76,6 @@ template<typename MatrixType> void comparisons(const MatrixType& m)
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
 
   Index rows = m.rows();
   Index cols = m.cols();
diff --git a/test/array_replicate.cpp b/test/array_replicate.cpp
index 94da7425b..f412d1aed 100644
--- a/test/array_replicate.cpp
+++ b/test/array_replicate.cpp
@@ -16,7 +16,6 @@ template<typename MatrixType> void replicate(const MatrixType& m)
   */
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
   typedef Matrix<Scalar, Dynamic, Dynamic> MatrixX;
   typedef Matrix<Scalar, Dynamic, 1> VectorX;
diff --git a/test/basicstuff.cpp b/test/basicstuff.cpp
index 48db531c1..0fbae19e8 100644
--- a/test/basicstuff.cpp
+++ b/test/basicstuff.cpp
@@ -52,8 +52,7 @@ template<typename MatrixType> void basicStuff(const MatrixType& m)
   VERIFY_IS_APPROX(               v1,    v1);
   VERIFY_IS_NOT_APPROX(           v1,    2*v1);
   VERIFY_IS_MUCH_SMALLER_THAN(    vzero, v1);
-  if(!NumTraits<Scalar>::IsInteger)
-    VERIFY_IS_MUCH_SMALLER_THAN(  vzero, v1.norm());
+  VERIFY_IS_MUCH_SMALLER_THAN(  vzero, v1.squaredNorm());
   VERIFY_IS_NOT_MUCH_SMALLER_THAN(v1,    v1);
   VERIFY_IS_APPROX(               vzero, v1-v1);
   VERIFY_IS_APPROX(               m1,    m1);
diff --git a/test/cholesky.cpp b/test/cholesky.cpp
index 49c79f9c8..ca7ecb1f4 100644
--- a/test/cholesky.cpp
+++ b/test/cholesky.cpp
@@ -68,7 +68,6 @@ template<typename MatrixType> void cholesky(const MatrixType& m)
   Index cols = m.cols();
 
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
 
diff --git a/test/conservative_resize.cpp b/test/conservative_resize.cpp
index 4d11e4075..2d1ab3f03 100644
--- a/test/conservative_resize.cpp
+++ b/test/conservative_resize.cpp
@@ -95,20 +95,23 @@ void run_vector_tests()
 
 void test_conservative_resize()
 {
-  CALL_SUBTEST_1((run_matrix_tests<int, Eigen::RowMajor>()));
-  CALL_SUBTEST_1((run_matrix_tests<int, Eigen::ColMajor>()));
-  CALL_SUBTEST_2((run_matrix_tests<float, Eigen::RowMajor>()));
-  CALL_SUBTEST_2((run_matrix_tests<float, Eigen::ColMajor>()));
-  CALL_SUBTEST_3((run_matrix_tests<double, Eigen::RowMajor>()));
-  CALL_SUBTEST_3((run_matrix_tests<double, Eigen::ColMajor>()));
-  CALL_SUBTEST_4((run_matrix_tests<std::complex<float>, Eigen::RowMajor>()));
-  CALL_SUBTEST_4((run_matrix_tests<std::complex<float>, Eigen::ColMajor>()));
-  CALL_SUBTEST_5((run_matrix_tests<std::complex<double>, Eigen::RowMajor>()));
-  CALL_SUBTEST_6((run_matrix_tests<std::complex<double>, Eigen::ColMajor>()));
-
-  CALL_SUBTEST_1((run_vector_tests<int>()));
-  CALL_SUBTEST_2((run_vector_tests<float>()));
-  CALL_SUBTEST_3((run_vector_tests<double>()));
-  CALL_SUBTEST_4((run_vector_tests<std::complex<float> >()));
-  CALL_SUBTEST_5((run_vector_tests<std::complex<double> >()));
+  for(int i=0; i<g_repeat; ++i)
+  {
+    CALL_SUBTEST_1((run_matrix_tests<int, Eigen::RowMajor>()));
+    CALL_SUBTEST_1((run_matrix_tests<int, Eigen::ColMajor>()));
+    CALL_SUBTEST_2((run_matrix_tests<float, Eigen::RowMajor>()));
+    CALL_SUBTEST_2((run_matrix_tests<float, Eigen::ColMajor>()));
+    CALL_SUBTEST_3((run_matrix_tests<double, Eigen::RowMajor>()));
+    CALL_SUBTEST_3((run_matrix_tests<double, Eigen::ColMajor>()));
+    CALL_SUBTEST_4((run_matrix_tests<std::complex<float>, Eigen::RowMajor>()));
+    CALL_SUBTEST_4((run_matrix_tests<std::complex<float>, Eigen::ColMajor>()));
+    CALL_SUBTEST_5((run_matrix_tests<std::complex<double>, Eigen::RowMajor>()));
+    CALL_SUBTEST_6((run_matrix_tests<std::complex<double>, Eigen::ColMajor>()));
+
+    CALL_SUBTEST_1((run_vector_tests<int>()));
+    CALL_SUBTEST_2((run_vector_tests<float>()));
+    CALL_SUBTEST_3((run_vector_tests<double>()));
+    CALL_SUBTEST_4((run_vector_tests<std::complex<float> >()));
+    CALL_SUBTEST_5((run_vector_tests<std::complex<double> >()));
+  }
 }
diff --git a/test/cwiseop.cpp b/test/cwiseop.cpp
index 24fc26d52..247fa2a09 100644
--- a/test/cwiseop.cpp
+++ b/test/cwiseop.cpp
@@ -60,7 +60,6 @@ template<typename MatrixType>
 typename Eigen::internal::enable_if<NumTraits<typename MatrixType::Scalar>::IsInteger,typename MatrixType::Scalar>::type
 cwiseops_real_only(MatrixType& , MatrixType& , MatrixType& , MatrixType& )
 {
-  typedef typename MatrixType::Scalar Scalar;
   return 0;
 }
 
@@ -68,13 +67,13 @@ template<typename MatrixType> void cwiseops(const MatrixType& m)
 {
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
 
   Index rows = m.rows();
   Index cols = m.cols();
 
   MatrixType m1 = MatrixType::Random(rows, cols),
+             m1bis = m1,
              m2 = MatrixType::Random(rows, cols),
              m3(rows, cols),
              m4(rows, cols),
@@ -164,8 +163,8 @@ template<typename MatrixType> void cwiseops(const MatrixType& m)
   VERIFY( (m1.cwise().max(m2).cwise() > (m1-mones)).all() );
 
   VERIFY( (m1.cwise()<m1.unaryExpr(bind2nd(plus<Scalar>(), Scalar(1)))).all() );
-  VERIFY( !(m1.cwise()<m1.unaryExpr(bind2nd(minus<Scalar>(), Scalar(1)))).all() );
-  VERIFY( !(m1.cwise()>m1.unaryExpr(bind2nd(plus<Scalar>(), Scalar(1)))).any() );
+  VERIFY( !(m1.cwise()<m1bis.unaryExpr(bind2nd(minus<Scalar>(), Scalar(1)))).all() );
+  VERIFY( !(m1.cwise()>m1bis.unaryExpr(bind2nd(plus<Scalar>(), Scalar(1)))).any() );
   
   cwiseops_real_only(m1, m2, m3, mones);
 }
diff --git a/test/diagonal.cpp b/test/diagonal.cpp
index 0f09a9dfe..53814a588 100644
--- a/test/diagonal.cpp
+++ b/test/diagonal.cpp
@@ -13,9 +13,6 @@ template<typename MatrixType> void diagonal(const MatrixType& m)
 {
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar RealScalar;
-  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
-  typedef Matrix<Scalar, 1, MatrixType::ColsAtCompileTime> RowVectorType;
 
   Index rows = m.rows();
   Index cols = m.cols();
@@ -53,12 +50,12 @@ template<typename MatrixType> void diagonal(const MatrixType& m)
     VERIFY_IS_APPROX(m2.template diagonal<N2>()[0], static_cast<Scalar>(6) * m1.template diagonal<N2>()[0]);
 
     m2.diagonal(N1) = 2 * m1.diagonal(N1);
-    VERIFY_IS_APPROX(m2.diagonal<N1>(), static_cast<Scalar>(2) * m1.diagonal(N1));
+    VERIFY_IS_APPROX(m2.template diagonal<N1>(), static_cast<Scalar>(2) * m1.diagonal(N1));
     m2.diagonal(N1)[0] *= 3;
     VERIFY_IS_APPROX(m2.diagonal(N1)[0], static_cast<Scalar>(6) * m1.diagonal(N1)[0]);
 
     m2.diagonal(N2) = 2 * m1.diagonal(N2);
-    VERIFY_IS_APPROX(m2.diagonal<N2>(), static_cast<Scalar>(2) * m1.diagonal(N2));
+    VERIFY_IS_APPROX(m2.template diagonal<N2>(), static_cast<Scalar>(2) * m1.diagonal(N2));
     m2.diagonal(N2)[0] *= 3;
     VERIFY_IS_APPROX(m2.diagonal(N2)[0], static_cast<Scalar>(6) * m1.diagonal(N2)[0]);
   }
diff --git a/test/diagonalmatrices.cpp b/test/diagonalmatrices.cpp
index 7e9c80d7b..149f1db2f 100644
--- a/test/diagonalmatrices.cpp
+++ b/test/diagonalmatrices.cpp
@@ -13,7 +13,6 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m)
 {
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar RealScalar;
   enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime };
   typedef Matrix<Scalar, Rows, 1> VectorType;
   typedef Matrix<Scalar, 1, Cols> RowVectorType;
diff --git a/test/eigensolver_complex.cpp b/test/eigensolver_complex.cpp
index aef125739..817fbf2c2 100644
--- a/test/eigensolver_complex.cpp
+++ b/test/eigensolver_complex.cpp
@@ -41,9 +41,6 @@ template<typename MatrixType> void eigensolver(const MatrixType& m)
 
   typedef typename MatrixType::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
-  typedef Matrix<RealScalar, MatrixType::RowsAtCompileTime, 1> RealVectorType;
-  typedef typename std::complex<typename NumTraits<typename MatrixType::Scalar>::Real> Complex;
 
   MatrixType a = MatrixType::Random(rows,cols);
   MatrixType symmA =  a.adjoint() * a;
diff --git a/test/eigensolver_generalized_real.cpp b/test/eigensolver_generalized_real.cpp
index e3edbb772..b8775871d 100644
--- a/test/eigensolver_generalized_real.cpp
+++ b/test/eigensolver_generalized_real.cpp
@@ -21,10 +21,7 @@ template<typename MatrixType> void generalized_eigensolver_real(const MatrixType
   Index cols = m.cols();
 
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
-  typedef Matrix<RealScalar, MatrixType::RowsAtCompileTime, 1> RealVectorType;
-  typedef typename std::complex<typename NumTraits<typename MatrixType::Scalar>::Real> Complex;
 
   MatrixType a = MatrixType::Random(rows,cols);
   MatrixType b = MatrixType::Random(rows,cols);
diff --git a/test/eigensolver_generic.cpp b/test/eigensolver_generic.cpp
index ef499a989..a8bbf9007 100644
--- a/test/eigensolver_generic.cpp
+++ b/test/eigensolver_generic.cpp
@@ -23,7 +23,6 @@ template<typename MatrixType> void eigensolver(const MatrixType& m)
 
   typedef typename MatrixType::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
   typedef Matrix<RealScalar, MatrixType::RowsAtCompileTime, 1> RealVectorType;
   typedef typename std::complex<typename NumTraits<typename MatrixType::Scalar>::Real> Complex;
 
diff --git a/test/eigensolver_selfadjoint.cpp b/test/eigensolver_selfadjoint.cpp
index 02dbdb429..55b7eea8d 100644
--- a/test/eigensolver_selfadjoint.cpp
+++ b/test/eigensolver_selfadjoint.cpp
@@ -23,9 +23,6 @@ template<typename MatrixType> void selfadjointeigensolver(const MatrixType& m)
 
   typedef typename MatrixType::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
-  typedef Matrix<RealScalar, MatrixType::RowsAtCompileTime, 1> RealVectorType;
-  typedef typename std::complex<typename NumTraits<typename MatrixType::Scalar>::Real> Complex;
 
   RealScalar largerEps = 10*test_precision<RealScalar>();
 
diff --git a/test/exceptions.cpp b/test/exceptions.cpp
index 8c48b2f7b..b83fb82ba 100644
--- a/test/exceptions.cpp
+++ b/test/exceptions.cpp
@@ -69,6 +69,10 @@ class ScalarWithExceptions
     static int countdown;
 };
 
+ScalarWithExceptions real(const ScalarWithExceptions &x) { return x; }
+ScalarWithExceptions imag(const ScalarWithExceptions & ) { return 0; }
+ScalarWithExceptions conj(const ScalarWithExceptions &x) { return x; }
+
 int ScalarWithExceptions::instances = 0;
 int ScalarWithExceptions::countdown = 0;
 
diff --git a/test/geo_alignedbox.cpp b/test/geo_alignedbox.cpp
index 4a51fc71e..e9fbfddf1 100644
--- a/test/geo_alignedbox.cpp
+++ b/test/geo_alignedbox.cpp
@@ -71,7 +71,6 @@ void alignedboxCastTests(const BoxType& _box)
   // casting  
   typedef typename BoxType::Index Index;
   typedef typename BoxType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Matrix<Scalar, BoxType::AmbientDimAtCompileTime, 1> VectorType;
 
   const Index dim = _box.dim();
diff --git a/test/geo_hyperplane.cpp b/test/geo_hyperplane.cpp
index 2845ba95d..f26fc1329 100644
--- a/test/geo_hyperplane.cpp
+++ b/test/geo_hyperplane.cpp
@@ -22,7 +22,6 @@ template<typename HyperplaneType> void hyperplane(const HyperplaneType& _plane)
   const Index dim = _plane.dim();
   enum { Options = HyperplaneType::Options };
   typedef typename HyperplaneType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Matrix<Scalar, HyperplaneType::AmbientDimAtCompileTime, 1> VectorType;
   typedef Matrix<Scalar, HyperplaneType::AmbientDimAtCompileTime,
                          HyperplaneType::AmbientDimAtCompileTime> MatrixType;
diff --git a/test/geo_parametrizedline.cpp b/test/geo_parametrizedline.cpp
index 7b2e34abe..f0462d40a 100644
--- a/test/geo_parametrizedline.cpp
+++ b/test/geo_parametrizedline.cpp
@@ -24,8 +24,6 @@ template<typename LineType> void parametrizedline(const LineType& _line)
   typedef typename LineType::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Matrix<Scalar, LineType::AmbientDimAtCompileTime, 1> VectorType;
-  typedef Matrix<Scalar, LineType::AmbientDimAtCompileTime,
-                         LineType::AmbientDimAtCompileTime> MatrixType;
   typedef Hyperplane<Scalar,LineType::AmbientDimAtCompileTime> HyperplaneType;
 
   VectorType p0 = VectorType::Random(dim);
diff --git a/test/geo_quaternion.cpp b/test/geo_quaternion.cpp
index 568a5f582..06b3af7c1 100644
--- a/test/geo_quaternion.cpp
+++ b/test/geo_quaternion.cpp
@@ -25,7 +25,6 @@ template<typename QuatType> void check_slerp(const QuatType& q0, const QuatType&
 {
   using std::abs;
   typedef typename QuatType::Scalar Scalar;
-  typedef Matrix<Scalar,3,1> VectorType;
   typedef AngleAxis<Scalar> AA;
 
   Scalar largeEps = test_precision<Scalar>();
@@ -49,7 +48,6 @@ template<typename Scalar, int Options> void quaternion(void)
      Quaternion.h
   */
   using std::abs;
-  typedef Matrix<Scalar,3,3> Matrix3;
   typedef Matrix<Scalar,3,1> Vector3;
   typedef Matrix<Scalar,4,1> Vector4;
   typedef Quaternion<Scalar,Options> Quaternionx;
diff --git a/test/geo_transformations.cpp b/test/geo_transformations.cpp
index 30a0aba66..35ae67ebe 100644
--- a/test/geo_transformations.cpp
+++ b/test/geo_transformations.cpp
@@ -17,22 +17,11 @@ template<typename Scalar, int Mode, int Options> void non_projective_only()
     /* this test covers the following files:
      Cross.h Quaternion.h, Transform.cpp
   */
-  typedef Matrix<Scalar,2,2> Matrix2;
-  typedef Matrix<Scalar,3,3> Matrix3;
-  typedef Matrix<Scalar,4,4> Matrix4;
-  typedef Matrix<Scalar,2,1> Vector2;
   typedef Matrix<Scalar,3,1> Vector3;
-  typedef Matrix<Scalar,4,1> Vector4;
   typedef Quaternion<Scalar> Quaternionx;
   typedef AngleAxis<Scalar> AngleAxisx;
-  typedef Transform<Scalar,2,Mode,Options> Transform2;
   typedef Transform<Scalar,3,Mode,Options> Transform3;
-  typedef Transform<Scalar,2,Isometry,Options> Isometry2;
-  typedef Transform<Scalar,3,Isometry,Options> Isometry3;
-  typedef typename Transform3::MatrixType MatrixType;
-  typedef DiagonalMatrix<Scalar,2> AlignedScaling2;
   typedef DiagonalMatrix<Scalar,3> AlignedScaling3;
-  typedef Translation<Scalar,2> Translation2;
   typedef Translation<Scalar,3> Translation3;
 
   Vector3 v0 = Vector3::Random(),
@@ -90,7 +79,6 @@ template<typename Scalar, int Mode, int Options> void transformations()
   */
   using std::cos;
   using std::abs;
-  typedef Matrix<Scalar,2,2> Matrix2;
   typedef Matrix<Scalar,3,3> Matrix3;
   typedef Matrix<Scalar,4,4> Matrix4;
   typedef Matrix<Scalar,2,1> Vector2;
@@ -100,10 +88,7 @@ template<typename Scalar, int Mode, int Options> void transformations()
   typedef AngleAxis<Scalar> AngleAxisx;
   typedef Transform<Scalar,2,Mode,Options> Transform2;
   typedef Transform<Scalar,3,Mode,Options> Transform3;
-  typedef Transform<Scalar,2,Isometry,Options> Isometry2;
-  typedef Transform<Scalar,3,Isometry,Options> Isometry3;
   typedef typename Transform3::MatrixType MatrixType;
-  typedef DiagonalMatrix<Scalar,2> AlignedScaling2;
   typedef DiagonalMatrix<Scalar,3> AlignedScaling3;
   typedef Translation<Scalar,2> Translation2;
   typedef Translation<Scalar,3> Translation3;
@@ -402,8 +387,8 @@ template<typename Scalar, int Mode, int Options> void transformations()
   Rotation2D<double> r2d1d = r2d1.template cast<double>();
   VERIFY_IS_APPROX(r2d1d.template cast<Scalar>(),r2d1);
 
-  t20 = Translation2(v20) * (Rotation2D<Scalar>(s0) * Scaling(s0));
-  t21 = Translation2(v20) * Rotation2D<Scalar>(s0) * Scaling(s0);
+  t20 = Translation2(v20) * (Rotation2D<Scalar>(s0) * Eigen::Scaling(s0));
+  t21 = Translation2(v20) * Rotation2D<Scalar>(s0) * Eigen::Scaling(s0);
   VERIFY_IS_APPROX(t20,t21);
 }
 
diff --git a/test/householder.cpp b/test/householder.cpp
index 203dce46c..1dac4331f 100644
--- a/test/householder.cpp
+++ b/test/householder.cpp
@@ -29,8 +29,6 @@ template<typename MatrixType> void householder(const MatrixType& m)
   typedef Matrix<Scalar, Dynamic, MatrixType::ColsAtCompileTime> HBlockMatrixType;
   typedef Matrix<Scalar, Dynamic, 1> HCoeffsVectorType;
 
-  typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime> RightSquareMatrixType;
-  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, Dynamic> VBlockMatrixType;
   typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::RowsAtCompileTime> TMatrixType;
   
   Matrix<Scalar, EIGEN_SIZE_MAX(MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime), 1> _tmp((std::max)(rows,cols));
diff --git a/test/inverse.cpp b/test/inverse.cpp
index 5544eb671..8978a1877 100644
--- a/test/inverse.cpp
+++ b/test/inverse.cpp
@@ -22,8 +22,6 @@ template<typename MatrixType> void inverse(const MatrixType& m)
   Index cols = m.cols();
 
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-  typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
 
   MatrixType m1(rows, cols),
              m2(rows, cols),
@@ -43,6 +41,9 @@ template<typename MatrixType> void inverse(const MatrixType& m)
   VERIFY_IS_APPROX(MatrixType(m1.transpose().inverse()), MatrixType(m1.inverse().transpose()));
 
 #if !defined(EIGEN_TEST_PART_5) && !defined(EIGEN_TEST_PART_6)
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
+  
   //computeInverseAndDetWithCheck tests
   //First: an invertible matrix
   bool invertible;
diff --git a/test/jacobi.cpp b/test/jacobi.cpp
index f64f5d08f..b123b9189 100644
--- a/test/jacobi.cpp
+++ b/test/jacobi.cpp
@@ -14,7 +14,6 @@
 template<typename MatrixType, typename JacobiScalar>
 void jacobi(const MatrixType& m = MatrixType())
 {
-  typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::Index Index;
   Index rows = m.rows();
   Index cols = m.cols();
diff --git a/test/jacobisvd.cpp b/test/jacobisvd.cpp
index f6c567829..26da05037 100644
--- a/test/jacobisvd.cpp
+++ b/test/jacobisvd.cpp
@@ -27,11 +27,8 @@ void jacobisvd_check_full(const MatrixType& m, const JacobiSVD<MatrixType, QRPre
   };
 
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime> MatrixUType;
   typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixVType;
-  typedef Matrix<Scalar, RowsAtCompileTime, 1> ColVectorType;
-  typedef Matrix<Scalar, ColsAtCompileTime, 1> InputVectorType;
 
   MatrixType sigma = MatrixType::Zero(rows,cols);
   sigma.diagonal() = svd.singularValues().template cast<Scalar>();
diff --git a/test/lu.cpp b/test/lu.cpp
index 6cbcb0a95..25f86755a 100644
--- a/test/lu.cpp
+++ b/test/lu.cpp
@@ -14,7 +14,6 @@ using namespace std;
 template<typename MatrixType> void lu_non_invertible()
 {
   typedef typename MatrixType::Index Index;
-  typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
   /* this test covers the following files:
      LU.h
@@ -100,7 +99,6 @@ template<typename MatrixType> void lu_invertible()
   /* this test covers the following files:
      LU.h
   */
-  typedef typename MatrixType::Scalar Scalar;
   typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
   int size = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);
 
@@ -132,8 +130,6 @@ template<typename MatrixType> void lu_partial_piv()
      PartialPivLU.h
   */
   typedef typename MatrixType::Index Index;
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
   Index rows = internal::random<Index>(1,4);
   Index cols = rows;
 
diff --git a/test/main.h b/test/main.h
index 2da327c17..3be0f9fca 100644
--- a/test/main.h
+++ b/test/main.h
@@ -14,6 +14,7 @@
 #include <iostream>
 #include <fstream>
 #include <string>
+#include <sstream>
 #include <vector>
 #include <typeinfo>
 #include <limits>
@@ -173,8 +174,13 @@ static void verify_impl(bool condition, const char *testname, const char *file,
 {
   if (!condition)
   {
-    std::cerr << "Test " << testname << " failed in " << file << " (" << line << ")" \
-      << std::endl << "    " << condition_as_string << std::endl << std::endl; \
+    std::cerr << "Test " << testname << " failed in " << file << " (" << line << ")"
+      << std::endl << "    " << condition_as_string << std::endl;
+    std::cerr << "Stack:\n";
+    const int test_stack_size = static_cast<int>(Eigen::g_test_stack.size());
+    for(int i=test_stack_size-1; i>=0; --i)
+      std::cerr << "  - " << Eigen::g_test_stack[i] << "\n";
+    std::cerr << "\n";
     abort();
   }
 }
@@ -399,7 +405,7 @@ void set_repeat_from_string(const char *str)
 void set_seed_from_string(const char *str)
 {
   errno = 0;
-  g_seed = strtoul(str, 0, 10);
+  g_seed = int(strtoul(str, 0, 10));
   if(errno || g_seed == 0)
   {
     std::cout << "Invalid seed value " << str << std::endl;
@@ -462,6 +468,9 @@ int main(int argc, char *argv[])
     if(!g_has_set_repeat) g_repeat = DEFAULT_REPEAT;
 
     std::cout << "Initializing random number generator with seed " << g_seed << std::endl;
+    std::stringstream ss;
+    ss << "Seed: " << g_seed;
+    g_test_stack.push_back(ss.str());
     srand(g_seed);
     std::cout << "Repeating each test " << g_repeat << " times" << std::endl;
 
diff --git a/test/map.cpp b/test/map.cpp
index fe983e802..2b52e4f38 100644
--- a/test/map.cpp
+++ b/test/map.cpp
@@ -102,9 +102,6 @@ template<typename VectorType> void map_static_methods(const VectorType& m)
 
 template<typename PlainObjectType> void check_const_correctness(const PlainObjectType&)
 {
-  typedef typename PlainObjectType::Index Index;
-  typedef typename PlainObjectType::Scalar Scalar;
-
   // there's a lot that we can't test here while still having this test compile!
   // the only possible approach would be to run a script trying to compile stuff and checking that it fails.
   // CMake can help with that.
diff --git a/test/meta.cpp b/test/meta.cpp
index 0ba968ba9..3302c5887 100644
--- a/test/meta.cpp
+++ b/test/meta.cpp
@@ -11,9 +11,6 @@
 
 void test_meta()
 {
-  typedef float & FloatRef;
-  typedef const float & ConstFloatRef;
-  
   VERIFY((internal::conditional<(3<4),internal::true_type, internal::false_type>::type::value));
   VERIFY(( internal::is_same<float,float>::value));
   VERIFY((!internal::is_same<float,double>::value));
diff --git a/test/miscmatrices.cpp b/test/miscmatrices.cpp
index af0481cfe..ef20dc749 100644
--- a/test/miscmatrices.cpp
+++ b/test/miscmatrices.cpp
@@ -17,7 +17,6 @@ template<typename MatrixType> void miscMatrices(const MatrixType& m)
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
-  typedef Matrix<Scalar, 1, MatrixType::ColsAtCompileTime> RowVectorType;
   Index rows = m.rows();
   Index cols = m.cols();
 
diff --git a/test/nesting_ops.cpp b/test/nesting_ops.cpp
index 938ebcb7a..a92000f15 100644
--- a/test/nesting_ops.cpp
+++ b/test/nesting_ops.cpp
@@ -12,7 +12,6 @@
 template <typename MatrixType> void run_nesting_ops(const MatrixType& _m)
 {
   typename MatrixType::Nested m(_m);
-  typedef typename MatrixType::Scalar Scalar;
 
 #ifdef NDEBUG
   const bool is_debug = false;
diff --git a/test/nomalloc.cpp b/test/nomalloc.cpp
index d4ffcefcb..cbd02dd21 100644
--- a/test/nomalloc.cpp
+++ b/test/nomalloc.cpp
@@ -12,6 +12,12 @@
 #ifdef __GNUC__
 #define throw(X)
 #endif
+
+#ifdef __INTEL_COMPILER
+  // disable "warning #76: argument to macro is empty" produced by the above hack
+  #pragma warning disable 76
+#endif
+
 // discard stack allocation as that too bypasses malloc
 #define EIGEN_STACK_ALLOCATION_LIMIT 0
 // any heap allocation will raise an assert
@@ -30,7 +36,6 @@ template<typename MatrixType> void nomalloc(const MatrixType& m)
   */
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
 
   Index rows = m.rows();
   Index cols = m.cols();
diff --git a/test/nullary.cpp b/test/nullary.cpp
index 1220e3f97..5408d88b2 100644
--- a/test/nullary.cpp
+++ b/test/nullary.cpp
@@ -91,6 +91,12 @@ void testVectorType(const VectorType& base)
   scalar.setLinSpaced(1,low,high);
   VERIFY_IS_APPROX( scalar, ScalarMatrix::Constant(high) );
   VERIFY_IS_APPROX( ScalarMatrix::LinSpaced(1,low,high), ScalarMatrix::Constant(high) );
+
+  // regression test for bug 526 (linear vectorized transversal)
+  if (size > 1) {
+    m.tail(size-1).setLinSpaced(low, high);
+    VERIFY_IS_APPROX(m(size-1), high);
+  }
 }
 
 template<typename MatrixType>
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index cb96d615c..9cdebd376 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -40,7 +40,7 @@ template<typename Scalar> bool areApprox(const Scalar* a, const Scalar* b, int s
 {
   for (int i=0; i<size; ++i)
   {
-    if (!internal::isApprox(a[i],b[i]))
+    if (a[i]!=b[i] && !internal::isApprox(a[i],b[i]))
     {
       std::cout << "[" << Map<const Matrix<Scalar,1,Dynamic> >(a,size) << "]" << " != " << Map<const Matrix<Scalar,1,Dynamic> >(b,size) << "\n";
       return false;
@@ -145,7 +145,6 @@ template<typename Scalar> void packetmath()
     for (int i=0; i<PacketSize; ++i)
       ref[i] = data1[i+offset];
 
-    typedef Matrix<Scalar, PacketSize, 1> Vector;
     VERIFY(areApprox(ref, data2, PacketSize) && "internal::palign");
   }
 
@@ -246,8 +245,23 @@ template<typename Scalar> void packetmath_real()
     data1[i] = internal::random<Scalar>(0,1e6);
     data2[i] = internal::random<Scalar>(0,1e6);
   }
+  if(internal::random<float>(0,1)<0.1)
+    data1[internal::random<int>(0, PacketSize)] = 0;
   CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLog, std::log, internal::plog);
   CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasSqrt, std::sqrt, internal::psqrt);
+}
+
+template<typename Scalar> void packetmath_notcomplex()
+{
+  using std::abs;
+  typedef typename internal::packet_traits<Scalar>::type Packet;
+  const int PacketSize = internal::packet_traits<Scalar>::size;
+
+  EIGEN_ALIGN16 Scalar data1[internal::packet_traits<Scalar>::size*4];
+  EIGEN_ALIGN16 Scalar data2[internal::packet_traits<Scalar>::size*4];
+  EIGEN_ALIGN16 Scalar ref[internal::packet_traits<Scalar>::size*4];
+  
+  Array<Scalar,Dynamic,1>::Map(data1, internal::packet_traits<Scalar>::size*4).setRandom();
 
   ref[0] = data1[0];
   for (int i=0; i<PacketSize; ++i)
@@ -338,6 +352,10 @@ void test_packetmath()
     CALL_SUBTEST_1( packetmath<std::complex<float> >() );
     CALL_SUBTEST_2( packetmath<std::complex<double> >() );
 
+    CALL_SUBTEST_1( packetmath_notcomplex<float>() );
+    CALL_SUBTEST_2( packetmath_notcomplex<double>() );
+    CALL_SUBTEST_3( packetmath_notcomplex<int>() );
+    
     CALL_SUBTEST_1( packetmath_real<float>() );
     CALL_SUBTEST_2( packetmath_real<double>() );
 
diff --git a/test/permutationmatrices.cpp b/test/permutationmatrices.cpp
index 00f666ccd..7b0dbc763 100644
--- a/test/permutationmatrices.cpp
+++ b/test/permutationmatrices.cpp
@@ -14,7 +14,6 @@ template<typename MatrixType> void permutationmatrices(const MatrixType& m)
 {
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar RealScalar;
   enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime,
          Options = MatrixType::Options };
   typedef PermutationMatrix<Rows> LeftPermutationType;
diff --git a/test/prec_inverse_4x4.cpp b/test/prec_inverse_4x4.cpp
index 9bab30a25..c4ef2d4bd 100644
--- a/test/prec_inverse_4x4.cpp
+++ b/test/prec_inverse_4x4.cpp
@@ -14,7 +14,6 @@
 template<typename MatrixType> void inverse_permutation_4x4()
 {
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar RealScalar;
   Vector4i indices(0,1,2,3);
   for(int i = 0; i < 24; ++i)
   {
diff --git a/test/product.h b/test/product.h
index 4aa9fd56d..856b234ac 100644
--- a/test/product.h
+++ b/test/product.h
@@ -24,7 +24,6 @@ template<typename MatrixType> void product(const MatrixType& m)
   */
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::NonInteger NonInteger;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> RowVectorType;
   typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> ColVectorType;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> RowSquareMatrixType;
diff --git a/test/product_extra.cpp b/test/product_extra.cpp
index 6f962159e..53493bdd6 100644
--- a/test/product_extra.cpp
+++ b/test/product_extra.cpp
@@ -13,7 +13,6 @@ template<typename MatrixType> void product_extra(const MatrixType& m)
 {
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::NonInteger NonInteger;
   typedef Matrix<Scalar, 1, Dynamic> RowVectorType;
   typedef Matrix<Scalar, Dynamic, 1> ColVectorType;
   typedef Matrix<Scalar, Dynamic, Dynamic,
diff --git a/test/product_mmtr.cpp b/test/product_mmtr.cpp
index 879cfe16a..7d6746800 100644
--- a/test/product_mmtr.cpp
+++ b/test/product_mmtr.cpp
@@ -19,8 +19,6 @@
 
 template<typename Scalar> void mmtr(int size)
 {
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-
   typedef Matrix<Scalar,Dynamic,Dynamic,ColMajor> MatrixColMaj;
   typedef Matrix<Scalar,Dynamic,Dynamic,RowMajor> MatrixRowMaj;
 
diff --git a/test/product_notemporary.cpp b/test/product_notemporary.cpp
index cf9dbdd03..a30a8b4c7 100644
--- a/test/product_notemporary.cpp
+++ b/test/product_notemporary.cpp
@@ -77,6 +77,9 @@ template<typename MatrixType> void product_notemporary(const MatrixType& m)
   VERIFY_EVALUATION_COUNT( m3.noalias() -= (s1 * m1).template triangularView<Lower>() * m2, 0);
   VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template triangularView<Upper>() * (m2+m2), 1);
   VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template triangularView<UnitUpper>() * m2.adjoint(), 0);
+  
+  VERIFY_EVALUATION_COUNT( m3.template triangularView<Upper>() = (m1 * m2.adjoint()), 0);
+  VERIFY_EVALUATION_COUNT( m3.template triangularView<Upper>() -= (m1 * m2.adjoint()), 0);
 
   // NOTE this is because the blas_traits require innerstride==1 to avoid a temporary, but that doesn't seem to be actually needed for the triangular products
   VERIFY_EVALUATION_COUNT( rm3.col(c0).noalias() = (s1 * m1.adjoint()).template triangularView<UnitUpper>() * (s2*m2.row(c0)).adjoint(), 1);
diff --git a/test/product_selfadjoint.cpp b/test/product_selfadjoint.cpp
index 95693b155..aede15053 100644
--- a/test/product_selfadjoint.cpp
+++ b/test/product_selfadjoint.cpp
@@ -13,7 +13,6 @@ template<typename MatrixType> void product_selfadjoint(const MatrixType& m)
 {
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
   typedef Matrix<Scalar, 1, MatrixType::RowsAtCompileTime> RowVectorType;
 
diff --git a/test/product_symm.cpp b/test/product_symm.cpp
index 2f7a0d231..74d7329b1 100644
--- a/test/product_symm.cpp
+++ b/test/product_symm.cpp
@@ -11,8 +11,6 @@
 
 template<typename Scalar, int Size, int OtherSize> void symm(int size = Size, int othersize = OtherSize)
 {
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-
   typedef Matrix<Scalar, Size, Size> MatrixType;
   typedef Matrix<Scalar, Size, OtherSize> Rhs1;
   typedef Matrix<Scalar, OtherSize, Size> Rhs2;
diff --git a/test/product_syrk.cpp b/test/product_syrk.cpp
index 5855c2181..73c95000c 100644
--- a/test/product_syrk.cpp
+++ b/test/product_syrk.cpp
@@ -13,7 +13,7 @@ template<typename MatrixType> void syrk(const MatrixType& m)
 {
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime, RowMajor> RMatrixType;
   typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, Dynamic> Rhs1;
   typedef Matrix<Scalar, Dynamic, MatrixType::RowsAtCompileTime> Rhs2;
   typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, Dynamic,RowMajor> Rhs3;
@@ -22,10 +22,12 @@ template<typename MatrixType> void syrk(const MatrixType& m)
   Index cols = m.cols();
 
   MatrixType m1 = MatrixType::Random(rows, cols),
-             m2 = MatrixType::Random(rows, cols);
+             m2 = MatrixType::Random(rows, cols),
+             m3 = MatrixType::Random(rows, cols);
+  RMatrixType rm2 = MatrixType::Random(rows, cols);
 
-  Rhs1 rhs1 = Rhs1::Random(internal::random<int>(1,320), cols);
-  Rhs2 rhs2 = Rhs2::Random(rows, internal::random<int>(1,320));
+  Rhs1 rhs1 = Rhs1::Random(internal::random<int>(1,320), cols); Rhs1 rhs11 = Rhs1::Random(rhs1.rows(), cols);
+  Rhs2 rhs2 = Rhs2::Random(rows, internal::random<int>(1,320)); Rhs2 rhs22 = Rhs2::Random(rows, rhs2.cols());
   Rhs3 rhs3 = Rhs3::Random(internal::random<int>(1,320), rows);
 
   Scalar s1 = internal::random<Scalar>();
@@ -35,19 +37,34 @@ template<typename MatrixType> void syrk(const MatrixType& m)
   m2.setZero();
   VERIFY_IS_APPROX((m2.template selfadjointView<Lower>().rankUpdate(rhs2,s1)._expression()),
                    ((s1 * rhs2 * rhs2.adjoint()).eval().template triangularView<Lower>().toDenseMatrix()));
+  m2.setZero();
+  VERIFY_IS_APPROX(((m2.template triangularView<Lower>() += s1 * rhs2  * rhs22.adjoint()).nestedExpression()),
+                   ((s1 * rhs2 * rhs22.adjoint()).eval().template triangularView<Lower>().toDenseMatrix()));
 
+  
   m2.setZero();
   VERIFY_IS_APPROX(m2.template selfadjointView<Upper>().rankUpdate(rhs2,s1)._expression(),
                    (s1 * rhs2 * rhs2.adjoint()).eval().template triangularView<Upper>().toDenseMatrix());
+  m2.setZero();
+  VERIFY_IS_APPROX((m2.template triangularView<Upper>() += s1 * rhs22 * rhs2.adjoint()).nestedExpression(),
+                   (s1 * rhs22 * rhs2.adjoint()).eval().template triangularView<Upper>().toDenseMatrix());
 
+  
   m2.setZero();
   VERIFY_IS_APPROX(m2.template selfadjointView<Lower>().rankUpdate(rhs1.adjoint(),s1)._expression(),
                    (s1 * rhs1.adjoint() * rhs1).eval().template triangularView<Lower>().toDenseMatrix());
-
+  m2.setZero();
+  VERIFY_IS_APPROX((m2.template triangularView<Lower>() += s1 * rhs11.adjoint() * rhs1).nestedExpression(),
+                   (s1 * rhs11.adjoint() * rhs1).eval().template triangularView<Lower>().toDenseMatrix());
+  
+  
   m2.setZero();
   VERIFY_IS_APPROX(m2.template selfadjointView<Upper>().rankUpdate(rhs1.adjoint(),s1)._expression(),
                    (s1 * rhs1.adjoint() * rhs1).eval().template triangularView<Upper>().toDenseMatrix());
+  VERIFY_IS_APPROX((m2.template triangularView<Upper>() = s1 * rhs1.adjoint() * rhs11).nestedExpression(),
+                   (s1 * rhs1.adjoint() * rhs11).eval().template triangularView<Upper>().toDenseMatrix());
 
+  
   m2.setZero();
   VERIFY_IS_APPROX(m2.template selfadjointView<Lower>().rankUpdate(rhs3.adjoint(),s1)._expression(),
                    (s1 * rhs3.adjoint() * rhs3).eval().template triangularView<Lower>().toDenseMatrix());
@@ -63,6 +80,15 @@ template<typename MatrixType> void syrk(const MatrixType& m)
   m2.setZero();
   VERIFY_IS_APPROX((m2.template selfadjointView<Upper>().rankUpdate(m1.col(c),s1)._expression()),
                    ((s1 * m1.col(c) * m1.col(c).adjoint()).eval().template triangularView<Upper>().toDenseMatrix()));
+  rm2.setZero();
+  VERIFY_IS_APPROX((rm2.template selfadjointView<Upper>().rankUpdate(m1.col(c),s1)._expression()),
+                   ((s1 * m1.col(c) * m1.col(c).adjoint()).eval().template triangularView<Upper>().toDenseMatrix()));
+  m2.setZero();
+  VERIFY_IS_APPROX((m2.template triangularView<Upper>() += s1 * m3.col(c) * m1.col(c).adjoint()).nestedExpression(),
+                   ((s1 * m3.col(c) * m1.col(c).adjoint()).eval().template triangularView<Upper>().toDenseMatrix()));
+  rm2.setZero();
+  VERIFY_IS_APPROX((rm2.template triangularView<Upper>() += s1 * m1.col(c) * m3.col(c).adjoint()).nestedExpression(),
+                   ((s1 * m1.col(c) * m3.col(c).adjoint()).eval().template triangularView<Upper>().toDenseMatrix()));
   
   m2.setZero();
   VERIFY_IS_APPROX((m2.template selfadjointView<Lower>().rankUpdate(m1.col(c).conjugate(),s1)._expression()),
@@ -72,9 +98,20 @@ template<typename MatrixType> void syrk(const MatrixType& m)
   VERIFY_IS_APPROX((m2.template selfadjointView<Upper>().rankUpdate(m1.col(c).conjugate(),s1)._expression()),
                    ((s1 * m1.col(c).conjugate() * m1.col(c).conjugate().adjoint()).eval().template triangularView<Upper>().toDenseMatrix()));
   
+  
   m2.setZero();
   VERIFY_IS_APPROX((m2.template selfadjointView<Lower>().rankUpdate(m1.row(c),s1)._expression()),
                    ((s1 * m1.row(c).transpose() * m1.row(c).transpose().adjoint()).eval().template triangularView<Lower>().toDenseMatrix()));
+  rm2.setZero();
+  VERIFY_IS_APPROX((rm2.template selfadjointView<Lower>().rankUpdate(m1.row(c),s1)._expression()),
+                   ((s1 * m1.row(c).transpose() * m1.row(c).transpose().adjoint()).eval().template triangularView<Lower>().toDenseMatrix()));
+  m2.setZero();
+  VERIFY_IS_APPROX((m2.template triangularView<Lower>() += s1 * m3.row(c).transpose() * m1.row(c).transpose().adjoint()).nestedExpression(),
+                   ((s1 * m3.row(c).transpose() * m1.row(c).transpose().adjoint()).eval().template triangularView<Lower>().toDenseMatrix()));
+  rm2.setZero();
+  VERIFY_IS_APPROX((rm2.template triangularView<Lower>() += s1 * m3.row(c).transpose() * m1.row(c).transpose().adjoint()).nestedExpression(),
+                   ((s1 * m3.row(c).transpose() * m1.row(c).transpose().adjoint()).eval().template triangularView<Lower>().toDenseMatrix()));
+  
   
   m2.setZero();
   VERIFY_IS_APPROX((m2.template selfadjointView<Upper>().rankUpdate(m1.row(c).adjoint(),s1)._expression()),
diff --git a/test/product_trmm.cpp b/test/product_trmm.cpp
index 64244c18f..31ac1b22e 100644
--- a/test/product_trmm.cpp
+++ b/test/product_trmm.cpp
@@ -14,8 +14,6 @@ void trmm(int rows=internal::random<int>(1,EIGEN_TEST_MAX_SIZE),
           int cols=internal::random<int>(1,EIGEN_TEST_MAX_SIZE),
           int otherCols = OtherCols==Dynamic?internal::random<int>(1,EIGEN_TEST_MAX_SIZE):OtherCols)
 {
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-
   typedef Matrix<Scalar,Dynamic,Dynamic,TriOrder> TriMatrix;
   typedef Matrix<Scalar,Dynamic,OtherCols,OtherCols==1?ColMajor:OtherOrder> OnTheRight;
   typedef Matrix<Scalar,OtherCols,Dynamic,OtherCols==1?RowMajor:OtherOrder> OnTheLeft;
diff --git a/test/qr.cpp b/test/qr.cpp
index 237aa98d8..a79e0dd34 100644
--- a/test/qr.cpp
+++ b/test/qr.cpp
@@ -19,7 +19,6 @@ template<typename MatrixType> void qr(const MatrixType& m)
 
   typedef typename MatrixType::Scalar Scalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> MatrixQType;
-  typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
 
   MatrixType a = MatrixType::Random(rows,cols);
   HouseholderQR<MatrixType> qrOfA(a);
diff --git a/test/qr_colpivoting.cpp b/test/qr_colpivoting.cpp
index 0fd19c4ee..eb3feac01 100644
--- a/test/qr_colpivoting.cpp
+++ b/test/qr_colpivoting.cpp
@@ -19,9 +19,7 @@ template<typename MatrixType> void qr()
   Index rank = internal::random<Index>(1, (std::min)(rows, cols)-1);
 
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::RealScalar RealScalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> MatrixQType;
-  typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
   MatrixType m1;
   createRandomPIMatrixOfRank(rank,rows,cols,m1);
   ColPivHouseholderQR<MatrixType> qr(m1);
diff --git a/test/qr_fullpivoting.cpp b/test/qr_fullpivoting.cpp
index 8b8188da3..15d7299d7 100644
--- a/test/qr_fullpivoting.cpp
+++ b/test/qr_fullpivoting.cpp
@@ -20,7 +20,6 @@ template<typename MatrixType> void qr()
 
   typedef typename MatrixType::Scalar Scalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> MatrixQType;
-  typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
   MatrixType m1;
   createRandomPIMatrixOfRank(rank,rows,cols,m1);
   FullPivHouseholderQR<MatrixType> qr(m1);
diff --git a/test/real_qz.cpp b/test/real_qz.cpp
index c31621439..b48bac361 100644
--- a/test/real_qz.cpp
+++ b/test/real_qz.cpp
@@ -19,10 +19,6 @@ template<typename MatrixType> void real_qz(const MatrixType& m)
   using std::abs;
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
-  typedef Matrix<RealScalar, MatrixType::RowsAtCompileTime, 1> RealVectorType;
-  typedef typename std::complex<typename NumTraits<typename MatrixType::Scalar>::Real> Complex;
   
   Index dim = m.cols();
   
diff --git a/test/ref.cpp b/test/ref.cpp
new file mode 100644
index 000000000..7c0ccafcf
--- /dev/null
+++ b/test/ref.cpp
@@ -0,0 +1,232 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 20013 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// This unit test cannot be easily written to work with EIGEN_DEFAULT_TO_ROW_MAJOR
+#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
+#undef EIGEN_DEFAULT_TO_ROW_MAJOR
+#endif
+
+static int nb_temporaries;
+
+void on_temporary_creation(int size) {
+  // here's a great place to set a breakpoint when debugging failures in this test!
+  if(size!=0) nb_temporaries++;
+}
+  
+
+#define EIGEN_DENSE_STORAGE_CTOR_PLUGIN { on_temporary_creation(size); }
+
+#include "main.h"
+
+#define VERIFY_EVALUATION_COUNT(XPR,N) {\
+    nb_temporaries = 0; \
+    XPR; \
+    if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \
+    VERIFY( (#XPR) && nb_temporaries==N ); \
+  }
+
+
+// test Ref.h
+
+template<typename MatrixType> void ref_matrix(const MatrixType& m)
+{
+  typedef typename MatrixType::Index Index;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef Matrix<Scalar,Dynamic,Dynamic,MatrixType::Options> DynMatrixType;
+  typedef Matrix<RealScalar,Dynamic,Dynamic,MatrixType::Options> RealDynMatrixType;
+  
+  typedef Ref<MatrixType> RefMat;
+  typedef Ref<DynMatrixType> RefDynMat;
+  typedef Ref<const DynMatrixType> ConstRefDynMat;
+  typedef Ref<RealDynMatrixType , 0, Stride<Dynamic,Dynamic> > RefRealMatWithStride;
+
+  Index rows = m.rows(), cols = m.cols();
+  
+  MatrixType  m1 = MatrixType::Random(rows, cols),
+              m2 = m1;
+  
+  Index i = internal::random<Index>(0,rows-1);
+  Index j = internal::random<Index>(0,cols-1);
+  Index brows = internal::random<Index>(1,rows-i);
+  Index bcols = internal::random<Index>(1,cols-j);
+  
+  RefMat rm0 = m1;
+  VERIFY_IS_EQUAL(rm0, m1);
+  RefDynMat rm1 = m1;
+  VERIFY_IS_EQUAL(rm1, m1);
+  RefDynMat rm2 = m1.block(i,j,brows,bcols);
+  VERIFY_IS_EQUAL(rm2, m1.block(i,j,brows,bcols));
+  rm2.setOnes();
+  m2.block(i,j,brows,bcols).setOnes();
+  VERIFY_IS_EQUAL(m1, m2);
+  
+  m2.block(i,j,brows,bcols).setRandom();
+  rm2 = m2.block(i,j,brows,bcols);
+  VERIFY_IS_EQUAL(m1, m2);
+  
+  
+  ConstRefDynMat rm3 = m1.block(i,j,brows,bcols);
+  m1.block(i,j,brows,bcols) *= 2;
+  m2.block(i,j,brows,bcols) *= 2;
+  VERIFY_IS_EQUAL(rm3, m2.block(i,j,brows,bcols));
+  RefRealMatWithStride rm4 = m1.real();
+  VERIFY_IS_EQUAL(rm4, m2.real());
+  rm4.array() += 1;
+  m2.real().array() += 1;
+  VERIFY_IS_EQUAL(m1, m2);
+}
+
+template<typename VectorType> void ref_vector(const VectorType& m)
+{
+  typedef typename VectorType::Index Index;
+  typedef typename VectorType::Scalar Scalar;
+  typedef typename VectorType::RealScalar RealScalar;
+  typedef Matrix<Scalar,Dynamic,1,VectorType::Options> DynMatrixType;
+  typedef Matrix<Scalar,Dynamic,Dynamic,ColMajor> MatrixType;
+  typedef Matrix<RealScalar,Dynamic,1,VectorType::Options> RealDynMatrixType;
+  
+  typedef Ref<VectorType> RefMat;
+  typedef Ref<DynMatrixType> RefDynMat;
+  typedef Ref<const DynMatrixType> ConstRefDynMat;
+  typedef Ref<RealDynMatrixType , 0, InnerStride<> > RefRealMatWithStride;
+  typedef Ref<DynMatrixType , 0, InnerStride<> > RefMatWithStride;
+
+  Index size = m.size();
+  
+  VectorType  v1 = VectorType::Random(size),
+              v2 = v1;
+  MatrixType mat1 = MatrixType::Random(size,size),
+             mat2 = mat1,
+             mat3 = MatrixType::Random(size,size);
+  
+  Index i = internal::random<Index>(0,size-1);
+  Index bsize = internal::random<Index>(1,size-i);
+  
+  RefMat rm0 = v1;
+  VERIFY_IS_EQUAL(rm0, v1);
+  RefDynMat rv1 = v1;
+  VERIFY_IS_EQUAL(rv1, v1);
+  RefDynMat rv2 = v1.segment(i,bsize);
+  VERIFY_IS_EQUAL(rv2, v1.segment(i,bsize));
+  rv2.setOnes();
+  v2.segment(i,bsize).setOnes();
+  VERIFY_IS_EQUAL(v1, v2);
+  
+  v2.segment(i,bsize).setRandom();
+  rv2 = v2.segment(i,bsize);
+  VERIFY_IS_EQUAL(v1, v2);
+  
+  ConstRefDynMat rm3 = v1.segment(i,bsize);
+  v1.segment(i,bsize) *= 2;
+  v2.segment(i,bsize) *= 2;
+  VERIFY_IS_EQUAL(rm3, v2.segment(i,bsize));
+  
+  RefRealMatWithStride rm4 = v1.real();
+  VERIFY_IS_EQUAL(rm4, v2.real());
+  rm4.array() += 1;
+  v2.real().array() += 1;
+  VERIFY_IS_EQUAL(v1, v2);
+  
+  RefMatWithStride rm5 = mat1.row(i).transpose();
+  VERIFY_IS_EQUAL(rm5, mat1.row(i).transpose());
+  rm5.array() += 1;
+  mat2.row(i).array() += 1;
+  VERIFY_IS_EQUAL(mat1, mat2);
+  rm5.noalias() = rm4.transpose() * mat3;
+  mat2.row(i) = v2.real().transpose() * mat3;
+  VERIFY_IS_APPROX(mat1, mat2);
+}
+
+template<typename PlainObjectType> void check_const_correctness(const PlainObjectType&)
+{
+  // verify that ref-to-const don't have LvalueBit
+  typedef typename internal::add_const<PlainObjectType>::type ConstPlainObjectType;
+  VERIFY( !(internal::traits<Ref<ConstPlainObjectType> >::Flags & LvalueBit) );
+  VERIFY( !(internal::traits<Ref<ConstPlainObjectType, Aligned> >::Flags & LvalueBit) );
+  VERIFY( !(Ref<ConstPlainObjectType>::Flags & LvalueBit) );
+  VERIFY( !(Ref<ConstPlainObjectType, Aligned>::Flags & LvalueBit) );
+}
+
+EIGEN_DONT_INLINE void call_ref_1(Ref<VectorXf> ) { }
+EIGEN_DONT_INLINE void call_ref_2(const Ref<const VectorXf>& ) { }
+EIGEN_DONT_INLINE void call_ref_3(Ref<VectorXf,0,InnerStride<> > ) { }
+EIGEN_DONT_INLINE void call_ref_4(const Ref<const VectorXf,0,InnerStride<> >& ) { }
+EIGEN_DONT_INLINE void call_ref_5(Ref<MatrixXf,0,OuterStride<> > ) { }
+EIGEN_DONT_INLINE void call_ref_6(const Ref<const MatrixXf,0,OuterStride<> >& ) { }
+
+void call_ref()
+{
+  VectorXcf ca(10);
+  VectorXf a(10);
+  const VectorXf& ac(a);
+  VectorBlock<VectorXf> ab(a,0,3);
+  MatrixXf A(10,10);
+  const VectorBlock<VectorXf> abc(a,0,3);
+
+  VERIFY_EVALUATION_COUNT( call_ref_1(a), 0);
+  //call_ref_1(ac);           // does not compile because ac is const
+  VERIFY_EVALUATION_COUNT( call_ref_1(ab), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_1(a.head(4)), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_1(abc), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_1(A.col(3)), 0);
+  // call_ref_1(A.row(3));    // does not compile because innerstride!=1
+  VERIFY_EVALUATION_COUNT( call_ref_3(A.row(3)), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_4(A.row(3)), 0);
+  //call_ref_1(a+a);          // does not compile for obvious reason
+
+  VERIFY_EVALUATION_COUNT( call_ref_2(A*A.col(1)), 1);     // evaluated into a temp
+  VERIFY_EVALUATION_COUNT( call_ref_2(ac.head(5)), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_2(ac), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_2(a), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_2(ab), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_2(a.head(4)), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_2(a+a), 1);            // evaluated into a temp
+  VERIFY_EVALUATION_COUNT( call_ref_2(ca.imag()), 1);      // evaluated into a temp
+
+  VERIFY_EVALUATION_COUNT( call_ref_4(ac.head(5)), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_4(a+a), 1);           // evaluated into a temp
+  VERIFY_EVALUATION_COUNT( call_ref_4(ca.imag()), 0);
+
+  VERIFY_EVALUATION_COUNT( call_ref_5(a), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_5(a.head(3)), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_5(A), 0);
+  // call_ref_5(A.transpose());   // does not compile
+  VERIFY_EVALUATION_COUNT( call_ref_5(A.block(1,1,2,2)), 0);
+
+  VERIFY_EVALUATION_COUNT( call_ref_6(a), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_6(a.head(3)), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_6(A.row(3)), 1);           // evaluated into a temp thouth it could be avoided by viewing it as a 1xn matrix
+  VERIFY_EVALUATION_COUNT( call_ref_6(A+A), 1);                // evaluated into a temp
+  VERIFY_EVALUATION_COUNT( call_ref_6(A), 0);
+  VERIFY_EVALUATION_COUNT( call_ref_6(A.transpose()), 1);      // evaluated into a temp because the storage orders do not match
+  VERIFY_EVALUATION_COUNT( call_ref_6(A.block(1,1,2,2)), 0);
+}
+
+void test_ref()
+{
+  for(int i = 0; i < g_repeat; i++) {
+    CALL_SUBTEST_1( ref_vector(Matrix<float, 1, 1>()) );
+    CALL_SUBTEST_1( check_const_correctness(Matrix<float, 1, 1>()) );
+    CALL_SUBTEST_2( ref_vector(Vector4d()) );
+    CALL_SUBTEST_2( check_const_correctness(Matrix4d()) );
+    CALL_SUBTEST_3( ref_vector(Vector4cf()) );
+    CALL_SUBTEST_4( ref_vector(VectorXcf(8)) );
+    CALL_SUBTEST_5( ref_vector(VectorXi(12)) );
+    CALL_SUBTEST_5( check_const_correctness(VectorXi(12)) );
+
+    CALL_SUBTEST_1( ref_matrix(Matrix<float, 1, 1>()) );
+    CALL_SUBTEST_2( ref_matrix(Matrix4d()) );
+    CALL_SUBTEST_1( ref_matrix(Matrix<float,3,5>()) );
+    CALL_SUBTEST_4( ref_matrix(MatrixXcf(internal::random<int>(1,10),internal::random<int>(1,10))) );
+    CALL_SUBTEST_4( ref_matrix(Matrix<std::complex<double>,10,15>()) );
+    CALL_SUBTEST_5( ref_matrix(MatrixXi(internal::random<int>(1,10),internal::random<int>(1,10))) );
+    CALL_SUBTEST_6( call_ref() );
+  }
+}
diff --git a/test/selfadjoint.cpp b/test/selfadjoint.cpp
index 6d3ec6536..32791eeb5 100644
--- a/test/selfadjoint.cpp
+++ b/test/selfadjoint.cpp
@@ -16,7 +16,6 @@ template<typename MatrixType> void selfadjoint(const MatrixType& m)
 {
   typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
 
   Index rows = m.rows();
   Index cols = m.cols();
diff --git a/test/sparse.h b/test/sparse.h
index 4db0004aa..b6f6e6fce 100644
--- a/test/sparse.h
+++ b/test/sparse.h
@@ -178,5 +178,30 @@ initSparse(double density,
   }
 }
 
+template<typename Scalar> void
+initSparse(double density,
+           Matrix<Scalar,1,Dynamic>& refVec,
+           SparseVector<Scalar,RowMajor>& sparseVec,
+           std::vector<int>* zeroCoords = 0,
+           std::vector<int>* nonzeroCoords = 0)
+{
+  sparseVec.reserve(int(refVec.size()*density));
+  sparseVec.setZero();
+  for(int i=0; i<refVec.size(); i++)
+  {
+    Scalar v = (internal::random<double>(0,1) < density) ? internal::random<Scalar>() : Scalar(0);
+    if (v!=Scalar(0))
+    {
+      sparseVec.insertBack(i) = v;
+      if (nonzeroCoords)
+        nonzeroCoords->push_back(i);
+    }
+    else if (zeroCoords)
+        zeroCoords->push_back(i);
+    refVec[i] = v;
+  }
+}
+
+
 #include <unsupported/Eigen/SparseExtra>
 #endif // EIGEN_TESTSPARSE_H
diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp
index cebb5e6a0..c573ae517 100644
--- a/test/sparse_basic.cpp
+++ b/test/sparse_basic.cpp
@@ -201,6 +201,8 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
     DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows);
     SparseMatrixType m2(rows, rows);
     initSparse<Scalar>(density, refMat2, m2);
+    if(internal::random<float>(0,1)>0.5) m2.makeCompressed();
+    
     int j0 = internal::random<int>(0,rows-2);
     int j1 = internal::random<int>(0,rows-2);
     int n0 = internal::random<int>(1,rows-(std::max)(j0,j1));
@@ -210,12 +212,21 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
       VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0));
     if(SparseMatrixType::IsRowMajor)
       VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
-                      refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols));
+                       refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0));
     else
       VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
                       refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
-    //m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0);
-    //refMat2.block(0,j0,rows,n0) = refMat2.block(0,j0,rows,n0) + refMat2.block(0,j1,rows,n0);
+    
+    VERIFY_IS_APPROX(m2, refMat2);
+    
+    m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0);
+    if(SparseMatrixType::IsRowMajor)
+      refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval();
+    else
+      refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval();
+    
+    VERIFY_IS_APPROX(m2, refMat2);
+    
   }
   
   // test basic computations
@@ -449,6 +460,7 @@ void test_sparse_basic()
 {
   for(int i = 0; i < g_repeat; i++) {
     int s = Eigen::internal::random<int>(1,50);
+    EIGEN_UNUSED_VARIABLE(s);
     CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double>(8, 8)) ));
     CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, ColMajor>(s, s)) ));
     CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, RowMajor>(s, s)) ));
diff --git a/test/sparse_product.cpp b/test/sparse_product.cpp
index 4eae263fa..67a59ecd8 100644
--- a/test/sparse_product.cpp
+++ b/test/sparse_product.cpp
@@ -46,6 +46,9 @@ template<typename SparseMatrixType> void sparse_product()
   double density = (std::max)(8./(rows*cols), 0.1);
   typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
   typedef Matrix<Scalar,Dynamic,1> DenseVector;
+  typedef Matrix<Scalar,1,Dynamic> RowDenseVector;
+  typedef SparseVector<Scalar,0,Index> ColSpVector;
+  typedef SparseVector<Scalar,RowMajor,Index> RowSpVector;
 
   Scalar s1 = internal::random<Scalar>();
   Scalar s2 = internal::random<Scalar>();
@@ -117,8 +120,23 @@ template<typename SparseMatrixType> void sparse_product()
     test_outer<SparseMatrixType,DenseMatrix>::run(m2,m4,refMat2,refMat4);
 
     VERIFY_IS_APPROX(m6=m6*m6, refMat6=refMat6*refMat6);
+    
+    // sparse matrix * sparse vector
+    ColSpVector cv0(cols), cv1;
+    DenseVector dcv0(cols), dcv1;
+    initSparse(2*density,dcv0, cv0);
+    
+    RowSpVector rv0(depth), rv1;
+    RowDenseVector drv0(depth), drv1(rv1);
+    initSparse(2*density,drv0, rv0);
+    
+    VERIFY_IS_APPROX(cv1=rv0*m3, dcv1=drv0*refMat3);
+    VERIFY_IS_APPROX(rv1=rv0*m3, drv1=drv0*refMat3);
+    VERIFY_IS_APPROX(cv1=m3*cv0, dcv1=refMat3*dcv0);
+    VERIFY_IS_APPROX(cv1=m3t.adjoint()*cv0, dcv1=refMat3t.adjoint()*dcv0);
+    VERIFY_IS_APPROX(rv1=m3*cv0, drv1=refMat3*dcv0);
   }
-
+  
   // test matrix - diagonal product
   {
     DenseMatrix refM2 = DenseMatrix::Zero(rows, cols);
diff --git a/test/sparse_solver.h b/test/sparse_solver.h
index 73d92874c..645a965bb 100644
--- a/test/sparse_solver.h
+++ b/test/sparse_solver.h
@@ -37,7 +37,6 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A,
   VERIFY(oldb.isApprox(b) && "sparse solver testing: the rhs should not be modified!");
 
   VERIFY(x.isApprox(refX,test_precision<Scalar>()));
-  
   x.setZero();
   // test the analyze/factorize API
   solver.analyzePattern(A);
@@ -113,7 +112,6 @@ void check_sparse_determinant(Solver& solver, const typename Solver::MatrixType&
 {
   typedef typename Solver::MatrixType Mat;
   typedef typename Mat::Scalar Scalar;
-  typedef typename Mat::RealScalar RealScalar;
   
   solver.compute(A);
   if (solver.info() != Success)
@@ -169,7 +167,6 @@ template<typename Solver> void check_sparse_spd_solving(Solver& solver)
 {
   typedef typename Solver::MatrixType Mat;
   typedef typename Mat::Scalar Scalar;
-  typedef typename Mat::Index Index; 
   typedef SparseMatrix<Scalar,ColMajor> SpMat;
   typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
   typedef Matrix<Scalar,Dynamic,1> DenseVector;
@@ -177,25 +174,32 @@ template<typename Solver> void check_sparse_spd_solving(Solver& solver)
   // generate the problem
   Mat A, halfA;
   DenseMatrix dA;
-  int size = generate_sparse_spd_problem(solver, A, halfA, dA);
-
-  // generate the right hand sides
-  int rhsCols = internal::random<int>(1,16);
-  double density = (std::max)(8./(size*rhsCols), 0.1);
-  SpMat B(size,rhsCols);
-  DenseVector b = DenseVector::Random(size);
-  DenseMatrix dB(size,rhsCols);
-  initSparse<Scalar>(density, dB, B, ForceNonZeroDiag);
-  
   for (int i = 0; i < g_repeat; i++) {
+    int size = generate_sparse_spd_problem(solver, A, halfA, dA);
+
+    // generate the right hand sides
+    int rhsCols = internal::random<int>(1,16);
+    double density = (std::max)(8./(size*rhsCols), 0.1);
+    SpMat B(size,rhsCols);
+    DenseVector b = DenseVector::Random(size);
+    DenseMatrix dB(size,rhsCols);
+    initSparse<Scalar>(density, dB, B, ForceNonZeroDiag);
+  
     check_sparse_solving(solver, A,     b,  dA, b);
     check_sparse_solving(solver, halfA, b,  dA, b);
     check_sparse_solving(solver, A,     dB, dA, dB);
     check_sparse_solving(solver, halfA, dB, dA, dB);
     check_sparse_solving(solver, A,     B,  dA, dB);
     check_sparse_solving(solver, halfA, B,  dA, dB);
+    
+    // check only once
+    if(i==0)
+    {
+      b = DenseVector::Zero(size);
+      check_sparse_solving(solver, A, b, dA, b);
+    }
   }
-
+  
   // First, get the folder 
 #ifdef TEST_REAL_CASES  
   if (internal::is_same<Scalar, float>::value 
@@ -241,7 +245,6 @@ int generate_sparse_square_problem(Solver&, typename Solver::MatrixType& A, Dens
 {
   typedef typename Solver::MatrixType Mat;
   typedef typename Mat::Scalar Scalar;
-  typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
 
   int size = internal::random<int>(1,maxSize);
   double density = (std::max)(8./(size*size), 0.01);
@@ -258,6 +261,7 @@ template<typename Solver> void check_sparse_square_solving(Solver& solver)
 {
   typedef typename Solver::MatrixType Mat;
   typedef typename Mat::Scalar Scalar;
+  typedef SparseMatrix<Scalar,ColMajor> SpMat;
   typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
   typedef Matrix<Scalar,Dynamic,1> DenseVector;
 
@@ -265,16 +269,28 @@ template<typename Solver> void check_sparse_square_solving(Solver& solver)
 
   Mat A;
   DenseMatrix dA;
-  int size = generate_sparse_square_problem(solver, A, dA);
-
-  DenseVector b = DenseVector::Random(size);
-  DenseMatrix dB = DenseMatrix::Random(size,rhsCols);
-  A.makeCompressed();
   for (int i = 0; i < g_repeat; i++) {
+    int size = generate_sparse_square_problem(solver, A, dA);
+
+    A.makeCompressed();
+    DenseVector b = DenseVector::Random(size);
+    DenseMatrix dB(size,rhsCols);
+    SpMat B(size,rhsCols);
+    double density = (std::max)(8./(size*rhsCols), 0.1);
+    initSparse<Scalar>(density, dB, B, ForceNonZeroDiag);
+    B.makeCompressed();
     check_sparse_solving(solver, A, b,  dA, b);
     check_sparse_solving(solver, A, dB, dA, dB);
+    check_sparse_solving(solver, A, B,  dA, dB);
+    
+    // check only once
+    if(i==0)
+    {
+      b = DenseVector::Zero(size);
+      check_sparse_solving(solver, A, b, dA, b);
+    }
   }
-   
+  
   // First, get the folder 
 #ifdef TEST_REAL_CASES
   if (internal::is_same<Scalar, float>::value 
diff --git a/test/sparse_vector.cpp b/test/sparse_vector.cpp
index 7973a47dc..d16d42f59 100644
--- a/test/sparse_vector.cpp
+++ b/test/sparse_vector.cpp
@@ -84,6 +84,12 @@ template<typename Scalar> void sparse_vector(int rows, int cols)
   VERIFY_IS_APPROX((v1 = -v1), (refV1 = -refV1));
   VERIFY_IS_APPROX((v1 = v1.transpose()), (refV1 = refV1.transpose().eval()));
   VERIFY_IS_APPROX((v1 += -v1), (refV1 += -refV1));
+  
+  // sparse matrix to sparse vector
+  SparseMatrixType mv1;
+  VERIFY_IS_APPROX((mv1=v1),v1);
+  VERIFY_IS_APPROX(mv1,(v1=mv1));
+  VERIFY_IS_APPROX(mv1,(v1=mv1.transpose()));
 
 }
 
diff --git a/test/sparselu.cpp b/test/sparselu.cpp
index 2a73320eb..6a9eac065 100644
--- a/test/sparselu.cpp
+++ b/test/sparselu.cpp
@@ -21,6 +21,16 @@
 // You should have received a copy of the GNU Lesser General Public
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+
+// SparseLU solve does not accept column major matrices for the destination.
+// However, as expected, the generic check_sparse_square_solving routines produces row-major
+// rhs and destination matrices when compiled with EIGEN_DEFAULT_TO_ROW_MAJOR
+//
+#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
+#undef EIGEN_DEFAULT_TO_ROW_MAJOR
+#endif
+
 #include "sparse_solver.h"
 #include <Eigen/SparseLU>
 #include <unsupported/Eigen/SparseExtra>
@@ -29,9 +39,11 @@ template<typename T> void test_sparselu_T()
 {
   SparseLU<SparseMatrix<T, ColMajor>, COLAMDOrdering<int> > sparselu_colamd;
   SparseLU<SparseMatrix<T, ColMajor>, AMDOrdering<int> > sparselu_amd; 
+  SparseLU<SparseMatrix<T, ColMajor, long int>, NaturalOrdering<long int> > sparselu_natural;
   
   check_sparse_square_solving(sparselu_colamd); 
   check_sparse_square_solving(sparselu_amd);
+  check_sparse_square_solving(sparselu_natural);
 }
 
 void test_sparselu()
diff --git a/test/sparseqr.cpp b/test/sparseqr.cpp
index d34f7c48d..66c7c005e 100644
--- a/test/sparseqr.cpp
+++ b/test/sparseqr.cpp
@@ -22,15 +22,25 @@ int generate_sparse_rectangular_problem(MatrixType& A, DenseMat& dA, int maxRows
   dA.resize(rows,rows);
   initSparse<Scalar>(density, dA, A,ForceNonZeroDiag);
   A.makeCompressed();
+  int nop = internal::random<int>(0, internal::random<double>(0,1) > 0.5 ? cols/2 : 0);
+  for(int k=0; k<nop; ++k)
+  {
+    int j0 = internal::random<int>(0,cols-1);
+    int j1 = internal::random<int>(0,cols-1);
+    Scalar s = internal::random<Scalar>();
+    A.col(j0)  = s * A.col(j1);
+    dA.col(j0) = s * dA.col(j1);
+  }
   return rows;
 }
 
 template<typename Scalar> void test_sparseqr_scalar()
 {
   typedef SparseMatrix<Scalar,ColMajor> MatrixType; 
-  MatrixType A;
-  Matrix<Scalar,Dynamic,Dynamic> dA;
+  typedef Matrix<Scalar,Dynamic,Dynamic> DenseMat;
   typedef Matrix<Scalar,Dynamic,1> DenseVector;
+  MatrixType A;
+  DenseMat dA;
   DenseVector refX,x,b; 
   SparseQR<MatrixType, AMDOrdering<int> > solver; 
   generate_sparse_rectangular_problem(A,dA);
@@ -50,13 +60,23 @@ template<typename Scalar> void test_sparseqr_scalar()
     std::cerr << "sparse QR factorization failed\n";
     exit(0);
     return;
-  }  
+  } 
   //Compare with a dense QR solver
-  refX = dA.colPivHouseholderQr().solve(b);
-  VERIFY(x.isApprox(refX,test_precision<Scalar>()));
+  ColPivHouseholderQR<DenseMat> dqr(dA);
+  refX = dqr.solve(b);
+  
+  VERIFY_IS_EQUAL(dqr.rank(), solver.rank());
+  
+  if(solver.rank()<A.cols())
+    VERIFY((dA * refX - b).norm() * 2 > (A * x - b).norm() );
+  else
+    VERIFY_IS_APPROX(x, refX);
 }
 void test_sparseqr()
 {
-  CALL_SUBTEST_1(test_sparseqr_scalar<double>());
-  CALL_SUBTEST_2(test_sparseqr_scalar<std::complex<double> >());
-}
-\ No newline at end of file
+  for(int i=0; i<g_repeat; ++i)
+  {
+    CALL_SUBTEST_1(test_sparseqr_scalar<double>());
+    CALL_SUBTEST_2(test_sparseqr_scalar<std::complex<double> >());
+  }
+}
diff --git a/test/special_numbers.cpp b/test/special_numbers.cpp
new file mode 100644
index 000000000..a5936184e
--- /dev/null
+++ b/test/special_numbers.cpp
@@ -0,0 +1,59 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2013 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+template<typename Scalar> void special_numbers()
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef Matrix<Scalar, Dynamic,Dynamic> MatType;
+  int rows = internal::random<int>(1,300);
+  int cols = internal::random<int>(1,300);
+  
+  Scalar nan = Scalar(0)/Scalar(0);
+  Scalar inf = Scalar(1)/Scalar(0);
+  Scalar s1 = internal::random<Scalar>();
+  
+  MatType m1    = MatType::Random(rows,cols),
+          mnan  = MatType::Random(rows,cols),
+          minf  = MatType::Random(rows,cols),
+          mboth = MatType::Random(rows,cols);
+          
+  int n = internal::random<int>(1,10);
+  for(int k=0; k<n; ++k)
+  {
+    mnan(internal::random<int>(0,rows-1), internal::random<int>(0,cols-1)) = nan;
+    minf(internal::random<int>(0,rows-1), internal::random<int>(0,cols-1)) = inf;
+  }
+  mboth = mnan + minf;
+  
+  VERIFY(!m1.hasNaN());
+  VERIFY(m1.isFinite());
+  
+  VERIFY(mnan.hasNaN());
+  VERIFY((s1*mnan).hasNaN());
+  VERIFY(!minf.hasNaN());
+  VERIFY(!(2*minf).hasNaN());
+  VERIFY(mboth.hasNaN());
+  VERIFY(mboth.array().hasNaN());
+  
+  VERIFY(!mnan.isFinite());
+  VERIFY(!minf.isFinite());
+  VERIFY(!(minf-mboth).isFinite());
+  VERIFY(!mboth.isFinite());
+  VERIFY(!mboth.array().isFinite());
+}
+
+void test_special_numbers()
+{
+  for(int i = 0; i < 10*g_repeat; i++) {
+    CALL_SUBTEST_1( special_numbers<float>() );
+    CALL_SUBTEST_1( special_numbers<double>() );
+  }
+}
diff --git a/test/spqr_support.cpp b/test/spqr_support.cpp
index fbfd4c58b..7e4b6e18a 100644
--- a/test/spqr_support.cpp
+++ b/test/spqr_support.cpp
@@ -15,7 +15,7 @@ int generate_sparse_rectangular_problem(MatrixType& A, DenseMat& dA, int maxRows
   eigen_assert(maxRows >= maxCols);
   typedef typename MatrixType::Scalar Scalar;
   int rows = internal::random<int>(1,maxRows);
-  int cols = internal::random<int>(1,maxCols);
+  int cols = internal::random<int>(1,rows);
   double density = (std::max)(8./(rows*cols), 0.01);
   
   A.resize(rows,rows);
@@ -35,8 +35,8 @@ template<typename Scalar> void test_spqr_scalar()
   SPQR<MatrixType> solver; 
   generate_sparse_rectangular_problem(A,dA);
   
-  int n = A.cols();
-  b = DenseVector::Random(n);
+  int m = A.rows();
+  b = DenseVector::Random(m);
   solver.compute(A);
   if (solver.info() != Success)
   {
diff --git a/test/triangular.cpp b/test/triangular.cpp
index 0e8ee5487..7e1723af5 100644
--- a/test/triangular.cpp
+++ b/test/triangular.cpp
@@ -123,9 +123,6 @@ template<typename MatrixType> void triangular_rect(const MatrixType& m)
   typedef typename MatrixType::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
   enum { Rows =  MatrixType::RowsAtCompileTime, Cols =  MatrixType::ColsAtCompileTime };
-  typedef Matrix<Scalar, Rows, 1> VectorType;
-  typedef Matrix<Scalar, Rows, Rows> RMatrixType;
-  
 
   Index rows = m.rows();
   Index cols = m.cols();
diff --git a/test/umeyama.cpp b/test/umeyama.cpp
index 972a280c3..814d19d01 100644
--- a/test/umeyama.cpp
+++ b/test/umeyama.cpp
@@ -22,8 +22,6 @@ template <typename T>
 Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> randMatrixUnitary(int size)
 {
   typedef T Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-
   typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> MatrixType;
 
   MatrixType Q;
@@ -77,7 +75,6 @@ template <typename T>
 Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> randMatrixSpecialUnitary(int size)
 {
   typedef T Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
 
   typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> MatrixType;
 
diff --git a/test/unalignedcount.cpp b/test/unalignedcount.cpp
index 5451159e6..ca7e159f3 100644
--- a/test/unalignedcount.cpp
+++ b/test/unalignedcount.cpp
@@ -40,5 +40,7 @@ void test_unalignedcount()
   #else
   // The following line is to eliminate "variable not used" warnings
   nb_load = nb_loadu = nb_store = nb_storeu = 0;
+  int a(0), b(0);
+  VERIFY(a==b);
   #endif
 }
diff --git a/test/upperbidiagonalization.cpp b/test/upperbidiagonalization.cpp
index db6ce383e..5897cffab 100644
--- a/test/upperbidiagonalization.cpp
+++ b/test/upperbidiagonalization.cpp
@@ -15,7 +15,6 @@ template<typename MatrixType> void upperbidiag(const MatrixType& m)
   const typename MatrixType::Index rows = m.rows();
   const typename MatrixType::Index cols = m.cols();
 
-  typedef typename MatrixType::Scalar Scalar;
   typedef Matrix<typename MatrixType::RealScalar, MatrixType::RowsAtCompileTime,  MatrixType::ColsAtCompileTime> RealMatrixType;
 
   MatrixType a = MatrixType::Random(rows,cols);
diff --git a/test/vectorwiseop.cpp b/test/vectorwiseop.cpp
index b938e3957..9d60b0388 100644
--- a/test/vectorwiseop.cpp
+++ b/test/vectorwiseop.cpp
@@ -15,7 +15,6 @@ template<typename ArrayType> void vectorwiseop_array(const ArrayType& m)
 {
   typedef typename ArrayType::Index Index;
   typedef typename ArrayType::Scalar Scalar;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Array<Scalar, ArrayType::RowsAtCompileTime, 1> ColVectorType;
   typedef Array<Scalar, 1, ArrayType::ColsAtCompileTime> RowVectorType;
 
@@ -111,6 +110,8 @@ template<typename MatrixType> void vectorwiseop_matrix(const MatrixType& m)
   typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> ColVectorType;
   typedef Matrix<Scalar, 1, MatrixType::ColsAtCompileTime> RowVectorType;
+  typedef Matrix<RealScalar, MatrixType::RowsAtCompileTime, 1> RealColVectorType;
+  typedef Matrix<RealScalar, 1, MatrixType::ColsAtCompileTime> RealRowVectorType;
 
   Index rows = m.rows();
   Index cols = m.cols();
@@ -123,6 +124,8 @@ template<typename MatrixType> void vectorwiseop_matrix(const MatrixType& m)
 
   ColVectorType colvec = ColVectorType::Random(rows);
   RowVectorType rowvec = RowVectorType::Random(cols);
+  RealColVectorType rcres;
+  RealRowVectorType rrres;
 
   // test addition
 
@@ -159,6 +162,26 @@ template<typename MatrixType> void vectorwiseop_matrix(const MatrixType& m)
 
   VERIFY_RAISES_ASSERT(m2.rowwise() -= rowvec.transpose());
   VERIFY_RAISES_ASSERT(m1.rowwise() - rowvec.transpose());
+  
+  // test norm
+  rrres = m1.colwise().norm();
+  VERIFY_IS_APPROX(rrres(c), m1.col(c).norm());
+  rcres = m1.rowwise().norm();
+  VERIFY_IS_APPROX(rcres(r), m1.row(r).norm());
+  
+  // test normalized
+  m2 = m1.colwise().normalized();
+  VERIFY_IS_APPROX(m2.col(c), m1.col(c).normalized());
+  m2 = m1.rowwise().normalized();
+  VERIFY_IS_APPROX(m2.row(r), m1.row(r).normalized());
+  
+  // test normalize
+  m2 = m1;
+  m2.colwise().normalize();
+  VERIFY_IS_APPROX(m2.col(c), m1.col(c).normalized());
+  m2 = m1;
+  m2.rowwise().normalize();
+  VERIFY_IS_APPROX(m2.row(r), m1.row(r).normalized());
 }
 
 void test_vectorwiseop()
diff --git a/test/visitor.cpp b/test/visitor.cpp
index b771be73d..39a5d6b5f 100644
--- a/test/visitor.cpp
+++ b/test/visitor.cpp
@@ -72,8 +72,8 @@ template<typename VectorType> void vectorVisitor(const VectorType& w)
       while(v(i) == v(i2)) // yes, ==
         v(i) = internal::random<Scalar>();
   
-  Scalar minc = Scalar(1000), maxc = Scalar(-1000);
-  Index minidx=0,maxidx=0;
+  Scalar minc = v(0), maxc = v(0);
+  Index minidx=0, maxidx=0;
   for(Index i = 0; i < size; i++)
   {
     if(v(i) < minc)
diff --git a/unsupported/Eigen/IterativeSolvers b/unsupported/Eigen/IterativeSolvers
index 04341b22e..aa15403db 100644
--- a/unsupported/Eigen/IterativeSolvers
+++ b/unsupported/Eigen/IterativeSolvers
@@ -27,8 +27,11 @@
 #include "../../Eigen/src/misc/Solve.h"
 #include "../../Eigen/src/misc/SparseSolve.h"
 
+#ifndef EIGEN_MPL2_ONLY
 #include "src/IterativeSolvers/IterationController.h"
 #include "src/IterativeSolvers/ConstrainedConjGrad.h"
+#endif
+
 #include "src/IterativeSolvers/IncompleteLU.h"
 #include "../../Eigen/Jacobi"
 #include "../../Eigen/Householder"
diff --git a/unsupported/Eigen/LevenbergMarquardt b/unsupported/Eigen/LevenbergMarquardt
index b465aef03..0fe2680ba 100644
--- a/unsupported/Eigen/LevenbergMarquardt
+++ b/unsupported/Eigen/LevenbergMarquardt
@@ -16,9 +16,8 @@
 #include <Eigen/Jacobi>
 #include <Eigen/QR>
 #include <unsupported/Eigen/NumericalDiff> 
-#ifdef EIGEN_SPQR_SUPPORT
-#include <Eigen/SPQRSupport>
-#endif
+
+#include <Eigen/SparseQR>
 
 /**
   * \defgroup LevenbergMarquardt_Module Levenberg-Marquardt module
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index b833df3c0..bb49191b7 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -489,20 +489,32 @@ struct make_coherent_impl<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows,
   }
 };
 
-template<typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols> struct scalar_product_traits<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols>,A_Scalar>
+template<typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols>
+struct scalar_product_traits<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols>,A_Scalar>
 {
-   typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> ReturnType;
+  enum { Defined = 1 };
+  typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> ReturnType;
 };
 
-template<typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols> struct scalar_product_traits<A_Scalar, Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> >
+template<typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols>
+struct scalar_product_traits<A_Scalar, Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> >
 {
-   typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> ReturnType;
+  enum { Defined = 1 };
+  typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> ReturnType;
 };
 
 template<typename DerType>
 struct scalar_product_traits<AutoDiffScalar<DerType>,typename DerType::Scalar>
 {
- typedef AutoDiffScalar<DerType> ReturnType;
+  enum { Defined = 1 };
+  typedef AutoDiffScalar<DerType> ReturnType;
+};
+
+template<typename DerType>
+struct scalar_product_traits<typename DerType::Scalar,AutoDiffScalar<DerType> >
+{
+  enum { Defined = 1 };
+  typedef AutoDiffScalar<DerType> ReturnType;
 };
 
 } // end namespace internal
@@ -532,11 +544,9 @@ inline AutoDiffScalar<DerType> (min)(const T& x, const AutoDiffScalar<DerType>&
 template<typename DerType, typename T>
 inline AutoDiffScalar<DerType> (max)(const T& x, const AutoDiffScalar<DerType>& y)    { return (x > y ? x : y); }
 
-#define sign(x) x >= 0 ? 1 : -1 // required for abs function below
-  
 EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs,
   using std::abs;
-  return ReturnType(abs(x.value()), x.derivatives() * (sign(x.value())));)
+  return ReturnType(abs(x.value()), x.derivatives() * (x.value()<0 ? -1 : 1) );)
 
 EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs2,
   using internal::abs2;
diff --git a/unsupported/Eigen/src/BVH/BVAlgorithms.h b/unsupported/Eigen/src/BVH/BVAlgorithms.h
index e5b51decb..994c8af54 100644
--- a/unsupported/Eigen/src/BVH/BVAlgorithms.h
+++ b/unsupported/Eigen/src/BVH/BVAlgorithms.h
@@ -189,7 +189,7 @@ struct minimizer_helper1
   Object2 stored;
   Minimizer &minimizer;
 private:
-  minimizer_helper1& operator=(const minimizer_helper1&) {}
+  minimizer_helper1& operator=(const minimizer_helper1&);
 };
 
 template<typename Volume2, typename Object2, typename Object1, typename Minimizer>
diff --git a/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h b/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
index b83bf7aef..3f18beeeb 100644
--- a/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
+++ b/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
@@ -2,10 +2,6 @@
 // for linear algebra.
 //
 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 /* NOTE The functions of this file have been adapted from the GMM++ library */
 
diff --git a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
index 952aba15e..7b5b5a91b 100644
--- a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
@@ -38,10 +38,9 @@ struct traits<DGMRES<_MatrixType,_Preconditioner> >
 template <typename VectorType, typename IndexType>
 void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::Scalar& ncut)
 {
-  assert(vec.size() == perm.size());
+  eigen_assert(vec.size() == perm.size());
   typedef typename IndexType::Scalar Index; 
   typedef typename VectorType::Scalar Scalar; 
-  Index n = vec.size();
   bool flag; 
   for (Index k  = 0; k < ncut; k++)
   {
@@ -115,8 +114,10 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
     typedef typename MatrixType::RealScalar RealScalar;
     typedef _Preconditioner Preconditioner;
     typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix; 
-    typedef Matrix<Scalar,Dynamic,1> DenseVector; 
-    typedef std::complex<RealScalar> ComplexScalar;
+    typedef Matrix<RealScalar,Dynamic,Dynamic> DenseRealMatrix; 
+    typedef Matrix<Scalar,Dynamic,1> DenseVector;
+    typedef Matrix<RealScalar,Dynamic,1> DenseRealVector; 
+    typedef Matrix<std::complex<RealScalar>, Dynamic, 1> ComplexVector;
  
     
   /** Default constructor. */
@@ -220,6 +221,8 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
     // Apply deflation to a vector
     template<typename RhsType, typename DestType>
     int dgmresApplyDeflation(const RhsType& In, DestType& Out) const; 
+    ComplexVector schurValues(const ComplexSchur<DenseMatrix>& schurofH) const;
+    ComplexVector schurValues(const RealSchur<DenseMatrix>& schurofH) const;
     // Init data for deflation
     void dgmresInitDeflation(Index& rows) const; 
     mutable DenseMatrix m_V; // Krylov basis vectors
@@ -307,9 +310,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con
   int n = mat.rows();
   DenseVector tv1(n), tv2(n);  //Temporary vectors
   while (m_info == NoConvergence && it < m_restart && nbIts < m_iterations)
-  {
-    int n = m_V.rows(); 
-    
+  {    
     // Apply preconditioner(s) at right
     if (m_isDeflInitialized )
     {
@@ -323,7 +324,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con
     tv1 = mat * tv2; 
    
     // Orthogonalize it with the previous basis in the basis using modified Gram-Schmidt
-    RealScalar coef; 
+    Scalar coef; 
     for (int i = 0; i <= it; ++i)
     { 
       coef = tv1.dot(m_V.col(i));
@@ -398,58 +399,71 @@ void DGMRES<_MatrixType, _Preconditioner>::dgmresInitDeflation(Index& rows) cons
 }
 
 template< typename _MatrixType, typename _Preconditioner>
-int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, Index& neig) const
+inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_MatrixType, _Preconditioner>::schurValues(const ComplexSchur<DenseMatrix>& schurofH) const
 {
-  // First, find the Schur form of the Hessenberg matrix H
-  RealSchur<DenseMatrix> schurofH; 
-  bool computeU = true;
-  DenseMatrix matrixQ(it,it); 
-  matrixQ.setIdentity();
-  schurofH.computeHessenberg(m_Hes.topLeftCorner(it,it), matrixQ, computeU); 
+  return schurofH.matrixT().diagonal();
+}
+
+template< typename _MatrixType, typename _Preconditioner>
+inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_MatrixType, _Preconditioner>::schurValues(const RealSchur<DenseMatrix>& schurofH) const
+{
+  typedef typename MatrixType::Index Index;
   const DenseMatrix& T = schurofH.matrixT();
-  
-  // Extract the schur values from the diagonal of T; 
-  Matrix<ComplexScalar,Dynamic,1> eig(it);
-  Matrix<Index,Dynamic,1>perm(it); 
-  int j = 0; 
+  Index it = T.rows();
+  ComplexVector eig(it);
+  Index j = 0;
   while (j < it-1)
   {
     if (T(j+1,j) ==Scalar(0))
     {
-      eig(j) = ComplexScalar(T(j,j),Scalar(0)); 
+      eig(j) = std::complex<RealScalar>(T(j,j),RealScalar(0)); 
       j++; 
     }
     else
     {
-      eig(j) = ComplexScalar(T(j,j),T(j+1,j)); 
-      eig(j+1) = ComplexScalar(T(j,j+1),T(j+1,j+1));
+      eig(j) = std::complex<RealScalar>(T(j,j),T(j+1,j)); 
+      eig(j+1) = std::complex<RealScalar>(T(j,j+1),T(j+1,j+1));
       j++;
     }
   }
-  if (j < it) eig(j) = ComplexScalar(T(j,j),Scalar(0));
+  if (j < it-1) eig(j) = std::complex<RealScalar>(T(j,j),RealScalar(0));
+  return eig;
+}
+
+template< typename _MatrixType, typename _Preconditioner>
+int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, Index& neig) const
+{
+  // First, find the Schur form of the Hessenberg matrix H
+  typename internal::conditional<NumTraits<Scalar>::IsComplex, ComplexSchur<DenseMatrix>, RealSchur<DenseMatrix> >::type schurofH; 
+  bool computeU = true;
+  DenseMatrix matrixQ(it,it); 
+  matrixQ.setIdentity();
+  schurofH.computeFromHessenberg(m_Hes.topLeftCorner(it,it), matrixQ, computeU); 
+  
+  ComplexVector eig(it);
+  Matrix<Index,Dynamic,1>perm(it); 
+  eig = this->schurValues(schurofH);
   
   // Reorder the absolute values of Schur values
-  DenseVector modulEig(it); 
+  DenseRealVector modulEig(it); 
   for (int j=0; j<it; ++j) modulEig(j) = std::abs(eig(j)); 
   perm.setLinSpaced(it,0,it-1);
   internal::sortWithPermutation(modulEig, perm, neig);
   
   if (!m_lambdaN)
   {
-   //Find the maximum eigenvalue
-   for (int i = 0; i < it; ++i) 
-     if (modulEig(i) > m_lambdaN) 
-       m_lambdaN = modulEig(i);
+    m_lambdaN = (std::max)(modulEig.maxCoeff(), m_lambdaN);
   }
   //Count the real number of extracted eigenvalues (with complex conjugates)
   int nbrEig = 0; 
   while (nbrEig < neig)
   {
-    if(eig(perm(it-nbrEig-1)).imag() == Scalar(0)) nbrEig++; 
+    if(eig(perm(it-nbrEig-1)).imag() == RealScalar(0)) nbrEig++; 
     else nbrEig += 2; 
   }
-  // Extract the smallest Schur vectors
+  // Extract the  Schur vectors corresponding to the smallest Ritz values
   DenseMatrix Sr(it, nbrEig); 
+  Sr.setZero();
   for (int j = 0; j < nbrEig; j++)
   {
     Sr.col(j) = schurofH.matrixU().col(perm(it-j-1));
@@ -478,7 +492,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
     MX.col(j) = precond.solve(tv1);
   }
   
-  //Update T = [U'MU U'MX; X'MU X'MX]
+  //Update m_T = [U'MU U'MX; X'MU X'MX]
   m_T.block(m_r, m_r, nbrEig, nbrEig) = X.transpose() * MX; 
   if(m_r)
   {
diff --git a/unsupported/Eigen/src/IterativeSolvers/GMRES.h b/unsupported/Eigen/src/IterativeSolvers/GMRES.h
index 34e67db82..073367506 100644
--- a/unsupported/Eigen/src/IterativeSolvers/GMRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/GMRES.h
@@ -61,7 +61,6 @@ bool gmres(const MatrixType & mat, const Rhs & rhs, Dest & x, const Precondition
 
 	typedef typename Dest::RealScalar RealScalar;
 	typedef typename Dest::Scalar Scalar;
-	typedef Matrix < RealScalar, Dynamic, 1 > RealVectorType;
 	typedef Matrix < Scalar, Dynamic, 1 > VectorType;
 	typedef Matrix < Scalar, Dynamic, Dynamic > FMatrixType;
 
@@ -348,7 +347,8 @@ public:
   template<typename Rhs,typename Dest>
   void _solve(const Rhs& b, Dest& x) const
   {
-    x.setZero();
+    x = b;
+    if(x.squaredNorm() == 0) return; // Check Zero right hand side
     _solveWithGuess(b,x);
   }
 
diff --git a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h
index b4a67ded0..661c1f2e0 100644
--- a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h
+++ b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h
@@ -132,6 +132,7 @@ template<typename _MatrixType>
 void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType& mat)
 {
   using std::sqrt;
+  using std::min;
   eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); 
     
   // Dropping strategies : Keep only the p largest elements per column, where p is the number of elements in the column of the original matrix. Other strategies will be added
@@ -165,7 +166,7 @@ void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType
   for (int j = 0; j < n; j++){
     for (int k = colPtr[j]; k < colPtr[j+1]; k++)
      vals[k] /= (m_scal(j) * m_scal(rowIdx[k]));
-    mindiag = std::min(vals[colPtr[j]], mindiag);
+    mindiag = (min)(vals[colPtr[j]], mindiag);
   }
   
   if(mindiag < Scalar(0.)) m_shift = m_shift - mindiag;
@@ -274,4 +275,4 @@ struct solve_retval<IncompleteCholesky<_Scalar,  _UpLo, OrderingType>, Rhs>
 
 } // end namespace Eigen 
 
-#endif
-\ No newline at end of file
+#endif
diff --git a/unsupported/Eigen/src/IterativeSolvers/IterationController.h b/unsupported/Eigen/src/IterativeSolvers/IterationController.h
index ea86dfef4..c9c1a4be2 100644
--- a/unsupported/Eigen/src/IterativeSolvers/IterationController.h
+++ b/unsupported/Eigen/src/IterativeSolvers/IterationController.h
@@ -2,10 +2,6 @@
 // for linear algebra.
 //
 // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 /* NOTE The class IterationController has been adapted from the iteration
  *      class of the GMM++ and ITL libraries.
diff --git a/unsupported/Eigen/src/IterativeSolvers/MINRES.h b/unsupported/Eigen/src/IterativeSolvers/MINRES.h
index 6bc1b8f5d..0e56342a8 100644
--- a/unsupported/Eigen/src/IterativeSolvers/MINRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/MINRES.h
@@ -52,7 +52,7 @@ namespace Eigen {
             VectorType w_new(precond.solve(v_new)); // initialize w_new
 //            RealScalar beta; // will be initialized inside loop
             RealScalar beta_new2(v_new.dot(w_new));
-            assert(beta_new2 >= 0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
+            eigen_assert(beta_new2 >= 0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
             RealScalar beta_new(sqrt(beta_new2));
             const RealScalar beta_one(beta_new);
             v_new /= beta_new;
@@ -91,7 +91,7 @@ namespace Eigen {
                 v_new -= alpha*v; // overwrite v_new
                 w_new = precond.solve(v_new); // overwrite w_new
                 beta_new2 = v_new.dot(w_new); // compute beta_new
-                assert(beta_new2 >= 0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
+                eigen_assert(beta_new2 >= 0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
                 beta_new = sqrt(beta_new2); // compute beta_new
                 v_new /= beta_new; // overwrite v_new for next iteration
                 w_new /= beta_new; // overwrite w_new for next iteration
diff --git a/unsupported/Eigen/src/IterativeSolvers/Scaling.h b/unsupported/Eigen/src/IterativeSolvers/Scaling.h
index ed4ee48c8..4fd439202 100644
--- a/unsupported/Eigen/src/IterativeSolvers/Scaling.h
+++ b/unsupported/Eigen/src/IterativeSolvers/Scaling.h
@@ -73,7 +73,7 @@ class IterScaling
     {
       int m = mat.rows(); 
       int n = mat.cols();
-      assert((m>0 && m == n) && "Please give a non - empty matrix");
+      eigen_assert((m>0 && m == n) && "Please give a non - empty matrix");
       m_left.resize(m); 
       m_right.resize(n);
       m_left.setOnes();
@@ -182,4 +182,4 @@ class IterScaling
     int m_maxits; // Maximum number of iterations allowed
 };
 }
-#endif
-\ No newline at end of file
+#endif
diff --git a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
index a313eade3..532896c3b 100644
--- a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
+++ b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
@@ -48,8 +48,8 @@ class KroneckerProduct : public ReturnByValue<KroneckerProduct<Lhs,Rhs> >
 
     Scalar coeff(Index row, Index col) const
     {
-      return m_A.coeff(row / m_A.cols(), col / m_A.rows()) *
-             m_B.coeff(row % m_A.cols(), col % m_A.rows());
+      return m_A.coeff(row / m_B.rows(), col / m_B.cols()) *
+             m_B.coeff(row % m_B.rows(), col % m_B.cols());
     }
 
     Scalar coeff(Index i) const
diff --git a/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h b/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h
index 3210587e4..32d3ad518 100644
--- a/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h
+++ b/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h
@@ -33,7 +33,7 @@ void covar(
     const Index n = r.cols();
     const Scalar tolr = tol * abs(r(0,0));
     Matrix< Scalar, Dynamic, 1 > wa(n);
-    assert(ipvt.size()==n);
+    eigen_assert(ipvt.size()==n);
 
     /* form the inverse of r in the full upper triangle of r. */
     l = -1;
diff --git a/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h b/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h
index aa0c02668..60584c523 100644
--- a/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h
+++ b/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h
@@ -20,13 +20,12 @@ template<typename FunctorType>
 LevenbergMarquardtSpace::Status
 LevenbergMarquardt<FunctorType>::minimizeOneStep(FVectorType  &x)
 {
-  typedef typename FunctorType::JacobianType JacobianType; 
   using std::abs;
   using std::sqrt;
   RealScalar temp, temp1,temp2; 
   RealScalar ratio; 
   RealScalar pnorm, xnorm, fnorm1, actred, dirder, prered;
-  assert(x.size()==n); // check the caller is not cheating us
+  eigen_assert(x.size()==n); // check the caller is not cheating us
 
   temp = 0.0; xnorm = 0.0;
   /* calculate the jacobian matrix. */
@@ -40,13 +39,14 @@ LevenbergMarquardt<FunctorType>::minimizeOneStep(FVectorType  &x)
 
   /* compute the qr factorization of the jacobian. */
   for (int j = 0; j < x.size(); ++j)
-    m_wa2(j) = m_fjac.col(j).norm(); 
-  //FIXME Implement bluenorm for sparse vectors
-//     m_wa2 = m_fjac.colwise().blueNorm();
-  QRSolver qrfac(m_fjac); //FIXME Check if the QR decomposition succeed
+    m_wa2(j) = m_fjac.col(j).blueNorm();
+  QRSolver qrfac(m_fjac);
+  if(qrfac.info() != Success) {
+    m_info = NumericalIssue;
+    return LevenbergMarquardtSpace::ImproperInputParameters;
+  }
   // Make a copy of the first factor with the associated permutation
-  JacobianType rfactor;
-  rfactor = qrfac.matrixQR();
+  m_rfactor = qrfac.matrixR();
   m_permutation = (qrfac.colsPermutation());
 
   /* on the first iteration and if external scaling is not used, scale according */
@@ -75,11 +75,13 @@ LevenbergMarquardt<FunctorType>::minimizeOneStep(FVectorType  &x)
   if (m_fnorm != 0.)
       for (Index j = 0; j < n; ++j)
           if (m_wa2[m_permutation.indices()[j]] != 0.)
-              m_gnorm = (std::max)(m_gnorm, abs( rfactor.col(j).head(j+1).dot(m_qtf.head(j+1)/m_fnorm) / m_wa2[m_permutation.indices()[j]]));
+              m_gnorm = (std::max)(m_gnorm, abs( m_rfactor.col(j).head(j+1).dot(m_qtf.head(j+1)/m_fnorm) / m_wa2[m_permutation.indices()[j]]));
 
   /* test for convergence of the gradient norm. */
-  if (m_gnorm <= m_gtol)
-      return LevenbergMarquardtSpace::CosinusTooSmall;
+  if (m_gnorm <= m_gtol) {
+    m_info = Success;
+    return LevenbergMarquardtSpace::CosinusTooSmall;
+  }
 
   /* rescale if necessary. */
   if (!m_useExternalScaling)
@@ -111,7 +113,7 @@ LevenbergMarquardt<FunctorType>::minimizeOneStep(FVectorType  &x)
 
     /* compute the scaled predicted reduction and */
     /* the scaled directional derivative. */
-    m_wa3 = rfactor.template triangularView<Upper>() * (m_permutation.inverse() *m_wa1);
+    m_wa3 = m_rfactor.template triangularView<Upper>() * (m_permutation.inverse() *m_wa1);
     temp1 = internal::abs2(m_wa3.stableNorm() / m_fnorm);
     temp2 = internal::abs2(sqrt(m_par) * pnorm / m_fnorm);
     prered = temp1 + temp2 / Scalar(.5);
@@ -152,21 +154,42 @@ LevenbergMarquardt<FunctorType>::minimizeOneStep(FVectorType  &x)
 
     /* tests for convergence. */
     if (abs(actred) <= m_ftol && prered <= m_ftol && Scalar(.5) * ratio <= 1. && m_delta <= m_xtol * xnorm)
-        return LevenbergMarquardtSpace::RelativeErrorAndReductionTooSmall;
-    if (abs(actred) <= m_ftol && prered <= m_ftol && Scalar(.5) * ratio <= 1.)
-        return LevenbergMarquardtSpace::RelativeReductionTooSmall;
+    {
+       m_info = Success;
+      return LevenbergMarquardtSpace::RelativeErrorAndReductionTooSmall;
+    }
+    if (abs(actred) <= m_ftol && prered <= m_ftol && Scalar(.5) * ratio <= 1.) 
+    {
+      m_info = Success;
+      return LevenbergMarquardtSpace::RelativeReductionTooSmall;
+    }
     if (m_delta <= m_xtol * xnorm)
-        return LevenbergMarquardtSpace::RelativeErrorTooSmall;
+    {
+      m_info = Success;
+      return LevenbergMarquardtSpace::RelativeErrorTooSmall;
+    }
 
     /* tests for termination and stringent tolerances. */
-    if (m_nfev >= m_maxfev)
-        return LevenbergMarquardtSpace::TooManyFunctionEvaluation;
+    if (m_nfev >= m_maxfev) 
+    {
+      m_info = NoConvergence;
+      return LevenbergMarquardtSpace::TooManyFunctionEvaluation;
+    }
     if (abs(actred) <= NumTraits<Scalar>::epsilon() && prered <= NumTraits<Scalar>::epsilon() && Scalar(.5) * ratio <= 1.)
-        return LevenbergMarquardtSpace::FtolTooSmall;
-    if (m_delta <= NumTraits<Scalar>::epsilon() * xnorm)
-        return LevenbergMarquardtSpace::XtolTooSmall;
+    {
+      m_info = Success;
+      return LevenbergMarquardtSpace::FtolTooSmall;
+    }
+    if (m_delta <= NumTraits<Scalar>::epsilon() * xnorm) 
+    {
+      m_info = Success;
+      return LevenbergMarquardtSpace::XtolTooSmall;
+    }
     if (m_gnorm <= NumTraits<Scalar>::epsilon())
-        return LevenbergMarquardtSpace::GtolTooSmall;
+    {
+      m_info = Success;
+      return LevenbergMarquardtSpace::GtolTooSmall;
+    }
 
   } while (ratio < Scalar(1e-4));
 
@@ -176,4 +199,4 @@ LevenbergMarquardt<FunctorType>::minimizeOneStep(FVectorType  &x)
   
 } // end namespace Eigen
 
-#endif // EIGEN_LMONESTEP_H
-\ No newline at end of file
+#endif // EIGEN_LMONESTEP_H
diff --git a/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h b/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h
index dc60ca05a..9532042d9 100644
--- a/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h
+++ b/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h
@@ -40,13 +40,17 @@ namespace internal {
     Scalar temp, paru;
     Scalar gnorm;
     Scalar dxnorm;
-
+    
+    // Make a copy of the triangular factor. 
+    // This copy is modified during call the qrsolv
+    MatrixType s;
+    s = qr.matrixR();
 
     /* Function Body */
     const Scalar dwarf = (std::numeric_limits<Scalar>::min)();
-    const Index n = qr.matrixQR().cols();
-    assert(n==diag.size());
-    assert(n==qtb.size());
+    const Index n = qr.matrixR().cols();
+    eigen_assert(n==diag.size());
+    eigen_assert(n==qtb.size());
 
     VectorType  wa1, wa2;
 
@@ -58,8 +62,7 @@ namespace internal {
     wa1 = qtb;
     wa1.tail(n-rank).setZero();
     //FIXME There is no solve in place for sparse triangularView
-    //qr.matrixQR().topLeftCorner(rank, rank).template triangularView<Upper>().solveInPlace(wa1.head(rank));
-    wa1.head(rank) = qr.matrixQR().topLeftCorner(rank, rank).template triangularView<Upper>().solve(qtb.head(rank));
+    wa1.head(rank) = s.topLeftCorner(rank,rank).template triangularView<Upper>().solve(qtb.head(rank));
 
     x = qr.colsPermutation()*wa1;
 
@@ -81,14 +84,14 @@ namespace internal {
     parl = 0.;
     if (rank==n) {
       wa1 = qr.colsPermutation().inverse() *  diag.cwiseProduct(wa2)/dxnorm;
-      qr.matrixQR().topLeftCorner(n, n).transpose().template triangularView<Lower>().solveInPlace(wa1);
+      s.topLeftCorner(n,n).transpose().template triangularView<Lower>().solveInPlace(wa1);
       temp = wa1.blueNorm();
       parl = fp / m_delta / temp / temp;
     }
 
     /* calculate an upper bound, paru, for the zero of the function. */
     for (j = 0; j < n; ++j)
-      wa1[j] = qr.matrixQR().col(j).head(j+1).dot(qtb.head(j+1)) / diag[qr.colsPermutation().indices()(j)];
+      wa1[j] = s.col(j).head(j+1).dot(qtb.head(j+1)) / diag[qr.colsPermutation().indices()(j)];
 
     gnorm = wa1.stableNorm();
     paru = gnorm / m_delta;
@@ -103,8 +106,6 @@ namespace internal {
       par = gnorm / dxnorm;
 
     /* beginning of an iteration. */
-    MatrixType s;
-    s = qr.matrixQR();
     while (true) {
       ++iter;
 
@@ -130,7 +131,6 @@ namespace internal {
       /* compute the newton correction. */
       wa1 = qr.colsPermutation().inverse() * diag.cwiseProduct(wa2/dxnorm);
       // we could almost use this here, but the diagonal is outside qr, in sdiag[]
-      // qr.matrixQR().topLeftCorner(n, n).transpose().template triangularView<Lower>().solveInPlace(wa1);
       for (j = 0; j < n; ++j) {
         wa1[j] /= sdiag[j];
         temp = wa1[j];
diff --git a/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h b/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h
index ed6f97fe8..f5290dee4 100644
--- a/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h
+++ b/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h
@@ -19,9 +19,9 @@ namespace Eigen {
 
 namespace internal {
 
-template <typename Scalar,int SizeAtCompileTime, typename Index>
+template <typename Scalar,int Rows, int Cols, typename Index>
 void lmqrsolv(
-  Matrix<Scalar,SizeAtCompileTime,SizeAtCompileTime> &s,
+  Matrix<Scalar,Rows,Cols> &s,
   const PermutationMatrix<Dynamic,Dynamic,Index> &iPerm,
   const Matrix<Scalar,Dynamic,1> &diag,
   const Matrix<Scalar,Dynamic,1> &qtb,
diff --git a/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h b/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h
index e45e73ab5..8d5538d69 100644
--- a/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h
+++ b/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h
@@ -65,7 +65,6 @@ struct DenseFunctor
   // should be defined in derived classes
 };
 
-#ifdef EIGEN_SPQR_SUPPORT
 template <typename _Scalar, typename _Index>
 struct SparseFunctor
 {
@@ -74,7 +73,11 @@ struct SparseFunctor
   typedef Matrix<Scalar,Dynamic,1> InputType;
   typedef Matrix<Scalar,Dynamic,1> ValueType;
   typedef SparseMatrix<Scalar, ColMajor, Index> JacobianType;
-  typedef SPQR<JacobianType> QRSolver;
+  typedef SparseQR<JacobianType, COLAMDOrdering<int> > QRSolver;
+  enum {
+    InputsAtCompileTime = Dynamic,
+    ValuesAtCompileTime = Dynamic
+  };
   
   SparseFunctor(int inputs, int values) : m_inputs(inputs), m_values(values) {}
 
@@ -89,7 +92,6 @@ struct SparseFunctor
   // to be defined in the functor if no automatic differentiation
   
 };
-#endif
 namespace internal {
 template <typename QRSolver, typename VectorType>
 void lmpar2(const QRSolver &qr, const VectorType  &diag, const VectorType  &qtb,
@@ -119,7 +121,8 @@ class LevenbergMarquardt
     typedef PermutationMatrix<Dynamic,Dynamic> PermutationType;
   public:
     LevenbergMarquardt(FunctorType& functor) 
-    : m_functor(functor),m_nfev(0),m_njev(0),m_fnorm(0.0),m_gnorm(0)
+    : m_functor(functor),m_nfev(0),m_njev(0),m_fnorm(0.0),m_gnorm(0),
+      m_isInitialized(false),m_info(InvalidInput)
     {
       resetParameters();
       m_useExternalScaling=false; 
@@ -171,41 +174,61 @@ class LevenbergMarquardt
     /** Use an external Scaling. If set to true, pass a nonzero diagonal to diag() */
     void setExternalScaling(bool value) {m_useExternalScaling  = value; }
     
-    /** Get a reference to the diagonal of the jacobian */
+    /** \returns a reference to the diagonal of the jacobian */
     FVectorType& diag() {return m_diag; }
     
-    /** Number of iterations performed */
+    /** \returns the number of iterations performed */
     Index iterations() { return m_iter; }
     
-    /** Number of functions evaluation */
+    /** \returns the number of functions evaluation */
     Index nfev() { return m_nfev; }
     
-    /** Number of jacobian evaluation */
+    /** \returns the number of jacobian evaluation */
     Index njev() { return m_njev; }
     
-    /** Norm of current vector function */
+    /** \returns the norm of current vector function */
     RealScalar fnorm() {return m_fnorm; }
     
-    /** Norm of the gradient of the error */
+    /** \returns the norm of the gradient of the error */
     RealScalar gnorm() {return m_gnorm; }
     
-    /** the LevenbergMarquardt parameter */
+    /** \returns the LevenbergMarquardt parameter */
     RealScalar lm_param(void) { return m_par; }
     
-    /** reference to the  current vector function 
+    /** \returns a reference to the  current vector function 
      */
     FVectorType& fvec() {return m_fvec; }
     
-    /** reference to the matrix where the current Jacobian matrix is stored
+    /** \returns a reference to the matrix where the current Jacobian matrix is stored
      */
-    JacobianType& fjac() {return m_fjac; }
+    JacobianType& jacobian() {return m_fjac; }
     
-    /** the permutation used
+    /** \returns a reference to the triangular matrix R from the QR of the jacobian matrix.
+     * \sa jacobian()
+     */
+    JacobianType& matrixR() {return m_rfactor; }
+    
+    /** the permutation used in the QR factorization
      */
     PermutationType permutation() {return m_permutation; }
     
+    /** 
+     * \brief Reports whether the minimization was successful
+     * \returns \c Success if the minimization was succesful,
+     *         \c NumericalIssue if a numerical problem arises during the 
+     *          minimization process, for exemple during the QR factorization
+     *         \c NoConvergence if the minimization did not converge after 
+     *          the maximum number of function evaluation allowed
+     *          \c InvalidInput if the input matrix is invalid
+     */
+    ComputationInfo info() const
+    {
+      
+      return m_info;
+    }
   private:
     JacobianType m_fjac; 
+    JacobianType m_rfactor; // The triangular matrix R from the QR of the jacobian matrix m_fjac
     FunctorType &m_functor;
     FVectorType m_fvec, m_qtf, m_diag; 
     Index n;
@@ -226,6 +249,8 @@ class LevenbergMarquardt
     PermutationType m_permutation;
     FVectorType m_wa1, m_wa2, m_wa3, m_wa4; //Temporary vectors
     RealScalar m_par;
+    bool m_isInitialized; // Check whether the minimization step has been called
+    ComputationInfo m_info; 
 };
 
 template<typename FunctorType>
@@ -233,13 +258,16 @@ LevenbergMarquardtSpace::Status
 LevenbergMarquardt<FunctorType>::minimize(FVectorType  &x)
 {
     LevenbergMarquardtSpace::Status status = minimizeInit(x);
-    if (status==LevenbergMarquardtSpace::ImproperInputParameters)
-        return status;
+    if (status==LevenbergMarquardtSpace::ImproperInputParameters) {
+      m_isInitialized = true;
+      return status;
+    }
     do {
 //       std::cout << " uv " << x.transpose() << "\n";
         status = minimizeOneStep(x);
     } while (status==LevenbergMarquardtSpace::Running);
-    return status;
+     m_isInitialized = true;
+     return status;
 }
 
 template<typename FunctorType>
@@ -257,7 +285,7 @@ LevenbergMarquardt<FunctorType>::minimizeInit(FVectorType  &x)
 //     m_fjac.reserve(VectorXi::Constant(n,5)); // FIXME Find a better alternative
     if (!m_useExternalScaling)
         m_diag.resize(n);
-    assert( (!m_useExternalScaling || m_diag.size()==n) || "When m_useExternalScaling is set, the caller must provide a valid 'm_diag'");
+    eigen_assert( (!m_useExternalScaling || m_diag.size()==n) || "When m_useExternalScaling is set, the caller must provide a valid 'm_diag'");
     m_qtf.resize(n);
 
     /* Function Body */
@@ -265,13 +293,18 @@ LevenbergMarquardt<FunctorType>::minimizeInit(FVectorType  &x)
     m_njev = 0;
 
     /*     check the input parameters for errors. */
-    if (n <= 0 || m < n || m_ftol < 0. || m_xtol < 0. || m_gtol < 0. || m_maxfev <= 0 || m_factor <= 0.)
-        return LevenbergMarquardtSpace::ImproperInputParameters;
+    if (n <= 0 || m < n || m_ftol < 0. || m_xtol < 0. || m_gtol < 0. || m_maxfev <= 0 || m_factor <= 0.){
+      m_info = InvalidInput;
+      return LevenbergMarquardtSpace::ImproperInputParameters;
+    }
 
     if (m_useExternalScaling)
         for (Index j = 0; j < n; ++j)
-            if (m_diag[j] <= 0.)
-                return LevenbergMarquardtSpace::ImproperInputParameters;
+            if (m_diag[j] <= 0.) 
+            {
+              return LevenbergMarquardtSpace::ImproperInputParameters;
+              m_info = InvalidInput;
+            }
 
     /*     evaluate the function at the starting point */
     /*     and calculate its norm. */
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
index 166393f00..b00e5e921 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
@@ -237,7 +237,7 @@ void MatrixLogarithmAtomic<MatrixType>::computePade3(MatrixType& result, const M
             0.8872983346207416885179265399782400L };
   const RealScalar weights[] = { 0.2777777777777777777777777777777778L, 0.4444444444444444444444444444444444L,
             0.2777777777777777777777777777777778L };
-  assert(degree <= maxPadeDegree);
+  eigen_assert(degree <= maxPadeDegree);
   MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
   result.setZero(T.rows(), T.rows());
   for (int k = 0; k < degree; ++k)
@@ -253,7 +253,7 @@ void MatrixLogarithmAtomic<MatrixType>::computePade4(MatrixType& result, const M
             0.6699905217924281324013328795516223L, 0.9305681557970262876119732444464048L };
   const RealScalar weights[] = { 0.1739274225687269286865319746109997L, 0.3260725774312730713134680253890003L,
             0.3260725774312730713134680253890003L, 0.1739274225687269286865319746109997L };
-  assert(degree <= maxPadeDegree);
+  eigen_assert(degree <= maxPadeDegree);
   MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
   result.setZero(T.rows(), T.rows());
   for (int k = 0; k < degree; ++k)
@@ -271,7 +271,7 @@ void MatrixLogarithmAtomic<MatrixType>::computePade5(MatrixType& result, const M
   const RealScalar weights[] = { 0.1184634425280945437571320203599587L, 0.2393143352496832340206457574178191L,
             0.2844444444444444444444444444444444L, 0.2393143352496832340206457574178191L,
             0.1184634425280945437571320203599587L };
-  assert(degree <= maxPadeDegree);
+  eigen_assert(degree <= maxPadeDegree);
   MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
   result.setZero(T.rows(), T.rows());
   for (int k = 0; k < degree; ++k)
@@ -289,7 +289,7 @@ void MatrixLogarithmAtomic<MatrixType>::computePade6(MatrixType& result, const M
   const RealScalar weights[] = { 0.0856622461895851725201480710863665L, 0.1803807865240693037849167569188581L,
             0.2339569672863455236949351719947755L, 0.2339569672863455236949351719947755L,
             0.1803807865240693037849167569188581L, 0.0856622461895851725201480710863665L };
-  assert(degree <= maxPadeDegree);
+  eigen_assert(degree <= maxPadeDegree);
   MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
   result.setZero(T.rows(), T.rows());
   for (int k = 0; k < degree; ++k)
@@ -309,7 +309,7 @@ void MatrixLogarithmAtomic<MatrixType>::computePade7(MatrixType& result, const M
             0.1909150252525594724751848877444876L, 0.2089795918367346938775510204081633L,
             0.1909150252525594724751848877444876L, 0.1398526957446383339507338857118898L,
             0.0647424830844348466353057163395410L };
-  assert(degree <= maxPadeDegree);
+  eigen_assert(degree <= maxPadeDegree);
   MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
   result.setZero(T.rows(), T.rows());
   for (int k = 0; k < degree; ++k)
@@ -329,7 +329,7 @@ void MatrixLogarithmAtomic<MatrixType>::computePade8(MatrixType& result, const M
             0.1568533229389436436689811009933007L, 0.1813418916891809914825752246385978L,
             0.1813418916891809914825752246385978L, 0.1568533229389436436689811009933007L,
             0.1111905172266872352721779972131204L, 0.0506142681451881295762656771549811L };
-  assert(degree <= maxPadeDegree);
+  eigen_assert(degree <= maxPadeDegree);
   MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
   result.setZero(T.rows(), T.rows());
   for (int k = 0; k < degree; ++k)
@@ -351,7 +351,7 @@ void MatrixLogarithmAtomic<MatrixType>::computePade9(MatrixType& result, const M
             0.1651196775006298815822625346434870L, 0.1561735385200014200343152032922218L,
             0.1303053482014677311593714347093164L, 0.0903240803474287020292360156214564L,
             0.0406371941807872059859460790552618L };
-  assert(degree <= maxPadeDegree);
+  eigen_assert(degree <= maxPadeDegree);
   MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
   result.setZero(T.rows(), T.rows());
   for (int k = 0; k < degree; ++k)
@@ -373,7 +373,7 @@ void MatrixLogarithmAtomic<MatrixType>::computePade10(MatrixType& result, const
             0.1477621123573764350869464973256692L, 0.1477621123573764350869464973256692L,
             0.1346333596549981775456134607847347L, 0.1095431812579910219977674671140816L,
             0.0747256745752902965728881698288487L, 0.0333356721543440687967844049466659L };
-  assert(degree <= maxPadeDegree);
+  eigen_assert(degree <= maxPadeDegree);
   MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
   result.setZero(T.rows(), T.rows());
   for (int k = 0; k < degree; ++k)
@@ -397,7 +397,7 @@ void MatrixLogarithmAtomic<MatrixType>::computePade11(MatrixType& result, const
             0.1314022722551233310903444349452546L, 0.1165968822959952399592618524215876L,
             0.0931451054638671257130488207158280L, 0.0627901847324523123173471496119701L,
             0.0278342835580868332413768602212743L };
-  assert(degree <= maxPadeDegree);
+  eigen_assert(degree <= maxPadeDegree);
   MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
   result.setZero(T.rows(), T.rows());
   for (int k = 0; k < degree; ++k)
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
index f2b1a5993..e75fc25b4 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
@@ -535,7 +535,7 @@ class MatrixPowerReturnValue : public ReturnByValue<MatrixPowerReturnValue<Deriv
 };
 
 template<typename Derived>
-const MatrixPowerReturnValue<Derived> MatrixBase<Derived>::pow(RealScalar p) const
+const MatrixPowerReturnValue<Derived> MatrixBase<Derived>::pow(const RealScalar& p) const
 { return MatrixPowerReturnValue<Derived>(derived(), p); }
 
 } // namespace Eigen
diff --git a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
index b190827b3..5b24b4619 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
@@ -150,7 +150,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveInit(FVectorType  &x)
     fjac.resize(n, n);
     if (!useExternalScaling)
         diag.resize(n);
-    assert( (!useExternalScaling || diag.size()==n) || "When useExternalScaling is set, the caller must provide a valid 'diag'");
+    eigen_assert( (!useExternalScaling || diag.size()==n) || "When useExternalScaling is set, the caller must provide a valid 'diag'");
 
     /* Function Body */
     nfev = 0;
@@ -187,7 +187,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(FVectorType  &x)
 {
     using std::abs;
     
-    assert(x.size()==n); // check the caller is not cheating us
+    eigen_assert(x.size()==n); // check the caller is not cheating us
 
     Index j;
     std::vector<JacobiRotation<Scalar> > v_givens(n), w_givens(n);
@@ -390,7 +390,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffInit(FVectorType  &
     fvec.resize(n);
     if (!useExternalScaling)
         diag.resize(n);
-    assert( (!useExternalScaling || diag.size()==n) || "When useExternalScaling is set, the caller must provide a valid 'diag'");
+    eigen_assert( (!useExternalScaling || diag.size()==n) || "When useExternalScaling is set, the caller must provide a valid 'diag'");
 
     /* Function Body */
     nfev = 0;
diff --git a/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h b/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
index 4b1a2d0fb..3d0a9c8fc 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
@@ -172,7 +172,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeInit(FVectorType  &x)
     fjac.resize(m, n);
     if (!useExternalScaling)
         diag.resize(n);
-    assert( (!useExternalScaling || diag.size()==n) || "When useExternalScaling is set, the caller must provide a valid 'diag'");
+    eigen_assert( (!useExternalScaling || diag.size()==n) || "When useExternalScaling is set, the caller must provide a valid 'diag'");
     qtf.resize(n);
 
     /* Function Body */
@@ -209,7 +209,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOneStep(FVectorType  &x)
     using std::abs;
     using std::sqrt;
     
-    assert(x.size()==n); // check the caller is not cheating us
+    eigen_assert(x.size()==n); // check the caller is not cheating us
 
     /* calculate the jacobian matrix. */
     Index df_ret = functor.df(x, fjac);
@@ -391,7 +391,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageInit(FVectorType
     fjac.resize(n, n);
     if (!useExternalScaling)
         diag.resize(n);
-    assert( (!useExternalScaling || diag.size()==n) || "When useExternalScaling is set, the caller must provide a valid 'diag'");
+    eigen_assert( (!useExternalScaling || diag.size()==n) || "When useExternalScaling is set, the caller must provide a valid 'diag'");
     qtf.resize(n);
 
     /* Function Body */
@@ -429,7 +429,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageOneStep(FVectorTyp
     using std::abs;
     using std::sqrt;
     
-    assert(x.size()==n); // check the caller is not cheating us
+    eigen_assert(x.size()==n); // check the caller is not cheating us
 
     Index i, j;
     bool sing;
diff --git a/unsupported/Eigen/src/NonLinearOptimization/covar.h b/unsupported/Eigen/src/NonLinearOptimization/covar.h
index c2fb79441..68260d191 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/covar.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/covar.h
@@ -20,7 +20,7 @@ void covar(
     const Index n = r.cols();
     const Scalar tolr = tol * abs(r(0,0));
     Matrix< Scalar, Dynamic, 1 > wa(n);
-    assert(ipvt.size()==n);
+    eigen_assert(ipvt.size()==n);
 
     /* form the inverse of r in the full upper triangle of r. */
     l = -1;
diff --git a/unsupported/Eigen/src/NonLinearOptimization/dogleg.h b/unsupported/Eigen/src/NonLinearOptimization/dogleg.h
index 57dbc8bfb..4210958e7 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/dogleg.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/dogleg.h
@@ -24,9 +24,9 @@ void dogleg(
     /* Function Body */
     const Scalar epsmch = NumTraits<Scalar>::epsilon();
     const Index n = qrfac.cols();
-    assert(n==qtb.size());
-    assert(n==x.size());
-    assert(n==diag.size());
+    eigen_assert(n==qtb.size());
+    eigen_assert(n==x.size());
+    eigen_assert(n==diag.size());
     Matrix< Scalar, Dynamic, 1 >  wa1(n), wa2(n);
 
     /* first, calculate the gauss-newton direction. */
diff --git a/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h b/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h
index 05947936e..bb7cf267b 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h
@@ -27,7 +27,7 @@ DenseIndex fdjac1(
     /* Function Body */
     const Scalar epsmch = NumTraits<Scalar>::epsilon();
     const Index n = x.size();
-    assert(fvec.size()==n);
+    eigen_assert(fvec.size()==n);
     Matrix< Scalar, Dynamic, 1 >  wa1(n);
     Matrix< Scalar, Dynamic, 1 >  wa2(n);
 
diff --git a/unsupported/Eigen/src/NonLinearOptimization/lmpar.h b/unsupported/Eigen/src/NonLinearOptimization/lmpar.h
index 834407c5a..4c17d4cdf 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/lmpar.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/lmpar.h
@@ -29,9 +29,9 @@ void lmpar(
     /* Function Body */
     const Scalar dwarf = (std::numeric_limits<Scalar>::min)();
     const Index n = r.cols();
-    assert(n==diag.size());
-    assert(n==qtb.size());
-    assert(n==x.size());
+    eigen_assert(n==diag.size());
+    eigen_assert(n==qtb.size());
+    eigen_assert(n==x.size());
 
     Matrix< Scalar, Dynamic, 1 >  wa1, wa2;
 
@@ -187,8 +187,8 @@ void lmpar2(
     /* Function Body */
     const Scalar dwarf = (std::numeric_limits<Scalar>::min)();
     const Index n = qr.matrixQR().cols();
-    assert(n==diag.size());
-    assert(n==qtb.size());
+    eigen_assert(n==diag.size());
+    eigen_assert(n==qtb.size());
 
     Matrix< Scalar, Dynamic, 1 >  wa1, wa2;
 
diff --git a/unsupported/Eigen/src/NonLinearOptimization/r1updt.h b/unsupported/Eigen/src/NonLinearOptimization/r1updt.h
index 55fae5ae8..f28766061 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/r1updt.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/r1updt.h
@@ -24,10 +24,10 @@ void r1updt(
 
     // r1updt had a broader usecase, but we dont use it here. And, more
     // importantly, we can not test it.
-    assert(m==n);
-    assert(u.size()==m);
-    assert(v.size()==n);
-    assert(w.size()==n);
+    eigen_assert(m==n);
+    eigen_assert(u.size()==m);
+    eigen_assert(v.size()==n);
+    eigen_assert(w.size()==n);
 
     /* move the nontrivial part of the last column of s into w. */
     w[n-1] = s(n-1,n-1);
diff --git a/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h b/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h
index 9ce079e22..6ebf8563f 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h
@@ -12,7 +12,7 @@ void rwupdt(
     typedef DenseIndex Index;
 
     const Index n = r.cols();
-    assert(r.rows()>=n);
+    eigen_assert(r.rows()>=n);
     std::vector<JacobiRotation<Scalar> > givens(n);
 
     /* Local variables */
diff --git a/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h b/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h
index 7ee30e18c..ea5d8bc27 100644
--- a/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h
+++ b/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h
@@ -86,7 +86,7 @@ public:
                 // do nothing
                 break;
             default:
-                assert(false);
+                eigen_assert(false);
         };
 
         // Function Body
@@ -112,7 +112,7 @@ public:
                     jac.col(j) = (val2-val1)/(2*h);
                     break;
                 default:
-                    assert(false);
+                    eigen_assert(false);
             };
         }
         return nfev;
diff --git a/unsupported/Eigen/src/Polynomials/PolynomialSolver.h b/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
index fba8fc910..ad486f08e 100644
--- a/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
+++ b/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
@@ -344,7 +344,7 @@ class PolynomialSolver : public PolynomialSolverBase<_Scalar,_Deg>
     template< typename OtherPolynomial >
     void compute( const OtherPolynomial& poly )
     {
-      assert( Scalar(0) != poly[poly.size()-1] );
+      eigen_assert( Scalar(0) != poly[poly.size()-1] );
       internal::companion<Scalar,_Deg> companion( poly );
       companion.balance();
       m_eigenSolver.compute( companion.denseMatrix() );
@@ -376,7 +376,7 @@ class PolynomialSolver<_Scalar,1> : public PolynomialSolverBase<_Scalar,1>
     template< typename OtherPolynomial >
     void compute( const OtherPolynomial& poly )
     {
-      assert( Scalar(0) != poly[poly.size()-1] );
+      eigen_assert( Scalar(0) != poly[poly.size()-1] );
       m_roots[0] = -poly[0]/poly[poly.size()-1];
     }
 
diff --git a/unsupported/Eigen/src/Polynomials/PolynomialUtils.h b/unsupported/Eigen/src/Polynomials/PolynomialUtils.h
index 5a9ab110e..27d4e9f91 100644
--- a/unsupported/Eigen/src/Polynomials/PolynomialUtils.h
+++ b/unsupported/Eigen/src/Polynomials/PolynomialUtils.h
@@ -78,7 +78,7 @@ typename NumTraits<typename Polynomial::Scalar>::Real cauchy_max_bound( const Po
   typedef typename Polynomial::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real Real;
 
-  assert( Scalar(0) != poly[poly.size()-1] );
+  eigen_assert( Scalar(0) != poly[poly.size()-1] );
   const Scalar inv_leading_coeff = Scalar(1)/poly[poly.size()-1];
   Real cb(0);
 
diff --git a/unsupported/Eigen/src/SparseExtra/MarketIO.h b/unsupported/Eigen/src/SparseExtra/MarketIO.h
index de958de9f..7aafce928 100644
--- a/unsupported/Eigen/src/SparseExtra/MarketIO.h
+++ b/unsupported/Eigen/src/SparseExtra/MarketIO.h
@@ -116,7 +116,7 @@ inline bool getMarketHeader(const std::string& filename, int& sym, bool& iscompl
   
   std::string line; 
   // The matrix header is always the first line in the file 
-  std::getline(in, line); assert(in.good());
+  std::getline(in, line); eigen_assert(in.good());
   
   std::stringstream fmtline(line); 
   std::string substr[5];
@@ -200,11 +200,11 @@ bool loadMarketVector(VectorType& vec, const std::string& filename)
   int n(0), col(0); 
   do 
   { // Skip comments
-    std::getline(in, line); assert(in.good());
+    std::getline(in, line); eigen_assert(in.good());
   } while (line[0] == '%');
   std::istringstream newline(line);
   newline  >> n >> col; 
-  assert(n>0 && col>0);
+  eigen_assert(n>0 && col>0);
   vec.resize(n);
   int i = 0; 
   Scalar value; 
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 0e0c8a6bf..78b9610d4 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -1,4 +1,7 @@
 
+set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Unsupported")
+add_custom_target(BuildUnsupported)
+
 include_directories(../../test ../../unsupported ../../Eigen 
                     ${CMAKE_CURRENT_BINARY_DIR}/../../test)
 
diff --git a/unsupported/test/dgmres.cpp b/unsupported/test/dgmres.cpp
new file mode 100644
index 000000000..2b11807c8
--- /dev/null
+++ b/unsupported/test/dgmres.cpp
@@ -0,0 +1,31 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2012 desire Nuentsa <desire.nuentsa_wakam@inria.fr
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "../../test/sparse_solver.h"
+#include <Eigen/src/IterativeSolvers/DGMRES.h>
+
+template<typename T> void test_dgmres_T()
+{
+  DGMRES<SparseMatrix<T>, DiagonalPreconditioner<T> > dgmres_colmajor_diag;
+  DGMRES<SparseMatrix<T>, IdentityPreconditioner    > dgmres_colmajor_I;
+  DGMRES<SparseMatrix<T>, IncompleteLUT<T> >           dgmres_colmajor_ilut;
+  //GMRES<SparseMatrix<T>, SSORPreconditioner<T> >     dgmres_colmajor_ssor;
+
+  CALL_SUBTEST( check_sparse_square_solving(dgmres_colmajor_diag)  );
+//   CALL_SUBTEST( check_sparse_square_solving(dgmres_colmajor_I)     );
+  CALL_SUBTEST( check_sparse_square_solving(dgmres_colmajor_ilut)     );
+  //CALL_SUBTEST( check_sparse_square_solving(dgmres_colmajor_ssor)     );
+}
+
+void test_dgmres()
+{
+  CALL_SUBTEST_1(test_dgmres_T<double>());
+  CALL_SUBTEST_2(test_dgmres_T<std::complex<double> >());
+}
diff --git a/unsupported/test/kronecker_product.cpp b/unsupported/test/kronecker_product.cpp
index 5d0bb410d..108bf6fde 100644
--- a/unsupported/test/kronecker_product.cpp
+++ b/unsupported/test/kronecker_product.cpp
@@ -16,7 +16,7 @@
 
 
 template<typename MatrixType>
-void check_dimension(const MatrixType& ab, const unsigned int rows,  const unsigned int cols)
+void check_dimension(const MatrixType& ab, const int rows,  const int cols)
 {
   VERIFY_IS_EQUAL(ab.rows(), rows);
   VERIFY_IS_EQUAL(ab.cols(), cols);
@@ -86,28 +86,36 @@ void check_sparse_kronecker_product(const MatrixType& ab)
 void test_kronecker_product()
 {
   // DM = dense matrix; SM = sparse matrix
+
   Matrix<double, 2, 3> DM_a;
-  MatrixXd             DM_b(3,2);
   SparseMatrix<double> SM_a(2,3);
-  SparseMatrix<double> SM_b(3,2);
   SM_a.insert(0,0) = DM_a.coeffRef(0,0) = -0.4461540300782201;
   SM_a.insert(0,1) = DM_a.coeffRef(0,1) = -0.8057364375283049;
   SM_a.insert(0,2) = DM_a.coeffRef(0,2) =  0.3896572459516341;
   SM_a.insert(1,0) = DM_a.coeffRef(1,0) = -0.9076572187376921;
   SM_a.insert(1,1) = DM_a.coeffRef(1,1) =  0.6469156566545853;
   SM_a.insert(1,2) = DM_a.coeffRef(1,2) = -0.3658010398782789;
+ 
+  MatrixXd             DM_b(3,2);
+  SparseMatrix<double> SM_b(3,2);
   SM_b.insert(0,0) = DM_b.coeffRef(0,0) =  0.9004440976767099;
   SM_b.insert(0,1) = DM_b.coeffRef(0,1) = -0.2368830858139832;
   SM_b.insert(1,0) = DM_b.coeffRef(1,0) = -0.9311078389941825;
   SM_b.insert(1,1) = DM_b.coeffRef(1,1) =  0.5310335762980047;
   SM_b.insert(2,0) = DM_b.coeffRef(2,0) = -0.1225112806872035;
   SM_b.insert(2,1) = DM_b.coeffRef(2,1) =  0.5903998022741264;
+
   SparseMatrix<double,RowMajor> SM_row_a(SM_a), SM_row_b(SM_b);
 
   // test kroneckerProduct(DM_block,DM,DM_fixedSize)
   Matrix<double, 6, 6> DM_fix_ab = kroneckerProduct(DM_a.topLeftCorner<2,3>(),DM_b);
+
   CALL_SUBTEST(check_kronecker_product(DM_fix_ab));
 
+  for(unsigned int i=0;i<DM_fix_ab.rows();++i)
+    for(unsigned int j=0;j<DM_fix_ab.cols();++j)
+       VERIFY_IS_APPROX(kroneckerProduct(DM_a,DM_b).coeff(i,j), DM_fix_ab(i,j));
+
   // test kroneckerProduct(DM,DM,DM_block)
   MatrixXd DM_block_ab(10,15);
   DM_block_ab.block<6,6>(2,5) = kroneckerProduct(DM_a,DM_b);
@@ -152,6 +160,7 @@ void test_kronecker_product()
   SM_a.insert(0,3) = -0.2;
   SM_a.insert(2,4) =  0.3;
   SM_a.finalize();
+  
   SM_b.insert(0,0) =  0.4;
   SM_b.insert(2,1) = -0.5;
   SM_b.finalize();
diff --git a/unsupported/test/levenberg_marquardt.cpp b/unsupported/test/levenberg_marquardt.cpp
index c7061f017..04464727d 100644
--- a/unsupported/test/levenberg_marquardt.cpp
+++ b/unsupported/test/levenberg_marquardt.cpp
@@ -12,7 +12,7 @@
 #include <stdio.h>
 
 #include "main.h"
-#include <Eigen/LevenbergMarquardt>
+#include <unsupported/Eigen/LevenbergMarquardt>
 
 // This disables some useless Warnings on MSVC.
 // It is intended to be done for this test only.
@@ -115,7 +115,7 @@ void testLmder()
 
   // check covariance
   covfac = fnorm*fnorm/(m-n);
-  internal::covar(lm.fjac(), lm.permutation().indices()); // TODO : move this as a function of lm
+  internal::covar(lm.matrixR(), lm.permutation().indices()); // TODO : move this as a function of lm
 
   MatrixXd cov_ref(n,n);
   cov_ref <<
@@ -126,7 +126,7 @@ void testLmder()
 //  std::cout << fjac*covfac << std::endl;
 
   MatrixXd cov;
-  cov =  covfac*lm.fjac().topLeftCorner<n,n>();
+  cov =  covfac*lm.matrixR().topLeftCorner<n,n>();
   VERIFY_IS_APPROX( cov, cov_ref);
   // TODO: why isn't this allowed ? :
   // VERIFY_IS_APPROX( covfac*fjac.topLeftCorner<n,n>() , cov_ref);
@@ -174,7 +174,7 @@ void testLmdif1()
 
   // check return value
   VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(nfev, 26);
+//   VERIFY_IS_EQUAL(nfev, 26);
 
   // check norm
   functor(x, fvec);
@@ -205,7 +205,7 @@ void testLmdif()
 
   // check return values
   VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 26);
+//   VERIFY_IS_EQUAL(lm.nfev(), 26);
 
   // check norm
   fnorm = lm.fvec().blueNorm();
@@ -218,7 +218,7 @@ void testLmdif()
 
   // check covariance
   covfac = fnorm*fnorm/(m-n);
-  internal::covar(lm.fjac(), lm.permutation().indices()); // TODO : move this as a function of lm
+  internal::covar(lm.matrixR(), lm.permutation().indices()); // TODO : move this as a function of lm
 
   MatrixXd cov_ref(n,n);
   cov_ref <<
@@ -229,7 +229,7 @@ void testLmdif()
 //  std::cout << fjac*covfac << std::endl;
 
   MatrixXd cov;
-  cov =  covfac*lm.fjac().topLeftCorner<n,n>();
+  cov =  covfac*lm.matrixR().topLeftCorner<n,n>();
   VERIFY_IS_APPROX( cov, cov_ref);
   // TODO: why isn't this allowed ? :
   // VERIFY_IS_APPROX( covfac*fjac.topLeftCorner<n,n>() , cov_ref);
@@ -290,7 +290,7 @@ void testNistChwirut2(void)
 
   // check return value
   VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 10);
+//   VERIFY_IS_EQUAL(lm.nfev(), 10);
   VERIFY_IS_EQUAL(lm.njev(), 8);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.1304802941E+02);
@@ -311,7 +311,7 @@ void testNistChwirut2(void)
 
   // check return value
   VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 7);
+//   VERIFY_IS_EQUAL(lm.nfev(), 7);
   VERIFY_IS_EQUAL(lm.njev(), 6);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.1304802941E+02);
@@ -483,7 +483,7 @@ void testNistHahn1(void)
 
   // check return value
   VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 11);
+//   VERIFY_IS_EQUAL(lm.nfev(), 11);
   VERIFY_IS_EQUAL(lm.njev(), 10);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.5324382854E+00);
@@ -949,7 +949,7 @@ void testNistMGH17(void)
   info = lm.minimize(x);
 
   // check return value
-  VERIFY_IS_EQUAL(info, 2); 
+//   VERIFY_IS_EQUAL(info, 2);  //FIXME Use (lm.info() == Success)
 //   VERIFY_IS_EQUAL(lm.nfev(), 602 ); 
   VERIFY_IS_EQUAL(lm.njev(), 545 ); 
   // check norm^2
diff --git a/unsupported/test/matrix_function.cpp b/unsupported/test/matrix_function.cpp
index 0439c5a7d..3c76cfb65 100644
--- a/unsupported/test/matrix_function.cpp
+++ b/unsupported/test/matrix_function.cpp
@@ -110,7 +110,6 @@ void testMatrixLogarithm(const MatrixType& A)
 {
   typedef typename internal::traits<MatrixType>::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  typedef std::complex<RealScalar> ComplexScalar;
 
   MatrixType scaledA;
   RealScalar maxImagPartOfSpectrum = A.eigenvalues().imag().cwiseAbs().maxCoeff();
diff --git a/unsupported/test/matrix_power.cpp b/unsupported/test/matrix_power.cpp
index b7b6423a8..2763b97cc 100644
--- a/unsupported/test/matrix_power.cpp
+++ b/unsupported/test/matrix_power.cpp
@@ -177,10 +177,10 @@ void test_matrix_power()
   CALL_SUBTEST_2(testMatrixVector(Matrix2d(),         Vector2d(),    1e-13));
   CALL_SUBTEST_7(testMatrixVector(Matrix3dRowMajor(), MatrixXd(3,5), 1e-13));
   CALL_SUBTEST_3(testMatrixVector(Matrix4cd(),        Vector4cd(),   1e-13));
-  CALL_SUBTEST_4(testMatrixVector(MatrixXd(8,8),      VectorXd(8),   1e-13));
+  CALL_SUBTEST_4(testMatrixVector(MatrixXd(8,8),      VectorXd(8),   2e-12));
   CALL_SUBTEST_1(testMatrixVector(Matrix2f(),         Vector2f(),    1e-4));
   CALL_SUBTEST_5(testMatrixVector(Matrix3cf(),        Vector3cf(),   1e-4));
   CALL_SUBTEST_8(testMatrixVector(Matrix4f(),         Vector4f(),    1e-4));
-  CALL_SUBTEST_6(testMatrixVector(MatrixXf(8,8),      VectorXf(8),   1e-4));
+  CALL_SUBTEST_6(testMatrixVector(MatrixXf(8,8),      VectorXf(8),   1e-3));
   CALL_SUBTEST_9(testMatrixVector(MatrixXe(7,7),      VectorXe(7),   1e-13));
 }
diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h
index 38946c3bd..ef0a6a9f0 100644
--- a/unsupported/test/mpreal/mpreal.h
+++ b/unsupported/test/mpreal/mpreal.h
@@ -86,26 +86,26 @@
     
     #define MPFR_USE_INTMAX_T                   // Should be defined before mpfr.h
 
-    #if defined(_MSC_VER)                       // <stdint.h> is available only in msvc2010!
+    #if defined(_MSC_VER)                       // MSVC + Windows
         #if (_MSC_VER >= 1600)                    
-            #include <stdint.h>                    
+            #include <stdint.h>                 // <stdint.h> is available only in msvc2010!
+
         #else                                   // MPFR relies on intmax_t which is available only in msvc2010
             #undef MPREAL_HAVE_INT64_SUPPORT    // Besides, MPFR & MPIR have to be compiled with msvc2010
             #undef MPFR_USE_INTMAX_T            // Since we cannot detect this, disable x64 by default
                                                 // Someone should change this manually if needed.
         #endif
-    #endif
-    
-    #if defined (__MINGW32__) || defined(__MINGW64__)
-            #include <stdint.h>                 // Equivalent to msvc2010
 
-    #elif defined (__GNUC__)
+    #elif defined (__GNUC__) && defined(__linux__)
         #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(__ia64) || defined(__itanium__) || defined(_M_IA64)
             #undef MPREAL_HAVE_INT64_SUPPORT    // Remove all shaman dances for x64 builds since
             #undef MPFR_USE_INTMAX_T            // GCC already supports x64 as of "long int" is 64-bit integer, nothing left to do
         #else
-            #include <stdint.h>                 // use int64_t, uint64_t otherwise.
+            #include <stdint.h>                 // use int64_t, uint64_t otherwise
         #endif
+
+    #else
+        #include <stdint.h>                     // rely on int64_t, uint64_t in all other cases, Mac OSX, etc.
     #endif
 
 #endif 
@@ -128,6 +128,12 @@
 #define MPREAL_DOUBLE_BITS_OVERFLOW -1          // Triggers overflow exception during conversion to double if mpreal 
                                                 // cannot fit in MPREAL_DOUBLE_BITS_OVERFLOW bits
                                                 // = -1 disables overflow checks (default)
+#if defined(__GNUC__)
+  #define MPREAL_PERMISSIVE_EXPR __extension__
+#else
+  #define MPREAL_PERMISSIVE_EXPR
+#endif
+
 namespace mpfr {
 
 class mpreal {
@@ -223,10 +229,10 @@ public:
     mpreal& operator-=(const int u);
     const mpreal operator-() const;
     friend const mpreal operator-(const unsigned long int b, const mpreal& a);
-    friend const mpreal operator-(const unsigned int b, const mpreal& a);
-    friend const mpreal operator-(const long int b, const mpreal& a);
-    friend const mpreal operator-(const int b, const mpreal& a);
-    friend const mpreal operator-(const double b, const mpreal& a);
+    friend const mpreal operator-(const unsigned int b,      const mpreal& a);
+    friend const mpreal operator-(const long int b,          const mpreal& a);
+    friend const mpreal operator-(const int b,               const mpreal& a);
+    friend const mpreal operator-(const double b,            const mpreal& a);
     mpreal& operator-- ();    
     const mpreal  operator-- (int);
 
@@ -252,10 +258,10 @@ public:
     mpreal& operator/=(const long int v);
     mpreal& operator/=(const int v);
     friend const mpreal operator/(const unsigned long int b, const mpreal& a);
-    friend const mpreal operator/(const unsigned int b, const mpreal& a);
-    friend const mpreal operator/(const long int b, const mpreal& a);
-    friend const mpreal operator/(const int b, const mpreal& a);
-    friend const mpreal operator/(const double b, const mpreal& a);
+    friend const mpreal operator/(const unsigned int b,      const mpreal& a);
+    friend const mpreal operator/(const long int b,          const mpreal& a);
+    friend const mpreal operator/(const int b,               const mpreal& a);
+    friend const mpreal operator/(const double b,            const mpreal& a);
 
     //<<= Fast Multiplication by 2^u
     mpreal& operator<<=(const unsigned long int u);
@@ -296,8 +302,9 @@ public:
     uint64_t        toUInt64    (mp_rnd_t mode = GMP_RNDZ)    const;
 #endif
 
-    // Get raw pointers so that mpreal can correctly be used in raw mpfr_* functions
-    ::mpfr_ptr mpfr_ptr();
+    // Get raw pointers so that mpreal can be directly used in raw mpfr_* functions
+    ::mpfr_ptr    mpfr_ptr();
+    ::mpfr_srcptr mpfr_ptr()    const;
     ::mpfr_srcptr mpfr_srcptr() const;
 
     // Convert mpreal to string with n significant digits in base b
@@ -856,7 +863,7 @@ inline mpreal& mpreal::operator=(const mpreal& v)
 		mp_prec_t tp = mpfr_get_prec(mp);
 		mp_prec_t vp = mpfr_get_prec(v.mp);
 
-		if(tp < vp){
+		if(tp != vp){
 			mpfr_clear(mp);
 			mpfr_init2(mp, vp);
 		}
@@ -1087,9 +1094,9 @@ inline const mpreal mpreal::operator+()const    {    return mpreal(*this); }
 
 inline const mpreal operator+(const mpreal& a, const mpreal& b)
 {
-    // prec(a+b) = max(prec(a),prec(b))
-    if(a.get_prec()>b.get_prec()) return mpreal(a) += b;
-    else                          return mpreal(b) += a;
+	mpreal c(0, (std::max)(mpfr_get_prec(a.mpfr_ptr()), mpfr_get_prec(b.mpfr_ptr())));
+	mpfr_add(c.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), mpreal::get_default_rnd());
+	return c;
 }
 
 inline mpreal& mpreal::operator++() 
@@ -1118,7 +1125,7 @@ inline const mpreal mpreal::operator-- (int)
 
 //////////////////////////////////////////////////////////////////////////
 // - Subtraction
-inline mpreal& mpreal::operator-= (const mpreal& v)
+inline mpreal& mpreal::operator-=(const mpreal& v)
 {
     mpfr_sub(mp,mp,v.mp,mpreal::get_default_rnd());
     MPREAL_MSVC_DEBUGVIEW_CODE;
@@ -1195,53 +1202,49 @@ inline const mpreal mpreal::operator-()const
 
 inline const mpreal operator-(const mpreal& a, const mpreal& b)
 {
-    // prec(a-b) = max(prec(a),prec(b))
-    if(a.getPrecision() >= b.getPrecision())    
-    {
-        return   mpreal(a) -= b;
-    }else{
-        mpreal x(a);
-        x.setPrecision(b.getPrecision());
-        return x -= b;        
-    }
+	mpreal c(0, (std::max)(mpfr_get_prec(a.mpfr_ptr()), mpfr_get_prec(b.mpfr_ptr())));
+	mpfr_sub(c.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), mpreal::get_default_rnd());
+	return c;
 }
 
 inline const mpreal operator-(const double  b, const mpreal& a)
 {
 #if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
-    mpreal x(a);
-    mpfr_d_sub(x.mp,b,a.mp,mpreal::get_default_rnd());
+    mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+    mpfr_d_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
     return x;
 #else
-    return mpreal(b) -= a;
+    mpreal x(b, mpfr_get_prec(a.mpfr_ptr()));
+    x -= a;
+    return x;
 #endif
 }
 
 inline const mpreal operator-(const unsigned long int b, const mpreal& a)
 {
-    mpreal x(a);
-    mpfr_ui_sub(x.mp,b,a.mp,mpreal::get_default_rnd());
+    mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+    mpfr_ui_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
     return x;
 }
 
 inline const mpreal operator-(const unsigned int b, const mpreal& a)
 {
-    mpreal x(a);
-    mpfr_ui_sub(x.mp,b,a.mp,mpreal::get_default_rnd());
+    mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+    mpfr_ui_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
     return x;
 }
 
 inline const mpreal operator-(const long int b, const mpreal& a)
 {
-    mpreal x(a);
-    mpfr_si_sub(x.mp,b,a.mp,mpreal::get_default_rnd());
+    mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+    mpfr_si_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
     return x;
 }
 
 inline const mpreal operator-(const int b, const mpreal& a)
 {
-    mpreal x(a);
-    mpfr_si_sub(x.mp,b,a.mp,mpreal::get_default_rnd());
+    mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+    mpfr_si_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
     return x;
 }
 
@@ -1282,7 +1285,6 @@ inline mpreal& mpreal::operator*=(const double v)
 #else
     *this *= mpreal(v);    
 #endif
-
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
 }
@@ -1317,9 +1319,9 @@ inline mpreal& mpreal::operator*=(const int v)
 
 inline const mpreal operator*(const mpreal& a, const mpreal& b)
 {
-    // prec(a*b) = max(prec(a),prec(b))
-    if(a.getPrecision() >= b.getPrecision())    return   mpreal(a) *= b;
-    else                                        return   mpreal(b) *= a;        
+	mpreal c(0, (std::max)(mpfr_get_prec(a.mpfr_ptr()), mpfr_get_prec(b.mpfr_ptr())));
+	mpfr_mul(c.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), mpreal::get_default_rnd());
+	return c;
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -1393,54 +1395,49 @@ inline mpreal& mpreal::operator/=(const int v)
 
 inline const mpreal operator/(const mpreal& a, const mpreal& b)
 {
-    // prec(a/b) = max(prec(a),prec(b))
-    if(a.getPrecision() >= b.getPrecision())    
-    {
-        return   mpreal(a) /= b;
-    }else{
-
-        mpreal x(a);
-        x.setPrecision(b.getPrecision());
-        return x /= b;        
-    }
+	mpreal c(0, (std::max)(mpfr_get_prec(a.mpfr_ptr()), mpfr_get_prec(b.mpfr_ptr())));
+	mpfr_div(c.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), mpreal::get_default_rnd());
+	return c;
 }
 
 inline const mpreal operator/(const unsigned long int b, const mpreal& a)
 {
-    mpreal x(a);
-    mpfr_ui_div(x.mp,b,a.mp,mpreal::get_default_rnd());
+    mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+    mpfr_ui_div(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
     return x;
 }
 
 inline const mpreal operator/(const unsigned int b, const mpreal& a)
 {
-    mpreal x(a);
-    mpfr_ui_div(x.mp,b,a.mp,mpreal::get_default_rnd());
+    mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+    mpfr_ui_div(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
     return x;
 }
 
 inline const mpreal operator/(const long int b, const mpreal& a)
 {
-    mpreal x(a);
-    mpfr_si_div(x.mp,b,a.mp,mpreal::get_default_rnd());
+    mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+    mpfr_si_div(x.mpfr_ptr(), b, a.mpfr_srcptr(),mpreal::get_default_rnd());
     return x;
 }
 
 inline const mpreal operator/(const int b, const mpreal& a)
 {
-    mpreal x(a);
-    mpfr_si_div(x.mp,b,a.mp,mpreal::get_default_rnd());
+    mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+    mpfr_si_div(x.mpfr_ptr(), b, a.mpfr_srcptr(),mpreal::get_default_rnd());
     return x;
 }
 
 inline const mpreal operator/(const double  b, const mpreal& a)
 {
 #if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
-    mpreal x(a);
-    mpfr_d_div(x.mp,b,a.mp,mpreal::get_default_rnd());
+    mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+    mpfr_d_div(x.mpfr_ptr(), b, a.mpfr_srcptr(),mpreal::get_default_rnd());
     return x;
 #else
-    return mpreal(b) /= a;
+    mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+    x /= a;
+    return x;
 #endif
 }
 
@@ -1611,8 +1608,9 @@ inline int64_t      mpreal::toInt64 (mp_rnd_t mode)    const{    return mpfr_get
 inline uint64_t     mpreal::toUInt64(mp_rnd_t mode)    const{    return mpfr_get_uj(mp, mode);    }
 #endif
 
-inline ::mpfr_ptr       mpreal::mpfr_ptr()             {    return mp;    }
-inline ::mpfr_srcptr    mpreal::mpfr_srcptr() const    {    return const_cast< ::mpfr_srcptr >(mp);    }
+inline ::mpfr_ptr     mpreal::mpfr_ptr()             { return mp; }
+inline ::mpfr_srcptr  mpreal::mpfr_ptr()    const    { return mp; }
+inline ::mpfr_srcptr  mpreal::mpfr_srcptr() const    { return mp; }
 
 template <class T>
 inline std::string toString(T t, std::ios_base & (*f)(std::ios_base&))
@@ -1818,7 +1816,7 @@ inline int mpreal::getPrecision() const
 
 inline mpreal& mpreal::setPrecision(int Precision, mp_rnd_t RoundingMode)
 {
-    mpfr_prec_round(mp,Precision, RoundingMode);
+    mpfr_prec_round(mp, Precision, RoundingMode);
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
 }
@@ -2002,25 +2000,19 @@ inline mp_exp_t mpreal::get_emax_max (void)
 //////////////////////////////////////////////////////////////////////////
 // Mathematical Functions
 //////////////////////////////////////////////////////////////////////////
-inline const mpreal sqr(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_sqr(x.mp,x.mp,rnd_mode);
-    return x;
-}
+#define MPREAL_UNARY_MATH_FUNCTION_BODY(f)                    \
+        mpreal y(0, mpfr_get_prec(x.mpfr_srcptr()));          \
+        mpfr_##f(y.mpfr_ptr(), x.mpfr_srcptr(), r);           \
+        return y; 
 
-inline const mpreal sqrt(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_sqrt(x.mp,x.mp,rnd_mode);
-    return x;
-}
+inline const mpreal sqr  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(sqr );    }
+inline const mpreal sqrt (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(sqrt);    }
 
-inline const mpreal sqrt(const unsigned long int v, mp_rnd_t rnd_mode)
+inline const mpreal sqrt(const unsigned long int x, mp_rnd_t r)
 {
-    mpreal x;
-    mpfr_sqrt_ui(x.mp,v,rnd_mode);
-    return x;
+    mpreal y;
+    mpfr_sqrt_ui(y.mpfr_ptr(), x, r);
+    return y;
 }
 
 inline const mpreal sqrt(const unsigned int v, mp_rnd_t rnd_mode)
@@ -2030,59 +2022,28 @@ inline const mpreal sqrt(const unsigned int v, mp_rnd_t rnd_mode)
 
 inline const mpreal sqrt(const long int v, mp_rnd_t rnd_mode)
 {
-    if (v>=0)    return sqrt(static_cast<unsigned long int>(v),rnd_mode);
+    if (v>=0)   return sqrt(static_cast<unsigned long int>(v),rnd_mode);
     else        return mpreal().setNan(); // NaN  
 }
 
 inline const mpreal sqrt(const int v, mp_rnd_t rnd_mode)
 {
-    if (v>=0)    return sqrt(static_cast<unsigned long int>(v),rnd_mode);
+    if (v>=0)   return sqrt(static_cast<unsigned long int>(v),rnd_mode);
     else        return mpreal().setNan(); // NaN
 }
 
-inline const mpreal sqrt(const long double v, mp_rnd_t rnd_mode)
-{
-    return sqrt(mpreal(v),rnd_mode);
-}
-
-inline const mpreal sqrt(const double v, mp_rnd_t rnd_mode)
+inline const mpreal root(const mpreal& x, unsigned long int k, mp_rnd_t r)
 {
-    return sqrt(mpreal(v),rnd_mode);
+    mpreal y(0, mpfr_get_prec(x.mpfr_srcptr())); 
+    mpfr_root(y.mpfr_ptr(), x.mpfr_srcptr(), k, r);  
+    return y; 
 }
 
-inline const mpreal cbrt(const mpreal& v, mp_rnd_t rnd_mode)
+inline const mpreal dim(const mpreal& a, const mpreal& b, mp_rnd_t r)
 {
-    mpreal x(v);
-    mpfr_cbrt(x.mp,x.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal root(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_root(x.mp,x.mp,k,rnd_mode);
-    return x;
-}
-
-inline const mpreal fabs(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_abs(x.mp,x.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal abs(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_abs(x.mp,x.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal dim(const mpreal& a, const mpreal& b, mp_rnd_t rnd_mode)
-{
-    mpreal x(a);
-    mpfr_dim(x.mp,a.mp,b.mp,rnd_mode);
-    return x;
+    mpreal y(0, mpfr_get_prec(a.mpfr_srcptr()));
+    mpfr_dim(y.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), r);
+    return y;
 }
 
 inline int cmpabs(const mpreal& a,const mpreal& b)
@@ -2090,145 +2051,62 @@ inline int cmpabs(const mpreal& a,const mpreal& b)
     return mpfr_cmpabs(a.mp,b.mp);
 }
 
-inline const mpreal log  (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_log(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal log2(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_log2(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal log10(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_log10(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal exp(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_exp(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal exp2(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_exp2(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal exp10(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_exp10(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal cos(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_cos(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal sin(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_sin(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal tan(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_tan(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal sec(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_sec(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal csc(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_csc(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal cot(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_cot(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
 inline int sin_cos(mpreal& s, mpreal& c, const mpreal& v, mp_rnd_t rnd_mode)
 {
     return mpfr_sin_cos(s.mp,c.mp,v.mp,rnd_mode);
 }
 
-inline const mpreal acos (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_acos(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal asin (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_asin(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal atan (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_atan(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal acot (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    return atan(1/v, rnd_mode);
-}
-
-inline const mpreal asec (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    return acos(1/v, rnd_mode);
-}
-
-inline const mpreal acsc (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    return asin(1/v, rnd_mode);
-}
-
-inline const mpreal acoth (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    return atanh(1/v, rnd_mode);
-}
-
-inline const mpreal asech (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    return acosh(1/v, rnd_mode);
-}
-
-inline const mpreal acsch (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    return asinh(1/v, rnd_mode);
-}
+inline const mpreal sqrt  (const long double v, mp_rnd_t rnd_mode)    {   return sqrt(mpreal(v),rnd_mode);    }
+inline const mpreal sqrt  (const double v, mp_rnd_t rnd_mode)         {   return sqrt(mpreal(v),rnd_mode);    }
+
+inline const mpreal cbrt  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(cbrt );    }
+inline const mpreal fabs  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(abs  );    }
+inline const mpreal abs   (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(abs  );    }
+inline const mpreal log   (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(log  );    }
+inline const mpreal log2  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(log2 );    }
+inline const mpreal log10 (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(log10);    }
+inline const mpreal exp   (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(exp  );    }
+inline const mpreal exp2  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(exp2 );    }
+inline const mpreal exp10 (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(exp10);    }
+inline const mpreal cos   (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(cos  );    }
+inline const mpreal sin   (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(sin  );    }
+inline const mpreal tan   (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(tan  );    }
+inline const mpreal sec   (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(sec  );    }
+inline const mpreal csc   (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(csc  );    }
+inline const mpreal cot   (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(cot  );    }
+inline const mpreal acos  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(acos);     }
+inline const mpreal asin  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(asin);     }
+inline const mpreal atan  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(atan);     }
+
+inline const mpreal acot  (const mpreal& v, mp_rnd_t r) {   return atan (1/v, r);                      }
+inline const mpreal asec  (const mpreal& v, mp_rnd_t r) {   return acos (1/v, r);                      }
+inline const mpreal acsc  (const mpreal& v, mp_rnd_t r) {   return asin (1/v, r);                      }
+inline const mpreal acoth (const mpreal& v, mp_rnd_t r) {   return atanh(1/v, r);                      }
+inline const mpreal asech (const mpreal& v, mp_rnd_t r) {   return acosh(1/v, r);                      }
+inline const mpreal acsch (const mpreal& v, mp_rnd_t r) {   return asinh(1/v, r);                      }
+
+inline const mpreal cosh  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(cosh );    }
+inline const mpreal sinh  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(sinh );    }
+inline const mpreal tanh  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(tanh );    }
+inline const mpreal sech  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(sech );    }
+inline const mpreal csch  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(csch );    }
+inline const mpreal coth  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(coth );    }
+inline const mpreal acosh (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(acosh);    }
+inline const mpreal asinh (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(asinh);    }
+inline const mpreal atanh (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(atanh);    }
+
+inline const mpreal log1p   (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(log1p  );    }
+inline const mpreal expm1   (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(expm1  );    }
+inline const mpreal eint    (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(eint   );    }
+inline const mpreal gamma   (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(gamma  );    }
+inline const mpreal lngamma (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(lngamma);    }
+inline const mpreal zeta    (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(zeta   );    }
+inline const mpreal erf     (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(erf    );    }
+inline const mpreal erfc    (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(erfc   );    }
+inline const mpreal besselj0(const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(j0     );    }
+inline const mpreal besselj1(const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(j1     );    }
+inline const mpreal bessely0(const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(y0     );    }
+inline const mpreal bessely1(const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(y1     );    }
 
 inline const mpreal atan2 (const mpreal& y, const mpreal& x, mp_rnd_t rnd_mode)
 {
@@ -2245,69 +2123,6 @@ inline const mpreal atan2 (const mpreal& y, const mpreal& x, mp_rnd_t rnd_mode)
     return a;
 }
 
-inline const mpreal cosh (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_cosh(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal sinh (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_sinh(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal tanh (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_tanh(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal sech (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_sech(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal csch (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_csch(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal coth (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_coth(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal acosh  (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_acosh(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal asinh  (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_asinh(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal atanh  (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_atanh(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
 inline const mpreal hypot (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode)
 {
     mpreal a;
@@ -2355,124 +2170,36 @@ inline const mpreal remquo (long* q, const mpreal& x, const mpreal& y, mp_rnd_t
 
 inline const mpreal fac_ui (unsigned long int v, mp_prec_t prec, mp_rnd_t rnd_mode)
 {
-    mpreal x(0,prec);
+    mpreal x(0, prec);
     mpfr_fac_ui(x.mp,v,rnd_mode);
     return x;
 }
 
-inline const mpreal log1p  (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_log1p(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal expm1  (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_expm1(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal eint   (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_eint(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal gamma (const mpreal& x, mp_rnd_t rnd_mode)
-{
-    mpreal FunctionValue(x);
-
-    // x < 0: gamma(-x) = -pi/(x * gamma(x) * sin(pi*x))
-
-    mpfr_gamma(FunctionValue.mp, x.mp, rnd_mode);
-
-    return FunctionValue;
-}
-
-inline const mpreal lngamma (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_lngamma(x.mp,v.mp,rnd_mode);
-    return x;
-}
 
 inline const mpreal lgamma (const mpreal& v, int *signp, mp_rnd_t rnd_mode)
 {
     mpreal x(v);
     int tsignp;
 
-    if(signp)
-        mpfr_lgamma(x.mp,signp,v.mp,rnd_mode);
-    else
-        mpfr_lgamma(x.mp,&tsignp,v.mp,rnd_mode);
-
-    return x;
-}
-
-inline const mpreal zeta (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_zeta(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal erf (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_erf(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal erfc (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_erfc(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal besselj0 (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_j0(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal besselj1 (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_j1(x.mp,v.mp,rnd_mode);
-    return x;
-}
+    if(signp)   mpfr_lgamma(x.mp,signp,v.mp,rnd_mode);
+    else        mpfr_lgamma(x.mp,&tsignp,v.mp,rnd_mode);
 
-inline const mpreal besseljn (long n, const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_jn(x.mp,n,v.mp,rnd_mode);
     return x;
 }
 
-inline const mpreal bessely0 (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_y0(x.mp,v.mp,rnd_mode);
-    return x;
-}
 
-inline const mpreal bessely1 (const mpreal& v, mp_rnd_t rnd_mode)
+inline const mpreal besseljn (long n, const mpreal& x, mp_rnd_t r)
 {
-    mpreal x(v);
-    mpfr_y1(x.mp,v.mp,rnd_mode);
-    return x;
+    mpreal  y(0, mpfr_get_prec(x.mpfr_srcptr()));
+    mpfr_jn(y.mpfr_ptr(), n, x.mpfr_srcptr(), r);
+    return y;
 }
 
-inline const mpreal besselyn (long n, const mpreal& v, mp_rnd_t rnd_mode)
+inline const mpreal besselyn (long n, const mpreal& x, mp_rnd_t r)
 {
-    mpreal x(v);
-    mpfr_yn(x.mp,n,v.mp,rnd_mode);
-    return x;
+    mpreal  y(0, mpfr_get_prec(x.mpfr_srcptr()));
+    mpfr_yn(y.mpfr_ptr(), n, x.mpfr_srcptr(), r);
+    return y;
 }
 
 inline const mpreal fma (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode)
@@ -2542,11 +2269,9 @@ inline int sinh_cosh(mpreal& s, mpreal& c, const mpreal& v, mp_rnd_t rnd_mode)
     return mpfr_sinh_cosh(s.mp,c.mp,v.mp,rnd_mode);
 }
 
-inline const mpreal li2(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_li2(x.mp,v.mp,rnd_mode);
-    return x;
+inline const mpreal li2 (const mpreal& x, mp_rnd_t r) 
+{   
+    MPREAL_UNARY_MATH_FUNCTION_BODY(li2);    
 }
 
 inline const mpreal rem (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode)
@@ -2606,80 +2331,54 @@ inline const mpreal rec_sqrt(const mpreal& v, mp_rnd_t rnd_mode)
 //////////////////////////////////////////////////////////////////////////
 // MPFR 3.0.0 Specifics
 #if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
-
-inline const mpreal digamma(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_digamma(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal ai(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_ai(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
+inline const mpreal digamma (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(digamma);     }
+inline const mpreal ai      (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(ai);          }
 #endif // MPFR 3.0.0 Specifics
 
 //////////////////////////////////////////////////////////////////////////
 // Constants
-inline const mpreal const_log2 (mp_prec_t prec, mp_rnd_t rnd_mode)
+inline const mpreal const_log2 (mp_prec_t p, mp_rnd_t r)
 {
-    mpreal x;
-    x.set_prec(prec);
-    mpfr_const_log2(x.mp,rnd_mode);
+    mpreal x(0, p);
+    mpfr_const_log2(x.mpfr_ptr(), r);
     return x;
 }
 
-inline const mpreal const_pi (mp_prec_t prec, mp_rnd_t rnd_mode)
+inline const mpreal const_pi (mp_prec_t p, mp_rnd_t r)
 {
-    mpreal x;
-    x.set_prec(prec);
-    mpfr_const_pi(x.mp,rnd_mode);
+    mpreal x(0, p);
+    mpfr_const_pi(x.mpfr_ptr(), r);
     return x;
 }
 
-inline const mpreal const_euler (mp_prec_t prec, mp_rnd_t rnd_mode)
+inline const mpreal const_euler (mp_prec_t p, mp_rnd_t r)
 {
-    mpreal x;
-    x.set_prec(prec);
-    mpfr_const_euler(x.mp,rnd_mode);
+    mpreal x(0, p);
+    mpfr_const_euler(x.mpfr_ptr(), r);
     return x;
 }
 
-inline const mpreal const_catalan (mp_prec_t prec, mp_rnd_t rnd_mode)
+inline const mpreal const_catalan (mp_prec_t p, mp_rnd_t r)
 {
-    mpreal x;
-    x.set_prec(prec);
-    mpfr_const_catalan(x.mp,rnd_mode);
+    mpreal x(0, p);
+    mpfr_const_catalan(x.mpfr_ptr(), r);
     return x;
 }
 
-inline const mpreal const_infinity (int sign, mp_prec_t prec, mp_rnd_t rnd_mode)
+inline const mpreal const_infinity (int sign, mp_prec_t p, mp_rnd_t /*r*/)
 {
-    mpreal x;
-    x.set_prec(prec,rnd_mode);
-    mpfr_set_inf(x.mp, sign);
+    mpreal x(0, p);
+    mpfr_set_inf(x.mpfr_ptr(), sign);
     return x;
 }
 
 //////////////////////////////////////////////////////////////////////////
 // Integer Related Functions
-inline const mpreal rint(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_rint(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
 inline const mpreal ceil(const mpreal& v)
 {
     mpreal x(v);
     mpfr_ceil(x.mp,v.mp);
     return x;
-
 }
 
 inline const mpreal floor(const mpreal& v)
@@ -2703,57 +2402,18 @@ inline const mpreal trunc(const mpreal& v)
     return x;
 }
 
-inline const mpreal rint_ceil (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_rint_ceil(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal rint_floor(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_rint_floor(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal rint_round(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_rint_round(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal rint_trunc(const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_rint_trunc(x.mp,v.mp,rnd_mode);
-    return x;
-}
-
-inline const mpreal frac (const mpreal& v, mp_rnd_t rnd_mode)
-{
-    mpreal x(v);
-    mpfr_frac(x.mp,v.mp,rnd_mode);
-    return x;
-}
+inline const mpreal rint       (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(rint      );     }
+inline const mpreal rint_ceil  (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(rint_ceil );     }
+inline const mpreal rint_floor (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(rint_floor);     }
+inline const mpreal rint_round (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(rint_round);     }
+inline const mpreal rint_trunc (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(rint_trunc);     }
+inline const mpreal frac       (const mpreal& x, mp_rnd_t r) {   MPREAL_UNARY_MATH_FUNCTION_BODY(frac      );     }
 
 //////////////////////////////////////////////////////////////////////////
 // Miscellaneous Functions
-inline void swap(mpreal& a, mpreal& b) 
-{
-    mpfr_swap(a.mp,b.mp);
-}
-
-inline const mpreal (max)(const mpreal& x, const mpreal& y)
-{
-    return (x>y?x:y);
-}
-
-inline const mpreal (min)(const mpreal& x, const mpreal& y)
-{
-    return (x<y?x:y);
-}
+inline void         swap (mpreal& a, mpreal& b)            {    mpfr_swap(a.mp,b.mp);   }
+inline const mpreal (max)(const mpreal& x, const mpreal& y){    return (x>y?x:y);       }
+inline const mpreal (min)(const mpreal& x, const mpreal& y){    return (x<y?x:y);       }
 
 inline const mpreal fmax(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode)
 {
@@ -2993,13 +2653,13 @@ inline const mpreal pow(const unsigned int a, const unsigned int b, mp_rnd_t rnd
 
 inline const mpreal pow(const unsigned int a, const long int b, mp_rnd_t rnd_mode)
 {
-    if(b>0)    return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+    if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
     else    return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
 }
 
 inline const mpreal pow(const unsigned int a, const int b, mp_rnd_t rnd_mode)
 {
-    if(b>0)    return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+    if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
     else    return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
 }
 
@@ -3050,13 +2710,13 @@ inline const mpreal pow(const long int a, const int b, mp_rnd_t rnd_mode)
 
 inline const mpreal pow(const long int a, const long double b, mp_rnd_t rnd_mode)
 {
-    if (a>=0)     return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+    if (a>=0)   return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
     else        return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow
 }
 
 inline const mpreal pow(const long int a, const double b, mp_rnd_t rnd_mode)
 {
-    if (a>=0)     return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+    if (a>=0)   return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
     else        return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow
 }
 
@@ -3097,13 +2757,13 @@ inline const mpreal pow(const int a, const int b, mp_rnd_t rnd_mode)
 
 inline const mpreal pow(const int a, const long double b, mp_rnd_t rnd_mode)
 {
-    if (a>=0)     return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+    if (a>=0)   return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
     else        return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow
 }
 
 inline const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode)
 {
-    if (a>=0)     return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+    if (a>=0)   return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
     else        return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow
 }
 
@@ -3215,7 +2875,7 @@ namespace std
         inline static mpfr::mpreal epsilon(mp_prec_t precision = mpfr::mpreal::get_default_prec()) {  return  mpfr::machine_epsilon(precision); }
 		
         // Returns smallest eps such that x + eps != x (relative machine epsilon)
-	inline static mpfr::mpreal epsilon(const mpfr::mpreal& x) {  return mpfr::machine_epsilon(x);  }
+        inline static mpfr::mpreal epsilon(const mpfr::mpreal& x) {  return mpfr::machine_epsilon(x);  }
 
         inline static mpfr::mpreal round_error(mp_prec_t precision = mpfr::mpreal::get_default_prec())
         {
@@ -3233,8 +2893,8 @@ namespace std
         // Please note, exponent range is not fixed in MPFR
         static const int min_exponent = MPFR_EMIN_DEFAULT;
         static const int max_exponent = MPFR_EMAX_DEFAULT;
-        EIGEN_PERMISSIVE_EXPR static const int min_exponent10 = (int) (MPFR_EMIN_DEFAULT * 0.3010299956639811); 
-        EIGEN_PERMISSIVE_EXPR static const int max_exponent10 = (int) (MPFR_EMAX_DEFAULT * 0.3010299956639811); 
+        MPREAL_PERMISSIVE_EXPR static const int min_exponent10 = (int) (MPFR_EMIN_DEFAULT * 0.3010299956639811); 
+        MPREAL_PERMISSIVE_EXPR static const int max_exponent10 = (int) (MPFR_EMAX_DEFAULT * 0.3010299956639811); 
 
         // Should be constant according to standard, but 'digits' depends on precision in MPFR
 
diff --git a/unsupported/test/splines.cpp b/unsupported/test/splines.cpp
index 1043453dc..a7eb3e0c4 100644
--- a/unsupported/test/splines.cpp
+++ b/unsupported/test/splines.cpp
@@ -11,6 +11,8 @@
 
 #include <unsupported/Eigen/Splines>
 
+namespace Eigen {
+  
 // lets do some explicit instantiations and thus
 // force the compilation of all spline functions...
 template class Spline<double, 2, Dynamic>;
@@ -29,6 +31,8 @@ template class Spline<float, 3, 3>;
 template class Spline<float, 3, 4>;
 template class Spline<float, 3, 5>;
 
+}
+
 Spline<double, 2, Dynamic> closed_spline2d()
 {
   RowVectorXd knots(12);