merge and add start/end to Eigen2Support

author: Gael Guennebaud <g.gael@free.fr> 2010-01-05 13:07:32 +0100
committer: Gael Guennebaud <g.gael@free.fr> 2010-01-05 13:07:32 +0100
commit: 9d9e00b6080153ddaa26ccfce922d7814811a1ae (patch)
tree: a18d77d660e3734a21daec2637c2066afab9021d
parent: 90d2ae7fec1000c244472c94af24126c5f2ca2a2 (diff)
parent: 51b8f014f30d0f64fcd4f6dff4b1afa64f8ace48 (diff)
78 files changed, 1065 insertions, 585 deletions
diff --git a/Eigen/Core b/Eigen/Core
index d452a6cd9..67d55fee3 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -59,6 +59,10 @@
   #define EIGEN_DONT_VECTORIZE
 #endif
 
+#ifdef __clang__
+#define EIGEN_DONT_VECTORIZE
+#endif
+
 #ifndef EIGEN_DONT_VECTORIZE
   #if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
     #define EIGEN_VECTORIZE
diff --git a/Eigen/src/Array/VectorwiseOp.h b/Eigen/src/Array/VectorwiseOp.h
index 3aaaa1ec9..eef554d8a 100644
--- a/Eigen/src/Array/VectorwiseOp.h
+++ b/Eigen/src/Array/VectorwiseOp.h
@@ -384,7 +384,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
     const Reverse<ExpressionType, Direction> reverse() const
     { return Reverse<ExpressionType, Direction>( _expression() ); }
 
-    const Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1>
+    const Replicate<ExpressionType,(Direction==Vertical?Dynamic:1),(Direction==Horizontal?Dynamic:1)>
     replicate(int factor) const;
 
     /** \nonstableyet
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index c13be9ac2..4fb6d3d2c 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -194,7 +194,7 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
   {
     // Find largest diagonal element
     int index_of_biggest_in_corner;
-    biggest_in_corner = m_matrix.diagonal().end(size-j).cwiseAbs()
+    biggest_in_corner = m_matrix.diagonal().tail(size-j).cwiseAbs()
                        .maxCoeff(&index_of_biggest_in_corner);
     index_of_biggest_in_corner += j;
 
@@ -227,12 +227,12 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
 
     if (j == 0) {
       m_matrix.row(0) = m_matrix.row(0).conjugate();
-      m_matrix.col(0).end(size-1) = m_matrix.row(0).end(size-1) / m_matrix.coeff(0,0);
+      m_matrix.col(0).tail(size-1) = m_matrix.row(0).tail(size-1) / m_matrix.coeff(0,0);
       continue;
     }
 
-    RealScalar Djj = ei_real(m_matrix.coeff(j,j) -  m_matrix.row(j).start(j)
-                                               .dot(m_matrix.col(j).start(j)));
+    RealScalar Djj = ei_real(m_matrix.coeff(j,j) -  m_matrix.row(j).head(j)
+                                               .dot(m_matrix.col(j).head(j)));
     m_matrix.coeffRef(j,j) = Djj;
 
     // Finish early if the matrix is not full rank.
@@ -244,13 +244,13 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
 
     int endSize = size - j - 1;
     if (endSize > 0) {
-      _temporary.end(endSize).noalias() = m_matrix.block(j+1,0, endSize, j)
-                                * m_matrix.col(j).start(j).conjugate();
+      _temporary.tail(endSize).noalias() = m_matrix.block(j+1,0, endSize, j)
+                                * m_matrix.col(j).head(j).conjugate();
 
-      m_matrix.row(j).end(endSize) = m_matrix.row(j).end(endSize).conjugate()
-                                   - _temporary.end(endSize).transpose();
+      m_matrix.row(j).tail(endSize) = m_matrix.row(j).tail(endSize).conjugate()
+                                   - _temporary.tail(endSize).transpose();
 
-      m_matrix.col(j).end(endSize) = m_matrix.row(j).end(endSize) / Djj;
+      m_matrix.col(j).tail(endSize) = m_matrix.row(j).tail(endSize) / Djj;
     }
   }
 
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index ad737aaeb..02645b23f 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -166,7 +166,7 @@ template<> struct ei_llt_inplace<LowerTriangular>
       Block<MatrixType,Dynamic,Dynamic> A20(mat,k+1,0,rs,k);
 
       RealScalar x = ei_real(mat.coeff(k,k));
-      if (k>0) x -= mat.row(k).start(k).squaredNorm();
+      if (k>0) x -= mat.row(k).head(k).squaredNorm();
       if (x<=RealScalar(0))
         return false;
       mat.coeffRef(k,k) = x = ei_sqrt(x);
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index d6bf37c6e..e5c17b3f4 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -49,6 +49,7 @@ private:
     InnerMaxSize = int(Derived::Flags)&RowMajorBit
               ? Derived::MaxColsAtCompileTime
               : Derived::MaxRowsAtCompileTime,
+    MaxSizeAtCompileTime = ei_size_at_compile_time<Derived::MaxColsAtCompileTime,Derived::MaxRowsAtCompileTime>::ret,
     PacketSize = ei_packet_traits<typename Derived::Scalar>::size
   };
 
@@ -60,9 +61,9 @@ private:
                        && int(DstIsAligned) && int(SrcIsAligned),
     MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
     MayLinearVectorize = MightVectorize && MayLinearize
-                                        && (DstIsAligned || InnerMaxSize == Dynamic),
+                                        && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
       /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
-         so it's only good for large enough sizes. See remark below about InnerMaxSize. */
+         so it's only good for large enough sizes. */
     MaySliceVectorize  = MightVectorize && int(InnerMaxSize)>=3*PacketSize
       /* slice vectorization can be slow, so we only want it if the slices are big, which is
          indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
@@ -385,7 +386,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling
     const int size = dst.size();
     const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
     const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
-                           : ei_alignmentOffset(&dst.coeffRef(0), size);
+                           : ei_first_aligned(&dst.coeffRef(0), size);
     const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
 
     for(int index = 0; index < alignedStart; ++index)
@@ -430,7 +431,7 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
     const int outerSize = dst.outerSize();
     const int alignedStep = (packetSize - dst.stride() % packetSize) & packetAlignedMask;
     int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
-                     : ei_alignmentOffset(&dst.coeffRef(0,0), innerSize);
+                     : ei_first_aligned(&dst.coeffRef(0,0), innerSize);
 
     for(int i = 0; i < outerSize; ++i)
     {
@@ -480,11 +481,11 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
   EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
   EIGEN_STATIC_ASSERT((ei_is_same_type<typename Derived::Scalar, typename OtherDerived::Scalar>::ret),
     YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-  ei_assert(rows() == other.rows() && cols() == other.cols());
-  ei_assign_impl<Derived, OtherDerived>::run(derived(),other.derived());
 #ifdef EIGEN_DEBUG_ASSIGN
   ei_assign_traits<Derived, OtherDerived>::debug();
 #endif
+  ei_assert(rows() == other.rows() && cols() == other.cols());
+  ei_assign_impl<Derived, OtherDerived>::run(derived(),other.derived());
 #ifndef EIGEN_NO_DEBUG
   checkTransposeAliasing(other.derived());
 #endif
diff --git a/Eigen/src/Core/Coeffs.h b/Eigen/src/Core/Coeffs.h
index b8af2531e..ebfd0c80e 100644
--- a/Eigen/src/Core/Coeffs.h
+++ b/Eigen/src/Core/Coeffs.h
@@ -379,36 +379,33 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::copyPacket(int index, const DenseBa
     other.derived().template packet<LoadMode>(index));
 }
 
-
-template<typename Derived, typename Integer, bool JustReturnZero>
-struct ei_alignmentOffset_impl
+template<typename Derived, bool JustReturnZero>
+struct ei_first_aligned_impl
 {
-  inline static Integer run(const DenseBase<Derived>&, Integer)
+  inline static int run(const DenseBase<Derived>&)
   { return 0; }
 };
 
-template<typename Derived, typename Integer>
-struct ei_alignmentOffset_impl<Derived, Integer, false>
+template<typename Derived>
+struct ei_first_aligned_impl<Derived, false>
 {
-  inline static Integer run(const DenseBase<Derived>& m, Integer maxOffset)
+  inline static int run(const DenseBase<Derived>& m)
   {
-    return ei_alignmentOffset(&m.const_cast_derived().coeffRef(0,0), maxOffset);
+    return ei_first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
   }
 };
 
-/** \internal \returns the number of elements which have to be skipped, starting
-  * from the address of coeffRef(0,0), to find the first 16-byte aligned element.
+/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
   *
-  * \note If the expression doesn't have the DirectAccessBit, this function returns 0.
-  *
-  * There is also the variant ei_alignmentOffset(const Scalar*, Integer) defined in Memory.h.
+  * There is also the variant ei_first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
+  * documentation.
   */
-template<typename Derived, typename Integer>
-inline static Integer ei_alignmentOffset(const DenseBase<Derived>& m, Integer maxOffset)
+template<typename Derived>
+inline static int ei_first_aligned(const DenseBase<Derived>& m)
 {
-  return ei_alignmentOffset_impl<Derived, Integer,
-                                 (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
-                                 ::run(m, maxOffset);
+  return ei_first_aligned_impl
+           <Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
+         ::run(m);
 }
 
 #endif
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index d47cc8876..e07b02a51 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -290,11 +290,11 @@ template<typename Derived> class DenseBase
     VectorBlock<Derived> segment(int start, int size);
     const VectorBlock<Derived> segment(int start, int size) const;
 
-    VectorBlock<Derived> start(int size);
-    const VectorBlock<Derived> start(int size) const;
+    VectorBlock<Derived> head(int size);
+    const VectorBlock<Derived> head(int size) const;
 
-    VectorBlock<Derived> end(int size);
-    const VectorBlock<Derived> end(int size) const;
+    VectorBlock<Derived> tail(int size);
+    const VectorBlock<Derived> tail(int size) const;
 
     typename BlockReturnType<Derived>::Type corner(CornerType type, int cRows, int cCols);
     const typename BlockReturnType<Derived>::Type corner(CornerType type, int cRows, int cCols) const;
@@ -309,11 +309,11 @@ template<typename Derived> class DenseBase
     template<int CRows, int CCols>
     const typename BlockReturnType<Derived, CRows, CCols>::Type corner(CornerType type) const;
 
-    template<int Size> VectorBlock<Derived,Size> start(void);
-    template<int Size> const VectorBlock<Derived,Size> start() const;
+    template<int Size> VectorBlock<Derived,Size> head(void);
+    template<int Size> const VectorBlock<Derived,Size> head() const;
 
-    template<int Size> VectorBlock<Derived,Size> end();
-    template<int Size> const VectorBlock<Derived,Size> end() const;
+    template<int Size> VectorBlock<Derived,Size> tail();
+    template<int Size> const VectorBlock<Derived,Size> tail() const;
 
     template<int Size> VectorBlock<Derived,Size> segment(int start);
     template<int Size> const VectorBlock<Derived,Size> segment(int start) const;
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 7ffddcbf8..eddabf4b9 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -218,7 +218,7 @@ inline float ei_norm1(const std::complex<float> &x) { return(ei_abs(x.real()) +
 inline std::complex<float> ei_exp(std::complex<float> x)  { return std::exp(x); }
 inline std::complex<float> ei_sin(std::complex<float> x)  { return std::sin(x); }
 inline std::complex<float> ei_cos(std::complex<float> x)  { return std::cos(x); }
-inline std::complex<float> ei_atan2(std::complex<float>, std::complex<float> )  { ei_assert(false); return 0; }
+inline std::complex<float> ei_atan2(std::complex<float>, std::complex<float> )  { ei_assert(false); return 0.f; }
 
 template<> inline std::complex<float> ei_random()
 {
@@ -255,7 +255,7 @@ inline double ei_norm1(const std::complex<double> &x) { return(ei_abs(x.real())
 inline std::complex<double> ei_exp(std::complex<double> x)  { return std::exp(x); }
 inline std::complex<double> ei_sin(std::complex<double> x)  { return std::sin(x); }
 inline std::complex<double> ei_cos(std::complex<double> x)  { return std::cos(x); }
-inline std::complex<double> ei_atan2(std::complex<double>, std::complex<double>)  { ei_assert(false); return 0; }
+inline std::complex<double> ei_atan2(std::complex<double>, std::complex<double>)  { ei_assert(false); return 0.; }
 
 template<> inline std::complex<double> ei_random()
 {
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 8f9949b43..4c30f30ad 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -397,6 +397,15 @@ template<typename Derived> class MatrixBase
     inline const Cwise<Derived> cwise() const;
     inline Cwise<Derived> cwise();
 
+    VectorBlock<Derived> start(int size);
+    const VectorBlock<Derived> start(int size) const;
+    VectorBlock<Derived> end(int size);
+    const VectorBlock<Derived> end(int size) const;
+    template<int Size> VectorBlock<Derived,Size> start();
+    template<int Size> const VectorBlock<Derived,Size> start() const;
+    template<int Size> VectorBlock<Derived,Size> end();
+    template<int Size> const VectorBlock<Derived,Size> end() const;
+
     template<typename OtherDerived>
     typename ei_plain_matrix_type_column_major<OtherDerived>::type
     solveTriangular(const MatrixBase<OtherDerived>& other) const;
diff --git a/Eigen/src/Core/MatrixStorage.h b/Eigen/src/Core/MatrixStorage.h
index 8bfa728b6..584ba8ca3 100644
--- a/Eigen/src/Core/MatrixStorage.h
+++ b/Eigen/src/Core/MatrixStorage.h
@@ -98,7 +98,7 @@ template<typename T, int _Rows, int _Cols, int _Options> class ei_matrix_storage
     inline explicit ei_matrix_storage() {}
     inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert) {}
     inline ei_matrix_storage(int,int,int) {}
-    inline void swap(ei_matrix_storage& other) {}
+    inline void swap(ei_matrix_storage& ) {}
     inline static int rows(void) {return _Rows;}
     inline static int cols(void) {return _Cols;}
     inline void resize(int,int,int) {}
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 92522f86c..1643f13b2 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -209,7 +209,7 @@ struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
   {
     const int size = mat.size();
     const int packetSize = ei_packet_traits<Scalar>::size;
-    const int alignedStart = ei_alignmentOffset(mat,size);
+    const int alignedStart = ei_first_aligned(mat);
     enum {
       alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
                 ? Aligned : Unaligned
diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h
index 618e29828..9dc019d17 100644
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -25,13 +25,27 @@
 #ifndef EIGEN_SOLVETRIANGULAR_H
 #define EIGEN_SOLVETRIANGULAR_H
 
+template<typename Lhs, typename Rhs, int Side>
+class ei_trsolve_traits
+{
+  private:
+    enum {
+      RhsIsVectorAtCompileTime = (Side==OnTheLeft ? Rhs::ColsAtCompileTime : Rhs::RowsAtCompileTime)==1
+    };
+  public:
+    enum {
+      Unrolling   = (RhsIsVectorAtCompileTime && Rhs::SizeAtCompileTime <= 8)
+                  ? CompleteUnrolling : NoUnrolling,
+      RhsVectors  = RhsIsVectorAtCompileTime ? 1 : Dynamic
+    };
+};
+
 template<typename Lhs, typename Rhs,
-  int Mode, // can be Upper/Lower | UnitDiag
   int Side, // can be OnTheLeft/OnTheRight
-  int Unrolling = Rhs::IsVectorAtCompileTime && Rhs::SizeAtCompileTime <= 8 // FIXME
-                ? CompleteUnrolling : NoUnrolling,
+  int Mode, // can be Upper/Lower | UnitDiag
+  int Unrolling = ei_trsolve_traits<Lhs,Rhs,Side>::Unrolling,
   int StorageOrder = (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
-  int RhsCols = Rhs::ColsAtCompileTime
+  int RhsVectors = ei_trsolve_traits<Lhs,Rhs,Side>::RhsVectors
   >
 struct ei_triangular_solver_selector;
 
@@ -142,12 +156,24 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor
   }
 };
 
+// transpose OnTheRight cases for vectors
+template<typename Lhs, typename Rhs, int Mode, int Unrolling, int StorageOrder>
+struct ei_triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,Unrolling,StorageOrder,1>
+{
+  static void run(const Lhs& lhs, Rhs& rhs)
+  {
+    Transpose<Rhs> rhsTr(rhs);
+    Transpose<Lhs> lhsTr(lhs);
+    ei_triangular_solver_selector<Transpose<Lhs>,Transpose<Rhs>,OnTheLeft,TriangularView<Lhs,Mode>::TransposeMode>::run(lhsTr,rhsTr);
+  }
+};
+
 template <typename Scalar, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder>
 struct ei_triangular_solve_matrix;
 
 // the rhs is a matrix
-template<typename Lhs, typename Rhs, int Side, int Mode, int StorageOrder, int RhsCols>
-struct ei_triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,StorageOrder,RhsCols>
+template<typename Lhs, typename Rhs, int Side, int Mode, int StorageOrder>
+struct ei_triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,StorageOrder,Dynamic>
 {
   typedef typename Rhs::Scalar Scalar;
   typedef ei_blas_traits<Lhs> LhsProductTraits;
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index 2874f0fd8..b4d6aa353 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -65,9 +65,9 @@ MatrixBase<Derived>::stableNorm() const
   int bi=0;
   if ((int(Flags)&DirectAccessBit) && !(int(Flags)&AlignedBit))
   {
-    bi = ei_alignmentOffset(&const_cast_derived().coeffRef(0), n);
+    bi = ei_first_aligned(&const_cast_derived().coeffRef(0), n);
     if (bi>0)
-      ei_stable_norm_kernel(this->start(bi), ssq, scale, invScale);
+      ei_stable_norm_kernel(this->head(bi), ssq, scale, invScale);
   }
   for (; bi<n; bi+=blockSize)
     ei_stable_norm_kernel(this->segment(bi,std::min(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index e593a468d..62d800fef 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -95,12 +95,14 @@ template<typename Derived> class TriangularBase : public AnyMatrixBase<Derived>
                 || ((Mode==StrictlyLowerTriangular || Mode==UnitLowerTriangular) && col<row));
     }
 
+    #ifdef EIGEN_INTERNAL_DEBUGGING
     void check_coordinates_internal(int row, int col)
     {
-      #ifdef EIGEN_INTERNAL_DEBUGGING
       check_coordinates(row, col);
-      #endif
     }
+    #else
+    void check_coordinates_internal(int , int ) {}
+    #endif
 
 };
 
diff --git a/Eigen/src/Core/VectorBlock.h b/Eigen/src/Core/VectorBlock.h
index 96af71b36..760c097ad 100644
--- a/Eigen/src/Core/VectorBlock.h
+++ b/Eigen/src/Core/VectorBlock.h
@@ -154,16 +154,16 @@ DenseBase<Derived>::segment(int start, int size) const
   */
 template<typename Derived>
 inline VectorBlock<Derived>
-DenseBase<Derived>::start(int size)
+DenseBase<Derived>::head(int size)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   return VectorBlock<Derived>(derived(), 0, size);
 }
 
-/** This is the const version of start(int).*/
+/** This is the const version of head(int).*/
 template<typename Derived>
 inline const VectorBlock<Derived>
-DenseBase<Derived>::start(int size) const
+DenseBase<Derived>::head(int size) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   return VectorBlock<Derived>(derived(), 0, size);
@@ -186,16 +186,16 @@ DenseBase<Derived>::start(int size) const
   */
 template<typename Derived>
 inline VectorBlock<Derived>
-DenseBase<Derived>::end(int size)
+DenseBase<Derived>::tail(int size)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   return VectorBlock<Derived>(derived(), this->size() - size, size);
 }
 
-/** This is the const version of end(int).*/
+/** This is the const version of tail(int).*/
 template<typename Derived>
 inline const VectorBlock<Derived>
-DenseBase<Derived>::end(int size) const
+DenseBase<Derived>::tail(int size) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   return VectorBlock<Derived>(derived(), this->size() - size, size);
@@ -247,17 +247,17 @@ DenseBase<Derived>::segment(int start) const
 template<typename Derived>
 template<int Size>
 inline VectorBlock<Derived,Size>
-DenseBase<Derived>::start()
+DenseBase<Derived>::head()
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   return VectorBlock<Derived,Size>(derived(), 0);
 }
 
-/** This is the const version of start<int>().*/
+/** This is the const version of head<int>().*/
 template<typename Derived>
 template<int Size>
 inline const VectorBlock<Derived,Size>
-DenseBase<Derived>::start() const
+DenseBase<Derived>::head() const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   return VectorBlock<Derived,Size>(derived(), 0);
@@ -277,17 +277,17 @@ DenseBase<Derived>::start() const
 template<typename Derived>
 template<int Size>
 inline VectorBlock<Derived,Size>
-DenseBase<Derived>::end()
+DenseBase<Derived>::tail()
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   return VectorBlock<Derived, Size>(derived(), size() - Size);
 }
 
-/** This is the const version of end<int>.*/
+/** This is the const version of tail<int>.*/
 template<typename Derived>
 template<int Size>
 inline const VectorBlock<Derived,Size>
-DenseBase<Derived>::end() const
+DenseBase<Derived>::tail() const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   return VectorBlock<Derived, Size>(derived(), size() - Size);
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index a18e5ef1d..3296f32ff 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -69,7 +69,7 @@ void ei_cache_friendly_product_colmajor_times_vector(
 
   // How many coeffs of the result do we have to skip to be aligned.
   // Here we assume data are at least aligned on the base scalar type.
-  int alignedStart = ei_alignmentOffset(res,size);
+  int alignedStart = ei_first_aligned(res,size);
   int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
   const int peeledSize  = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
 
@@ -79,7 +79,7 @@ void ei_cache_friendly_product_colmajor_times_vector(
                        : FirstAligned;
 
   // we cannot assume the first element is aligned because of sub-matrices
-  const int lhsAlignmentOffset = ei_alignmentOffset(lhs,size);
+  const int lhsAlignmentOffset = ei_first_aligned(lhs,size);
 
   // find how many columns do we have to skip to be aligned with the result (if possible)
   int skipColumns = 0;
@@ -282,7 +282,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
   // How many coeffs of the result do we have to skip to be aligned.
   // Here we assume data are at least aligned on the base scalar type
   // if that's not the case then vectorization is discarded, see below.
-  int alignedStart = ei_alignmentOffset(rhs, size);
+  int alignedStart = ei_first_aligned(rhs, size);
   int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
   const int peeledSize  = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
 
@@ -292,7 +292,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
                        : FirstAligned;
 
   // we cannot assume the first element is aligned because of sub-matrices
-  const int lhsAlignmentOffset = ei_alignmentOffset(lhs,size);
+  const int lhsAlignmentOffset = ei_first_aligned(lhs,size);
 
   // find how many rows do we have to skip to be aligned with rhs (if possible)
   int skipRows = 0;
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index 5e025b90b..35efa752e 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -313,7 +313,6 @@ struct ei_product_selfadjoint_matrix<Scalar,LhsStorageOrder,false,ConjugateLhs,
     int size = cols;
 
     ei_const_blas_data_mapper<Scalar, LhsStorageOrder> lhs(_lhs,lhsStride);
-    ei_const_blas_data_mapper<Scalar, RhsStorageOrder> rhs(_rhs,rhsStride);
 
     if (ConjugateRhs)
       alpha = ei_conj(alpha);
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index c27454bee..32b7f220e 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -86,7 +86,7 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
     size_t starti = FirstTriangular ? 0 : j+2;
     size_t endi   = FirstTriangular ? j : size;
     size_t alignedEnd = starti;
-    size_t alignedStart = (starti) + ei_alignmentOffset(&res[starti], endi-starti);
+    size_t alignedStart = (starti) + ei_first_aligned(&res[starti], endi-starti);
     alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
 
     res[j] += cj0.pmul(A0[j], t0);
diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h
index 69cf1896c..1c0e503e6 100644
--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -41,8 +41,8 @@ struct ei_selfadjoint_rank2_update_selector<Scalar,UType,VType,LowerTriangular>
     for (int i=0; i<size; ++i)
     {
       Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+i, size-i) +=
-                        (alpha * ei_conj(u.coeff(i))) * v.end(size-i)
-                      + (alpha * ei_conj(v.coeff(i))) * u.end(size-i);
+                        (alpha * ei_conj(u.coeff(i))) * v.tail(size-i)
+                      + (alpha * ei_conj(v.coeff(i))) * u.tail(size-i);
     }
   }
 };
@@ -55,8 +55,8 @@ struct ei_selfadjoint_rank2_update_selector<Scalar,UType,VType,UpperTriangular>
     const int size = u.size();
     for (int i=0; i<size; ++i)
       Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i, i+1) +=
-                        (alpha * ei_conj(u.coeff(i))) * v.start(i+1)
-                      + (alpha * ei_conj(v.coeff(i))) * u.start(i+1);
+                        (alpha * ei_conj(u.coeff(i))) * v.head(i+1)
+                      + (alpha * ei_conj(v.coeff(i))) * u.head(i+1);
   }
 };
 
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index fa21ceebb..916a125e3 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -223,7 +223,8 @@ struct ei_blas_traits<Transpose<NestedXpr> >
   typedef typename NestedXpr::Scalar Scalar;
   typedef ei_blas_traits<NestedXpr> Base;
   typedef Transpose<NestedXpr> XprType;
-  typedef Transpose<typename Base::_ExtractType> ExtractType;
+  typedef Transpose<typename Base::_ExtractType>  ExtractType;
+  typedef Transpose<typename Base::_ExtractType> _ExtractType;
   typedef typename ei_meta_if<int(Base::ActualAccess)==HasDirectAccess,
     ExtractType,
     typename ExtractType::PlainMatrixType
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 524bec2fc..bfc6ff686 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -209,27 +209,53 @@ template<typename T, bool Align> inline void ei_conditional_aligned_delete(T *pt
   ei_conditional_aligned_free<Align>(ptr);
 }
 
-/** \internal \returns the number of elements which have to be skipped to
-  * find the first 16-byte aligned element
+/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
   *
-  * There is also the variant ei_alignmentOffset(const MatrixBase&, Integer) defined in Coeffs.h.
+  * \param array the address of the start of the array
+  * \param size the size of the array
+  *
+  * \note If no element of the array is well aligned, the size of the array is returned. Typically,
+  * for example with SSE, "well aligned" means 16-byte-aligned. If vectorization is disabled or if the
+  * packet size for the given scalar type is 1, then everything is considered well-aligned.
+  *
+  * \note If the scalar type is vectorizable, we rely on the following assumptions: sizeof(Scalar) is a
+  * power of 2, the packet size in bytes is also a power of 2, and is a multiple of sizeof(Scalar). On the
+  * other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
+  * example with Scalar=double on certain 32-bit platforms, see bug #79.
+  *
+  * There is also the variant ei_first_aligned(const MatrixBase&, Integer) defined in Coeffs.h.
   */
 template<typename Scalar, typename Integer>
-inline static Integer ei_alignmentOffset(const Scalar* ptr, Integer maxOffset)
+inline static Integer ei_first_aligned(const Scalar* array, Integer size)
 {
   typedef typename ei_packet_traits<Scalar>::type Packet;
-  const Integer PacketSize = ei_packet_traits<Scalar>::size;
-  const Integer PacketAlignedMask = PacketSize-1;
-  const bool Vectorized = PacketSize>1;
-  return Vectorized
-          ? std::min<Integer>( (PacketSize - (Integer((size_t(ptr)/sizeof(Scalar))) & PacketAlignedMask))
-                           & PacketAlignedMask, maxOffset)
-          : 0;
+  enum { PacketSize = ei_packet_traits<Scalar>::size,
+         PacketAlignedMask = PacketSize-1
+  };
+  
+  if(PacketSize==1)
+  {
+    // Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
+    // of the array have the same aligment.
+    return 0;
+  }
+  else if(size_t(array) & (sizeof(Scalar)-1))
+  {
+    // There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar.
+    // Consequently, no element of the array is well aligned.
+    return size;
+  }
+  else
+  {
+    return std::min<Integer>( (PacketSize - (Integer((size_t(array)/sizeof(Scalar))) & PacketAlignedMask))
+                           & PacketAlignedMask, size);
+  }
 }
 
 /** \internal
   * ei_aligned_stack_alloc(SIZE) allocates an aligned buffer of SIZE bytes
-  * on the stack if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT.
+  * on the stack if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and
+  * if stack allocation is supported by the platform (currently, this is linux only).
   * Otherwise the memory is allocated on the heap.
   * Data allocated with ei_aligned_stack_alloc \b must be freed by calling ei_aligned_stack_free(PTR,SIZE).
   * \code
@@ -381,10 +407,10 @@ public:
         ei_aligned_free( p );
     }
 
-    bool operator!=(const aligned_allocator<T>& other) const
+    bool operator!=(const aligned_allocator<T>& ) const
     { return false; }
 
-    bool operator==(const aligned_allocator<T>& other) const
+    bool operator==(const aligned_allocator<T>& ) const
     { return true; }
 };
 
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 6d4a5c7bc..4bff09252 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -109,23 +109,6 @@ template<int _Rows, int _Cols> struct ei_size_at_compile_time
  * whereas ei_eval is a const reference in the case of a matrix
  */
 
-// template<typename Derived> class MatrixBase;
-// template<typename Derived> class ArrayBase;
-// template<typename Object> struct ei_is_matrix_or_array
-// {
-//   struct is_matrix  {int a[1];};
-//   struct is_array   {int a[2];};
-//   struct is_none    {int a[3];};
-//
-//   template<typename T>
-//   static is_matrix testBaseClass(const MatrixBase<T>*);
-//   template<typename T>
-//   static is_array  testBaseClass(const ArrayBase<T>*);
-// //   static is_none   testBaseClass(...);
-//
-//   enum {BaseClassType = sizeof(testBaseClass(static_cast<const Object*>(0)))};
-// };
-
 template<typename T, typename StorageType = typename ei_traits<T>::StorageType> class ei_plain_matrix_type;
 template<typename T, typename BaseClassType> struct ei_plain_matrix_type_dense;
 template<typename T> struct ei_plain_matrix_type<T,Dense>
diff --git a/Eigen/src/Eigen2Support/VectorBlock.h b/Eigen/src/Eigen2Support/VectorBlock.h
new file mode 100644
index 000000000..c3be84c9b
--- /dev/null
+++ b/Eigen/src/Eigen2Support/VectorBlock.h
@@ -0,0 +1,105 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_VECTORBLOCK2_H
+#define EIGEN_VECTORBLOCK2_H
+
+/** \deprecated use DenseMase::start(int) */
+template<typename Derived>
+inline VectorBlock<Derived>
+MatrixBase<Derived>::start(int size)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return VectorBlock<Derived>(derived(), 0, size);
+}
+
+/** \deprecated use DenseMase::start(int) */
+template<typename Derived>
+inline const VectorBlock<Derived>
+MatrixBase<Derived>::start(int size) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return VectorBlock<Derived>(derived(), 0, size);
+}
+
+/** \deprecated use DenseMase::end(int) */
+template<typename Derived>
+inline VectorBlock<Derived>
+MatrixBase<Derived>::end(int size)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return VectorBlock<Derived>(derived(), this->size() - size, size);
+}
+
+/** \deprecated use DenseMase::end(int) */
+template<typename Derived>
+inline const VectorBlock<Derived>
+MatrixBase<Derived>::end(int size) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return VectorBlock<Derived>(derived(), this->size() - size, size);
+}
+
+/** \deprecated use DenseMase::start() */
+template<typename Derived>
+template<int Size>
+inline VectorBlock<Derived,Size>
+MatrixBase<Derived>::start()
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return VectorBlock<Derived,Size>(derived(), 0);
+}
+
+/** \deprecated use DenseMase::start() */
+template<typename Derived>
+template<int Size>
+inline const VectorBlock<Derived,Size>
+MatrixBase<Derived>::start() const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return VectorBlock<Derived,Size>(derived(), 0);
+}
+
+/** \deprecated use DenseMase::end() */
+template<typename Derived>
+template<int Size>
+inline VectorBlock<Derived,Size>
+MatrixBase<Derived>::end()
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return VectorBlock<Derived, Size>(derived(), size() - Size);
+}
+
+/** \deprecated use DenseMase::end() */
+template<typename Derived>
+template<int Size>
+inline const VectorBlock<Derived,Size>
+MatrixBase<Derived>::end() const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return VectorBlock<Derived, Size>(derived(), size() - Size);
+}
+
+#endif // EIGEN_VECTORBLOCK2_H
diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
index 0441d4f02..d55dc2a96 100644
--- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h
+++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@@ -133,7 +133,7 @@ void ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix)
     for (int i=0; i<n; i++)
     {
       int k;
-      m_eivalues.cwiseAbs().end(n-i).minCoeff(&k);
+      m_eivalues.cwiseAbs().tail(n-i).minCoeff(&k);
       if (k != 0)
       {
         k += i;
diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h
index c9c239b98..3f9e30a6e 100644
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h
@@ -620,7 +620,7 @@ void EigenSolver<MatrixType>::hqr2(MatrixType& matH)
           // Overflow control
           t = ei_abs(matH.coeff(i,n));
           if ((eps * t) * t > 1)
-            matH.col(n).end(nn-i) /= t;
+            matH.col(n).tail(nn-i) /= t;
         }
       }
     }
@@ -708,7 +708,7 @@ void EigenSolver<MatrixType>::hqr2(MatrixType& matH)
     // in this algo low==0 and high==nn-1 !!
     if (i < low || i > high)
     {
-      m_eivec.row(i).end(nn-i) = matH.row(i).end(nn-i);
+      m_eivec.row(i).tail(nn-i) = matH.row(i).tail(nn-i);
     }
   }
 
diff --git a/Eigen/src/Eigenvalues/HessenbergDecomposition.h b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
index 9f7df49bc..636b2f4f7 100644
--- a/Eigen/src/Eigenvalues/HessenbergDecomposition.h
+++ b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -55,25 +55,23 @@ template<typename _MatrixType> class HessenbergDecomposition
     };
 
     typedef Matrix<Scalar, SizeMinusOne, 1> CoeffVectorType;
-    typedef Matrix<RealScalar, Size, 1> DiagonalType;
-    typedef Matrix<RealScalar, SizeMinusOne, 1> SubDiagonalType;
-
-    typedef typename Diagonal<MatrixType,0>::RealReturnType DiagonalReturnType;
-
-    typedef typename Diagonal<
-        Block<MatrixType,SizeMinusOne,SizeMinusOne>,0 >::RealReturnType SubDiagonalReturnType;
 
     /** This constructor initializes a HessenbergDecomposition object for
       * further use with HessenbergDecomposition::compute()
       */
     HessenbergDecomposition(int size = Size==Dynamic ? 2 : Size)
-      : m_matrix(size,size), m_hCoeffs(size-1)
-    {}
+      : m_matrix(size,size)
+    {
+      if(size>1)
+        m_hCoeffs.resize(size-1);
+    }
 
     HessenbergDecomposition(const MatrixType& matrix)
-      : m_matrix(matrix),
-        m_hCoeffs(matrix.cols()-1)
+      : m_matrix(matrix)
     {
+      if(matrix.rows()<2)
+        return;
+      m_hCoeffs.resize(matrix.rows()-1,1);
       _compute(m_matrix, m_hCoeffs);
     }
 
@@ -84,6 +82,8 @@ template<typename _MatrixType> class HessenbergDecomposition
     void compute(const MatrixType& matrix)
     {
       m_matrix = matrix;
+      if(matrix.rows()<2)
+        return;
       m_hCoeffs.resize(matrix.rows()-1,1);
       _compute(m_matrix, m_hCoeffs);
     }
@@ -150,7 +150,7 @@ void HessenbergDecomposition<MatrixType>::_compute(MatrixType& matA, CoeffVector
     int remainingSize = n-i-1;
     RealScalar beta;
     Scalar h;
-    matA.col(i).end(remainingSize).makeHouseholderInPlace(h, beta);
+    matA.col(i).tail(remainingSize).makeHouseholderInPlace(h, beta);
     matA.col(i).coeffRef(i+1) = beta;
     hCoeffs.coeffRef(i) = h;
 
@@ -159,11 +159,11 @@ void HessenbergDecomposition<MatrixType>::_compute(MatrixType& matA, CoeffVector
 
     // A = H A
     matA.corner(BottomRight, remainingSize, remainingSize)
-        .applyHouseholderOnTheLeft(matA.col(i).end(remainingSize-1), h, &temp.coeffRef(0));
+        .applyHouseholderOnTheLeft(matA.col(i).tail(remainingSize-1), h, &temp.coeffRef(0));
 
     // A = A H'
     matA.corner(BottomRight, n, remainingSize)
-        .applyHouseholderOnTheRight(matA.col(i).end(remainingSize-1).conjugate(), ei_conj(h), &temp.coeffRef(0));
+        .applyHouseholderOnTheRight(matA.col(i).tail(remainingSize-1).conjugate(), ei_conj(h), &temp.coeffRef(0));
   }
 }
 
@@ -178,7 +178,7 @@ HessenbergDecomposition<MatrixType>::matrixQ() const
   for (int i = n-2; i>=0; i--)
   {
     matQ.corner(BottomRight,n-i-1,n-i-1)
-        .applyHouseholderOnTheLeft(m_matrix.col(i).end(n-i-2), ei_conj(m_hCoeffs.coeff(i)), &temp.coeffRef(0,0));
+        .applyHouseholderOnTheLeft(m_matrix.col(i).tail(n-i-2), ei_conj(m_hCoeffs.coeff(i)), &temp.coeffRef(0,0));
   }
   return matQ;
 }
diff --git a/Eigen/src/Eigenvalues/Tridiagonalization.h b/Eigen/src/Eigenvalues/Tridiagonalization.h
index d8dcfb047..e43605b0f 100644
--- a/Eigen/src/Eigenvalues/Tridiagonalization.h
+++ b/Eigen/src/Eigenvalues/Tridiagonalization.h
@@ -197,25 +197,24 @@ void Tridiagonalization<MatrixType>::_compute(MatrixType& matA, CoeffVectorType&
 {
   assert(matA.rows()==matA.cols());
   int n = matA.rows();
-  Matrix<Scalar,1,Dynamic> aux(n);
   for (int i = 0; i<n-1; ++i)
   {
     int remainingSize = n-i-1;
     RealScalar beta;
     Scalar h;
-    matA.col(i).end(remainingSize).makeHouseholderInPlace(h, beta);
+    matA.col(i).tail(remainingSize).makeHouseholderInPlace(h, beta);
 
     // Apply similarity transformation to remaining columns,
-    // i.e., A = H A H' where H = I - h v v' and v = matA.col(i).end(n-i-1)
+    // i.e., A = H A H' where H = I - h v v' and v = matA.col(i).tail(n-i-1)
     matA.col(i).coeffRef(i+1) = 1;
 
-    hCoeffs.end(n-i-1) = (matA.corner(BottomRight,remainingSize,remainingSize).template selfadjointView<LowerTriangular>()
-                        * (ei_conj(h) * matA.col(i).end(remainingSize)));
+    hCoeffs.tail(n-i-1) = (matA.corner(BottomRight,remainingSize,remainingSize).template selfadjointView<LowerTriangular>()
+                        * (ei_conj(h) * matA.col(i).tail(remainingSize)));
 
-    hCoeffs.end(n-i-1) += (ei_conj(h)*Scalar(-0.5)*(hCoeffs.end(remainingSize).dot(matA.col(i).end(remainingSize)))) * matA.col(i).end(n-i-1);
+    hCoeffs.tail(n-i-1) += (ei_conj(h)*Scalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1);
 
     matA.corner(BottomRight, remainingSize, remainingSize).template selfadjointView<LowerTriangular>()
-      .rankUpdate(matA.col(i).end(remainingSize), hCoeffs.end(remainingSize), -1);
+      .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), -1);
 
     matA.col(i).coeffRef(i+1) = beta;
     hCoeffs.coeffRef(i) = h;
@@ -243,7 +242,7 @@ void Tridiagonalization<MatrixType>::matrixQInPlace(MatrixBase<QDerived>* q) con
   for (int i = n-2; i>=0; i--)
   {
     matQ.corner(BottomRight,n-i-1,n-i-1)
-        .applyHouseholderOnTheLeft(m_matrix.col(i).end(n-i-2), ei_conj(m_hCoeffs.coeff(i)), &aux.coeffRef(0,0));
+        .applyHouseholderOnTheLeft(m_matrix.col(i).tail(n-i-2), ei_conj(m_hCoeffs.coeff(i)), &aux.coeffRef(0,0));
   }
 }
 
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index 79baeadd5..c10b6abf4 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -173,7 +173,7 @@ struct ei_unitOrthogonal_selector<Derived,3>
     if((!ei_isMuchSmallerThan(src.x(), src.z()))
     || (!ei_isMuchSmallerThan(src.y(), src.z())))
     {
-      RealScalar invnm = RealScalar(1)/src.template start<2>().norm();
+      RealScalar invnm = RealScalar(1)/src.template head<2>().norm();
       perp.coeffRef(0) = -ei_conj(src.y())*invnm;
       perp.coeffRef(1) = ei_conj(src.x())*invnm;
       perp.coeffRef(2) = 0;
@@ -184,7 +184,7 @@ struct ei_unitOrthogonal_selector<Derived,3>
      */
     else
     {
-      RealScalar invnm = RealScalar(1)/src.template end<2>().norm();
+      RealScalar invnm = RealScalar(1)/src.template tail<2>().norm();
       perp.coeffRef(0) = 0;
       perp.coeffRef(1) = -ei_conj(src.z())*invnm;
       perp.coeffRef(2) = ei_conj(src.y())*invnm;
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 861eff19c..24772089e 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -77,10 +77,10 @@ public:
   inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
 
   /** \returns a read-only vector expression of the imaginary part (x,y,z) */
-  inline const VectorBlock<Coefficients,3> vec() const { return coeffs().template start<3>(); }
+  inline const VectorBlock<Coefficients,3> vec() const { return coeffs().template head<3>(); }
 
   /** \returns a vector expression of the imaginary part (x,y,z) */
-  inline VectorBlock<Coefficients,3> vec() { return coeffs().template start<3>(); }
+  inline VectorBlock<Coefficients,3> vec() { return coeffs().template head<3>(); }
 
   /** \returns a read-only vector expression of the coefficients (x,y,z,w) */
   inline const typename ei_traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h
index 89df73505..b945ea43f 100644
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -1102,7 +1102,7 @@ struct ei_transform_right_product_impl<Other,AffineCompact, Dim,HDim, HDim,1>
   static ResultType run(const TransformType& tr, const Other& other)
   {
     ResultType res;
-    res.template start<HDim>() = tr.matrix() * other;
+    res.template head<HDim>() = tr.matrix() * other;
     res.coeffRef(Dim) = other.coeff(Dim);
   }
 };
@@ -1120,7 +1120,7 @@ struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, Dim,Dim>
     res.matrix().col(Dim) = tr.matrix().col(Dim);
     res.linearExt().noalias() = (tr.linearExt() * other);
     if(Mode==Affine)
-      res.matrix().row(Dim).template start<Dim>() = tr.matrix().row(Dim).template start<Dim>();
+      res.matrix().row(Dim).template head<Dim>() = tr.matrix().row(Dim).template head<Dim>();
     return res;
   }
 };
diff --git a/Eigen/src/Geometry/Umeyama.h b/Eigen/src/Geometry/Umeyama.h
index 551a69e5b..5be098d77 100644
--- a/Eigen/src/Geometry/Umeyama.h
+++ b/Eigen/src/Geometry/Umeyama.h
@@ -170,8 +170,8 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo
   // Eq. (41)
   // Note that we first assign dst_mean to the destination so that there no need
   // for a temporary.
-  Rt.col(m).start(m) = dst_mean;
-  Rt.col(m).start(m).noalias() -= c*Rt.corner(TopLeft,m,m)*src_mean;
+  Rt.col(m).head(m) = dst_mean;
+  Rt.col(m).head(m).noalias() -= c*Rt.corner(TopLeft,m,m)*src_mean;
 
   if (with_scaling) Rt.block(0,0,m,m) *= c;
 
diff --git a/Eigen/src/Householder/HouseholderSequence.h b/Eigen/src/Householder/HouseholderSequence.h
index 25e962001..26e0f6a88 100644
--- a/Eigen/src/Householder/HouseholderSequence.h
+++ b/Eigen/src/Householder/HouseholderSequence.h
@@ -105,10 +105,10 @@ template<typename VectorsType, typename CoeffsType> class HouseholderSequence
       {
         if(m_trans)
           dst.corner(BottomRight, length-k, length-k)
-          .applyHouseholderOnTheRight(m_vectors.col(k).end(length-k-1), m_coeffs.coeff(k), &temp.coeffRef(0));
+          .applyHouseholderOnTheRight(m_vectors.col(k).tail(length-k-1), m_coeffs.coeff(k), &temp.coeffRef(0));
         else
           dst.corner(BottomRight, length-k, length-k)
-            .applyHouseholderOnTheLeft(m_vectors.col(k).end(length-k-1), m_coeffs.coeff(k), &temp.coeffRef(k));
+            .applyHouseholderOnTheLeft(m_vectors.col(k).tail(length-k-1), m_coeffs.coeff(k), &temp.coeffRef(k));
       }
     }
 
@@ -122,7 +122,7 @@ template<typename VectorsType, typename CoeffsType> class HouseholderSequence
       {
         int actual_k = m_trans ? vecs-k-1 : k;
         dst.corner(BottomRight, dst.rows(), length-actual_k)
-           .applyHouseholderOnTheRight(m_vectors.col(actual_k).end(length-actual_k-1), m_coeffs.coeff(actual_k), &temp.coeffRef(0));
+           .applyHouseholderOnTheRight(m_vectors.col(actual_k).tail(length-actual_k-1), m_coeffs.coeff(actual_k), &temp.coeffRef(0));
       }
     }
 
@@ -136,7 +136,7 @@ template<typename VectorsType, typename CoeffsType> class HouseholderSequence
       {
         int actual_k = m_trans ? k : vecs-k-1;
         dst.corner(BottomRight, length-actual_k, dst.cols())
-           .applyHouseholderOnTheLeft(m_vectors.col(actual_k).end(length-actual_k-1), m_coeffs.coeff(actual_k), &temp.coeffRef(0));
+           .applyHouseholderOnTheLeft(m_vectors.col(actual_k).tail(length-actual_k-1), m_coeffs.coeff(actual_k), &temp.coeffRef(0));
       }
     }
 
diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h
index 727c97583..024a130f2 100644
--- a/Eigen/src/Jacobi/Jacobi.h
+++ b/Eigen/src/Jacobi/Jacobi.h
@@ -318,7 +318,7 @@ void /*EIGEN_DONT_INLINE*/ ei_apply_rotation_in_the_plane(VectorX& _x, VectorY&
     typedef typename ei_packet_traits<Scalar>::type Packet;
     enum { PacketSize = ei_packet_traits<Scalar>::size, Peeling = 2 };
 
-    int alignedStart = ei_alignmentOffset(y, size);
+    int alignedStart = ei_first_aligned(y, size);
     int alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
 
     const Packet pc = ei_pset1(Scalar(j.c()));
@@ -336,7 +336,7 @@ void /*EIGEN_DONT_INLINE*/ ei_apply_rotation_in_the_plane(VectorX& _x, VectorY&
     Scalar* px = x + alignedStart;
     Scalar* py = y + alignedStart;
 
-    if(ei_alignmentOffset(x, size)==alignedStart)
+    if(ei_first_aligned(x, size)==alignedStart)
     {
       for(int i=alignedStart; i<alignedEnd; i+=PacketSize)
       {
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 149af64bc..f62dcc1db 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -451,9 +451,9 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
     // bottom-right corner by Gaussian elimination.
 
     if(k<rows-1)
-      m_lu.col(k).end(rows-k-1) /= m_lu.coeff(k,k);
+      m_lu.col(k).tail(rows-k-1) /= m_lu.coeff(k,k);
     if(k<size-1)
-      m_lu.block(k+1,k+1,rows-k-1,cols-k-1).noalias() -= m_lu.col(k).end(rows-k-1) * m_lu.row(k).end(cols-k-1);
+      m_lu.block(k+1,k+1,rows-k-1,cols-k-1).noalias() -= m_lu.col(k).tail(rows-k-1) * m_lu.row(k).tail(cols-k-1);
   }
 
   // the main loop is over, we still have to accumulate the transpositions to find the
@@ -537,8 +537,8 @@ struct ei_kernel_retval<FullPivLU<_MatrixType> >
       m(dec().matrixLU().block(0, 0, rank(), cols));
     for(int i = 0; i < rank(); ++i)
     {
-      if(i) m.row(i).start(i).setZero();
-      m.row(i).end(cols-i) = dec().matrixLU().row(pivots.coeff(i)).end(cols-i);
+      if(i) m.row(i).head(i).setZero();
+      m.row(i).tail(cols-i) = dec().matrixLU().row(pivots.coeff(i)).tail(cols-i);
     }
     m.block(0, 0, rank(), rank());
     m.block(0, 0, rank(), rank()).template triangularView<StrictlyLowerTriangular>().setZero();
@@ -558,7 +558,7 @@ struct ei_kernel_retval<FullPivLU<_MatrixType> >
       m.col(i).swap(m.col(pivots.coeff(i)));
 
     // see the negative sign in the next line, that's what we were talking about above.
-    for(int i = 0; i < rank(); ++i) dst.row(dec().permutationQ().indices().coeff(i)) = -m.row(i).end(dimker);
+    for(int i = 0; i < rank(); ++i) dst.row(dec().permutationQ().indices().coeff(i)) = -m.row(i).tail(dimker);
     for(int i = rank(); i < cols; ++i) dst.row(dec().permutationQ().indices().coeff(i)).setZero();
     for(int k = 0; k < dimker; ++k) dst.coeffRef(dec().permutationQ().indices().coeff(rank()+k), k) = Scalar(1);
   }
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index deb29b5c6..6bb5c3ac7 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -229,7 +229,7 @@ struct ei_partial_lu_impl
     {
       int row_of_biggest_in_col;
       RealScalar biggest_in_corner
-        = lu.col(k).end(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col);
+        = lu.col(k).tail(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col);
       row_of_biggest_in_col += k;
 
       if(biggest_in_corner == 0) // the pivot is exactly zero: the matrix is singular
@@ -256,8 +256,8 @@ struct ei_partial_lu_impl
       {
         int rrows = rows-k-1;
         int rsize = size-k-1;
-        lu.col(k).end(rrows) /= lu.coeff(k,k);
-        lu.corner(BottomRight,rrows,rsize).noalias() -= lu.col(k).end(rrows) * lu.row(k).end(rsize);
+        lu.col(k).tail(rrows) /= lu.coeff(k,k);
+        lu.corner(BottomRight,rrows,rsize).noalias() -= lu.col(k).tail(rrows) * lu.row(k).tail(rsize);
       }
     }
     return true;
diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h
index b4c1a5fcc..8b705de3c 100644
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -359,14 +359,14 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
   {
     // first, we look up in our table colSqNorms which column has the biggest squared norm
     int biggest_col_index;
-    RealScalar biggest_col_sq_norm = colSqNorms.end(cols-k).maxCoeff(&biggest_col_index);
+    RealScalar biggest_col_sq_norm = colSqNorms.tail(cols-k).maxCoeff(&biggest_col_index);
     biggest_col_index += k;
 
     // since our table colSqNorms accumulates imprecision at every step, we must now recompute
     // the actual squared norm of the selected column.
     // Note that not doing so does result in solve() sometimes returning inf/nan values
     // when running the unit test with 1000 repetitions.
-    biggest_col_sq_norm = m_qr.col(biggest_col_index).end(rows-k).squaredNorm();
+    biggest_col_sq_norm = m_qr.col(biggest_col_index).tail(rows-k).squaredNorm();
 
     // we store that back into our table: it can't hurt to correct our table.
     colSqNorms.coeffRef(biggest_col_index) = biggest_col_sq_norm;
@@ -379,7 +379,7 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
     if(biggest_col_sq_norm < threshold_helper * (rows-k))
     {
       m_nonzero_pivots = k;
-      m_hCoeffs.end(size-k).setZero();
+      m_hCoeffs.tail(size-k).setZero();
       m_qr.corner(BottomRight,rows-k,cols-k)
           .template triangularView<StrictlyLowerTriangular>()
           .setZero();
@@ -396,7 +396,7 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
 
     // generate the householder vector, store it below the diagonal
     RealScalar beta;
-    m_qr.col(k).end(rows-k).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
+    m_qr.col(k).tail(rows-k).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
 
     // apply the householder transformation to the diagonal coefficient
     m_qr.coeffRef(k,k) = beta;
@@ -406,10 +406,10 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
 
     // apply the householder transformation
     m_qr.corner(BottomRight, rows-k, cols-k-1)
-        .applyHouseholderOnTheLeft(m_qr.col(k).end(rows-k-1), m_hCoeffs.coeffRef(k), &temp.coeffRef(k+1));
+        .applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), m_hCoeffs.coeffRef(k), &temp.coeffRef(k+1));
 
     // update our table of squared norms of the columns
-    colSqNorms.end(cols-k-1) -= m_qr.row(k).end(cols-k-1).cwiseAbs2();
+    colSqNorms.tail(cols-k-1) -= m_qr.row(k).tail(cols-k-1).cwiseAbs2();
   }
 
   m_cols_permutation.setIdentity(cols);
@@ -427,7 +427,7 @@ struct ei_solve_retval<ColPivHouseholderQR<_MatrixType>, Rhs>
   : ei_solve_retval_base<ColPivHouseholderQR<_MatrixType>, Rhs>
 {
   EIGEN_MAKE_SOLVE_HELPERS(ColPivHouseholderQR<_MatrixType>,Rhs)
-  
+
   template<typename Dest> void evalTo(Dest& dst) const
   {
     const int rows = dec().rows(), cols = dec().cols(),
diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h
index 51609ca1a..598f44dc3 100644
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@@ -306,7 +306,7 @@ FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(cons
     m_rows_transpositions.coeffRef(k) = row_of_biggest_in_corner;
     cols_transpositions.coeffRef(k) = col_of_biggest_in_corner;
     if(k != row_of_biggest_in_corner) {
-      m_qr.row(k).end(cols-k).swap(m_qr.row(row_of_biggest_in_corner).end(cols-k));
+      m_qr.row(k).tail(cols-k).swap(m_qr.row(row_of_biggest_in_corner).tail(cols-k));
       ++number_of_transpositions;
     }
     if(k != col_of_biggest_in_corner) {
@@ -315,11 +315,11 @@ FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(cons
     }
 
     RealScalar beta;
-    m_qr.col(k).end(rows-k).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
+    m_qr.col(k).tail(rows-k).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
     m_qr.coeffRef(k,k) = beta;
 
     m_qr.corner(BottomRight, rows-k, cols-k-1)
-        .applyHouseholderOnTheLeft(m_qr.col(k).end(rows-k-1), m_hCoeffs.coeffRef(k), &temp.coeffRef(k+1));
+        .applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), m_hCoeffs.coeffRef(k), &temp.coeffRef(k+1));
   }
 
   m_cols_permutation.setIdentity(cols);
@@ -360,7 +360,7 @@ struct ei_solve_retval<FullPivHouseholderQR<_MatrixType>, Rhs>
       int remainingSize = rows-k;
       c.row(k).swap(c.row(dec().rowsTranspositions().coeff(k)));
       c.corner(BottomRight, remainingSize, rhs().cols())
-       .applyHouseholderOnTheLeft(dec().matrixQR().col(k).end(remainingSize-1),
+       .applyHouseholderOnTheLeft(dec().matrixQR().col(k).tail(remainingSize-1),
                                   dec().hCoeffs().coeff(k), &temp.coeffRef(0));
     }
 
@@ -400,7 +400,7 @@ typename FullPivHouseholderQR<MatrixType>::MatrixQType FullPivHouseholderQR<Matr
   for (int k = size-1; k >= 0; k--)
   {
     res.block(k, k, rows-k, rows-k)
-       .applyHouseholderOnTheLeft(m_qr.col(k).end(rows-k-1), ei_conj(m_hCoeffs.coeff(k)), &temp.coeffRef(k));
+       .applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), ei_conj(m_hCoeffs.coeff(k)), &temp.coeffRef(k));
     res.row(k).swap(res.row(m_rows_transpositions.coeff(k)));
   }
   return res;
diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index 895ae046a..24aa96e04 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -197,12 +197,12 @@ HouseholderQR<MatrixType>& HouseholderQR<MatrixType>::compute(const MatrixType&
     int remainingCols = cols - k - 1;
 
     RealScalar beta;
-    m_qr.col(k).end(remainingRows).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
+    m_qr.col(k).tail(remainingRows).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
     m_qr.coeffRef(k,k) = beta;
 
     // apply H to remaining part of m_qr from the left
     m_qr.corner(BottomRight, remainingRows, remainingCols)
-        .applyHouseholderOnTheLeft(m_qr.col(k).end(remainingRows-1), m_hCoeffs.coeffRef(k), &temp.coeffRef(k+1));
+        .applyHouseholderOnTheLeft(m_qr.col(k).tail(remainingRows-1), m_hCoeffs.coeffRef(k), &temp.coeffRef(k+1));
   }
   m_isInitialized = true;
   return *this;
@@ -226,7 +226,7 @@ struct ei_solve_retval<HouseholderQR<_MatrixType>, Rhs>
     // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
     c.applyOnTheLeft(householderSequence(
       dec().matrixQR().corner(TopLeft,rows,rank),
-      dec().hCoeffs().start(rank)).transpose()
+      dec().hCoeffs().head(rank)).transpose()
     );
 
     dec().matrixQR()
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index 5792c5767..d2cd8e478 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -342,7 +342,7 @@ JacobiSVD<MatrixType, Options>& JacobiSVD<MatrixType, Options>::compute(const Ma
   for(int i = 0; i < diagSize; i++)
   {
     int pos;
-    m_singularValues.end(diagSize-i).maxCoeff(&pos);
+    m_singularValues.tail(diagSize-i).maxCoeff(&pos);
     if(pos)
     {
       pos += i;
diff --git a/Eigen/src/SVD/SVD.h b/Eigen/src/SVD/SVD.h
index 7a8e4f312..cd8c11b8d 100644
--- a/Eigen/src/SVD/SVD.h
+++ b/Eigen/src/SVD/SVD.h
@@ -137,7 +137,7 @@ template<typename _MatrixType> class SVD
       ei_assert(m_isInitialized && "SVD is not initialized.");
       return m_cols;
     }
-    
+
   protected:
     // Computes (a^2 + b^2)^(1/2) without destructive underflow or overflow.
     inline static Scalar pythag(Scalar a, Scalar b)
@@ -205,7 +205,7 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
     g = s = scale = 0.0;
     if (i < m)
     {
-      scale = A.col(i).end(m-i).cwiseAbs().sum();
+      scale = A.col(i).tail(m-i).cwiseAbs().sum();
       if (scale != Scalar(0))
       {
         for (k=i; k<m; k++)
@@ -219,18 +219,18 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
         A(i, i)=f-g;
         for (j=l-1; j<n; j++)
         {
-          s = A.col(j).end(m-i).dot(A.col(i).end(m-i));
+          s = A.col(j).tail(m-i).dot(A.col(i).tail(m-i));
           f = s/h;
-          A.col(j).end(m-i) += f*A.col(i).end(m-i);
+          A.col(j).tail(m-i) += f*A.col(i).tail(m-i);
         }
-        A.col(i).end(m-i) *= scale;
+        A.col(i).tail(m-i) *= scale;
       }
     }
     W[i] = scale * g;
     g = s = scale = 0.0;
     if (i+1 <= m && i+1 != n)
     {
-      scale = A.row(i).end(n-l+1).cwiseAbs().sum();
+      scale = A.row(i).tail(n-l+1).cwiseAbs().sum();
       if (scale != Scalar(0))
       {
         for (k=l-1; k<n; k++)
@@ -242,13 +242,13 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
         g = -sign(ei_sqrt(s),f);
         h = f*g - s;
         A(i,l-1) = f-g;
-        rv1.end(n-l+1) = A.row(i).end(n-l+1)/h;
+        rv1.tail(n-l+1) = A.row(i).tail(n-l+1)/h;
         for (j=l-1; j<m; j++)
         {
-          s = A.row(i).end(n-l+1).dot(A.row(j).end(n-l+1));
-          A.row(j).end(n-l+1) += s*rv1.end(n-l+1).transpose();
+          s = A.row(i).tail(n-l+1).dot(A.row(j).tail(n-l+1));
+          A.row(j).tail(n-l+1) += s*rv1.tail(n-l+1).transpose();
         }
-        A.row(i).end(n-l+1) *= scale;
+        A.row(i).tail(n-l+1) *= scale;
       }
     }
     anorm = std::max( anorm, (ei_abs(W[i])+ei_abs(rv1[i])) );
@@ -265,12 +265,12 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
           V(j, i) = (A(i, j)/A(i, l))/g;
         for (j=l; j<n; j++)
         {
-          s = V.col(j).end(n-l).dot(A.row(i).end(n-l));
-          V.col(j).end(n-l) += s * V.col(i).end(n-l);
+          s = V.col(j).tail(n-l).dot(A.row(i).tail(n-l));
+          V.col(j).tail(n-l) += s * V.col(i).tail(n-l);
         }
       }
-      V.row(i).end(n-l).setZero();
-      V.col(i).end(n-l).setZero();
+      V.row(i).tail(n-l).setZero();
+      V.col(i).tail(n-l).setZero();
     }
     V(i, i) = 1.0;
     g = rv1[i];
@@ -282,7 +282,7 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
     l = i+1;
     g = W[i];
     if (n-l>0)
-      A.row(i).end(n-l).setZero();
+      A.row(i).tail(n-l).setZero();
     if (g != Scalar(0.0))
     {
       g = Scalar(1.0)/g;
@@ -290,15 +290,15 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
       {
         for (j=l; j<n; j++)
         {
-          s = A.col(j).end(m-l).dot(A.col(i).end(m-l));
+          s = A.col(j).tail(m-l).dot(A.col(i).tail(m-l));
           f = (s/A(i,i))*g;
-          A.col(j).end(m-i) += f * A.col(i).end(m-i);
+          A.col(j).tail(m-i) += f * A.col(i).tail(m-i);
         }
       }
-      A.col(i).end(m-i) *= g;
+      A.col(i).tail(m-i) *= g;
     }
     else
-      A.col(i).end(m-i).setZero();
+      A.col(i).tail(m-i).setZero();
     ++A(i,i);
   }
   // Diagonalization of the bidiagonal form: Loop over
@@ -408,7 +408,7 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
     for (int i=0; i<n; i++)
     {
       int k;
-      W.end(n-i).maxCoeff(&k);
+      W.tail(n-i).maxCoeff(&k);
       if (k != 0)
       {
         k += i;
@@ -451,8 +451,8 @@ struct ei_solve_retval<SVD<_MatrixType>, Rhs>
           aux.coeffRef(i) /= si;
       }
       const int minsize = std::min(dec().rows(),dec().cols());
-      dst.col(j).start(minsize) = aux.start(minsize);
-      if(dec().cols()>dec().rows()) dst.col(j).end(cols()-minsize).setZero();
+      dst.col(j).head(minsize) = aux.head(minsize);
+      if(dec().cols()>dec().rows()) dst.col(j).tail(cols()-minsize).setZero();
       dst.col(j) = dec().matrixV() * dst.col(j);
     }
   }
diff --git a/bench/btl/libs/eigen2/eigen2_interface.hh b/bench/btl/libs/eigen2/eigen2_interface.hh
index f93ccad58..1166a37a1 100644
--- a/bench/btl/libs/eigen2/eigen2_interface.hh
+++ b/bench/btl/libs/eigen2/eigen2_interface.hh
@@ -124,7 +124,7 @@ public :
       Scalar* A0 = dst.data() + j*dst.stride();
       int starti = j;
       int alignedEnd = starti;
-      int alignedStart = (starti) + ei_alignmentOffset(&A0[starti], size-starti);
+      int alignedStart = (starti) + ei_first_aligned(&A0[starti], size-starti);
       alignedEnd = alignedStart + ((size-alignedStart)/(2*PacketSize))*(PacketSize*2);
 
       // do the non-vectorizable part of the assignment
@@ -153,14 +153,14 @@ public :
         else
           dst.copyCoeff(index, j, src);
       }
-      //dst.col(j).end(N-j) = src.col(j).end(N-j);
+      //dst.col(j).tail(N-j) = src.col(j).tail(N-j);
     }
   }
 
   static EIGEN_DONT_INLINE void syr2(gene_matrix & A,  gene_vector & X, gene_vector & Y, int N){
     // ei_product_selfadjoint_rank2_update<real,0,LowerTriangularBit>(N,A.data(),N, X.data(), 1, Y.data(), 1, -1);
     for(int j=0; j<N; ++j)
-      A.col(j).end(N-j) += X[j] * Y.end(N-j) + Y[j] * X.end(N-j);
+      A.col(j).tail(N-j) += X[j] * Y.tail(N-j) + Y[j] * X.tail(N-j);
   }
 
   static EIGEN_DONT_INLINE void ger(gene_matrix & A,  gene_vector & X, gene_vector & Y, int N){
diff --git a/bench/btl/libs/hand_vec/hand_vec_interface.hh b/bench/btl/libs/hand_vec/hand_vec_interface.hh
index 6080b2460..4b54c03a3 100755
--- a/bench/btl/libs/hand_vec/hand_vec_interface.hh
+++ b/bench/btl/libs/hand_vec/hand_vec_interface.hh
@@ -265,7 +265,7 @@ public :
       
       int starti = j+2;
       int alignedEnd = starti;
-      int alignedStart = (starti) + ei_alignmentOffset(&X[starti], N-starti);
+      int alignedStart = (starti) + ei_first_aligned(&X[starti], N-starti);
       alignedEnd = alignedStart + ((N-alignedStart)/(PacketSize))*(PacketSize);
 
       X[j]   += t0 * A0[j];
diff --git a/disabled/Householder.h b/disabled/Householder.h
index 874b812db..9b5f56360 100644
--- a/disabled/Householder.h
+++ b/disabled/Householder.h
@@ -16,8 +16,8 @@ void ei_compute_householder(const InputVector& x, OutputVector *v, typename Outp
   typedef typename OutputVector::RealScalar RealScalar;
   ei_assert(x.size() == v->size()+1);
   int n = x.size();
-  RealScalar sigma = x.end(n-1).squaredNorm();
-  *v = x.end(n-1);
+  RealScalar sigma = x.tail(n-1).squaredNorm();
+  *v = x.tail(n-1);
   // the big assumption in this code is that ei_abs2(x->coeff(0)) is not much smaller than sigma.
   if(ei_isMuchSmallerThan(sigma, ei_abs2(x.coeff(0))))
   {
diff --git a/doc/AsciiQuickReference.txt b/doc/AsciiQuickReference.txt
index 6c1c4fbd8..86f77a66b 100644
--- a/doc/AsciiQuickReference.txt
+++ b/doc/AsciiQuickReference.txt
@@ -41,8 +41,8 @@ A.setIdentity();  // Fill A with the identity.
 // Eigen                           // Matlab
 x.start(n)                         // x(1:n)
 x.start<n>()                       // x(1:n)
-x.end(n)                           // N = rows(x); x(N - n: N)
-x.end<n>()                         // N = rows(x); x(N - n: N)
+x.tail(n)                           // N = rows(x); x(N - n: N)
+x.tail<n>()                         // N = rows(x); x(N - n: N)
 x.segment(i, n)                    // x(i+1 : i+n)
 x.segment<n>(i)                    // x(i+1 : i+n)
 P.block(i, j, rows, cols)          // P(i+1 : i+rows, j+1 : j+cols)
diff --git a/doc/C01_QuickStartGuide.dox b/doc/C01_QuickStartGuide.dox
index 7c4aa8f76..2240ed8b1 100644
--- a/doc/C01_QuickStartGuide.dox
+++ b/doc/C01_QuickStartGuide.dox
@@ -493,7 +493,7 @@ Read-write access to sub-vectors:
 <td></td>
 
 <tr><td>\code vec1.start(n)\endcode</td><td>\code vec1.start<n>()\endcode</td><td>the first \c n coeffs </td></tr>
-<tr><td>\code vec1.end(n)\endcode</td><td>\code vec1.end<n>()\endcode</td><td>the last \c n coeffs </td></tr>
+<tr><td>\code vec1.tail(n)\endcode</td><td>\code vec1.tail<n>()\endcode</td><td>the last \c n coeffs </td></tr>
 <tr><td>\code vec1.segment(pos,n)\endcode</td><td>\code vec1.segment<n>(pos)\endcode</td>
     <td>the \c size coeffs in \n the range [\c pos : \c pos + \c n [</td></tr>
 <tr style="border-style: dashed none dashed none;"><td>
diff --git a/doc/D01_StlContainers.dox b/doc/D01_StlContainers.dox
index d778e4fa0..db682c996 100644
--- a/doc/D01_StlContainers.dox
+++ b/doc/D01_StlContainers.dox
@@ -9,7 +9,7 @@ namespace Eigen {
 
 \section summary Executive summary
 
-Using STL containers on \ref FixedSizeVectorizable "fixed-size vectorizable Eigen types" requires taking the following two steps:
+Using STL containers on \ref FixedSizeVectorizable "fixed-size vectorizable Eigen types", or classes having members of such types, requires taking the following two steps:
 
 \li A 16-byte-aligned allocator must be used. Eigen does provide one ready for use: aligned_allocator.
 \li If you want to use the std::vector container, you need to \#include <Eigen/StdVector>.
diff --git a/doc/D11_UnalignedArrayAssert.dox b/doc/D11_UnalignedArrayAssert.dox
index f4da15236..e9fb2a69f 100644
--- a/doc/D11_UnalignedArrayAssert.dox
+++ b/doc/D11_UnalignedArrayAssert.dox
@@ -55,11 +55,12 @@ Note that here, Eigen::Vector2d is only used as an example, more generally the i
 
 \section c2 Cause 2: STL Containers
 
-If you use STL Containers such as std::vector, std::map, ..., with Eigen objects, like this,
+If you use STL Containers such as std::vector, std::map, ..., with Eigen objects, or with classes containing Eigen objects, like this,
 
 \code
 std::vector<Eigen::Matrix2f> my_vector;
-std::map<int, Eigen::Matrix2f> my_map;
+struct my_class { ... Eigen::Matrix2f m; ... };
+std::map<int, my_class> my_map;
 \endcode
 
 then you need to read this separate page: \ref StlContainers "Using STL Containers with Eigen".
diff --git a/doc/I03_InsideEigenExample.dox b/doc/I03_InsideEigenExample.dox
index d4960e79d..95cbe6800 100644
--- a/doc/I03_InsideEigenExample.dox
+++ b/doc/I03_InsideEigenExample.dox
@@ -343,7 +343,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
     const int size = dst.size();
     const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
     const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
-                           : ei_alignmentOffset(&dst.coeffRef(0), size);
+                           : ei_first_aligned(&dst.coeffRef(0), size);
     const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
 
     for(int index = 0; index < alignedStart; index++)
diff --git a/doc/echelon.cpp b/doc/echelon.cpp
index 49b719ff2..c95be6f3b 100644
--- a/doc/echelon.cpp
+++ b/doc/echelon.cpp
@@ -27,8 +27,8 @@ struct unroll_echelon
     m.row(k).swap(m.row(k+rowOfBiggest));
     m.col(k).swap(m.col(k+colOfBiggest));
     m.template corner<CornerRows-1, CornerCols>(BottomRight)
-      -= m.col(k).template end<CornerRows-1>()
-       * (m.row(k).template end<CornerCols>() / m(k,k));
+      -= m.col(k).template tail<CornerRows-1>()
+       * (m.row(k).template tail<CornerCols>() / m(k,k));
   }
 };
 
@@ -59,7 +59,7 @@ struct unroll_echelon<Derived, Dynamic>
       m.row(k).swap(m.row(k+rowOfBiggest));
       m.col(k).swap(m.col(k+colOfBiggest));
       m.corner(BottomRight, cornerRows-1, cornerCols)
-        -= m.col(k).end(cornerRows-1) * (m.row(k).end(cornerCols) / m(k,k));
+        -= m.col(k).tail(cornerRows-1) * (m.row(k).tail(cornerCols) / m(k,k));
     }
   }
 };
diff --git a/doc/snippets/MatrixBase_end_int.cpp b/doc/snippets/MatrixBase_end_int.cpp
index aaa54b668..03c54a931 100644
--- a/doc/snippets/MatrixBase_end_int.cpp
+++ b/doc/snippets/MatrixBase_end_int.cpp
@@ -1,5 +1,5 @@
 RowVector4i v = RowVector4i::Random();
 cout << "Here is the vector v:" << endl << v << endl;
-cout << "Here is v.end(2):" << endl << v.end(2) << endl;
-v.end(2).setZero();
+cout << "Here is v.tail(2):" << endl << v.tail(2) << endl;
+v.tail(2).setZero();
 cout << "Now the vector v is:" << endl << v << endl;
diff --git a/doc/snippets/MatrixBase_eval.cpp b/doc/snippets/MatrixBase_eval.cpp
index d70424562..1df3aa01d 100644
--- a/doc/snippets/MatrixBase_eval.cpp
+++ b/doc/snippets/MatrixBase_eval.cpp
@@ -2,11 +2,11 @@ Matrix2f M = Matrix2f::Random();
 Matrix2f m;
 m = M;
 cout << "Here is the matrix m:" << endl << m << endl;
-cout << "Now we want to replace m by its own transpose." << endl;
-cout << "If we do m = m.transpose(), then m becomes:" << endl;
-m = m.transpose() * 1;
+cout << "Now we want to copy a column into a row." << endl;
+cout << "If we do m.col(1) = m.row(0), then m becomes:" << endl;
+m.col(1) = m.row(0);
 cout << m << endl << "which is wrong!" << endl;
-cout << "Now let us instead do m = m.transpose().eval(). Then m becomes" << endl;
+cout << "Now let us instead do m.col(1) = m.row(0).eval(). Then m becomes" << endl;
 m = M;
-m = m.transpose().eval();
+m.col(1) = m.row(0).eval();
 cout << m << endl << "which is right." << endl;
diff --git a/doc/snippets/MatrixBase_start_int.cpp b/doc/snippets/MatrixBase_start_int.cpp
index eb43a5dc7..c261d2b4e 100644
--- a/doc/snippets/MatrixBase_start_int.cpp
+++ b/doc/snippets/MatrixBase_start_int.cpp
@@ -1,5 +1,5 @@
 RowVector4i v = RowVector4i::Random();
 cout << "Here is the vector v:" << endl << v << endl;
-cout << "Here is v.start(2):" << endl << v.start(2) << endl;
-v.start(2).setZero();
+cout << "Here is v.head(2):" << endl << v.head(2) << endl;
+v.head(2).setZero();
 cout << "Now the vector v is:" << endl << v << endl;
diff --git a/doc/snippets/MatrixBase_template_int_end.cpp b/doc/snippets/MatrixBase_template_int_end.cpp
index 0908c0305..f5ccb00f6 100644
--- a/doc/snippets/MatrixBase_template_int_end.cpp
+++ b/doc/snippets/MatrixBase_template_int_end.cpp
@@ -1,5 +1,5 @@
 RowVector4i v = RowVector4i::Random();
 cout << "Here is the vector v:" << endl << v << endl;
-cout << "Here is v.end(2):" << endl << v.end<2>() << endl;
-v.end<2>().setZero();
+cout << "Here is v.tail(2):" << endl << v.tail<2>() << endl;
+v.tail<2>().setZero();
 cout << "Now the vector v is:" << endl << v << endl;
diff --git a/doc/snippets/MatrixBase_template_int_start.cpp b/doc/snippets/MatrixBase_template_int_start.cpp
index 231fc3299..d336b3716 100644
--- a/doc/snippets/MatrixBase_template_int_start.cpp
+++ b/doc/snippets/MatrixBase_template_int_start.cpp
@@ -1,5 +1,5 @@
 RowVector4i v = RowVector4i::Random();
 cout << "Here is the vector v:" << endl << v << endl;
-cout << "Here is v.start(2):" << endl << v.start<2>() << endl;
-v.start<2>().setZero();
+cout << "Here is v.head(2):" << endl << v.head<2>() << endl;
+v.head<2>().setZero();
 cout << "Now the vector v is:" << endl << v << endl;
diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt
index acf3bb6e9..7e2c9ca7a 100644
--- a/scripts/CMakeLists.txt
+++ b/scripts/CMakeLists.txt
@@ -1,6 +1,6 @@
 get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST)
-configure_file(buildtests.in ${CMAKE_BINARY_DIR}/buildtests)
+configure_file(buildtests.in ${CMAKE_BINARY_DIR}/buildtests @ONLY)
 
-configure_file(check.in ${CMAKE_BINARY_DIR}/check)
-configure_file(debug.in ${CMAKE_BINARY_DIR}/debug)
-configure_file(release.in ${CMAKE_BINARY_DIR}/release)
+configure_file(check.in ${CMAKE_BINARY_DIR}/check COPYONLY)
+configure_file(debug.in ${CMAKE_BINARY_DIR}/debug COPYONLY)
+configure_file(release.in ${CMAKE_BINARY_DIR}/release COPYONLY)
diff --git a/scripts/buildtests.in b/scripts/buildtests.in
index 3c4282848..7026373cf 100755
--- a/scripts/buildtests.in
+++ b/scripts/buildtests.in
@@ -1,24 +1,22 @@
 #!/bin/bash
 
-if [ $# == 0 -o $# -ge 3 ]
+if [[ $# != 1 || $1 == *help ]]
 then
-  echo "usage: ./buildtests regexp [jobs]"
-  echo "  makes tests matching the regexp, with [jobs] concurrent make jobs"
+  echo "usage: ./check regexp"
+  echo "  Builds tests matching the regexp."
+  echo "  The EIGEN_MAKE_ARGS environment variable allows to pass args to 'make'."
+  echo "    For example, to launch 5 concurrent builds, use EIGEN_MAKE_ARGS='-j5'"
   exit 0
 fi
 
-TESTSLIST="${EIGEN_TESTS_LIST}"
-
+TESTSLIST="@EIGEN_TESTS_LIST@"
 targets_to_make=`echo "$TESTSLIST" | egrep "$1" | xargs echo`
 
-if [ $# == 1 ]
+if [ -n "${EIGEN_MAKE_ARGS:+x}" ]
 then
+  make $targets_to_make ${EIGEN_MAKE_ARGS}
+else
   make $targets_to_make
-  exit $?
 fi
+exit $?
 
-if [ $# == 2 ]
-then
-  make -j $2 $targets_to_make
-  exit $?
-fi
diff --git a/scripts/check.in b/scripts/check.in
index 82d805b79..d6a8466e2 100755
--- a/scripts/check.in
+++ b/scripts/check.in
@@ -1,12 +1,21 @@
 #!/bin/bash
 # check : shorthand for make and ctest -R
 
-if [ $# == 0 -o $# -ge 3 ]
+if [[ $# != 1 || $1 == *help ]]
 then
-  echo "usage: ./check regexp [jobs]"
-  echo "  makes and runs tests matching the regexp, with [jobs] concurrent make jobs"
+  echo "usage: ./check regexp"
+  echo "  Builds and runs tests matching the regexp."
+  echo "  The EIGEN_MAKE_ARGS environment variable allows to pass args to 'make'."
+  echo "    For example, to launch 5 concurrent builds, use EIGEN_MAKE_ARGS='-j5'"
+  echo "  The EIGEN_CTEST_ARGS environment variable allows to pass args to 'ctest'."
+  echo "    For example, with CTest 2.8, you can use EIGEN_CTEST_ARGS='-j5'."
   exit 0
 fi
 
-# TODO when ctest 2.8 comes out, honor the jobs parameter
-./buildtests "$1" "${2:-1}" && ctest -R "$1"
+if [ -n "${EIGEN_CTEST_ARGS:+x}" ]
+then
+  ./buildtests "$1" && ctest -R "$1" ${EIGEN_CTEST_ARGS}
+else
+  ./buildtests "$1" && ctest -R "$1"
+fi
+exit $?
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c29331db5..e82026ee9 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -88,6 +88,7 @@ ei_add_test(meta)
 ei_add_test(sizeof)
 ei_add_test(dynalloc)
 ei_add_test(nomalloc)
+ei_add_test(first_aligned)
 ei_add_test(mixingtypes)
 ei_add_test(packetmath)
 ei_add_test(unalignedassert)
@@ -117,7 +118,7 @@ ei_add_test(product_symm)
 ei_add_test(product_syrk)
 ei_add_test(product_trmv)
 ei_add_test(product_trmm)
-ei_add_test(product_trsm)
+ei_add_test(product_trsolve)
 ei_add_test(product_notemporary)
 ei_add_test(stable_norm)
 ei_add_test(bandmatrix)
@@ -128,6 +129,7 @@ ei_add_test(inverse)
 ei_add_test(qr)
 ei_add_test(qr_colpivoting)
 ei_add_test(qr_fullpivoting)
+ei_add_test(hessenberg " " "${GSL_LIBRARIES}")
 ei_add_test(eigensolver_selfadjoint " " "${GSL_LIBRARIES}")
 ei_add_test(eigensolver_generic " " "${GSL_LIBRARIES}")
 ei_add_test(eigensolver_complex)
diff --git a/test/first_aligned.cpp b/test/first_aligned.cpp
new file mode 100644
index 000000000..3cf1a7eef
--- /dev/null
+++ b/test/first_aligned.cpp
@@ -0,0 +1,64 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include "main.h"
+
+template<typename Scalar>
+void test_first_aligned_helper(Scalar *array, int size)
+{
+  const int packet_size = sizeof(Scalar) * ei_packet_traits<Scalar>::size;
+  VERIFY(((size_t(array) + sizeof(Scalar) * ei_first_aligned(array, size)) % packet_size) == 0);
+}
+
+template<typename Scalar>
+void test_none_aligned_helper(Scalar *array, int size)
+{
+  VERIFY(ei_packet_traits<Scalar>::size == 1 || ei_first_aligned(array, size) == size);
+}
+
+struct some_non_vectorizable_type { float x; };
+
+void test_first_aligned()
+{
+  EIGEN_ALIGN16 float array_float[100];
+  test_first_aligned_helper(array_float, 50);
+  test_first_aligned_helper(array_float+1, 50);
+  test_first_aligned_helper(array_float+2, 50);
+  test_first_aligned_helper(array_float+3, 50);
+  test_first_aligned_helper(array_float+4, 50);
+  test_first_aligned_helper(array_float+5, 50);
+  
+  EIGEN_ALIGN16 double array_double[100];
+  test_first_aligned_helper(array_float, 50);
+  test_first_aligned_helper(array_float+1, 50);
+  test_first_aligned_helper(array_float+2, 50);
+  
+  double *array_double_plus_4_bytes = (double*)(size_t(array_double)+4);
+  test_none_aligned_helper(array_double_plus_4_bytes, 50);
+  test_none_aligned_helper(array_double_plus_4_bytes+1, 50);
+  
+  some_non_vectorizable_type array_nonvec[100];
+  test_first_aligned_helper(array_nonvec, 100);
+  test_none_aligned_helper(array_nonvec, 100);
+}
diff --git a/test/geo_homogeneous.cpp b/test/geo_homogeneous.cpp
index 48d8cbcdf..781913553 100644
--- a/test/geo_homogeneous.cpp
+++ b/test/geo_homogeneous.cpp
@@ -67,7 +67,7 @@ template<typename Scalar,int Size> void homogeneous(void)
   VERIFY_IS_APPROX(m0, hm0.colwise().hnormalized());
   hm0.row(Size-1).setRandom();
   for(int j=0; j<Size; ++j)
-    m0.col(j) = hm0.col(j).start(Size) / hm0(Size,j);
+    m0.col(j) = hm0.col(j).head(Size) / hm0(Size,j);
   VERIFY_IS_APPROX(m0, hm0.colwise().hnormalized());
 
   T1MatrixType t1 = T1MatrixType::Random();
diff --git a/test/geo_orthomethods.cpp b/test/geo_orthomethods.cpp
index 54a6febab..9f1113559 100644
--- a/test/geo_orthomethods.cpp
+++ b/test/geo_orthomethods.cpp
@@ -66,7 +66,7 @@ template<typename Scalar> void orthomethods_3()
           v41 = Vector4::Random(),
           v42 = Vector4::Random();
   v40.w() = v41.w() = v42.w() = 0;
-  v42.template start<3>() = v40.template start<3>().cross(v41.template start<3>());
+  v42.template head<3>() = v40.template head<3>().cross(v41.template head<3>());
   VERIFY_IS_APPROX(v40.cross3(v41), v42);
 }
 
@@ -88,8 +88,8 @@ template<typename Scalar, int Size> void orthomethods(int size=Size)
 
   if (size>=3)
   {
-    v0.template start<2>().setZero();
-    v0.end(size-2).setRandom();
+    v0.template head<2>().setZero();
+    v0.tail(size-2).setRandom();
 
     VERIFY_IS_MUCH_SMALLER_THAN(v0.unitOrthogonal().dot(v0), Scalar(1));
     VERIFY_IS_APPROX(v0.unitOrthogonal().norm(), RealScalar(1));
diff --git a/test/geo_transformations.cpp b/test/geo_transformations.cpp
index bcef908d8..fc542e71b 100644
--- a/test/geo_transformations.cpp
+++ b/test/geo_transformations.cpp
@@ -118,7 +118,7 @@ template<typename Scalar, int Mode> void transformations(void)
   t0.scale(v0);
   t1.prescale(v0);
 
-  VERIFY_IS_APPROX( (t0 * Vector3(1,0,0)).template start<3>().norm(), v0.x());
+  VERIFY_IS_APPROX( (t0 * Vector3(1,0,0)).template head<3>().norm(), v0.x());
   //VERIFY(!ei_isApprox((t1 * Vector3(1,0,0)).norm(), v0.x()));
 
   t0.setIdentity();
@@ -290,12 +290,12 @@ template<typename Scalar, int Mode> void transformations(void)
   // translation * vector
   t0.setIdentity();
   t0.translate(v0);
-  VERIFY_IS_APPROX((t0 * v1).template start<3>(), Translation3(v0) * v1);
+  VERIFY_IS_APPROX((t0 * v1).template head<3>(), Translation3(v0) * v1);
 
   // AlignedScaling * vector
   t0.setIdentity();
   t0.scale(v0);
-  VERIFY_IS_APPROX((t0 * v1).template start<3>(), AlignedScaling3(v0) * v1);
+  VERIFY_IS_APPROX((t0 * v1).template head<3>(), AlignedScaling3(v0) * v1);
 
   // test transform inversion
   t0.setIdentity();
diff --git a/test/hessenberg.cpp b/test/hessenberg.cpp
new file mode 100644
index 000000000..d917be357
--- /dev/null
+++ b/test/hessenberg.cpp
@@ -0,0 +1,46 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include "main.h"
+#include <Eigen/Eigenvalues>
+
+template<typename Scalar,int Size> void hessenberg(int size = Size)
+{
+  typedef Matrix<Scalar,Size,Size> MatrixType;
+  MatrixType m = MatrixType::Random(size,size);
+  HessenbergDecomposition<MatrixType> hess(m);
+
+  VERIFY_IS_APPROX(m, hess.matrixQ() * hess.matrixH() * hess.matrixQ().adjoint());
+}
+
+void test_hessenberg()
+{
+  for(int i = 0; i < g_repeat; i++) {
+    CALL_SUBTEST_1(( hessenberg<std::complex<double>,1>() ));
+    CALL_SUBTEST_2(( hessenberg<std::complex<double>,2>() ));
+    CALL_SUBTEST_3(( hessenberg<std::complex<float>,4>() ));
+    CALL_SUBTEST_4(( hessenberg<float,Dynamic>(ei_random<int>(1,320)) ));
+    CALL_SUBTEST_5(( hessenberg<std::complex<double>,Dynamic>(ei_random<int>(1,320)) ));
+  }
+}
diff --git a/test/householder.cpp b/test/householder.cpp
index 6e480c0de..4e4c78863 100644
--- a/test/householder.cpp
+++ b/test/householder.cpp
@@ -53,7 +53,7 @@ template<typename MatrixType> void householder(const MatrixType& m)
   v1.makeHouseholder(essential, beta, alpha);
   v1.applyHouseholderOnTheLeft(essential,beta,tmp);
   VERIFY_IS_APPROX(v1.norm(), v2.norm());
-  VERIFY_IS_MUCH_SMALLER_THAN(v1.end(rows-1).norm(), v1.norm());
+  VERIFY_IS_MUCH_SMALLER_THAN(v1.tail(rows-1).norm(), v1.norm());
   v1 = VectorType::Random(rows);
   v2 = v1;
   v1.applyHouseholderOnTheLeft(essential,beta,tmp);
@@ -63,7 +63,7 @@ template<typename MatrixType> void householder(const MatrixType& m)
              m2(rows, cols);
 
   v1 = VectorType::Random(rows);
-  if(even) v1.end(rows-1).setZero();
+  if(even) v1.tail(rows-1).setZero();
   m1.colwise() = v1;
   m2 = m1;
   m1.col(0).makeHouseholder(essential, beta, alpha);
@@ -74,7 +74,7 @@ template<typename MatrixType> void householder(const MatrixType& m)
   VERIFY_IS_APPROX(ei_real(m1(0,0)), alpha);
 
   v1 = VectorType::Random(rows);
-  if(even) v1.end(rows-1).setZero();
+  if(even) v1.tail(rows-1).setZero();
   SquareMatrixType m3(rows,rows), m4(rows,rows);
   m3.rowwise() = v1.transpose();
   m4 = m3;
diff --git a/test/product_selfadjoint.cpp b/test/product_selfadjoint.cpp
index 2e9f8be80..2f3833a02 100644
--- a/test/product_selfadjoint.cpp
+++ b/test/product_selfadjoint.cpp
@@ -68,9 +68,9 @@ template<typename MatrixType> void product_selfadjoint(const MatrixType& m)
   if (rows>1)
   {
     m2 = m1.template triangularView<LowerTriangular>();
-    m2.block(1,1,rows-1,cols-1).template selfadjointView<LowerTriangular>().rankUpdate(v1.end(rows-1),v2.start(cols-1));
+    m2.block(1,1,rows-1,cols-1).template selfadjointView<LowerTriangular>().rankUpdate(v1.tail(rows-1),v2.head(cols-1));
     m3 = m1;
-    m3.block(1,1,rows-1,cols-1) += v1.end(rows-1) * v2.start(cols-1).adjoint()+ v2.start(cols-1) * v1.end(rows-1).adjoint();
+    m3.block(1,1,rows-1,cols-1) += v1.tail(rows-1) * v2.head(cols-1).adjoint()+ v2.head(cols-1) * v1.tail(rows-1).adjoint();
     VERIFY_IS_APPROX(m2, m3.template triangularView<LowerTriangular>().toDenseMatrix());
   }
 }
diff --git a/test/product_trsm.cpp b/test/product_trsolve.cpp
index e884f3180..4477a29d1 100644
--- a/test/product_trsm.cpp
+++ b/test/product_trsolve.cpp
@@ -30,15 +30,21 @@
     VERIFY_IS_APPROX((TRI).toDenseMatrix() * (XB), ref); \
   }
 
-template<typename Scalar> void trsm(int size,int cols)
+#define VERIFY_TRSM_ONTHERIGHT(TRI,XB) { \
+    (XB).setRandom(); ref = (XB); \
+    (TRI).transpose().template solveInPlace<OnTheRight>(XB.transpose()); \
+    VERIFY_IS_APPROX((XB).transpose() * (TRI).transpose().toDenseMatrix(), ref.transpose()); \
+  }
+
+template<typename Scalar,int Size, int Cols> void trsolve(int size=Size,int cols=Cols)
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
 
-  Matrix<Scalar,Dynamic,Dynamic,ColMajor> cmLhs(size,size);
-  Matrix<Scalar,Dynamic,Dynamic,RowMajor> rmLhs(size,size);
+  Matrix<Scalar,Size,Size,ColMajor> cmLhs(size,size);
+  Matrix<Scalar,Size,Size,RowMajor> rmLhs(size,size);
 
-  Matrix<Scalar,Dynamic,Dynamic,ColMajor> cmRhs(size,cols), ref(size,cols);
-  Matrix<Scalar,Dynamic,Dynamic,RowMajor> rmRhs(size,cols);
+  Matrix<Scalar,Size,Cols,ColMajor> cmRhs(size,cols), ref(size,cols);
+  Matrix<Scalar,Size,Cols,RowMajor> rmRhs(size,cols);
 
   cmLhs.setRandom(); cmLhs *= static_cast<RealScalar>(0.1); cmLhs.diagonal().array() += static_cast<RealScalar>(1);
   rmLhs.setRandom(); rmLhs *= static_cast<RealScalar>(0.1); rmLhs.diagonal().array() += static_cast<RealScalar>(1);
@@ -53,13 +59,32 @@ template<typename Scalar> void trsm(int size,int cols)
 
   VERIFY_TRSM(rmLhs            .template triangularView<LowerTriangular>(), cmRhs);
   VERIFY_TRSM(rmLhs.conjugate().template triangularView<UnitUpperTriangular>(), rmRhs);
+
+
+  VERIFY_TRSM_ONTHERIGHT(cmLhs.conjugate().template triangularView<LowerTriangular>(), cmRhs);
+  VERIFY_TRSM_ONTHERIGHT(cmLhs            .template triangularView<UpperTriangular>(), cmRhs);
+  VERIFY_TRSM_ONTHERIGHT(cmLhs            .template triangularView<LowerTriangular>(), rmRhs);
+  VERIFY_TRSM_ONTHERIGHT(cmLhs.conjugate().template triangularView<UpperTriangular>(), rmRhs);
+
+  VERIFY_TRSM_ONTHERIGHT(cmLhs.conjugate().template triangularView<UnitLowerTriangular>(), cmRhs);
+  VERIFY_TRSM_ONTHERIGHT(cmLhs            .template triangularView<UnitUpperTriangular>(), rmRhs);
+
+  VERIFY_TRSM_ONTHERIGHT(rmLhs            .template triangularView<LowerTriangular>(), cmRhs);
+  VERIFY_TRSM_ONTHERIGHT(rmLhs.conjugate().template triangularView<UnitUpperTriangular>(), rmRhs);
 }
 
-void test_product_trsm()
+void test_product_trsolve()
 {
   for(int i = 0; i < g_repeat ; i++)
   {
-    CALL_SUBTEST_1((trsm<float>(ei_random<int>(1,320),ei_random<int>(1,320))));
-    CALL_SUBTEST_2((trsm<std::complex<double> >(ei_random<int>(1,320),ei_random<int>(1,320))));
+    // matrices
+    CALL_SUBTEST_1((trsolve<float,Dynamic,Dynamic>(ei_random<int>(1,320),ei_random<int>(1,320))));
+    CALL_SUBTEST_2((trsolve<std::complex<double>,Dynamic,Dynamic>(ei_random<int>(1,320),ei_random<int>(1,320))));
+
+    // vectors
+    CALL_SUBTEST_3((trsolve<std::complex<double>,Dynamic,1>(ei_random<int>(1,320))));
+    CALL_SUBTEST_4((trsolve<float,1,1>()));
+    CALL_SUBTEST_5((trsolve<float,1,2>()));
+    CALL_SUBTEST_6((trsolve<std::complex<float>,4,1>()));
   }
 }
diff --git a/test/redux.cpp b/test/redux.cpp
index c075c1393..3293fd54e 100644
--- a/test/redux.cpp
+++ b/test/redux.cpp
@@ -68,10 +68,10 @@ template<typename VectorType> void vectorRedux(const VectorType& w)
       minc = std::min(minc, ei_real(v[j]));
       maxc = std::max(maxc, ei_real(v[j]));
     }
-    VERIFY_IS_APPROX(s, v.start(i).sum());
-    VERIFY_IS_APPROX(p, v.start(i).prod());
-    VERIFY_IS_APPROX(minc, v.real().start(i).minCoeff());
-    VERIFY_IS_APPROX(maxc, v.real().start(i).maxCoeff());
+    VERIFY_IS_APPROX(s, v.head(i).sum());
+    VERIFY_IS_APPROX(p, v.head(i).prod());
+    VERIFY_IS_APPROX(minc, v.real().head(i).minCoeff());
+    VERIFY_IS_APPROX(maxc, v.real().head(i).maxCoeff());
   }
 
   for(int i = 0; i < size-1; i++)
@@ -85,10 +85,10 @@ template<typename VectorType> void vectorRedux(const VectorType& w)
       minc = std::min(minc, ei_real(v[j]));
       maxc = std::max(maxc, ei_real(v[j]));
     }
-    VERIFY_IS_APPROX(s, v.end(size-i).sum());
-    VERIFY_IS_APPROX(p, v.end(size-i).prod());
-    VERIFY_IS_APPROX(minc, v.real().end(size-i).minCoeff());
-    VERIFY_IS_APPROX(maxc, v.real().end(size-i).maxCoeff());
+    VERIFY_IS_APPROX(s, v.tail(size-i).sum());
+    VERIFY_IS_APPROX(p, v.tail(size-i).prod());
+    VERIFY_IS_APPROX(minc, v.real().tail(size-i).minCoeff());
+    VERIFY_IS_APPROX(maxc, v.real().tail(size-i).maxCoeff());
   }
 
   for(int i = 0; i < size/2; i++)
diff --git a/test/regression.cpp b/test/regression.cpp
index bcda73e0e..a0f2ae102 100644
--- a/test/regression.cpp
+++ b/test/regression.cpp
@@ -51,7 +51,7 @@ void makeNoisyCohyperplanarPoints(int numPoints,
     {
       cur_point = VectorType::Random(size)/*.normalized()*/;
       // project cur_point onto the hyperplane
-      Scalar x = - (hyperplane->coeffs().start(size).cwiseProduct(cur_point)).sum();
+      Scalar x = - (hyperplane->coeffs().head(size).cwiseProduct(cur_point)).sum();
       cur_point *= hyperplane->coeffs().coeff(size) / x;
     } while( cur_point.norm() < 0.5
           || cur_point.norm() > 2.0 );
diff --git a/test/submatrices.cpp b/test/submatrices.cpp
index 75b0fde4b..9cd6f3fab 100644
--- a/test/submatrices.cpp
+++ b/test/submatrices.cpp
@@ -127,15 +127,15 @@ template<typename MatrixType> void submatrices(const MatrixType& m)
   if (rows>2)
   {
     // test sub vectors
-    VERIFY_IS_APPROX(v1.template start<2>(), v1.block(0,0,2,1));
-    VERIFY_IS_APPROX(v1.template start<2>(), v1.start(2));
-    VERIFY_IS_APPROX(v1.template start<2>(), v1.segment(0,2));
-    VERIFY_IS_APPROX(v1.template start<2>(), v1.template segment<2>(0));
+    VERIFY_IS_APPROX(v1.template head<2>(), v1.block(0,0,2,1));
+    VERIFY_IS_APPROX(v1.template head<2>(), v1.head(2));
+    VERIFY_IS_APPROX(v1.template head<2>(), v1.segment(0,2));
+    VERIFY_IS_APPROX(v1.template head<2>(), v1.template segment<2>(0));
     int i = rows-2;
-    VERIFY_IS_APPROX(v1.template end<2>(), v1.block(i,0,2,1));
-    VERIFY_IS_APPROX(v1.template end<2>(), v1.end(2));
-    VERIFY_IS_APPROX(v1.template end<2>(), v1.segment(i,2));
-    VERIFY_IS_APPROX(v1.template end<2>(), v1.template segment<2>(i));
+    VERIFY_IS_APPROX(v1.template tail<2>(), v1.block(i,0,2,1));
+    VERIFY_IS_APPROX(v1.template tail<2>(), v1.tail(2));
+    VERIFY_IS_APPROX(v1.template tail<2>(), v1.segment(i,2));
+    VERIFY_IS_APPROX(v1.template tail<2>(), v1.template segment<2>(i));
     i = ei_random(0,rows-2);
     VERIFY_IS_APPROX(v1.segment(i,2), v1.template segment<2>(i));
 
diff --git a/unsupported/Eigen/AlignedVector3 b/unsupported/Eigen/AlignedVector3
index a1510f19d..37018bfc6 100644
--- a/unsupported/Eigen/AlignedVector3
+++ b/unsupported/Eigen/AlignedVector3
@@ -106,7 +106,7 @@ template<typename _Scalar> class AlignedVector3
     {
       inline static void run(AlignedVector3& dest, const XprType& src)
       {
-        dest.m_coeffs.template start<3>() = src;
+        dest.m_coeffs.template head<3>() = src;
         dest.m_coeffs.w() = Scalar(0);
       }
     };
@@ -190,7 +190,7 @@ template<typename _Scalar> class AlignedVector3
     template<typename Derived>
     inline bool isApprox(const MatrixBase<Derived>& other, RealScalar eps=dummy_precision<Scalar>()) const
     {
-      return m_coeffs.template start<3>().isApprox(other,eps);
+      return m_coeffs.template head<3>().isApprox(other,eps);
     }
 };
 
diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
index a43cd8d97..8702120de 100644
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@@ -213,7 +213,7 @@ class FFT
         int nfft = src.size();
         int nout = HasFlag(HalfSpectrum) ? ((nfft>>1)+1) : nfft;
         dst.derived().resize( nout );
-        inv( &dst[0],&src[0],src.size() );
+        inv( &dst[0],&src[0], nfft);
     }
 
     template <typename _Output>
diff --git a/unsupported/Eigen/NonLinearOptimization b/unsupported/Eigen/NonLinearOptimization
index c413af436..f15e09386 100644
--- a/unsupported/Eigen/NonLinearOptimization
+++ b/unsupported/Eigen/NonLinearOptimization
@@ -27,6 +27,7 @@
 
 #include <Eigen/Core>
 #include <Eigen/Jacobi>
+#include <Eigen/QR>
 #include <unsupported/Eigen/NumericalDiff>
 
 namespace Eigen {
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
index 7103ac541..43539f549 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
@@ -40,9 +40,13 @@ struct ei_stem_function
   * \param[in]  f      an entire function; \c f(x,n) should compute the n-th derivative of f at x.
   * \param[out] result pointer to the matrix in which to store the result, \f$ f(M) \f$.
   *
-  * Suppose that \f$ f \f$ is an entire function (that is, a function
-  * on the complex plane that is everywhere complex differentiable).
-  * Then its Taylor series
+  * This function computes \f$ f(A) \f$ and stores the result in the
+  * matrix pointed to by \p result.
+  *
+  * %Matrix functions are defined as follows.  Suppose that \f$ f \f$
+  * is an entire function (that is, a function on the complex plane
+  * that is everywhere complex differentiable).  Then its Taylor
+  * series
   * \f[ f(0) + f'(0) x + \frac{f''(0)}{2} x^2 + \frac{f'''(0)}{3!} x^3 + \cdots \f]
   * converges to \f$ f(x) \f$. In this case, we can define the matrix
   * function by the same series:
@@ -53,6 +57,8 @@ struct ei_stem_function
   * "A Schur-Parlett algorithm for computing matrix functions", 
   * <em>SIAM J. %Matrix Anal. Applic.</em>, <b>25</b>:464&ndash;485, 2003.
   *
+  * The actual work is done by the MatrixFunction class.
+  *
   * Example: The following program checks that
   * \f[ \exp \left[ \begin{array}{ccc} 
   *       0 & \frac14\pi & 0 \\ 
@@ -78,398 +84,430 @@ EIGEN_STRONG_INLINE void ei_matrix_function(const MatrixBase<Derived>& M,
 					    typename ei_stem_function<typename ei_traits<Derived>::Scalar>::type f,
 					    typename MatrixBase<Derived>::PlainMatrixType* result);
 
+#include "MatrixFunctionAtomic.h"
+
 
 /** \ingroup MatrixFunctions_Module 
-  * \class MatrixFunction
   * \brief Helper class for computing matrix functions. 
   */
-template <typename MatrixType, 
-  int IsComplex = NumTraits<typename ei_traits<MatrixType>::Scalar>::IsComplex,
-  int IsDynamic = ( (ei_traits<MatrixType>::RowsAtCompileTime == Dynamic) 
-		    && (ei_traits<MatrixType>::RowsAtCompileTime == Dynamic) ) >
-class MatrixFunction;
+template <typename MatrixType, int IsComplex = NumTraits<typename ei_traits<MatrixType>::Scalar>::IsComplex>
+class MatrixFunction
+{  
+  private:
 
-/* Partial specialization of MatrixFunction for real matrices */
+    typedef typename ei_traits<MatrixType>::Scalar Scalar;
+    typedef typename ei_stem_function<Scalar>::type StemFunction;
 
-template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols, int IsDynamic>
-class MatrixFunction<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>, 0, IsDynamic>
-{  
   public:
 
+    /** \brief Constructor. Computes matrix function. 
+      *
+      * \param[in]  A      argument of matrix function, should be a square matrix.
+      * \param[in]  f      an entire function; \c f(x,n) should compute the n-th derivative of f at x.
+      * \param[out] result pointer to the matrix in which to store the result, \f$ f(A) \f$.
+      *
+      * This function computes \f$ f(A) \f$ and stores the result in
+      * the matrix pointed to by \p result.
+      *
+      * See ei_matrix_function() for details.
+      */
+    MatrixFunction(const MatrixType& A, StemFunction f, MatrixType* result);
+};
+
+
+/** \ingroup MatrixFunctions_Module 
+  * \brief Partial specialization of MatrixFunction for real matrices \internal 
+  */
+template <typename MatrixType>
+class MatrixFunction<MatrixType, 0>
+{  
+  private:
+
+    typedef ei_traits<MatrixType> Traits;
+    typedef typename Traits::Scalar Scalar;
+    static const int Rows = Traits::RowsAtCompileTime;
+    static const int Cols = Traits::ColsAtCompileTime;
+    static const int Options = MatrixType::Options;
+    static const int MaxRows = Traits::MaxRowsAtCompileTime;
+    static const int MaxCols = Traits::MaxColsAtCompileTime;
+
     typedef std::complex<Scalar> ComplexScalar;
-    typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> MatrixType;
     typedef Matrix<ComplexScalar, Rows, Cols, Options, MaxRows, MaxCols> ComplexMatrix;
     typedef typename ei_stem_function<Scalar>::type StemFunction;
 
+  public:
+
+    /** \brief Constructor. Computes matrix function. 
+      *
+      * \param[in]  A      argument of matrix function, should be a square matrix.
+      * \param[in]  f      an entire function; \c f(x,n) should compute the n-th derivative of f at x.
+      * \param[out] result pointer to the matrix in which to store the result, \f$ f(A) \f$.
+      *
+      * This function converts the real matrix \c A to a complex matrix,
+      * uses MatrixFunction<MatrixType,1> and then converts the result back to
+      * a real matrix.
+      */
     MatrixFunction(const MatrixType& A, StemFunction f, MatrixType* result) 
     {
       ComplexMatrix CA = A.template cast<ComplexScalar>();
       ComplexMatrix Cresult;
       MatrixFunction<ComplexMatrix>(CA, f, &Cresult);
-      result->resize(A.cols(), A.rows());
-      for (int j = 0; j < A.cols(); j++)
-	for (int i = 0; i < A.rows(); i++)
-	  (*result)(i,j) = std::real(Cresult(i,j));
+      *result = Cresult.real();
     }
 };
-      
-/* Partial specialization of MatrixFunction for complex static-size matrices */
-
-template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
-class MatrixFunction<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>, 1, 0>
-{  
-  public:
-
-    typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> MatrixType;
-    typedef Matrix<Scalar, Dynamic, Dynamic, Options, MaxRows, MaxCols> DynamicMatrix;
-    typedef typename ei_stem_function<Scalar>::type StemFunction;
 
-    MatrixFunction(const MatrixType& A, StemFunction f, MatrixType* result) 
-    {
-      DynamicMatrix DA = A;
-      DynamicMatrix Dresult;
-      MatrixFunction<DynamicMatrix>(DA, f, &Dresult);
-      *result = Dresult;
-    }
-};
       
-/* Partial specialization of MatrixFunction for complex dynamic-size matrices */
-  
+/** \ingroup MatrixFunctions_Module 
+  * \brief Partial specialization of MatrixFunction for complex matrices \internal 
+  */
 template <typename MatrixType>
-class MatrixFunction<MatrixType, 1, 1>
+class MatrixFunction<MatrixType, 1>
 {
-  public:
+  private:
 
     typedef ei_traits<MatrixType> Traits;
     typedef typename Traits::Scalar Scalar;
+    static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
+    static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
+    static const int Options = MatrixType::Options;
     typedef typename NumTraits<Scalar>::Real RealScalar;
     typedef typename ei_stem_function<Scalar>::type StemFunction;
     typedef Matrix<Scalar, Traits::RowsAtCompileTime, 1> VectorType;
     typedef Matrix<int, Traits::RowsAtCompileTime, 1> IntVectorType;
-    typedef std::list<Scalar> listOfScalars;
-    typedef std::list<listOfScalars> listOfLists;
-
-    /** \brief Compute matrix function. 
-     *
-     * \param A      argument of matrix function.
-     * \param f      function to compute.
-     * \param result pointer to the matrix in which to store the result.
-     */
+    typedef std::list<Scalar> Cluster;
+    typedef std::list<Cluster> ListOfClusters;
+    typedef Matrix<Scalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
+
+  public:
+
+    /** \brief Constructor. Computes matrix function. 
+      *
+      * \param[in]  A      argument of matrix function, should be a square matrix.
+      * \param[in]  f      an entire function; \c f(x,n) should compute the n-th derivative of f at x.
+      * \param[out] result pointer to the matrix in which to store the result, \f$ f(A) \f$.
+      */
     MatrixFunction(const MatrixType& A, StemFunction f, MatrixType* result);
 
   private:
 
-    // Prevent copying
-    MatrixFunction(const MatrixFunction&);
-    MatrixFunction& operator=(const MatrixFunction&);
-
-    void separateBlocksInSchur(MatrixType& T, MatrixType& U, IntVectorType& blockSize);
-    void permuteSchur(const IntVectorType& permutation, MatrixType& T, MatrixType& U);
-    void swapEntriesInSchur(int index, MatrixType& T, MatrixType& U);
-    void computeTriangular(const MatrixType& T, MatrixType& result, const IntVectorType& blockSize);
-    void computeBlockAtomic(const MatrixType& T, MatrixType& result, const IntVectorType& blockSize);
-    MatrixType solveSylvester(const MatrixType& A, const MatrixType& B, const MatrixType& C);
-    MatrixType computeAtomic(const MatrixType& T);
-    void divideInBlocks(const VectorType& v, listOfLists* result);
-    void constructPermutation(const VectorType& diag, const listOfLists& blocks, 
-			      IntVectorType& blockSize, IntVectorType& permutation);
-
-    RealScalar computeMu(const MatrixType& M);
-    bool taylorConverged(const MatrixType& T, int s, const MatrixType& F, 
-			 const MatrixType& Fincr, const MatrixType& P, RealScalar mu);
-
+    void computeSchurDecomposition(const MatrixType& A);
+    void partitionEigenvalues();
+    typename ListOfClusters::iterator findCluster(Scalar key);
+    void computeClusterSize();
+    void computeBlockStart();
+    void constructPermutation();
+    void permuteSchur();
+    void swapEntriesInSchur(int index);
+    void computeBlockAtomic();
+    Block<MatrixType> block(const MatrixType& A, int i, int j);
+    void computeOffDiagonal();
+    DynMatrixType solveTriangularSylvester(const DynMatrixType& A, const DynMatrixType& B, const DynMatrixType& C);
+
+    StemFunction *m_f; /**< \brief Stem function for matrix function under consideration */
+    MatrixType m_T; /**< \brief Triangular part of Schur decomposition */
+    MatrixType m_U; /**< \brief Unitary part of Schur decomposition */
+    MatrixType m_fT; /**< \brief %Matrix function applied to #m_T */
+    ListOfClusters m_clusters; /**< \brief Partition of eigenvalues into clusters of ei'vals "close" to each other */
+    VectorXi m_eivalToCluster; /**< \brief m_eivalToCluster[i] = j means i-th ei'val is in j-th cluster */
+    VectorXi m_clusterSize; /**< \brief Number of eigenvalues in each clusters  */
+    VectorXi m_blockStart; /**< \brief Row index at which block corresponding to i-th cluster starts */
+    IntVectorType m_permutation; /**< \brief Permutation which groups ei'vals in the same cluster together */
+
+    /** \brief Maximum distance allowed between eigenvalues to be considered "close".
+      *
+      * This is morally a \c static \c const \c Scalar, but only
+      * integers can be static constant class members in C++. The
+      * separation constant is set to 0.01, a value taken from the
+      * paper by Davies and Higham. */
     static const RealScalar separation() { return static_cast<RealScalar>(0.01); }
-    StemFunction *m_f;
 };
 
 template <typename MatrixType>
-MatrixFunction<MatrixType,1,1>::MatrixFunction(const MatrixType& A, StemFunction f, MatrixType* result) :
+MatrixFunction<MatrixType,1>::MatrixFunction(const MatrixType& A, StemFunction f, MatrixType* result) :
   m_f(f)
 {
-  if (A.rows() == 1) {
-    result->resize(1,1);
-    (*result)(0,0) = f(A(0,0), 0);
-  } else {
-    const ComplexSchur<MatrixType> schurOfA(A);  
-    MatrixType T = schurOfA.matrixT();
-    MatrixType U = schurOfA.matrixU();
-    IntVectorType blockSize, permutation;
-    separateBlocksInSchur(T, U, blockSize);
-    MatrixType fT;
-    computeTriangular(T, fT, blockSize);
-    *result = U * fT * U.adjoint();
-  }
+  computeSchurDecomposition(A);
+  partitionEigenvalues();
+  computeClusterSize();
+  computeBlockStart();
+  constructPermutation();
+  permuteSchur();
+  computeBlockAtomic();
+  computeOffDiagonal();
+  *result = m_U * m_fT * m_U.adjoint();
 }
 
+/** \brief Store the Schur decomposition of \p A in #m_T and #m_U */
 template <typename MatrixType>
-void MatrixFunction<MatrixType,1,1>::separateBlocksInSchur(MatrixType& T, MatrixType& U, IntVectorType& blockSize)
+void MatrixFunction<MatrixType,1>::computeSchurDecomposition(const MatrixType& A)
 {
-  const VectorType d = T.diagonal();
-  listOfLists blocks;
-  divideInBlocks(d, &blocks);
-
-  IntVectorType permutation;
-  constructPermutation(d, blocks, blockSize, permutation);
-  permuteSchur(permutation, T, U);
+  const ComplexSchur<MatrixType> schurOfA(A);  
+  m_T = schurOfA.matrixT();
+  m_U = schurOfA.matrixU();
 }
 
+/** \brief Partition eigenvalues in clusters of ei'vals close to each other
+  * 
+  * This function computes #m_clusters. This is a partition of the
+  * eigenvalues of #m_T in clusters, such that
+  * # Any eigenvalue in a certain cluster is at most separation() away
+  *   from another eigenvalue in the same cluster.
+  * # The distance between two eigenvalues in different clusters is
+  *   more than separation().
+  * The implementation follows Algorithm 4.1 in the paper of Davies
+  * and Higham. 
+  */
 template <typename MatrixType>
-void MatrixFunction<MatrixType,1,1>::permuteSchur(const IntVectorType& permutation, MatrixType& T, MatrixType& U)
+void MatrixFunction<MatrixType,1>::partitionEigenvalues()
 {
-  IntVectorType p = permutation;
-  for (int i = 0; i < p.rows() - 1; i++) {
-    int j;
-    for (j = i; j < p.rows(); j++) {
-      if (p(j) == i) break;
+  const int rows = m_T.rows();
+  VectorType diag = m_T.diagonal(); // contains eigenvalues of A
+
+  for (int i=0; i<rows; ++i) {
+    // Find set containing diag(i), adding a new set if necessary
+    typename ListOfClusters::iterator qi = findCluster(diag(i));
+    if (qi == m_clusters.end()) {
+      Cluster l;
+      l.push_back(diag(i));
+      m_clusters.push_back(l);
+      qi = m_clusters.end();
+      --qi;
     }
-    ei_assert(p(j) == i);
-    for (int k = j-1; k >= i; k--) {
-      swapEntriesInSchur(k, T, U);
-      std::swap(p.coeffRef(k), p.coeffRef(k+1));
+
+    // Look for other element to add to the set
+    for (int j=i+1; j<rows; ++j) {
+      if (ei_abs(diag(j) - diag(i)) <= separation() && std::find(qi->begin(), qi->end(), diag(j)) == qi->end()) {
+	typename ListOfClusters::iterator qj = findCluster(diag(j));
+	if (qj == m_clusters.end()) {
+	  qi->push_back(diag(j));
+	} else {
+	  qi->insert(qi->end(), qj->begin(), qj->end());
+	  m_clusters.erase(qj);
+	}
+      }
     }
   }
 }
 
-// swap T(index, index) and T(index+1, index+1)
+/** \brief Find cluster in #m_clusters containing some value 
+  * \param[in] key Value to find
+  * \returns Iterator to cluster containing \c key, or
+  * \c m_clusters.end() if no cluster in m_clusters contains \c key.
+  */
 template <typename MatrixType>
-void MatrixFunction<MatrixType,1,1>::swapEntriesInSchur(int index, MatrixType& T, MatrixType& U)
+typename MatrixFunction<MatrixType,1>::ListOfClusters::iterator MatrixFunction<MatrixType,1>::findCluster(Scalar key)
 {
-  PlanarRotation<Scalar> rotation;
-  rotation.makeGivens(T(index, index+1), T(index+1, index+1) - T(index, index));
-  T.applyOnTheLeft(index, index+1, rotation.adjoint());
-  T.applyOnTheRight(index, index+1, rotation);
-  U.applyOnTheRight(index, index+1, rotation);
-}  
-
-template <typename MatrixType>
-void MatrixFunction<MatrixType,1,1>::computeTriangular(const MatrixType& T, MatrixType& result, 
-						       const IntVectorType& blockSize)
-{ 
-  MatrixType expT;
-  ei_matrix_exponential(T, &expT);
-  computeBlockAtomic(T, result, blockSize);
-  IntVectorType blockStart(blockSize.rows());
-  blockStart(0) = 0;
-  for (int i = 1; i < blockSize.rows(); i++) {
-    blockStart(i) = blockStart(i-1) + blockSize(i-1);
-  }
-  for (int diagIndex = 1; diagIndex < blockSize.rows(); diagIndex++) {
-    for (int blockIndex = 0; blockIndex < blockSize.rows() - diagIndex; blockIndex++) {
-      // compute (blockIndex, blockIndex+diagIndex) block
-      MatrixType A = T.block(blockStart(blockIndex), blockStart(blockIndex), blockSize(blockIndex), blockSize(blockIndex));
-      MatrixType B = -T.block(blockStart(blockIndex+diagIndex), blockStart(blockIndex+diagIndex), blockSize(blockIndex+diagIndex), blockSize(blockIndex+diagIndex));
-      MatrixType C = result.block(blockStart(blockIndex), blockStart(blockIndex), blockSize(blockIndex), blockSize(blockIndex)) * T.block(blockStart(blockIndex), blockStart(blockIndex+diagIndex), blockSize(blockIndex), blockSize(blockIndex+diagIndex));
-      C -= T.block(blockStart(blockIndex), blockStart(blockIndex+diagIndex), blockSize(blockIndex), blockSize(blockIndex+diagIndex)) * result.block(blockStart(blockIndex+diagIndex), blockStart(blockIndex+diagIndex), blockSize(blockIndex+diagIndex), blockSize(blockIndex+diagIndex));
-      for (int k = blockIndex + 1; k < blockIndex + diagIndex; k++) {
-	C += result.block(blockStart(blockIndex), blockStart(k), blockSize(blockIndex), blockSize(k)) * T.block(blockStart(k), blockStart(blockIndex+diagIndex), blockSize(k), blockSize(blockIndex+diagIndex));
-	C -= T.block(blockStart(blockIndex), blockStart(k), blockSize(blockIndex), blockSize(k)) * result.block(blockStart(k), blockStart(blockIndex+diagIndex), blockSize(k), blockSize(blockIndex+diagIndex));
-      }
-      result.block(blockStart(blockIndex), blockStart(blockIndex+diagIndex), blockSize(blockIndex), blockSize(blockIndex+diagIndex)) = solveSylvester(A, B, C);
-    }
+  typename Cluster::iterator j;
+  for (typename ListOfClusters::iterator i = m_clusters.begin(); i != m_clusters.end(); ++i) {
+    j = std::find(i->begin(), i->end(), key);
+    if (j != i->end())
+      return i;
   }
+  return m_clusters.end();
 }
 
-// solve AX + XB = C  <=>  U* A' U X V V* + U* U X V B' V* = U* U C V V*  <=>  A' U X V + U X V B' = U C V 
-// Schur: A* = U A'* U* (so A = U* A' U), B = V B' V*, define: X' = U X V, C' = U C V, to get: A' X' + X' B' = C'
-// A is m-by-m, B is n-by-n, X is m-by-n, C is m-by-n, U is m-by-m, V is n-by-n
+/** \brief Compute #m_clusterSize and #m_eivalToCluster using #m_clusters */
 template <typename MatrixType>
-MatrixType MatrixFunction<MatrixType,1,1>::solveSylvester(const MatrixType& A, const MatrixType& B, const MatrixType& C)
+void MatrixFunction<MatrixType,1>::computeClusterSize()
 {
-  MatrixType U = MatrixType::Zero(A.rows(), A.rows());
-  for (int i = 0; i < A.rows(); i++) {
-    U(i, A.rows() - 1 - i) = static_cast<Scalar>(1);
-  }
-  MatrixType Aprime = U * A * U;
-
-  MatrixType Bprime = B;
-  MatrixType V = MatrixType::Identity(B.rows(), B.rows());
-
-  MatrixType Cprime = U * C * V;
-  MatrixType Xprime(A.rows(), B.rows());
-  for (int l = 0; l < B.rows(); l++) {
-    for (int k = 0; k < A.rows(); k++) {
-      Scalar tmp1, tmp2;
-      if (k == 0) {
-	tmp1 = 0; 
-      } else {
-	Matrix<Scalar,1,1> tmp1matrix = Aprime.row(k).start(k) * Xprime.col(l).start(k);
-	tmp1 = tmp1matrix(0,0);
-      }
-      if (l == 0) {
-	tmp2 = 0; 
-      } else {
-	Matrix<Scalar,1,1> tmp2matrix = Xprime.row(k).start(l) * Bprime.col(l).start(l);
-	tmp2 = tmp2matrix(0,0);
+  const int rows = m_T.rows();
+  VectorType diag = m_T.diagonal(); 
+  const int numClusters = m_clusters.size();
+
+  m_clusterSize.setZero(numClusters);
+  m_eivalToCluster.resize(rows);
+  int clusterIndex = 0;
+  for (typename ListOfClusters::const_iterator cluster = m_clusters.begin(); cluster != m_clusters.end(); ++cluster) {
+    for (int i = 0; i < diag.rows(); ++i) {
+      if (std::find(cluster->begin(), cluster->end(), diag(i)) != cluster->end()) {
+	++m_clusterSize[clusterIndex];
+	m_eivalToCluster[i] = clusterIndex;
       }
-      Xprime(k,l) = (Cprime(k,l) - tmp1 - tmp2) / (Aprime(k,k) + Bprime(l,l));
     }
+    ++clusterIndex;
   }
-  return U.adjoint() * Xprime * V.adjoint();
 }
 
-
-// does not touch irrelevant parts of T
+/** \brief Compute #m_blockStart using #m_clusterSize */
 template <typename MatrixType>
-void MatrixFunction<MatrixType,1,1>::computeBlockAtomic(const MatrixType& T, MatrixType& result, 
-							const IntVectorType& blockSize)
-{ 
-  int blockStart = 0;
-  result.resize(T.rows(), T.cols());
-  result.setZero();
-  for (int i = 0; i < blockSize.rows(); i++) {
-    result.block(blockStart, blockStart, blockSize(i), blockSize(i))
-      = computeAtomic(T.block(blockStart, blockStart, blockSize(i), blockSize(i)));
-    blockStart += blockSize(i);
+void MatrixFunction<MatrixType,1>::computeBlockStart()
+{
+  m_blockStart.resize(m_clusterSize.rows());
+  m_blockStart(0) = 0;
+  for (int i = 1; i < m_clusterSize.rows(); i++) {
+    m_blockStart(i) = m_blockStart(i-1) + m_clusterSize(i-1);
   }
 }
 
-template <typename Scalar>
-typename std::list<std::list<Scalar> >::iterator ei_find_in_list_of_lists(typename std::list<std::list<Scalar> >& ll, Scalar x)
+/** \brief Compute #m_permutation using #m_eivalToCluster and #m_blockStart */
+template <typename MatrixType>
+void MatrixFunction<MatrixType,1>::constructPermutation()
 {
-  typename std::list<Scalar>::iterator j;
-  for (typename std::list<std::list<Scalar> >::iterator i = ll.begin(); i != ll.end(); i++) {
-    j = std::find(i->begin(), i->end(), x);
-    if (j != i->end())
-      return i;
+  VectorXi indexNextEntry = m_blockStart;
+  m_permutation.resize(m_T.rows());
+  for (int i = 0; i < m_T.rows(); i++) {
+    int cluster = m_eivalToCluster[i];
+    m_permutation[i] = indexNextEntry[cluster];
+    ++indexNextEntry[cluster];
   }
-  return ll.end();
-}
+}  
 
-// Alg 4.1
+/** \brief Permute Schur decomposition in #m_U and #m_T according to #m_permutation */
 template <typename MatrixType>
-void MatrixFunction<MatrixType,1,1>::divideInBlocks(const VectorType& v, listOfLists* result)
+void MatrixFunction<MatrixType,1>::permuteSchur()
 {
-  const int n = v.rows();
-  for (int i=0; i<n; i++) {
-    // Find set containing v(i), adding a new set if necessary
-    typename listOfLists::iterator qi = ei_find_in_list_of_lists(*result, v(i));
-    if (qi == result->end()) {
-      listOfScalars l;
-      l.push_back(v(i));
-      result->push_back(l);
-      qi = result->end();
-      qi--;
+  IntVectorType p = m_permutation;
+  for (int i = 0; i < p.rows() - 1; i++) {
+    int j;
+    for (j = i; j < p.rows(); j++) {
+      if (p(j) == i) break;
     }
-    // Look for other element to add to the set
-    for (int j=i+1; j<n; j++) {
-      if (ei_abs(v(j) - v(i)) <= separation() && std::find(qi->begin(), qi->end(), v(j)) == qi->end()) {
-	typename listOfLists::iterator qj = ei_find_in_list_of_lists(*result, v(j));
-	if (qj == result->end()) {
-	  qi->push_back(v(j));
-	} else {
-	  qi->insert(qi->end(), qj->begin(), qj->end());
-	  result->erase(qj);
-	}
-      }
+    ei_assert(p(j) == i);
+    for (int k = j-1; k >= i; k--) {
+      swapEntriesInSchur(k);
+      std::swap(p.coeffRef(k), p.coeffRef(k+1));
     }
   }
 }
 
-// Construct permutation P, such that P(D) has eigenvalues clustered together
+/** \brief Swap rows \a index and \a index+1 in Schur decomposition in #m_U and #m_T */
 template <typename MatrixType>
-void MatrixFunction<MatrixType,1,1>::constructPermutation(const VectorType& diag, const listOfLists& blocks, 
-							  IntVectorType& blockSize, IntVectorType& permutation)
+void MatrixFunction<MatrixType,1>::swapEntriesInSchur(int index)
 {
-  const int n = diag.rows();
-  const int numBlocks = blocks.size();
-
-  // For every block in blocks, mark and count the entries in diag that
-  // appear in that block
-  blockSize.setZero(numBlocks);
-  IntVectorType entryToBlock(n);
-  int blockIndex = 0;
-  for (typename listOfLists::const_iterator block = blocks.begin(); block != blocks.end(); block++) {
-    for (int i = 0; i < diag.rows(); i++) {
-      if (std::find(block->begin(), block->end(), diag(i)) != block->end()) {
-	blockSize[blockIndex]++;
-	entryToBlock[i] = blockIndex;
-      }
-    }
-    blockIndex++;
-  }
-
-  // Compute index of first entry in every block as the sum of sizes
-  // of all the preceding blocks
-  IntVectorType indexNextEntry(numBlocks);
-  indexNextEntry[0] = 0;
-  for (blockIndex = 1; blockIndex < numBlocks; blockIndex++) {
-    indexNextEntry[blockIndex] = indexNextEntry[blockIndex-1] + blockSize[blockIndex-1];
-  }
-      
-  // Construct permutation 
-  permutation.resize(n);
-  for (int i = 0; i < n; i++) {
-    int block = entryToBlock[i];
-    permutation[i] = indexNextEntry[block];
-    indexNextEntry[block]++;
-  }
+  PlanarRotation<Scalar> rotation;
+  rotation.makeGivens(m_T(index, index+1), m_T(index+1, index+1) - m_T(index, index));
+  m_T.applyOnTheLeft(index, index+1, rotation.adjoint());
+  m_T.applyOnTheRight(index, index+1, rotation);
+  m_U.applyOnTheRight(index, index+1, rotation);
 }  
 
+/** \brief Compute block diagonal part of #m_fT.
+  *
+  * This routine computes the matrix function #m_f applied to the block
+  * diagonal part of #m_T, with the blocking given by #m_blockStart. The
+  * result is stored in #m_fT. The off-diagonal parts of #m_fT are set
+  * to zero.
+  */
 template <typename MatrixType>
-MatrixType MatrixFunction<MatrixType,1,1>::computeAtomic(const MatrixType& T)
-{
-  // TODO: Use that T is upper triangular
-  const int n = T.rows();
-  const Scalar sigma = T.trace() / Scalar(n);
-  const MatrixType M = T - sigma * MatrixType::Identity(n, n);
-  const RealScalar mu = computeMu(M);
-  MatrixType F = m_f(sigma, 0) * MatrixType::Identity(n, n);
-  MatrixType P = M;
-  MatrixType Fincr;
-  for (int s = 1; s < 1.1*n + 10; s++) { // upper limit is fairly arbitrary
-    Fincr = m_f(sigma, s) * P;
-    F += Fincr;
-    P = (1/(s + 1.0)) * P * M;
-    if (taylorConverged(T, s, F, Fincr, P, mu)) {
-      return F;
-    }
+void MatrixFunction<MatrixType,1>::computeBlockAtomic()
+{ 
+  m_fT.resize(m_T.rows(), m_T.cols());
+  m_fT.setZero();
+  MatrixFunctionAtomic<DynMatrixType> mfa(m_f);
+  for (int i = 0; i < m_clusterSize.rows(); ++i) {
+    block(m_fT, i, i) = mfa.compute(block(m_T, i, i));
   }
-  ei_assert("Taylor series does not converge" && 0);
-  return F;
 }
 
+/** \brief Return block of matrix according to blocking given by #m_blockStart */
 template <typename MatrixType>
-typename MatrixFunction<MatrixType,1,1>::RealScalar MatrixFunction<MatrixType,1,1>::computeMu(const MatrixType& M)
+Block<MatrixType> MatrixFunction<MatrixType,1>::block(const MatrixType& A, int i, int j)
 {
-  const int n = M.rows();
-  const MatrixType N = MatrixType::Identity(n, n) - M;
-  VectorType e = VectorType::Ones(n);
-  N.template triangularView<UpperTriangular>().solveInPlace(e);
-  return e.cwise().abs().maxCoeff();
+  return A.block(m_blockStart(i), m_blockStart(j), m_clusterSize(i), m_clusterSize(j));
+}
+
+/** \brief Compute part of #m_fT above block diagonal.
+  *
+  * This routine assumes that the block diagonal part of #m_fT (which
+  * equals #m_f applied to #m_T) has already been computed and computes
+  * the part above the block diagonal. The part below the diagonal is
+  * zero, because #m_T is upper triangular.
+  */
+template <typename MatrixType>
+void MatrixFunction<MatrixType,1>::computeOffDiagonal()
+{ 
+  for (int diagIndex = 1; diagIndex < m_clusterSize.rows(); diagIndex++) {
+    for (int blockIndex = 0; blockIndex < m_clusterSize.rows() - diagIndex; blockIndex++) {
+      // compute (blockIndex, blockIndex+diagIndex) block
+      DynMatrixType A = block(m_T, blockIndex, blockIndex);
+      DynMatrixType B = -block(m_T, blockIndex+diagIndex, blockIndex+diagIndex);
+      DynMatrixType C = block(m_fT, blockIndex, blockIndex) * block(m_T, blockIndex, blockIndex+diagIndex);
+      C -= block(m_T, blockIndex, blockIndex+diagIndex) * block(m_fT, blockIndex+diagIndex, blockIndex+diagIndex);
+      for (int k = blockIndex + 1; k < blockIndex + diagIndex; k++) {
+	C += block(m_fT, blockIndex, k) * block(m_T, k, blockIndex+diagIndex);
+	C -= block(m_T, blockIndex, k) * block(m_fT, k, blockIndex+diagIndex);
+      }
+      block(m_fT, blockIndex, blockIndex+diagIndex) = solveTriangularSylvester(A, B, C);
+    }
+  }
 }
 
+/** \brief Solve a triangular Sylvester equation AX + XB = C 
+  *
+  * \param[in]  A  the matrix A; should be square and upper triangular
+  * \param[in]  B  the matrix B; should be square and upper triangular
+  * \param[in]  C  the matrix C; should have correct size.
+  *
+  * \returns the solution X.
+  *
+  * If A is m-by-m and B is n-by-n, then both C and X are m-by-n. 
+  * The (i,j)-th component of the Sylvester equation is
+  * \f[ 
+  *     \sum_{k=i}^m A_{ik} X_{kj} + \sum_{k=1}^j X_{ik} B_{kj} = C_{ij}. 
+  * \f]
+  * This can be re-arranged to yield:
+  * \f[ 
+  *     X_{ij} = \frac{1}{A_{ii} + B_{jj}} \Bigl( C_{ij}
+  *     - \sum_{k=i+1}^m A_{ik} X_{kj} - \sum_{k=1}^{j-1} X_{ik} B_{kj} \Bigr).
+  * \f]
+  * It is assumed that A and B are such that the numerator is never
+  * zero (otherwise the Sylvester equation does not have a unique
+  * solution). In that case, these equations can be evaluated in the
+  * order \f$ i=m,\ldots,1 \f$ and \f$ j=1,\ldots,n \f$.
+  */
 template <typename MatrixType>
-bool MatrixFunction<MatrixType,1,1>::taylorConverged(const MatrixType& T, int s, const MatrixType& F, 
-						   const MatrixType& Fincr, const MatrixType& P, RealScalar mu)
+typename MatrixFunction<MatrixType,1>::DynMatrixType MatrixFunction<MatrixType,1>::solveTriangularSylvester(
+  const DynMatrixType& A, 
+  const DynMatrixType& B, 
+  const DynMatrixType& C)
 {
-  const int n = F.rows();
-  const RealScalar F_norm = F.cwise().abs().rowwise().sum().maxCoeff();
-  const RealScalar Fincr_norm = Fincr.cwise().abs().rowwise().sum().maxCoeff();
-  if (Fincr_norm < epsilon<Scalar>() * F_norm) {
-    RealScalar delta = 0;
-    RealScalar rfactorial = 1;
-    for (int r = 0; r < n; r++) {
-      RealScalar mx = 0;
-      for (int i = 0; i < n; i++) 
-	mx = std::max(mx, std::abs(m_f(T(i, i), s+r)));
-       if (r != 0)
-	rfactorial *= r;
-      delta = std::max(delta, mx / rfactorial);
+  ei_assert(A.rows() == A.cols());
+  ei_assert(A.isUpperTriangular());
+  ei_assert(B.rows() == B.cols());
+  ei_assert(B.isUpperTriangular());
+  ei_assert(C.rows() == A.rows());
+  ei_assert(C.cols() == B.rows());
+
+  int m = A.rows();
+  int n = B.rows();
+  DynMatrixType X(m, n);
+
+  for (int i = m - 1; i >= 0; --i) {
+    for (int j = 0; j < n; ++j) {
+
+      // Compute AX = \sum_{k=i+1}^m A_{ik} X_{kj}
+      Scalar AX;
+      if (i == m - 1) {
+	AX = 0; 
+      } else {
+	Matrix<Scalar,1,1> AXmatrix = A.row(i).tail(m-1-i) * X.col(j).tail(m-1-i);
+	AX = AXmatrix(0,0);
+      }
+
+      // Compute XB = \sum_{k=1}^{j-1} X_{ik} B_{kj}
+      Scalar XB;
+      if (j == 0) {
+	XB = 0; 
+      } else {
+	Matrix<Scalar,1,1> XBmatrix = X.row(i).head(j) * B.col(j).head(j);
+	XB = XBmatrix(0,0);
+      }
+
+      X(i,j) = (C(i,j) - AX - XB) / (A(i,i) + B(j,j));
     }
-    const RealScalar P_norm = P.cwise().abs().rowwise().sum().maxCoeff();
-    if (mu * delta * P_norm < epsilon<Scalar>() * F_norm) 
-      return true;
   }
-  return false;
+  return X;
 }
 
+
 template <typename Derived>
 EIGEN_STRONG_INLINE void ei_matrix_function(const MatrixBase<Derived>& M, 
 					    typename ei_stem_function<typename ei_traits<Derived>::Scalar>::type f,
 					    typename MatrixBase<Derived>::PlainMatrixType* result)
 {
   ei_assert(M.rows() == M.cols());
-  MatrixFunction<typename MatrixBase<Derived>::PlainMatrixType>(M, f, result);
+  typedef typename MatrixBase<Derived>::PlainMatrixType PlainMatrixType;
+  MatrixFunction<PlainMatrixType>(M, f, result);
 }
 
 #endif // EIGEN_MATRIX_FUNCTION
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunctionAtomic.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunctionAtomic.h
new file mode 100644
index 000000000..117ee82d7
--- /dev/null
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunctionAtomic.h
@@ -0,0 +1,142 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_MATRIX_FUNCTION_ATOMIC
+#define EIGEN_MATRIX_FUNCTION_ATOMIC
+
+/** \ingroup MatrixFunctions_Module 
+  * \class MatrixFunctionAtomic
+  * \brief Helper class for computing matrix functions of atomic matrices.
+  *
+  * \internal
+  * Here, an atomic matrix is a triangular matrix whose diagonal
+  * entries are close to each other.
+  */
+template <typename MatrixType>
+class MatrixFunctionAtomic
+{
+  public:
+
+    typedef ei_traits<MatrixType> Traits;
+    typedef typename Traits::Scalar Scalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    typedef typename ei_stem_function<Scalar>::type StemFunction;
+    typedef Matrix<Scalar, Traits::RowsAtCompileTime, 1> VectorType;
+
+    /** \brief Constructor
+      * \param[in]  f  matrix function to compute.
+      */
+    MatrixFunctionAtomic(StemFunction f) : m_f(f) { }
+
+    /** \brief Compute matrix function of atomic matrix
+      * \param[in]  A  argument of matrix function, should be upper triangular and atomic
+      * \returns  f(A), the matrix function evaluated at the given matrix
+      */
+    MatrixType compute(const MatrixType& A);
+
+  private:
+
+    // Prevent copying
+    MatrixFunctionAtomic(const MatrixFunctionAtomic&);
+    MatrixFunctionAtomic& operator=(const MatrixFunctionAtomic&);
+
+    void computeMu();
+    bool taylorConverged(int s, const MatrixType& F, const MatrixType& Fincr, const MatrixType& P);
+
+    /** \brief Pointer to scalar function */
+    StemFunction* m_f;
+
+    /** \brief Size of matrix function */
+    int m_Arows;
+
+    /** \brief Mean of eigenvalues */
+    Scalar m_avgEival;
+
+    /** \brief Argument shifted by mean of eigenvalues */
+    MatrixType m_Ashifted;
+
+    /** \brief Constant used to determine whether Taylor series has converged */
+    RealScalar m_mu;
+};
+
+template <typename MatrixType>
+MatrixType MatrixFunctionAtomic<MatrixType>::compute(const MatrixType& A)
+{
+  // TODO: Use that A is upper triangular
+  m_Arows = A.rows();
+  m_avgEival = A.trace() / Scalar(m_Arows);
+  m_Ashifted = A - m_avgEival * MatrixType::Identity(m_Arows, m_Arows);
+  computeMu();
+  MatrixType F = m_f(m_avgEival, 0) * MatrixType::Identity(m_Arows, m_Arows);
+  MatrixType P = m_Ashifted;
+  MatrixType Fincr;
+  for (int s = 1; s < 1.1 * m_Arows + 10; s++) { // upper limit is fairly arbitrary
+    Fincr = m_f(m_avgEival, s) * P;
+    F += Fincr;
+    P = (1/(s + 1.0)) * P * m_Ashifted;
+    if (taylorConverged(s, F, Fincr, P)) {
+      return F;
+    }
+  }
+  ei_assert("Taylor series does not converge" && 0);
+  return F;
+}
+
+/** \brief Compute \c m_mu. */
+template <typename MatrixType>
+void MatrixFunctionAtomic<MatrixType>::computeMu()
+{
+  const MatrixType N = MatrixType::Identity(m_Arows, m_Arows) - m_Ashifted;
+  VectorType e = VectorType::Ones(m_Arows);
+  N.template triangularView<UpperTriangular>().solveInPlace(e);
+  m_mu = e.cwise().abs().maxCoeff();
+}
+
+/** \brief Determine whether Taylor series has converged */
+template <typename MatrixType>
+bool MatrixFunctionAtomic<MatrixType>::taylorConverged(int s, const MatrixType& F, 
+						       const MatrixType& Fincr, const MatrixType& P)
+{
+  const int n = F.rows();
+  const RealScalar F_norm = F.cwise().abs().rowwise().sum().maxCoeff();
+  const RealScalar Fincr_norm = Fincr.cwise().abs().rowwise().sum().maxCoeff();
+  if (Fincr_norm < epsilon<Scalar>() * F_norm) {
+    RealScalar delta = 0;
+    RealScalar rfactorial = 1;
+    for (int r = 0; r < n; r++) {
+      RealScalar mx = 0;
+      for (int i = 0; i < n; i++) 
+	mx = std::max(mx, std::abs(m_f(m_Ashifted(i, i) + m_avgEival, s+r)));
+       if (r != 0)
+	rfactorial *= r;
+      delta = std::max(delta, mx / rfactorial);
+    }
+    const RealScalar P_norm = P.cwise().abs().rowwise().sum().maxCoeff();
+    if (m_mu * delta * P_norm < epsilon<Scalar>() * F_norm) 
+      return true;
+  }
+  return false;
+}
+
+#endif // EIGEN_MATRIX_FUNCTION_ATOMIC
diff --git a/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h b/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
index 2cf96eb14..5ab440863 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
@@ -129,6 +129,8 @@ public:
     int njev;
     int iter;
     Scalar fnorm, gnorm;
+
+    Scalar lm_param(void) { return par; }
 private:
     FunctorType &functor;
     int n;
@@ -533,7 +535,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageOneStep(
             sing = true;
         }
         ipvt[j] = j;
-        wa2[j] = fjac.col(j).start(j).stableNorm();
+        wa2[j] = fjac.col(j).head(j).stableNorm();
     }
     if (sing) {
         ipvt.cwise()+=1;
diff --git a/unsupported/Eigen/src/NonLinearOptimization/lmpar.h b/unsupported/Eigen/src/NonLinearOptimization/lmpar.h
index b723a7e0a..e5b66c0d7 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/lmpar.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/lmpar.h
@@ -87,7 +87,7 @@ void ei_lmpar(
     /* calculate an upper bound, paru, for the zero of the function. */
 
     for (j = 0; j < n; ++j)
-        wa1[j] = r.col(j).start(j+1).dot(qtb.start(j+1)) / diag[ipvt[j]];
+        wa1[j] = r.col(j).head(j+1).dot(qtb.head(j+1)) / diag[ipvt[j]];
 
     gnorm = wa1.stableNorm();
     paru = gnorm / delta;
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 58af79351..77758696d 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -14,7 +14,7 @@ ei_add_test(NonLinearOptimization)
 ei_add_test(NumericalDiff)
 ei_add_test(autodiff)
 ei_add_test(BVH)
-ei_add_test(matrixExponential)
+ei_add_test(matrix_exponential)
 ei_add_test(alignedvector3)
 ei_add_test(FFT)
 
diff --git a/unsupported/test/matrixExponential.cpp b/unsupported/test/matrix_exponential.cpp
index 9e4d8e611..f155e5f98 100644
--- a/unsupported/test/matrixExponential.cpp
+++ b/unsupported/test/matrix_exponential.cpp
@@ -144,7 +144,7 @@ void randomTest(const MatrixType& m, double tol)
   }
 }
 
-void test_matrixExponential()
+void test_matrix_exponential()
 {
   CALL_SUBTEST_2(test2dRotation<double>(1e-13));
   CALL_SUBTEST_1(test2dRotation<float>(1e-5));
author	Gael Guennebaud <g.gael@free.fr>	2010-01-05 13:07:32 +0100
committer	Gael Guennebaud <g.gael@free.fr>	2010-01-05 13:07:32 +0100
commit	9d9e00b6080153ddaa26ccfce922d7814811a1ae (patch)
tree	a18d77d660e3734a21daec2637c2066afab9021d
parent	90d2ae7fec1000c244472c94af24126c5f2ca2a2 (diff)
parent	51b8f014f30d0f64fcd4f6dff4b1afa64f8ace48 (diff)