34 files changed, 2145 insertions, 271 deletions
diff --git a/Eigen/Core b/Eigen/Core
index c8fcb1c09..3dce6422f 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -200,6 +200,7 @@ namespace Eigen {
 #include "src/Core/products/TriangularMatrixMatrix.h"
 #include "src/Core/products/TriangularSolverMatrix.h"
 #include "src/Core/BandMatrix.h"
+#include "src/Core/ExpressionMaker.h"
 
 } // namespace Eigen
 
diff --git a/Eigen/Sparse b/Eigen/Sparse
index a8888daa3..96bd61419 100644
--- a/Eigen/Sparse
+++ b/Eigen/Sparse
@@ -110,6 +110,7 @@ namespace Eigen {
 #include "src/Sparse/SparseLLT.h"
 #include "src/Sparse/SparseLDLT.h"
 #include "src/Sparse/SparseLU.h"
+#include "src/Sparse/SparseExpressionMaker.h"
 
 #ifdef EIGEN_CHOLMOD_SUPPORT
 # include "src/Sparse/CholmodSupport.h"
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index cebfeaf75..5fffdcb01 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -33,10 +33,10 @@
   * \param MatrixType the type of the object in which we are taking a block
   * \param BlockRows the number of rows of the block we are taking at compile time (optional)
   * \param BlockCols the number of columns of the block we are taking at compile time (optional)
-  * \param _PacketAccess allows to enforce aligned loads and stores if set to \b ForceAligned.
-  *                      The default is \b AsRequested. This parameter is internaly used by Eigen
-  *                      in expressions such as \code mat.block() += other; \endcode and most of
-  *                      the time this is the only way it is used.
+  * \param _PacketAccess \internal used to enforce aligned loads in expressions such as
+  *                      \code mat.block() += other; \endcode. Possible values are
+  *                      \c AsRequested (default) and \c EnforceAlignedAccess.
+  *                      See class MapBase for more details.
   * \param _DirectAccessStatus \internal used for partial specialization
   *
   * This class represents an expression of either a fixed-size or dynamic-size block. It is the return
@@ -84,9 +84,9 @@ struct ei_traits<Block<MatrixType, BlockRows, BlockCols, _PacketAccess, _DirectA
     CoeffReadCost = ei_traits<MatrixType>::CoeffReadCost,
     PacketAccess = _PacketAccess
   };
-  typedef typename ei_meta_if<int(PacketAccess)==ForceAligned,
+  typedef typename ei_meta_if<int(PacketAccess)==EnforceAlignedAccess,
                  Block<MatrixType, BlockRows, BlockCols, _PacketAccess, _DirectAccessStatus>&,
-                 Block<MatrixType, BlockRows, BlockCols, ForceAligned, _DirectAccessStatus> >::ret AlignedDerivedType;
+                 Block<MatrixType, BlockRows, BlockCols, EnforceAlignedAccess, _DirectAccessStatus> >::ret AlignedDerivedType;
 };
 
 template<typename MatrixType, int BlockRows, int BlockCols, int PacketAccess, int _DirectAccessStatus> class Block
@@ -228,13 +228,13 @@ class Block<MatrixType,BlockRows,BlockCols,PacketAccess,HasDirectAccess>
 
     class InnerIterator;
     typedef typename ei_traits<Block>::AlignedDerivedType AlignedDerivedType;
-    friend class Block<MatrixType,BlockRows,BlockCols,PacketAccess==AsRequested?ForceAligned:AsRequested,HasDirectAccess>;
+    friend class Block<MatrixType,BlockRows,BlockCols,PacketAccess==EnforceAlignedAccess?AsRequested:EnforceAlignedAccess,HasDirectAccess>;
 
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
 
-    AlignedDerivedType _convertToForceAligned()
+    AlignedDerivedType _convertToEnforceAlignedAccess()
     {
-      return Block<MatrixType,BlockRows,BlockCols,ForceAligned,HasDirectAccess>
+      return Block<MatrixType,BlockRows,BlockCols,EnforceAlignedAccess,HasDirectAccess>
                     (m_matrix, Base::m_data, Base::m_rows.value(), Base::m_cols.value());
     }
 
diff --git a/Eigen/src/Core/ExpressionMaker.h b/Eigen/src/Core/ExpressionMaker.h
new file mode 100644
index 000000000..1d265b63c
--- /dev/null
+++ b/Eigen/src/Core/ExpressionMaker.h
@@ -0,0 +1,61 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_EXPRESSIONMAKER_H
+#define EIGEN_EXPRESSIONMAKER_H
+
+// computes the shape of a matrix from its traits flag
+template<typename XprType> struct ei_shape_of
+{
+  enum { ret = ei_traits<XprType>::Flags&SparseBit ? IsSparse : IsDense };
+};
+
+
+// Since the Sparse module is completely separated from the Core module, there is
+// no way to write the type of a generic expression working for both dense and sparse
+// matrix. Unless we change the overall design, here is a workaround.
+// There is an example in unsuported/Eigen/src/AutoDiff/AutoDiffScalar.
+
+template<typename XprType, int Shape = ei_shape_of<XprType>::ret>
+struct MakeNestByValue
+{
+  typedef NestByValue<XprType> Type;
+};
+
+template<typename Func, typename XprType, int Shape = ei_shape_of<XprType>::ret>
+struct MakeCwiseUnaryOp
+{
+  typedef CwiseUnaryOp<Func,XprType> Type;
+};
+
+template<typename Func, typename A, typename B, int Shape = ei_shape_of<A>::ret>
+struct MakeCwiseBinaryOp
+{
+  typedef CwiseBinaryOp<Func,A,B> Type;
+};
+
+// TODO complete the list
+
+
+#endif // EIGEN_EXPRESSIONMAKER_H
diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h
index f6bc814e2..dba7e20e4 100644
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -31,16 +31,14 @@
   * \brief A matrix or vector expression mapping an existing array of data.
   *
   * \param MatrixType the equivalent matrix type of the mapped data
-  * \param _PacketAccess allows to enforce aligned loads and stores if set to ForceAligned.
-  *                      The default is AsRequested. This parameter is internaly used by Eigen
-  *                      in expressions such as \code Map<...>(...) += other; \endcode and most
-  *                      of the time this is the only way it is used.
+  * \param PointerAlignment specifies whether the pointer is \c Aligned, or \c Unaligned.
+  *                         The default is \c Unaligned.
   *
   * This class represents a matrix or vector expression mapping an existing array of data.
   * It can be used to let Eigen interface without any overhead with non-Eigen data structures,
   * such as plain C arrays or structures from other libraries.
   *
-  * \b Tips: to change the array of data mapped by a Map object, you can use the C++
+  * \b Tip: to change the array of data mapped by a Map object, you can use the C++
   * placement new syntax:
   *
   * Example: \include Map_placement_new.cpp
@@ -48,22 +46,27 @@
   *
   * This class is the return type of Matrix::Map() but can also be used directly.
   *
+  * \b Note \b to \b Eigen \b developers: The template parameter \c PointerAlignment
+  * can also be or-ed with \c EnforceAlignedAccess in order to enforce aligned read 
+  * in expressions such as \code A += B; \endcode. See class MapBase for further details.
+  *
   * \sa Matrix::Map()
   */
-template<typename MatrixType, int _PacketAccess>
-struct ei_traits<Map<MatrixType, _PacketAccess> > : public ei_traits<MatrixType>
+template<typename MatrixType, int Options>
+struct ei_traits<Map<MatrixType, Options> > : public ei_traits<MatrixType>
 {
   enum {
-    PacketAccess = _PacketAccess,
-    Flags = ei_traits<MatrixType>::Flags & ~AlignedBit
+    PacketAccess = Options & EnforceAlignedAccess,
+    Flags = (Options&Aligned)==Aligned ? ei_traits<MatrixType>::Flags |  AlignedBit
+                                       : ei_traits<MatrixType>::Flags & ~AlignedBit
   };
-  typedef typename ei_meta_if<int(PacketAccess)==ForceAligned,
-                              Map<MatrixType, _PacketAccess>&,
-                              Map<MatrixType, ForceAligned> >::ret AlignedDerivedType;
+  typedef typename ei_meta_if<int(PacketAccess)==EnforceAlignedAccess,
+                              Map<MatrixType, Options>&,
+                              Map<MatrixType, Options|EnforceAlignedAccess> >::ret AlignedDerivedType;
 };
 
-template<typename MatrixType, int PacketAccess> class Map
-  : public MapBase<Map<MatrixType, PacketAccess> >
+template<typename MatrixType, int Options> class Map
+  : public MapBase<Map<MatrixType, Options> >
 {
   public:
 
@@ -72,9 +75,9 @@ template<typename MatrixType, int PacketAccess> class Map
 
     inline int stride() const { return this->innerSize(); }
 
-    AlignedDerivedType _convertToForceAligned()
+    AlignedDerivedType _convertToEnforceAlignedAccess()
     {
-      return Map<MatrixType,ForceAligned>(Base::m_data, Base::m_rows.value(), Base::m_cols.value());
+      return AlignedDerivedType(Base::m_data, Base::m_rows.value(), Base::m_cols.value());
     }
 
     inline Map(const Scalar* data) : Base(data) {}
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index 88a3fac1e..8770732de 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -32,11 +32,17 @@
   *
   * Expression classes inheriting MapBase must define the constant \c PacketAccess,
   * and type \c AlignedDerivedType in their respective ei_traits<> specialization structure.
-  * The value of \c PacketAccess can be either:
-  *  - \b ForceAligned which enforces both aligned loads and stores
-  *  - \b AsRequested which is the default behavior
+  * The value of \c PacketAccess can be either \b AsRequested, or set to \b EnforceAlignedAccess which
+  * enforces both aligned loads and stores.
+  * 
+  * \c EnforceAlignedAccess is automatically set in expressions such as 
+  * \code A += B; \endcode where A is either a Block or a Map. Here,
+  * this expression is transfomed into \code A = A_with_EnforceAlignedAccess + B; \endcode
+  * avoiding unaligned loads from A. Indeed, since Eigen's packet evaluation mechanism
+  * automatically align to the destination matrix, we know that loads to A will be aligned too.
+  * 
   * The type \c AlignedDerivedType should correspond to the equivalent expression type
-  * with \c PacketAccess being \c ForceAligned.
+  * with \c PacketAccess set to \c EnforceAlignedAccess.
   *
   * \sa class Map, class Block
   */
@@ -79,19 +85,19 @@ template<typename Derived> class MapBase
       * \sa MapBase::stride() */
     inline const Scalar* data() const { return m_data; }
 
-    template<bool IsForceAligned,typename Dummy> struct force_aligned_impl {
+    template<bool IsEnforceAlignedAccess,typename Dummy> struct force_aligned_impl {
       static AlignedDerivedType run(MapBase& a) { return a.derived(); }
     };
 
     template<typename Dummy> struct force_aligned_impl<false,Dummy> {
-      static AlignedDerivedType run(MapBase& a) { return a.derived()._convertToForceAligned(); }
+      static AlignedDerivedType run(MapBase& a) { return a.derived()._convertToEnforceAlignedAccess(); }
     };
 
     /** \returns an expression equivalent to \c *this but having the \c PacketAccess constant
-      * set to \c ForceAligned. Must be reimplemented by the derived class. */
+      * set to \c EnforceAlignedAccess. Must be reimplemented by the derived class. */
     AlignedDerivedType forceAligned()
     {
-      return force_aligned_impl<int(PacketAccess)==int(ForceAligned),Derived>::run(*this);
+      return force_aligned_impl<int(PacketAccess)==int(EnforceAlignedAccess),Derived>::run(*this);
     }
 
     inline const Scalar& coeff(int row, int col) const
@@ -131,7 +137,7 @@ template<typename Derived> class MapBase
     template<int LoadMode>
     inline PacketScalar packet(int row, int col) const
     {
-      return ei_ploadt<Scalar, int(PacketAccess) == ForceAligned ? Aligned : LoadMode>
+      return ei_ploadt<Scalar, int(PacketAccess) == EnforceAlignedAccess ? Aligned : LoadMode>
                (m_data + (IsRowMajor ? col + row * stride()
                                      : row + col * stride()));
     }
@@ -139,13 +145,13 @@ template<typename Derived> class MapBase
     template<int LoadMode>
     inline PacketScalar packet(int index) const
     {
-      return ei_ploadt<Scalar, int(PacketAccess) == ForceAligned ? Aligned : LoadMode>(m_data + index);
+      return ei_ploadt<Scalar, int(PacketAccess) == EnforceAlignedAccess ? Aligned : LoadMode>(m_data + index);
     }
 
     template<int StoreMode>
     inline void writePacket(int row, int col, const PacketScalar& x)
     {
-      ei_pstoret<Scalar, PacketScalar, int(PacketAccess) == ForceAligned ? Aligned : StoreMode>
+      ei_pstoret<Scalar, PacketScalar, int(PacketAccess) == EnforceAlignedAccess ? Aligned : StoreMode>
                (const_cast<Scalar*>(m_data) + (IsRowMajor ? col + row * stride()
                                                           : row + col * stride()), x);
     }
@@ -153,13 +159,14 @@ template<typename Derived> class MapBase
     template<int StoreMode>
     inline void writePacket(int index, const PacketScalar& x)
     {
-      ei_pstoret<Scalar, PacketScalar, int(PacketAccess) == ForceAligned ? Aligned : StoreMode>
+      ei_pstoret<Scalar, PacketScalar, int(PacketAccess) == EnforceAlignedAccess ? Aligned : StoreMode>
         (const_cast<Scalar*>(m_data) + index, x);
     }
 
     inline MapBase(const Scalar* data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
     {
       EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+      checkDataAlignment();
     }
 
     inline MapBase(const Scalar* data, int size)
@@ -170,6 +177,7 @@ template<typename Derived> class MapBase
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
       ei_assert(size >= 0);
       ei_assert(data == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
+      checkDataAlignment();
     }
 
     inline MapBase(const Scalar* data, int rows, int cols)
@@ -178,6 +186,7 @@ template<typename Derived> class MapBase
       ei_assert( (data == 0)
               || (   rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
                   && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
+      checkDataAlignment();
     }
 
     Derived& operator=(const MapBase& other)
@@ -215,6 +224,13 @@ template<typename Derived> class MapBase
     { return derived() = forceAligned() / other; }
 
   protected:
+
+    void checkDataAlignment() const
+    {
+      ei_assert( ((!(ei_traits<Derived>::Flags&AlignedBit))
+                  || ((std::size_t(m_data)&0xf)==0)) && "data is not aligned");
+    }
+    
     const Scalar* EIGEN_RESTRICT m_data;
     const ei_int_if_dynamic<RowsAtCompileTime> m_rows;
     const ei_int_if_dynamic<ColsAtCompileTime> m_cols;
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 027e6bb70..17d2f2836 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -58,6 +58,9 @@ template <typename Derived, typename OtherDerived, bool IsVector = static_cast<b
   * \li \c MatrixXf is a dynamic-size matrix of floats (\c Matrix<float, Dynamic, Dynamic>)
   * \li \c VectorXf is a dynamic-size vector of floats (\c Matrix<float, Dynamic, 1>)
   *
+  * \li \c Matrix2Xf is a partially fixed-size (dynamic-size) matrix of floats (\c Matrix<float, 2, Dynamic>)
+  * \li \c MatrixX3d is a partially dynamic-size (fixed-size) matrix of double (\c Matrix<double, Dynamic, 3>)
+  *
   * See \link matrixtypedefs this page \endlink for a complete list of predefined \em %Matrix and \em Vector typedefs.
   *
   * You can access elements of vectors and matrices using normal subscripting:
@@ -794,11 +797,20 @@ typedef Matrix<Type, Size, 1>    Vector##SizeSuffix##TypeSuffix;  \
 /** \ingroup matrixtypedefs */                                    \
 typedef Matrix<Type, 1, Size>    RowVector##SizeSuffix##TypeSuffix;
 
+#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size)         \
+/** \ingroup matrixtypedefs */                                    \
+typedef Matrix<Type, Size, Dynamic> Matrix##Size##X##TypeSuffix;  \
+/** \ingroup matrixtypedefs */                                    \
+typedef Matrix<Type, Dynamic, Size> Matrix##X##Size##TypeSuffix;
+
 #define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
 EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \
 EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \
 EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \
-EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X)
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
 
 EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int,                  i)
 EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float,                f)
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 729349b6f..7b5310175 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -190,6 +190,25 @@ template<typename Derived> class MatrixBase
       * i.e., the number of rows for a columns major matrix, and the number of cols otherwise */
     int innerSize() const { return (int(Flags)&RowMajorBit) ? this->cols() : this->rows(); }
 
+    /** Only plain matrices, not expressions may be resized; therefore the only useful resize method is
+      * Matrix::resize(). The present method only asserts that the new size equals the old size, and does
+      * nothing else.
+      */
+    void resize(int size)
+    {
+      ei_assert(size == this->size()
+                && "MatrixBase::resize() does not actually allow to resize.");
+    }
+    /** Only plain matrices, not expressions may be resized; therefore the only useful resize method is
+      * Matrix::resize(). The present method only asserts that the new size equals the old size, and does
+      * nothing else.
+      */
+    void resize(int rows, int cols)
+    {
+      ei_assert(rows == this->rows() && cols == this->cols()
+                && "MatrixBase::resize() does not actually allow to resize.");
+    }
+
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** \internal the plain matrix type corresponding to this expression. Note that is not necessarily
       * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index 06e69c448..f2d1e7240 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -59,7 +59,7 @@ MatrixBase<Derived>::stableNorm() const
   RealScalar invScale = 1;
   RealScalar ssq = 0; // sum of square
   enum {
-    Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? ForceAligned : AsRequested
+    Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? EnforceAlignedAccess : AsRequested
   };
   int n = size();
   int bi=0;
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index 226a53c33..c9735b6e4 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -196,8 +196,8 @@ const unsigned int UnitLowerTriangular = LowerTriangularBit | UnitDiagBit;
 
 enum { DiagonalOnTheLeft, DiagonalOnTheRight };
 
-enum { Aligned, Unaligned };
-enum { ForceAligned, AsRequested };
+enum { Unaligned=0, Aligned=1 };
+enum { AsRequested=0, EnforceAlignedAccess=2 };
 enum { ConditionalJumpCost = 5 };
 enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
 enum DirectionType { Vertical, Horizontal, BothDirections };
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index 86539a64e..c8f2c4cd7 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -130,6 +130,7 @@ template<typename Scalar>     class PlanarRotation;
 // Geometry module:
 template<typename Derived, int _Dim> class RotationBase;
 template<typename Lhs, typename Rhs> class Cross;
+template<typename Derived> class QuaternionBase;
 template<typename Scalar> class Quaternion;
 template<typename Scalar> class Rotation2D;
 template<typename Scalar> class AngleAxis;
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 66b9d52f4..dd41ad0e2 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -256,7 +256,7 @@ using Eigen::ei_cos;
 
 // C++0x features
 #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (defined(_MSC_VER) && (_MSC_VER >= 1600))
-  #define EIGEN_REF_TO_TEMPORARY &&
+  #define EIGEN_REF_TO_TEMPORARY const &
 #else
   #define EIGEN_REF_TO_TEMPORARY const &
 #endif
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h
index 883f2d95e..6210bc91c 100644
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -78,7 +78,8 @@
         INVALID_MATRIX_TEMPLATE_PARAMETERS,
         BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
         THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
-        THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE
+        THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
+        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES
       };
     };
 
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 2f9f97807..67b040165 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -2,6 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2009 Mathieu Gautier <mathieu.gautier@cea.fr>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -25,11 +26,6 @@
 #ifndef EIGEN_QUATERNION_H
 #define EIGEN_QUATERNION_H
 
-template<typename Other,
-         int OtherRows=Other::RowsAtCompileTime,
-         int OtherCols=Other::ColsAtCompileTime>
-struct ei_quaternion_assign_impl;
-
 /** \geometry_module \ingroup Geometry_Module
   *
   * \class Quaternion
@@ -52,28 +48,33 @@ struct ei_quaternion_assign_impl;
   * \sa  class AngleAxis, class Transform
   */
 
-template<typename _Scalar> struct ei_traits<Quaternion<_Scalar> >
+template<typename Other,
+         int OtherRows=Other::RowsAtCompileTime,
+         int OtherCols=Other::ColsAtCompileTime>
+struct ei_quaternionbase_assign_impl;
+
+template<typename Scalar> class Quaternion; // [XXX] => remove when Quaternion becomes Quaternion
+
+template<typename Derived>
+struct ei_traits<QuaternionBase<Derived> >
 {
-  typedef _Scalar Scalar;
+  typedef typename ei_traits<Derived>::Scalar Scalar;
+  enum {
+    PacketAccess = ei_traits<Derived>::PacketAccess
+  };
 };
 
-template<typename _Scalar>
-class Quaternion : public RotationBase<Quaternion<_Scalar>,3>
+template<class Derived>
+class QuaternionBase : public RotationBase<Derived, 3>
 {
-  typedef RotationBase<Quaternion<_Scalar>,3> Base;
-
-
-
+  typedef RotationBase<Derived, 3> Base;
 public:
-  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,4)
-
   using Base::operator*;
 
-  /** the scalar type of the coefficients */
-  typedef _Scalar Scalar;
+  typedef typename ei_traits<QuaternionBase<Derived> >::Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
 
-  /** the type of the Coefficients 4-vector */
-  typedef Matrix<Scalar, 4, 1> Coefficients;
+ // typedef typename Matrix<Scalar,4,1> Coefficients;
   /** the type of a 3D vector */
   typedef Matrix<Scalar,3,1> Vector3;
   /** the equivalent rotation matrix type */
@@ -82,114 +83,96 @@ public:
   typedef AngleAxis<Scalar> AngleAxisType;
 
   /** \returns the \c x coefficient */
-  inline Scalar x() const { return m_coeffs.coeff(0); }
+  inline Scalar x() const { return this->derived().coeffs().coeff(0); }
   /** \returns the \c y coefficient */
-  inline Scalar y() const { return m_coeffs.coeff(1); }
+  inline Scalar y() const { return this->derived().coeffs().coeff(1); }
   /** \returns the \c z coefficient */
-  inline Scalar z() const { return m_coeffs.coeff(2); }
+  inline Scalar z() const { return this->derived().coeffs().coeff(2); }
   /** \returns the \c w coefficient */
-  inline Scalar w() const { return m_coeffs.coeff(3); }
+  inline Scalar w() const { return this->derived().coeffs().coeff(3); }
 
   /** \returns a reference to the \c x coefficient */
-  inline Scalar& x() { return m_coeffs.coeffRef(0); }
+  inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
   /** \returns a reference to the \c y coefficient */
-  inline Scalar& y() { return m_coeffs.coeffRef(1); }
+  inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
   /** \returns a reference to the \c z coefficient */
-  inline Scalar& z() { return m_coeffs.coeffRef(2); }
+  inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
   /** \returns a reference to the \c w coefficient */
-  inline Scalar& w() { return m_coeffs.coeffRef(3); }
+  inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
 
   /** \returns a read-only vector expression of the imaginary part (x,y,z) */
-  inline const Block<Coefficients,3,1> vec() const { return m_coeffs.template start<3>(); }
+  inline const VectorBlock<typename ei_traits<Derived>::Coefficients,3> vec() const { return this->derived().coeffs().template start<3>(); }
 
   /** \returns a vector expression of the imaginary part (x,y,z) */
-  inline Block<Coefficients,3,1> vec() { return m_coeffs.template start<3>(); }
+  inline VectorBlock<typename ei_traits<Derived>::Coefficients,3> vec() { return this->derived().coeffs().template start<3>(); }
 
   /** \returns a read-only vector expression of the coefficients (x,y,z,w) */
-  inline const Coefficients& coeffs() const { return m_coeffs; }
+  inline const typename ei_traits<Derived>::Coefficients& coeffs() const { return this->derived().coeffs(); }
 
   /** \returns a vector expression of the coefficients (x,y,z,w) */
-  inline Coefficients& coeffs() { return m_coeffs; }
-
-  /** Default constructor leaving the quaternion uninitialized. */
-  inline Quaternion() {}
-
-  /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from
-    * its four coefficients \a w, \a x, \a y and \a z.
-    *
-    * \warning Note the order of the arguments: the real \a w coefficient first,
-    * while internally the coefficients are stored in the following order:
-    * [\c x, \c y, \c z, \c w]
-    */
-  inline Quaternion(Scalar w, Scalar x, Scalar y, Scalar z)
-  { m_coeffs << x, y, z, w; }
+  inline typename ei_traits<Derived>::Coefficients& coeffs() { return this->derived().coeffs(); }
 
-  /** Copy constructor */
-  inline Quaternion(const Quaternion& other) { m_coeffs = other.m_coeffs; }
-
-  /** Constructs and initializes a quaternion from the angle-axis \a aa */
-  explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; }
-
-  /** Constructs and initializes a quaternion from either:
-    *  - a rotation matrix expression,
-    *  - a 4D vector expression representing quaternion coefficients.
-    * \sa operator=(MatrixBase<Derived>)
-    */
-  template<typename Derived>
-  explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
-
-  Quaternion& operator=(const Quaternion& other);
-  Quaternion& operator=(const AngleAxisType& aa);
-  template<typename Derived>
-  Quaternion& operator=(const MatrixBase<Derived>& m);
+  template<class OtherDerived> QuaternionBase& operator=(const QuaternionBase<OtherDerived>& other);
+  QuaternionBase& operator=(const AngleAxisType& aa);
+  template<class OtherDerived>
+  QuaternionBase& operator=(const MatrixBase<OtherDerived>& m);
 
   /** \returns a quaternion representing an identity rotation
     * \sa MatrixBase::Identity()
     */
-  inline static Quaternion Identity() { return Quaternion(1, 0, 0, 0); }
+  inline static Quaternion<Scalar> Identity() { return Quaternion<Scalar>(1, 0, 0, 0); }
 
-  /** \sa Quaternion::Identity(), MatrixBase::setIdentity()
+  /** \sa Quaternion2::Identity(), MatrixBase::setIdentity()
     */
-  inline Quaternion& setIdentity() { m_coeffs << 0, 0, 0, 1; return *this; }
+  inline QuaternionBase& setIdentity() { coeffs() << 0, 0, 0, 1; return *this; }
 
   /** \returns the squared norm of the quaternion's coefficients
-    * \sa Quaternion::norm(), MatrixBase::squaredNorm()
+    * \sa Quaternion2::norm(), MatrixBase::squaredNorm()
     */
-  inline Scalar squaredNorm() const { return m_coeffs.squaredNorm(); }
+  inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
 
   /** \returns the norm of the quaternion's coefficients
-    * \sa Quaternion::squaredNorm(), MatrixBase::norm()
+    * \sa Quaternion2::squaredNorm(), MatrixBase::norm()
     */
-  inline Scalar norm() const { return m_coeffs.norm(); }
+  inline Scalar norm() const { return coeffs().norm(); }
 
   /** Normalizes the quaternion \c *this
     * \sa normalized(), MatrixBase::normalize() */
-  inline void normalize() { m_coeffs.normalize(); }
+  inline void normalize() { coeffs().normalize(); }
   /** \returns a normalized version of \c *this
     * \sa normalize(), MatrixBase::normalized() */
-  inline Quaternion normalized() const { return Quaternion(m_coeffs.normalized()); }
+  inline Quaternion<Scalar> normalized() const { return Quaternion<Scalar>(coeffs().normalized()); }
 
-  /** \returns the dot product of \c *this and \a other
+    /** \returns the dot product of \c *this and \a other
     * Geometrically speaking, the dot product of two unit quaternions
     * corresponds to the cosine of half the angle between the two rotations.
     * \sa angularDistance()
     */
-  inline Scalar dot(const Quaternion& other) const { return m_coeffs.dot(other.m_coeffs); }
+  template<class OtherDerived> inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
 
-  inline Scalar angularDistance(const Quaternion& other) const;
+  template<class OtherDerived> inline Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;
 
   Matrix3 toRotationMatrix(void) const;
 
   template<typename Derived1, typename Derived2>
-  Quaternion& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
+  QuaternionBase& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
 
-  inline Quaternion operator* (const Quaternion& q) const;
-  inline Quaternion& operator*= (const Quaternion& q);
+  template<class OtherDerived> inline Quaternion<Scalar> operator* (const QuaternionBase<OtherDerived>& q) const;
+  template<class OtherDerived> inline QuaternionBase& operator*= (const QuaternionBase<OtherDerived>& q);
 
-  Quaternion inverse(void) const;
-  Quaternion conjugate(void) const;
+  Quaternion<Scalar> inverse(void) const;
+  Quaternion<Scalar> conjugate(void) const;
 
-  Quaternion slerp(Scalar t, const Quaternion& other) const;
+  template<class OtherDerived> Quaternion<Scalar> slerp(Scalar t, const QuaternionBase<OtherDerived>& other) const;
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  bool isApprox(const QuaternionBase& other, RealScalar prec = precision<Scalar>()) const
+  { return coeffs().isApprox(other.coeffs(), prec); }
+
+  Vector3 _transformVector(Vector3 v) const;
 
   /** \returns \c *this with scalar type casted to \a NewScalarType
     *
@@ -197,57 +180,150 @@ public:
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename ei_cast_return_type<Quaternion,Quaternion<NewScalarType> >::type cast() const
-  { return typename ei_cast_return_type<Quaternion,Quaternion<NewScalarType> >::type(*this); }
+  inline typename ei_cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const
+  {
+    return typename ei_cast_return_type<Derived,Quaternion<NewScalarType> >::type(
+      coeffs().template cast<NewScalarType>());
+  }
+};
 
-  /** Copy constructor with scalar type conversion */
-  template<typename OtherScalarType>
-  inline explicit Quaternion(const Quaternion<OtherScalarType>& other)
-  { m_coeffs = other.coeffs().template cast<Scalar>(); }
+template<typename _Scalar>
+struct ei_traits<Quaternion<_Scalar> >
+{
+  typedef _Scalar Scalar;
+  typedef Matrix<_Scalar,4,1> Coefficients;
+  enum{
+    PacketAccess = Aligned 
+  };
+};
 
-  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
-    * determined by \a prec.
+template<typename _Scalar>
+class Quaternion : public QuaternionBase<Quaternion<_Scalar> >{
+  typedef QuaternionBase<Quaternion<_Scalar> > Base;
+public:
+  using Base::operator=;
+
+  typedef _Scalar Scalar;
+
+  typedef typename ei_traits<Quaternion<Scalar> >::Coefficients Coefficients;
+  typedef typename Base::AngleAxisType AngleAxisType;
+
+  /** Default constructor leaving the quaternion uninitialized. */
+  inline Quaternion() {}
+
+  /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from
+    * its four coefficients \a w, \a x, \a y and \a z.
     *
-    * \sa MatrixBase::isApprox() */
-  bool isApprox(const Quaternion& other, typename NumTraits<Scalar>::Real prec = precision<Scalar>()) const
-  { return m_coeffs.isApprox(other.m_coeffs, prec); }
+    * \warning Note the order of the arguments: the real \a w coefficient first,
+    * while internally the coefficients are stored in the following order:
+    * [\c x, \c y, \c z, \c w]
+    */
+  inline Quaternion(Scalar w, Scalar x, Scalar y, Scalar z)
+  { coeffs() << x, y, z, w; }
 
-  Vector3 _transformVector(Vector3 v) const;
+  /** Constructs and initialize a quaternion from the array data
+    * This constructor is also used to map an array */
+  inline Quaternion(const Scalar* data) : m_coeffs(data) {}
+
+  /** Copy constructor */
+//  template<class Derived> inline Quaternion(const QuaternionBase<Derived>& other) { m_coeffs = other.coeffs(); } [XXX] redundant with 703
+
+  /** Constructs and initializes a quaternion from the angle-axis \a aa */
+  explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; }
+
+  /** Constructs and initializes a quaternion from either:
+    *  - a rotation matrix expression,
+    *  - a 4D vector expression representing quaternion coefficients.
+    */
+  template<typename Derived>
+  explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
+
+  /** Copy constructor with scalar type conversion */
+  template<class Derived>
+  inline explicit Quaternion(const QuaternionBase<Derived>& other)
+  { m_coeffs = other.coeffs().template cast<Scalar>(); }
+
+  inline Coefficients& coeffs() { return m_coeffs;}
+  inline const Coefficients& coeffs() const { return m_coeffs;}
 
 protected:
   Coefficients m_coeffs;
 };
 
-/** \ingroup Geometry_Module
-  * single precision quaternion type */
-typedef Quaternion<float> Quaternionf;
-/** \ingroup Geometry_Module
-  * double precision quaternion type */
-typedef Quaternion<double> Quaterniond;
+/* ########### Map<Quaternion> */
+
+/** \class Map<Quaternion>
+  * \nonstableyet
+  *
+  * \brief Expression of a quaternion
+  *
+  * \param Scalar the type of the vector of diagonal coefficients
+  *
+  * \sa class Quaternion, class QuaternionBase
+  */
+template<typename _Scalar, int _PacketAccess>
+struct ei_traits<Map<Quaternion<_Scalar>, _PacketAccess> >:
+ei_traits<Quaternion<_Scalar> >
+{
+  typedef _Scalar Scalar;
+  typedef Map<Matrix<_Scalar,4,1> > Coefficients;
+  enum {
+    PacketAccess = _PacketAccess
+  };
+};
+
+template<typename _Scalar, int PacketAccess>
+class Map<Quaternion<_Scalar>, PacketAccess > : public QuaternionBase<Map<Quaternion<_Scalar>, PacketAccess> >, ei_no_assignment_operator {
+  public:
+    
+    typedef _Scalar Scalar;
+
+    typedef typename ei_traits<Map<Quaternion<Scalar>, PacketAccess> >::Coefficients Coefficients;
+
+    inline Map<Quaternion<Scalar>, PacketAccess >(const Scalar* coeffs) : m_coeffs(coeffs) {}
+    
+    inline Coefficients& coeffs() { return m_coeffs;}
+    inline const Coefficients& coeffs() const { return m_coeffs;}
+
+  protected:
+    Coefficients m_coeffs;
+};
+
+typedef Map<Quaternion<double> > QuaternionMapd;
+typedef Map<Quaternion<float> > QuaternionMapf;
+typedef Map<Quaternion<double>, Aligned> QuaternionMapAlignedd;
+typedef Map<Quaternion<float>, Aligned> QuaternionMapAlignedf;
 
 // Generic Quaternion * Quaternion product
-template<int Arch,typename Scalar> inline Quaternion<Scalar>
-ei_quaternion_product(const Quaternion<Scalar>& a, const Quaternion<Scalar>& b)
+template<int Arch, class Derived, class OtherDerived, typename Scalar, int PacketAccess> struct ei_quat_product
 {
-  return Quaternion<Scalar>
-  (
-    a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(),
-    a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(),
-    a.w() * b.y() + a.y() * b.w() + a.z() * b.x() - a.x() * b.z(),
-    a.w() * b.z() + a.z() * b.w() + a.x() * b.y() - a.y() * b.x()
-  );
-}
+  inline static Quaternion<Scalar> run(const QuaternionBase<Derived>& a, const QuaternionBase<OtherDerived>& b){
+    return Quaternion<Scalar>
+    (
+      a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(),
+      a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(),
+      a.w() * b.y() + a.y() * b.w() + a.z() * b.x() - a.x() * b.z(),
+      a.w() * b.z() + a.z() * b.w() + a.x() * b.y() - a.y() * b.x()
+    );
+  }
+};
 
 /** \returns the concatenation of two rotations as a quaternion-quaternion product */
-template <typename Scalar>
-inline Quaternion<Scalar> Quaternion<Scalar>::operator* (const Quaternion& other) const
+template <class Derived>
+template <class OtherDerived>
+inline Quaternion<typename ei_traits<QuaternionBase<Derived> >::Scalar> QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) const
 {
-  return ei_quaternion_product<EiArch>(*this,other);
+  EIGEN_STATIC_ASSERT((ei_is_same_type<typename Derived::Scalar, typename OtherDerived::Scalar>::ret),
+   YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+   return ei_quat_product<EiArch, Derived, OtherDerived, 
+                          typename ei_traits<Derived>::Scalar,
+                          ei_traits<Derived>::PacketAccess && ei_traits<OtherDerived>::PacketAccess>::run(*this, other);
 }
 
 /** \sa operator*(Quaternion) */
-template <typename Scalar>
-inline Quaternion<Scalar>& Quaternion<Scalar>::operator*= (const Quaternion& other)
+template <class Derived>
+template <class OtherDerived>
+inline QuaternionBase<Derived>& QuaternionBase<Derived>::operator*= (const QuaternionBase<OtherDerived>& other)
 {
   return (*this = *this * other);
 }
@@ -256,12 +332,12 @@ inline Quaternion<Scalar>& Quaternion<Scalar>::operator*= (const Quaternion& oth
   * \remarks If the quaternion is used to rotate several points (>1)
   * then it is much more efficient to first convert it to a 3x3 Matrix.
   * Comparison of the operation cost for n transformations:
-  *   - Quaternion:    30n
+  *   - Quaternion2:    30n
   *   - Via a Matrix3: 24 + 15n
   */
-template <typename Scalar>
-inline typename Quaternion<Scalar>::Vector3
-Quaternion<Scalar>::_transformVector(Vector3 v) const
+template <class Derived>
+inline typename QuaternionBase<Derived>::Vector3
+QuaternionBase<Derived>::_transformVector(Vector3 v) const
 {
     // Note that this algorithm comes from the optimization by hand
     // of the conversion to a Matrix followed by a Matrix/Vector product.
@@ -272,17 +348,18 @@ Quaternion<Scalar>::_transformVector(Vector3 v) const
     return v + this->w() * uv + this->vec().cross(uv);
 }
 
-template<typename Scalar>
-inline Quaternion<Scalar>& Quaternion<Scalar>::operator=(const Quaternion& other)
+template<class Derived>
+template<class OtherDerived>
+inline QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const QuaternionBase<OtherDerived>& other)
 {
-  m_coeffs = other.m_coeffs;
+  coeffs() = other.coeffs();
   return *this;
 }
 
 /** Set \c *this from an angle-axis \a aa and returns a reference to \c *this
   */
-template<typename Scalar>
-inline Quaternion<Scalar>& Quaternion<Scalar>::operator=(const AngleAxisType& aa)
+template<class Derived>
+inline QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const AngleAxisType& aa)
 {
   Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings
   this->w() = ei_cos(ha);
@@ -295,20 +372,23 @@ inline Quaternion<Scalar>& Quaternion<Scalar>::operator=(const AngleAxisType& aa
   *   - if \a xpr is a 3x3 matrix, then \a xpr is assumed to be rotation matrix
   *     and \a xpr is converted to a quaternion
   */
-template<typename Scalar>
-template<typename Derived>
-inline Quaternion<Scalar>& Quaternion<Scalar>::operator=(const MatrixBase<Derived>& xpr)
+
+template<class Derived>
+template<class MatrixDerived>
+inline QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
 {
-  ei_quaternion_assign_impl<Derived>::run(*this, xpr.derived());
+  EIGEN_STATIC_ASSERT((ei_is_same_type<typename Derived::Scalar, typename MatrixDerived::Scalar>::ret),
+   YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+  ei_quaternionbase_assign_impl<MatrixDerived>::run(*this, xpr.derived());
   return *this;
 }
 
 /** Convert the quaternion to a 3x3 rotation matrix. The quaternion is required to
   * be normalized, otherwise the result is undefined.
   */
-template<typename Scalar>
-inline typename Quaternion<Scalar>::Matrix3
-Quaternion<Scalar>::toRotationMatrix(void) const
+template<class Derived>
+inline typename QuaternionBase<Derived>::Matrix3
+QuaternionBase<Derived>::toRotationMatrix(void) const
 {
   // NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!)
   // if not inlined then the cost of the return by value is huge ~ +35%,
@@ -352,9 +432,9 @@ Quaternion<Scalar>::toRotationMatrix(void) const
   * Note that the two input vectors do \b not have to be normalized, and
   * do not need to have the same norm.
   */
-template<typename Scalar>
+template<class Derived>
 template<typename Derived1, typename Derived2>
-inline Quaternion<Scalar>& Quaternion<Scalar>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
+inline QuaternionBase<Derived>& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
 {
   Vector3 v0 = a.normalized();
   Vector3 v1 = b.normalized();
@@ -393,19 +473,19 @@ inline Quaternion<Scalar>& Quaternion<Scalar>::setFromTwoVectors(const MatrixBas
   * Note that in most cases, i.e., if you simply want the opposite rotation,
   * and/or the quaternion is normalized, then it is enough to use the conjugate.
   *
-  * \sa Quaternion::conjugate()
+  * \sa Quaternion2::conjugate()
   */
-template <typename Scalar>
-inline Quaternion<Scalar> Quaternion<Scalar>::inverse() const
+template <class Derived>
+inline Quaternion<typename ei_traits<QuaternionBase<Derived> >::Scalar> QuaternionBase<Derived>::inverse() const
 {
   // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite()  ??
   Scalar n2 = this->squaredNorm();
   if (n2 > 0)
-    return Quaternion(conjugate().coeffs() / n2);
+    return Quaternion<Scalar>(conjugate().coeffs() / n2);
   else
   {
     // return an invalid result to flag the error
-    return Quaternion(Coefficients::Zero());
+    return Quaternion<Scalar>(ei_traits<Derived>::Coefficients::Zero());
   }
 }
 
@@ -413,19 +493,20 @@ inline Quaternion<Scalar> Quaternion<Scalar>::inverse() const
   * if the quaternion is normalized.
   * The conjugate of a quaternion represents the opposite rotation.
   *
-  * \sa Quaternion::inverse()
+  * \sa Quaternion2::inverse()
   */
-template <typename Scalar>
-inline Quaternion<Scalar> Quaternion<Scalar>::conjugate() const
+template <class Derived>
+inline Quaternion<typename ei_traits<QuaternionBase<Derived> >::Scalar> QuaternionBase<Derived>::conjugate() const
 {
-  return Quaternion(this->w(),-this->x(),-this->y(),-this->z());
+  return Quaternion<Scalar>(this->w(),-this->x(),-this->y(),-this->z());
 }
 
 /** \returns the angle (in radian) between two rotations
   * \sa dot()
   */
-template <typename Scalar>
-inline Scalar Quaternion<Scalar>::angularDistance(const Quaternion& other) const
+template <class Derived>
+template <class OtherDerived>
+inline typename ei_traits<QuaternionBase<Derived> >::Scalar QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& other) const
 {
   double d = ei_abs(this->dot(other));
   if (d>=1.0)
@@ -436,14 +517,15 @@ inline Scalar Quaternion<Scalar>::angularDistance(const Quaternion& other) const
 /** \returns the spherical linear interpolation between the two quaternions
   * \c *this and \a other at the parameter \a t
   */
-template <typename Scalar>
-Quaternion<Scalar> Quaternion<Scalar>::slerp(Scalar t, const Quaternion& other) const
+template <class Derived>
+template <class OtherDerived>
+Quaternion<typename ei_traits<QuaternionBase<Derived> >::Scalar> QuaternionBase<Derived>::slerp(Scalar t, const QuaternionBase<OtherDerived>& other) const
 {
   static const Scalar one = Scalar(1) - precision<Scalar>();
   Scalar d = this->dot(other);
   Scalar absD = ei_abs(d);
   if (absD>=one)
-    return *this;
+    return Quaternion<Scalar>(*this);
 
   // theta is the angle between the 2 quaternions
   Scalar theta = std::acos(absD);
@@ -454,15 +536,15 @@ Quaternion<Scalar> Quaternion<Scalar>::slerp(Scalar t, const Quaternion& other)
   if (d<0)
     scale1 = -scale1;
 
-  return Quaternion(scale0 * m_coeffs + scale1 * other.m_coeffs);
+  return Quaternion<Scalar>(scale0 * coeffs() + scale1 * other.coeffs());
 }
 
 // set from a rotation matrix
 template<typename Other>
-struct ei_quaternion_assign_impl<Other,3,3>
+struct ei_quaternionbase_assign_impl<Other,3,3>
 {
   typedef typename Other::Scalar Scalar;
-  inline static void run(Quaternion<Scalar>& q, const Other& mat)
+  template<class Derived> inline static void run(QuaternionBase<Derived>& q, const Other& mat)
   {
     // This algorithm comes from  "Quaternion Calculus and Fast Animation",
     // Ken Shoemake, 1987 SIGGRAPH course notes
@@ -498,13 +580,14 @@ struct ei_quaternion_assign_impl<Other,3,3>
 
 // set from a vector of coefficients assumed to be a quaternion
 template<typename Other>
-struct ei_quaternion_assign_impl<Other,4,1>
+struct ei_quaternionbase_assign_impl<Other,4,1>
 {
   typedef typename Other::Scalar Scalar;
-  inline static void run(Quaternion<Scalar>& q, const Other& vec)
+  template<class Derived> inline static void run(QuaternionBase<Derived>& q, const Other& vec)
   {
     q.coeffs() = vec;
   }
 };
 
+
 #endif // EIGEN_QUATERNION_H
diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h
index 70204f72b..4ee036140 100644
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -481,6 +481,15 @@ typedef Transform<double,2> Transform2d;
 typedef Transform<double,3> Transform3d;
 
 /** \ingroup Geometry_Module */
+typedef Transform<float,2,Isometry> Isometry2f;
+/** \ingroup Geometry_Module */
+typedef Transform<float,3,Isometry> Isometry3f;
+/** \ingroup Geometry_Module */
+typedef Transform<double,2,Isometry> Isometry2d;
+/** \ingroup Geometry_Module */
+typedef Transform<double,3,Isometry> Isometry3d;
+
+/** \ingroup Geometry_Module */
 typedef Transform<float,2> Affine2f;
 /** \ingroup Geometry_Module */
 typedef Transform<float,3> Affine3f;
@@ -512,7 +521,7 @@ typedef Transform<double,3,Projective> Projective3d;
 **************************/
 
 #ifdef EIGEN_QT_SUPPORT
-/** Initialises \c *this from a QMatrix assuming the dimension is 2.
+/** Initializes \c *this from a QMatrix assuming the dimension is 2.
   *
   * This function is available only if the token EIGEN_QT_SUPPORT is defined.
   */
@@ -538,7 +547,7 @@ Transform<Scalar,Dim,Mode>& Transform<Scalar,Dim,Mode>::operator=(const QMatrix&
 
 /** \returns a QMatrix from \c *this assuming the dimension is 2.
   *
-  * \warning this convertion might loss data if \c *this is not affine
+  * \warning this conversion might loss data if \c *this is not affine
   *
   * This function is available only if the token EIGEN_QT_SUPPORT is defined.
   */
@@ -551,7 +560,7 @@ QMatrix Transform<Scalar,Dim,Mode>::toQMatrix(void) const
                  matrix.coeff(0,2), matrix.coeff(1,2));
 }
 
-/** Initialises \c *this from a QTransform assuming the dimension is 2.
+/** Initializes \c *this from a QTransform assuming the dimension is 2.
   *
   * This function is available only if the token EIGEN_QT_SUPPORT is defined.
   */
@@ -899,7 +908,7 @@ struct ei_projective_transform_inverse<TransformType, Projective>
   * \returns the inverse transformation according to some given knowledge
   * on \c *this.
   *
-  * \param traits allows to optimize the inversion process when the transformion
+  * \param traits allows to optimize the inversion process when the transformation
   * is known to be not a general transformation. The possible values are:
   *  - Projective if the transformation is not necessarily affine, i.e., if the
   *    last row is not guaranteed to be [0 ... 0 1]
@@ -968,7 +977,7 @@ struct ei_transform_take_affine_part<Transform<Scalar,Dim,AffineCompact> > {
 };
 
 /*****************************************************
-*** Specializations of construct from matix        ***
+*** Specializations of construct from matrix       ***
 *****************************************************/
 
 template<typename Other, int Mode, int Dim, int HDim>
diff --git a/Eigen/src/Geometry/Umeyama.h b/Eigen/src/Geometry/Umeyama.h
index 7652aa92e..551a69e5b 100644
--- a/Eigen/src/Geometry/Umeyama.h
+++ b/Eigen/src/Geometry/Umeyama.h
@@ -117,7 +117,7 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo
   enum { Dimension = EIGEN_ENUM_MIN(Derived::RowsAtCompileTime, OtherDerived::RowsAtCompileTime) };
 
   typedef Matrix<Scalar, Dimension, 1> VectorType;
-  typedef typename ei_plain_matrix_type<Derived>::type MatrixType;
+  typedef Matrix<Scalar, Dimension, Dimension> MatrixType;
   typedef typename ei_plain_matrix_type_row_major<Derived>::type RowMajorMatrixType;
 
   const int m = src.rows(); // dimension
@@ -131,17 +131,11 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo
   const VectorType dst_mean = dst.rowwise().sum() * one_over_n;
 
   // demeaning of src and dst points
-  RowMajorMatrixType src_demean(m,n);
-  RowMajorMatrixType dst_demean(m,n);
-  for (int i=0; i<n; ++i)
-  {
-    src_demean.col(i) = src.col(i) - src_mean;
-    dst_demean.col(i) = dst.col(i) - dst_mean;
-  }
+  const RowMajorMatrixType src_demean = src.colwise() - src_mean;
+  const RowMajorMatrixType dst_demean = dst.colwise() - dst_mean;
 
   // Eq. (36)-(37)
   const Scalar src_var = src_demean.rowwise().squaredNorm().sum() * one_over_n;
-  // const Scalar dst_var = dst_demean.rowwise().squaredNorm().sum() * one_over_n;
 
   // Eq. (38)
   const MatrixType sigma = one_over_n * dst_demean * src_demean.transpose();
diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h
index d0342febc..1b8f6aead 100644
--- a/Eigen/src/Geometry/arch/Geometry_SSE.h
+++ b/Eigen/src/Geometry/arch/Geometry_SSE.h
@@ -26,24 +26,26 @@
 #ifndef EIGEN_GEOMETRY_SSE_H
 #define EIGEN_GEOMETRY_SSE_H
 
-template<> inline Quaternion<float>
-ei_quaternion_product<EiArch_SSE,float>(const Quaternion<float>& _a, const Quaternion<float>& _b)
+template<class Derived, class OtherDerived> struct ei_quat_product<EiArch_SSE, Derived, OtherDerived, float, Aligned>
 {
-  const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0,0,0,0x80000000));
-  Quaternion<float> res;
-  __m128 a = _a.coeffs().packet<Aligned>(0);
-  __m128 b = _b.coeffs().packet<Aligned>(0);
-  __m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2),
-                                       ei_vec4f_swizzle1(b,2,0,1,2)),mask);
-  __m128 flip2 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,3,3,3,1),
-                                       ei_vec4f_swizzle1(b,0,1,2,1)),mask);
-  ei_pstore(&res.x(),
-            _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,ei_vec4f_swizzle1(b,3,3,3,3)),
-                                  _mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,0),
-                                             ei_vec4f_swizzle1(b,1,2,0,0))),
-                       _mm_add_ps(flip1,flip2)));
-  return res;
-}
+  inline static Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
+  {
+    const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0,0,0,0x80000000));
+    Quaternion<float> res;
+    __m128 a = _a.coeffs().packet<Aligned>(0);
+    __m128 b = _b.coeffs().packet<Aligned>(0);
+    __m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2),
+                                         ei_vec4f_swizzle1(b,2,0,1,2)),mask);
+    __m128 flip2 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,3,3,3,1),
+                                         ei_vec4f_swizzle1(b,0,1,2,1)),mask);
+    ei_pstore(&res.x(),
+              _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,ei_vec4f_swizzle1(b,3,3,3,3)),
+                                    _mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,0),
+                                               ei_vec4f_swizzle1(b,1,2,0,0))),
+                         _mm_add_ps(flip1,flip2)));
+    return res;
+  }
+};
 
 template<typename VectorLhs,typename VectorRhs>
 struct ei_cross3_impl<EiArch_SSE,VectorLhs,VectorRhs,float,true> {
diff --git a/Eigen/src/Sparse/SparseExpressionMaker.h b/Eigen/src/Sparse/SparseExpressionMaker.h
new file mode 100644
index 000000000..1fdcbb1f2
--- /dev/null
+++ b/Eigen/src/Sparse/SparseExpressionMaker.h
@@ -0,0 +1,48 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_SPARSE_EXPRESSIONMAKER_H
+#define EIGEN_SPARSE_EXPRESSIONMAKER_H
+
+template<typename XprType>
+struct MakeNestByValue<XprType,IsSparse>
+{
+  typedef SparseNestByValue<XprType> Type;
+};
+
+template<typename Func, typename XprType>
+struct MakeCwiseUnaryOp<Func,XprType,IsSparse>
+{
+  typedef SparseCwiseUnaryOp<Func,XprType> Type;
+};
+
+template<typename Func, typename A, typename B>
+struct MakeCwiseBinaryOp<Func,A,B,IsSparse>
+{
+  typedef SparseCwiseBinaryOp<Func,A,B> Type;
+};
+
+// TODO complete the list
+
+#endif // EIGEN_SPARSE_EXPRESSIONMAKER_H
diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h
index bfc3a99b3..c1f473597 100644
--- a/bench/BenchTimer.h
+++ b/bench/BenchTimer.h
@@ -26,8 +26,14 @@
 #ifndef EIGEN_BENCH_TIMER_H
 #define EIGEN_BENCH_TIMER_H
 
+#ifndef WIN32
 #include <sys/time.h>
 #include <unistd.h>
+#else
+#define NOMINMAX
+#include <windows.h>
+#endif
+
 #include <cstdlib>
 #include <numeric>
 
@@ -40,7 +46,15 @@ class BenchTimer
 {
 public:
 
-  BenchTimer() { reset(); }
+  BenchTimer() 
+  { 
+#ifdef WIN32
+    LARGE_INTEGER freq;
+    QueryPerformanceFrequency(&freq);
+    m_frequency = (double)freq.QuadPart;
+#endif
+    reset(); 
+  }
 
   ~BenchTimer() {}
 
@@ -51,23 +65,35 @@ public:
     m_best = std::min(m_best, getTime() - m_start);
   }
 
-  /** Return the best elapsed time.
+  /** Return the best elapsed time in seconds.
     */
   inline double value(void)
   {
-      return m_best;
+    return m_best;
   }
 
+#ifdef WIN32
+  inline double getTime(void)
+#else
   static inline double getTime(void)
+#endif
   {
+#ifdef WIN32
+    LARGE_INTEGER query_ticks;
+    QueryPerformanceCounter(&query_ticks);
+    return query_ticks.QuadPart/m_frequency;
+#else
       struct timeval tv;
       struct timezone tz;
       gettimeofday(&tv, &tz);
       return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
+#endif
   }
 
 protected:
-
+#ifdef WIN32
+  double m_frequency;
+#endif
   double m_best, m_start;
 
 };
diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp
new file mode 100644
index 000000000..4b6cabb55
--- /dev/null
+++ b/bench/benchFFT.cpp
@@ -0,0 +1,115 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include <complex>
+#include <vector>
+#include <Eigen/Core>
+#include <bench/BenchTimer.h>
+#ifdef USE_FFTW
+#include <fftw3.h>
+#endif
+
+#include <unsupported/Eigen/FFT>
+
+using namespace Eigen;
+using namespace std;
+
+
+template <typename T>
+string nameof();
+
+template <> string nameof<float>() {return "float";}
+template <> string nameof<double>() {return "double";}
+template <> string nameof<long double>() {return "long double";}
+
+#ifndef TYPE
+#define TYPE float
+#endif
+
+#ifndef NFFT
+#define NFFT 1024
+#endif
+#ifndef NDATA
+#define NDATA 1000000
+#endif
+
+using namespace Eigen;
+
+template <typename T>
+void bench(int nfft,bool fwd)
+{
+    typedef typename NumTraits<T>::Real Scalar;
+    typedef typename std::complex<Scalar> Complex;
+    int nits = NDATA/nfft;
+    vector<T> inbuf(nfft);
+    vector<Complex > outbuf(nfft);
+    FFT< Scalar > fft;
+
+    fft.fwd( outbuf , inbuf);
+
+    BenchTimer timer;
+    timer.reset();
+    for (int k=0;k<8;++k) {
+        timer.start();
+        for(int i = 0; i < nits; i++)
+            if (fwd)
+                fft.fwd( outbuf , inbuf);
+            else
+                fft.inv(inbuf,outbuf);
+        timer.stop();
+    }
+
+    cout << nameof<Scalar>() << " ";
+    double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits );
+    if ( NumTraits<T>::IsComplex ) {
+        cout << "complex";
+    }else{
+        cout << "real   ";
+        mflops /= 2;
+    }
+
+    if (fwd)
+        cout << " fwd";
+    else
+        cout << " inv";
+
+    cout << " NFFT=" << nfft << "  " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s  " << mflops << "MFLOPS\n";
+}
+
+int main(int argc,char ** argv)
+{
+    bench<complex<float> >(NFFT,true);
+    bench<complex<float> >(NFFT,false);
+    bench<float>(NFFT,true);
+    bench<float>(NFFT,false);
+    bench<complex<double> >(NFFT,true);
+    bench<complex<double> >(NFFT,false);
+    bench<double>(NFFT,true);
+    bench<double>(NFFT,false);
+    bench<complex<long double> >(NFFT,true);
+    bench<complex<long double> >(NFFT,false);
+    bench<long double>(NFFT,true);
+    bench<long double>(NFFT,false);
+    return 0;
+}
diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake
new file mode 100644
index 000000000..a56450b17
--- /dev/null
+++ b/cmake/FindFFTW.cmake
@@ -0,0 +1,24 @@
+
+if (FFTW_INCLUDES AND FFTW_LIBRARIES)
+  set(FFTW_FIND_QUIETLY TRUE)
+endif (FFTW_INCLUDES AND FFTW_LIBRARIES)
+
+find_path(FFTW_INCLUDES
+  NAMES
+  fftw3.h
+  PATHS
+  $ENV{FFTWDIR}
+  ${INCLUDE_INSTALL_DIR}
+)
+
+find_library(FFTWF_LIB NAMES fftw3f PATHS $ENV{FFTWDIR} ${LIB_INSTALL_DIR})
+find_library(FFTW_LIB NAMES fftw3 PATHS $ENV{FFTWDIR} ${LIB_INSTALL_DIR})
+find_library(FFTWL_LIB NAMES fftw3l  PATHS $ENV{FFTWDIR} ${LIB_INSTALL_DIR})
+set(FFTW_LIBRARIES "${FFTWF_LIB} ${FFTW_LIB} ${FFTWL_LIB}" )
+message(STATUS "FFTW ${FFTW_LIBRARIES}" )
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(FFTW DEFAULT_MSG
+                                  FFTW_INCLUDES FFTW_LIBRARIES)
+
+mark_as_advanced(FFTW_INCLUDES FFTW_LIBRARIES)
diff --git a/scripts/eigen_gen_credits.cpp b/scripts/eigen_gen_credits.cpp
index 086548e26..d7a625d47 100644
--- a/scripts/eigen_gen_credits.cpp
+++ b/scripts/eigen_gen_credits.cpp
@@ -13,10 +13,24 @@ using namespace std;
 std::string contributor_name(const std::string& line)
 {
   string result;
+
+  // let's first take care of the case of isolated email addresses, like
+  // "user@localhost.localdomain" entries
+  if(line.find("markb@localhost.localdomain") != string::npos)
+  {
+    return "Mark Borgerding";
+  }
+
+  // from there on we assume that we have a entry of the form
+  // either:
+  //   Bla bli Blurp
+  // or:
+  //   Bla bli Blurp <bblurp@email.com>
+  
   size_t position_of_email_address = line.find_first_of('<');
   if(position_of_email_address != string::npos)
   {
-    // there is an e-mail address.
+    // there is an e-mail address in <...>.
     
     // Hauke once committed as "John Smith", fix that.
     if(line.find("hauke.heibel") != string::npos)
@@ -29,7 +43,7 @@ std::string contributor_name(const std::string& line)
   }
   else
   {
-    // there is no e-mail address.
+    // there is no e-mail address in <...>.
     
     if(line.find("convert-repo") != string::npos)
       result = "";
diff --git a/test/geo_hyperplane.cpp b/test/geo_hyperplane.cpp
index f1d3b016f..3cf5655c2 100644
--- a/test/geo_hyperplane.cpp
+++ b/test/geo_hyperplane.cpp
@@ -121,7 +121,8 @@ template<typename Scalar> void lines()
     VERIFY_IS_APPROX(result, center);
 
     // check conversions between two types of lines
-    CoeffsType converted_coeffs = HLine(PLine(line_u)).coeffs();
+    PLine pl(line_u); // gcc 3.3 will commit suicide if we don't name this variable
+    CoeffsType converted_coeffs = HLine(pl).coeffs();
     converted_coeffs *= (line_u.coeffs()[0])/(converted_coeffs[0]);
     VERIFY(line_u.coeffs().isApprox(converted_coeffs));
   }
diff --git a/test/map.cpp b/test/map.cpp
index 5c0ec3137..18c6b2694 100644
--- a/test/map.cpp
+++ b/test/map.cpp
@@ -37,14 +37,15 @@ template<typename VectorType> void map_class(const VectorType& m)
   Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3;
   
   Map<VectorType, Aligned>(array1, size) = VectorType::Random(size);
-  Map<VectorType>(array2, size) = Map<VectorType>(array1, size);
+  Map<VectorType, Aligned>(array2, size) = Map<VectorType,Aligned>(array1, size);
   Map<VectorType>(array3unaligned, size) = Map<VectorType>(array1, size);
-  VectorType ma1 = Map<VectorType>(array1, size);
+  VectorType ma1 = Map<VectorType, Aligned>(array1, size);
   VectorType ma2 = Map<VectorType, Aligned>(array2, size);
   VectorType ma3 = Map<VectorType>(array3unaligned, size);
   VERIFY_IS_APPROX(ma1, ma2);
   VERIFY_IS_APPROX(ma1, ma3);
-  
+  VERIFY_RAISES_ASSERT((Map<VectorType,Aligned>(array3unaligned, size)));
+
   ei_aligned_delete(array1, size);
   ei_aligned_delete(array2, size);
   delete[] array3;
diff --git a/unsupported/Eigen/Complex b/unsupported/Eigen/Complex
new file mode 100644
index 000000000..e1c41ab38
--- /dev/null
+++ b/unsupported/Eigen/Complex
@@ -0,0 +1,182 @@
+#ifndef EIGEN_COMPLEX_H
+#define EIGEN_COMPLEX_H
+
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+// Eigen::Complex reuses as much as possible from std::complex
+// and allows easy conversion to and from, even at the pointer level.
+
+
+#include <complex>
+
+namespace Eigen {
+
+template <typename _NativePtr,typename _PunnedPtr>
+struct castable_pointer
+{
+    castable_pointer(_NativePtr ptr) : _ptr(ptr) {}
+    operator _NativePtr ()  {return _ptr;}
+    operator _PunnedPtr ()  {return reinterpret_cast<_PunnedPtr>(_ptr);}
+    private: 
+    _NativePtr _ptr;
+};
+
+template <typename T>
+struct Complex
+{
+    typedef typename std::complex<T> StandardComplex;
+    typedef T value_type;
+
+    // constructors
+    Complex(const T& re = T(), const T& im = T()) : _re(re),_im(im) { }
+    Complex(const Complex&other ): _re(other.real()) ,_im(other.imag()) {}
+
+    template<class X> 
+    Complex(const Complex<X>&other): _re(other.real()) ,_im(other.imag()) {}
+    template<class X> 
+    Complex(const std::complex<X>&other): _re(other.real()) ,_im(other.imag()) {}
+
+
+    // allow binary access to the object as a std::complex
+    typedef castable_pointer< Complex<T>*, StandardComplex* > pointer_type;
+    typedef castable_pointer< const Complex<T>*, const StandardComplex* > const_pointer_type;
+    pointer_type operator & () {return pointer_type(this);}
+    const_pointer_type operator & () const {return const_pointer_type(this);}
+
+    operator StandardComplex () const {return std_type();}
+    operator StandardComplex & () {return std_type();}
+
+    StandardComplex std_type() const {return StandardComplex(real(),imag());}
+    StandardComplex & std_type() {return *reinterpret_cast<StandardComplex*>(this);}
+
+
+    // every sort of accessor and mutator that has ever been in fashion.
+    // For a brief history, search for "std::complex over-encapsulated"
+    // http://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#387
+    const T & real() const {return _re;}
+    const T & imag() const {return _im;}
+    T & real() {return _re;}
+    T & imag() {return _im;}
+    T & real(const T & x) {return _re=x;}
+    T & imag(const T & x) {return _im=x;}
+    void set_real(const T & x) {_re = x;}
+    void set_imag(const T & x) {_im = x;}
+
+    // *** complex member functions: ***
+    Complex<T>& operator= (const T& val) { _re=val;_im=0;return *this;  }
+    Complex<T>& operator+= (const T& val) {_re+=val;return *this;}
+    Complex<T>& operator-= (const T& val) {_re-=val;return *this;}
+    Complex<T>& operator*= (const T& val) {_re*=val;_im*=val;return *this;  }
+    Complex<T>& operator/= (const T& val) {_re/=val;_im/=val;return *this;  }
+
+    Complex& operator= (const Complex& rhs) {_re=rhs._re;_im=rhs._im;return *this;}
+    Complex& operator= (const StandardComplex& rhs) {_re=rhs.real();_im=rhs.imag();return *this;}
+
+    template<class X> Complex<T>& operator= (const Complex<X>& rhs) { _re=rhs._re;_im=rhs._im;return *this;}
+    template<class X> Complex<T>& operator+= (const Complex<X>& rhs) { _re+=rhs._re;_im+=rhs._im;return *this;}
+    template<class X> Complex<T>& operator-= (const Complex<X>& rhs) { _re-=rhs._re;_im-=rhs._im;return *this;}
+    template<class X> Complex<T>& operator*= (const Complex<X>& rhs) { this->std_type() *= rhs.std_type(); return *this; }
+    template<class X> Complex<T>& operator/= (const Complex<X>& rhs) { this->std_type() /= rhs.std_type(); return *this; }
+
+    private:
+    T _re;
+    T _im;
+};
+
+template <typename T>
+T ei_to_std( const T & x) {return x;}
+
+template <typename T>
+std::complex<T> ei_to_std( const Complex<T> & x) {return x.std_type();}
+
+// 26.2.6 operators
+template<class T> Complex<T> operator+(const Complex<T>& rhs) {return rhs;}
+template<class T> Complex<T> operator-(const Complex<T>& rhs) {return -ei_to_std(rhs);}
+
+template<class T> Complex<T> operator+(const Complex<T>& lhs, const Complex<T>& rhs) { return ei_to_std(lhs) + ei_to_std(rhs);}
+template<class T> Complex<T> operator-(const Complex<T>& lhs, const Complex<T>& rhs) { return ei_to_std(lhs) - ei_to_std(rhs);}
+template<class T> Complex<T> operator*(const Complex<T>& lhs, const Complex<T>& rhs) { return ei_to_std(lhs) * ei_to_std(rhs);}
+template<class T> Complex<T> operator/(const Complex<T>& lhs, const Complex<T>& rhs) { return ei_to_std(lhs) / ei_to_std(rhs);}
+template<class T> bool operator==(const Complex<T>& lhs, const Complex<T>& rhs) { return ei_to_std(lhs) == ei_to_std(rhs);}
+template<class T> bool operator!=(const Complex<T>& lhs, const Complex<T>& rhs) { return ei_to_std(lhs) != ei_to_std(rhs);}
+
+template<class T> Complex<T> operator+(const Complex<T>& lhs, const T& rhs) {return ei_to_std(lhs) + ei_to_std(rhs); }
+template<class T> Complex<T> operator-(const Complex<T>& lhs, const T& rhs) {return ei_to_std(lhs) - ei_to_std(rhs); }
+template<class T> Complex<T> operator*(const Complex<T>& lhs, const T& rhs) {return ei_to_std(lhs) * ei_to_std(rhs); }
+template<class T> Complex<T> operator/(const Complex<T>& lhs, const T& rhs) {return ei_to_std(lhs) / ei_to_std(rhs); }
+template<class T> bool operator==(const Complex<T>& lhs, const T& rhs) {return ei_to_std(lhs) == ei_to_std(rhs); }
+template<class T> bool operator!=(const Complex<T>& lhs, const T& rhs) {return ei_to_std(lhs) != ei_to_std(rhs); }
+
+template<class T> Complex<T> operator+(const T& lhs, const Complex<T>& rhs) {return ei_to_std(lhs) + ei_to_std(rhs); }
+template<class T> Complex<T> operator-(const T& lhs, const Complex<T>& rhs) {return ei_to_std(lhs) - ei_to_std(rhs); }
+template<class T> Complex<T> operator*(const T& lhs, const Complex<T>& rhs) {return ei_to_std(lhs) * ei_to_std(rhs); }
+template<class T> Complex<T> operator/(const T& lhs, const Complex<T>& rhs) {return ei_to_std(lhs) / ei_to_std(rhs); }
+template<class T> bool operator==(const T& lhs, const Complex<T>& rhs) {return ei_to_std(lhs) == ei_to_std(rhs); }
+template<class T> bool operator!=(const T& lhs, const Complex<T>& rhs) {return ei_to_std(lhs) != ei_to_std(rhs); }
+
+template<class T, class charT, class traits>
+std::basic_istream<charT,traits>&
+  operator>> (std::basic_istream<charT,traits>& istr, Complex<T>& rhs)
+{
+    return istr >> rhs.std_type();
+}
+
+template<class T, class charT, class traits>
+std::basic_ostream<charT,traits>&
+operator<< (std::basic_ostream<charT,traits>& ostr, const Complex<T>& rhs)
+{
+    return ostr << rhs.std_type();
+}
+
+ // 26.2.7 values:
+ template<class T> T real(const Complex<T>&x) {return real(ei_to_std(x));}
+ template<class T> T abs(const Complex<T>&x) {return abs(ei_to_std(x));}
+ template<class T> T arg(const Complex<T>&x) {return arg(ei_to_std(x));}
+ template<class T> T norm(const Complex<T>&x) {return norm(ei_to_std(x));}
+
+ template<class T> Complex<T> conj(const Complex<T>&x) { return conj(ei_to_std(x));}
+ template<class T> Complex<T> polar(const T& x, const T&y) {return polar(ei_to_std(x),ei_to_std(y));} 
+ // 26.2.8 transcendentals:
+ template<class T> Complex<T> cos (const  Complex<T>&x){return cos(ei_to_std(x));}
+ template<class T> Complex<T> cosh (const  Complex<T>&x){return cosh(ei_to_std(x));}
+ template<class T> Complex<T> exp (const  Complex<T>&x){return exp(ei_to_std(x));}
+ template<class T> Complex<T> log (const  Complex<T>&x){return log(ei_to_std(x));}
+ template<class T> Complex<T> log10 (const  Complex<T>&x){return log10(ei_to_std(x));}
+
+ template<class T> Complex<T> pow(const Complex<T>&x, int p) {return pow(ei_to_std(x),ei_to_std(p));}
+ template<class T> Complex<T> pow(const Complex<T>&x, const T&p) {return pow(ei_to_std(x),ei_to_std(p));}
+ template<class T> Complex<T> pow(const Complex<T>&x, const Complex<T>&p) {return pow(ei_to_std(x),ei_to_std(p));}
+ template<class T> Complex<T> pow(const T&x, const Complex<T>&p) {return pow(ei_to_std(x),ei_to_std(p));}
+
+ template<class T> Complex<T> sin (const  Complex<T>&x){return sin(ei_to_std(x));}
+ template<class T> Complex<T> sinh (const  Complex<T>&x){return sinh(ei_to_std(x));}
+ template<class T> Complex<T> sqrt (const  Complex<T>&x){return sqrt(ei_to_std(x));}
+ template<class T> Complex<T> tan (const  Complex<T>&x){return tan(ei_to_std(x));}
+ template<class T> Complex<T> tanh (const  Complex<T>&x){return tanh(ei_to_std(x));}
+}
+
+#endif
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
+
diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
new file mode 100644
index 000000000..36afdde8d
--- /dev/null
+++ b/unsupported/Eigen/FFT
@@ -0,0 +1,135 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. 
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_FFT_H
+#define EIGEN_FFT_H
+
+#include <complex>
+#include <vector>
+#include <map>
+
+#ifdef EIGEN_FFTW_DEFAULT
+// FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size
+#  include <fftw3.h>
+   namespace Eigen {
+#    include "src/FFT/ei_fftw_impl.h"
+     //template <typename T> typedef struct ei_fftw_impl  default_fft_impl; this does not work
+     template <typename T> struct default_fft_impl : public ei_fftw_impl<T> {};
+   }
+#elif defined EIGEN_MKL_DEFAULT
+// TODO 
+// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form
+   namespace Eigen {
+#    include "src/FFT/ei_imklfft_impl.h"
+     template <typename T> struct default_fft_impl : public ei_imklfft_impl {};
+   }
+#else
+// ei_kissfft_impl:  small, free, reasonably efficient default, derived from kissfft
+//
+  namespace Eigen {
+#   include "src/FFT/ei_kissfft_impl.h"
+     template <typename T> 
+       struct default_fft_impl : public ei_kissfft_impl<T> {};
+  }
+#endif
+
+namespace Eigen {
+
+template <typename _Scalar,
+         typename _Impl=default_fft_impl<_Scalar> >
+class FFT
+{
+  public:
+    typedef _Impl impl_type;
+    typedef typename impl_type::Scalar Scalar;
+    typedef typename impl_type::Complex Complex;
+
+    FFT(const impl_type & impl=impl_type() ) :m_impl(impl) { }
+
+    template <typename _Input>
+    void fwd( Complex * dst, const _Input * src, int nfft)
+    {
+        m_impl.fwd(dst,src,nfft);
+    }
+
+    template <typename _Input>
+    void fwd( std::vector<Complex> & dst, const std::vector<_Input> & src) 
+    {
+        dst.resize( src.size() );
+        fwd( &dst[0],&src[0],src.size() );
+    }
+
+    template<typename InputDerived, typename ComplexDerived>
+    void fwd( MatrixBase<ComplexDerived> & dst, const MatrixBase<InputDerived> & src)
+    {
+        EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived)
+        EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
+        EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,InputDerived) // size at compile-time
+        EIGEN_STATIC_ASSERT((ei_is_same_type<typename ComplexDerived::Scalar, Complex>::ret),
+                            YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+        EIGEN_STATIC_ASSERT(int(InputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit,
+                            THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
+        dst.derived().resize( src.size() );
+        fwd( &dst[0],&src[0],src.size() );
+    }
+
+    template <typename _Output>
+    void inv( _Output * dst, const Complex * src, int nfft)
+    {
+        m_impl.inv( dst,src,nfft );
+    }
+
+    template <typename _Output>
+    void inv( std::vector<_Output> & dst, const std::vector<Complex> & src) 
+    {
+        dst.resize( src.size() );
+        inv( &dst[0],&src[0],src.size() );
+    }
+
+    template<typename OutputDerived, typename ComplexDerived>
+    void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src)
+    {
+        EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
+        EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
+        EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,OutputDerived) // size at compile-time
+        EIGEN_STATIC_ASSERT((ei_is_same_type<typename ComplexDerived::Scalar, Complex>::ret),
+                            YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+        EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit,
+                            THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
+        dst.derived().resize( src.size() );
+        inv( &dst[0],&src[0],src.size() );
+    }
+
+    // TODO: multi-dimensional FFTs
+    
+    // TODO: handle Eigen MatrixBase
+     // ---> i added fwd and inv specializations above + unit test, is this enough? (bjacob)
+
+    impl_type & impl() {return m_impl;}
+  private:
+    impl_type m_impl;
+};
+}
+#endif
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h
index a5e881487..b3983f8a6 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h
@@ -50,10 +50,12 @@ public:
   typedef typename Functor::InputType InputType;
   typedef typename Functor::ValueType ValueType;
   typedef typename Functor::JacobianType JacobianType;
+  typedef typename JacobianType::Scalar Scalar;
 
-  typedef Matrix<double,InputsAtCompileTime,1> DerivativeType;
+  typedef Matrix<Scalar,InputsAtCompileTime,1> DerivativeType;
   typedef AutoDiffScalar<DerivativeType> ActiveScalar;
 
+
   typedef Matrix<ActiveScalar, InputsAtCompileTime, 1> ActiveInput;
   typedef Matrix<ActiveScalar, ValuesAtCompileTime, 1> ActiveValue;
 
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index 888aa5c8c..2fb733a99 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -42,9 +42,17 @@ void ei_make_coherent(const A& a, const B&b)
 /** \class AutoDiffScalar
   * \brief A scalar type replacement with automatic differentation capability
   *
-  * \param DerType the vector type used to store/represent the derivatives (e.g. Vector3f)
+  * \param _DerType the vector type used to store/represent the derivatives. The base scalar type
+  *                 as well as the number of derivatives to compute are determined from this type.
+  *                 Typical choices include, e.g., \c Vector4f for 4 derivatives, or \c VectorXf
+  *                 if the number of derivatives is not known at compile time, and/or, the number
+  *                 of derivatives is large.
+  *                 Note that _DerType can also be a reference (e.g., \c VectorXf&) to wrap a
+  *                 existing vector into an AutoDiffScalar.
+  *                 Finally, _DerType can also be any Eigen compatible expression.
   *
-  * This class represents a scalar value while tracking its respective derivatives.
+  * This class represents a scalar value while tracking its respective derivatives using Eigen's expression
+  * template mechanism.
   *
   * It supports the following list of global math function:
   *  - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos,
@@ -56,10 +64,11 @@ void ei_make_coherent(const A& a, const B&b)
   * while derivatives are computed right away.
   *
   */
-template<typename DerType>
+template<typename _DerType>
 class AutoDiffScalar
 {
   public:
+    typedef typename ei_cleantype<_DerType>::type DerType;
     typedef typename ei_traits<DerType>::Scalar Scalar;
 
     inline AutoDiffScalar() {}
@@ -108,12 +117,28 @@ class AutoDiffScalar
     inline const DerType& derivatives() const { return m_derivatives; }
     inline DerType& derivatives() { return m_derivatives; }
 
+    inline const AutoDiffScalar<DerType&> operator+(const Scalar& other) const
+    {
+      return AutoDiffScalar<DerType>(m_value + other, m_derivatives);
+    }
+
+    friend inline const AutoDiffScalar<DerType&> operator+(const Scalar& a, const AutoDiffScalar& b)
+    {
+      return AutoDiffScalar<DerType>(a + b.value(), b.derivatives());
+    }
+
+    inline AutoDiffScalar& operator+=(const Scalar& other)
+    {
+      value() += other;
+      return *this;
+    }
+
     template<typename OtherDerType>
-    inline const AutoDiffScalar<CwiseBinaryOp<ei_scalar_sum_op<Scalar>,DerType,OtherDerType> >
+    inline const AutoDiffScalar<typename MakeCwiseBinaryOp<ei_scalar_sum_op<Scalar>,DerType,typename ei_cleantype<OtherDerType>::type>::Type >
     operator+(const AutoDiffScalar<OtherDerType>& other) const
     {
       ei_make_coherent(m_derivatives, other.derivatives());
-      return AutoDiffScalar<CwiseBinaryOp<ei_scalar_sum_op<Scalar>,DerType,OtherDerType> >(
+      return AutoDiffScalar<typename MakeCwiseBinaryOp<ei_scalar_sum_op<Scalar>,DerType,typename ei_cleantype<OtherDerType>::type>::Type >(
         m_value + other.value(),
         m_derivatives + other.derivatives());
     }
@@ -127,11 +152,11 @@ class AutoDiffScalar
     }
 
     template<typename OtherDerType>
-    inline const AutoDiffScalar<CwiseBinaryOp<ei_scalar_difference_op<Scalar>, DerType,OtherDerType> >
+    inline const AutoDiffScalar<typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>, DerType,typename ei_cleantype<OtherDerType>::type>::Type >
     operator-(const AutoDiffScalar<OtherDerType>& other) const
     {
       ei_make_coherent(m_derivatives, other.derivatives());
-      return AutoDiffScalar<CwiseBinaryOp<ei_scalar_difference_op<Scalar>, DerType,OtherDerType> >(
+      return AutoDiffScalar<typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>, DerType,typename ei_cleantype<OtherDerType>::type>::Type >(
         m_value - other.value(),
         m_derivatives - other.derivatives());
     }
@@ -145,73 +170,73 @@ class AutoDiffScalar
     }
 
     template<typename OtherDerType>
-    inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, DerType> >
+    inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_opposite_op<Scalar>, DerType>::Type >
     operator-() const
     {
-      return AutoDiffScalar<CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, DerType> >(
+      return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_opposite_op<Scalar>, DerType>::Type >(
         -m_value,
         -m_derivatives);
     }
 
-    inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >
+    inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >
     operator*(const Scalar& other) const
     {
-      return AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >(
+      return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >(
         m_value * other,
         (m_derivatives * other));
     }
 
-    friend inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >
+    friend inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >
     operator*(const Scalar& other, const AutoDiffScalar& a)
     {
-      return AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >(
+      return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >(
         a.value() * other,
         a.derivatives() * other);
     }
 
-    inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >
+    inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >
     operator/(const Scalar& other) const
     {
-      return AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >(
+      return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >(
         m_value / other,
         (m_derivatives * (Scalar(1)/other)));
     }
 
-    friend inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >
+    friend inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >
     operator/(const Scalar& other, const AutoDiffScalar& a)
     {
-      return AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >(
+      return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >(
         other / a.value(),
         a.derivatives() * (-Scalar(1)/other));
     }
 
     template<typename OtherDerType>
-    inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>,
-        NestByValue<CwiseBinaryOp<ei_scalar_difference_op<Scalar>,
-          NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >,
-          NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, OtherDerType> > > > > >
+    inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>,
+        typename MakeNestByValue<typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>,
+          typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type>::Type,
+          typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, typename ei_cleantype<OtherDerType>::type>::Type>::Type >::Type >::Type >::Type >
     operator/(const AutoDiffScalar<OtherDerType>& other) const
     {
       ei_make_coherent(m_derivatives, other.derivatives());
-      return AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>,
-        NestByValue<CwiseBinaryOp<ei_scalar_difference_op<Scalar>,
-          NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >,
-          NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, OtherDerType> > > > > >(
+      return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>,
+        typename MakeNestByValue<typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>,
+          typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type>::Type,
+          typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, typename ei_cleantype<OtherDerType>::type>::Type>::Type >::Type >::Type >::Type >(
         m_value / other.value(),
           ((m_derivatives * other.value()).nestByValue() - (m_value * other.derivatives()).nestByValue()).nestByValue()
         * (Scalar(1)/(other.value()*other.value())));
     }
 
     template<typename OtherDerType>
-    inline const AutoDiffScalar<CwiseBinaryOp<ei_scalar_sum_op<Scalar>,
-        NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >,
-        NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, OtherDerType> > > >
+    inline const AutoDiffScalar<typename MakeCwiseBinaryOp<ei_scalar_sum_op<Scalar>,
+        typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type>::Type,
+        typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, typename ei_cleantype<OtherDerType>::type>::Type>::Type >::Type >
     operator*(const AutoDiffScalar<OtherDerType>& other) const
     {
       ei_make_coherent(m_derivatives, other.derivatives());
-      return AutoDiffScalar<CwiseBinaryOp<ei_scalar_sum_op<Scalar>,
-        NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >,
-        NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, OtherDerType> > > >(
+      return AutoDiffScalar<typename MakeCwiseBinaryOp<ei_scalar_sum_op<Scalar>,
+        typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type>::Type,
+        typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, typename ei_cleantype<OtherDerType>::type>::Type>::Type >::Type >(
         m_value * other.value(),
         (m_derivatives * other.value()).nestByValue() + (m_value * other.derivatives()).nestByValue());
     }
@@ -283,11 +308,11 @@ struct ei_make_coherent_impl<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRo
 
 #define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC,CODE) \
   template<typename DerType> \
-  inline const Eigen::AutoDiffScalar<Eigen::CwiseUnaryOp<Eigen::ei_scalar_multiple_op<typename Eigen::ei_traits<DerType>::Scalar>, DerType> > \
+  inline const Eigen::AutoDiffScalar<typename Eigen::MakeCwiseUnaryOp<Eigen::ei_scalar_multiple_op<typename Eigen::ei_traits<DerType>::Scalar>, DerType>::Type > \
   FUNC(const Eigen::AutoDiffScalar<DerType>& x) { \
     using namespace Eigen; \
     typedef typename ei_traits<DerType>::Scalar Scalar; \
-    typedef AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> > ReturnType; \
+    typedef AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type > ReturnType; \
     CODE; \
   }
 
@@ -314,12 +339,12 @@ namespace std
     return ReturnType(std::log(x.value),x.derivatives() * (Scalar(1).x.value()));)
 
   template<typename DerType>
-  inline const Eigen::AutoDiffScalar<Eigen::CwiseUnaryOp<Eigen::ei_scalar_multiple_op<typename Eigen::ei_traits<DerType>::Scalar>, DerType> >
+  inline const Eigen::AutoDiffScalar<typename Eigen::MakeCwiseUnaryOp<Eigen::ei_scalar_multiple_op<typename Eigen::ei_traits<DerType>::Scalar>, DerType>::Type >
   pow(const Eigen::AutoDiffScalar<DerType>& x, typename Eigen::ei_traits<DerType>::Scalar y)
   {
     using namespace Eigen;
     typedef typename ei_traits<DerType>::Scalar Scalar;
-    return AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >(
+    return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >(
       std::pow(x.value(),y),
       x.derivatives() * (y * std::pow(x.value(),y-1)));
   }
@@ -359,7 +384,7 @@ EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(ei_log,
   return ReturnType(ei_log(x.value),x.derivatives() * (Scalar(1).x.value()));)
 
 template<typename DerType>
-inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<typename ei_traits<DerType>::Scalar>, DerType> >
+inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<typename ei_traits<DerType>::Scalar>, DerType>::Type >
 ei_pow(const AutoDiffScalar<DerType>& x, typename ei_traits<DerType>::Scalar y)
 { return std::pow(x,y);}
 
diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
new file mode 100644
index 000000000..e1f67f334
--- /dev/null
+++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
@@ -0,0 +1,224 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. 
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+
+
+  // FFTW uses non-const arguments
+  // so we must use ugly const_cast calls for all the args it uses
+  //
+  // This should be safe as long as 
+  // 1. we use FFTW_ESTIMATE for all our planning
+  //       see the FFTW docs section 4.3.2 "Planner Flags"
+  // 2. fftw_complex is compatible with std::complex
+  //    This assumes std::complex<T> layout is array of size 2 with real,imag
+  template <typename T> 
+  inline 
+  T * ei_fftw_cast(const T* p) 
+  { 
+      return const_cast<T*>( p); 
+  }
+
+  inline 
+  fftw_complex * ei_fftw_cast( const std::complex<double> * p) 
+  {
+      return const_cast<fftw_complex*>( reinterpret_cast<const fftw_complex*>(p) ); 
+  }
+
+  inline 
+  fftwf_complex * ei_fftw_cast( const std::complex<float> * p) 
+  { 
+      return const_cast<fftwf_complex*>( reinterpret_cast<const fftwf_complex*>(p) ); 
+  }
+
+  inline 
+  fftwl_complex * ei_fftw_cast( const std::complex<long double> * p) 
+  { 
+      return const_cast<fftwl_complex*>( reinterpret_cast<const fftwl_complex*>(p) ); 
+  }
+
+  template <typename T> 
+  struct ei_fftw_plan {};
+
+  template <> 
+  struct ei_fftw_plan<float>
+  {
+      typedef float scalar_type;
+      typedef fftwf_complex complex_type;
+      fftwf_plan m_plan;
+      ei_fftw_plan() :m_plan(NULL) {}
+      ~ei_fftw_plan() {if (m_plan) fftwf_destroy_plan(m_plan);}
+
+      inline
+      void fwd(complex_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE);
+          fftwf_execute_dft( m_plan, src,dst);
+      }
+      inline
+      void inv(complex_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE);
+          fftwf_execute_dft( m_plan, src,dst);
+      }
+      inline
+      void fwd(complex_type * dst,scalar_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftwf_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE);
+          fftwf_execute_dft_r2c( m_plan,src,dst);
+      }
+      inline
+      void inv(scalar_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL)
+              m_plan = fftwf_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE);
+          fftwf_execute_dft_c2r( m_plan, src,dst);
+      }
+  };
+  template <> 
+  struct ei_fftw_plan<double>
+  {
+      typedef double scalar_type;
+      typedef fftw_complex complex_type;
+      fftw_plan m_plan;
+      ei_fftw_plan() :m_plan(NULL) {}
+      ~ei_fftw_plan() {if (m_plan) fftw_destroy_plan(m_plan);}
+
+      inline
+      void fwd(complex_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE);
+          fftw_execute_dft( m_plan, src,dst);
+      }
+      inline
+      void inv(complex_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE);
+          fftw_execute_dft( m_plan, src,dst);
+      }
+      inline
+      void fwd(complex_type * dst,scalar_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftw_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE);
+          fftw_execute_dft_r2c( m_plan,src,dst);
+      }
+      inline
+      void inv(scalar_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL)
+              m_plan = fftw_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE);
+          fftw_execute_dft_c2r( m_plan, src,dst);
+      }
+  };
+  template <> 
+  struct ei_fftw_plan<long double>
+  {
+      typedef long double scalar_type;
+      typedef fftwl_complex complex_type;
+      fftwl_plan m_plan;
+      ei_fftw_plan() :m_plan(NULL) {}
+      ~ei_fftw_plan() {if (m_plan) fftwl_destroy_plan(m_plan);}
+
+      inline
+      void fwd(complex_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE);
+          fftwl_execute_dft( m_plan, src,dst);
+      }
+      inline
+      void inv(complex_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE);
+          fftwl_execute_dft( m_plan, src,dst);
+      }
+      inline
+      void fwd(complex_type * dst,scalar_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftwl_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE);
+          fftwl_execute_dft_r2c( m_plan,src,dst);
+      }
+      inline
+      void inv(scalar_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL)
+              m_plan = fftwl_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE);
+          fftwl_execute_dft_c2r( m_plan, src,dst);
+      }
+  };
+
+  template <typename _Scalar>
+  struct ei_fftw_impl
+  {
+      typedef _Scalar Scalar;
+      typedef std::complex<Scalar> Complex;
+
+      inline
+      void clear() 
+      {
+        m_plans.clear();
+      }
+
+      inline
+      void fwd( Complex * dst,const Complex *src,int nfft)
+      {
+        get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
+      }
+
+      // real-to-complex forward FFT
+      inline
+      void fwd( Complex * dst,const Scalar * src,int nfft) 
+      {
+          get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft);
+          int nhbins=(nfft>>1)+1;
+          for (int k=nhbins;k < nfft; ++k )
+              dst[k] = conj(dst[nfft-k]);
+      }
+
+      // inverse complex-to-complex
+      inline
+      void inv(Complex * dst,const Complex  *src,int nfft)
+      {
+        get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
+
+        //TODO move scaling to Eigen::FFT
+        // scaling
+        Scalar s = Scalar(1.)/nfft;
+        for (int k=0;k<nfft;++k)
+          dst[k] *= s;
+      }
+
+      // half-complex to scalar
+      inline
+      void inv( Scalar * dst,const Complex * src,int nfft) 
+      {
+        get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
+
+        //TODO move scaling to Eigen::FFT
+        Scalar s = Scalar(1.)/nfft;
+        for (int k=0;k<nfft;++k)
+          dst[k] *= s;
+      }
+
+  protected:
+      typedef ei_fftw_plan<Scalar> PlanData;
+      typedef std::map<int,PlanData> PlanMap;
+
+      PlanMap m_plans;
+
+      inline
+      PlanData & get_plan(int nfft,bool inverse,void * dst,const void * src)
+      {
+          bool inplace = (dst==src);
+          bool aligned = ( (reinterpret_cast<size_t>(src)&15) | (reinterpret_cast<size_t>(dst)&15) ) == 0;
+          int key = (nfft<<3 ) | (inverse<<2) | (inplace<<1) | aligned;
+          return m_plans[key];
+      }
+  };
diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
new file mode 100644
index 000000000..c068d8765
--- /dev/null
+++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
@@ -0,0 +1,414 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+
+
+  // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft
+  // Copyright 2003-2009 Mark Borgerding
+
+  template <typename _Scalar>
+    struct ei_kiss_cpx_fft
+    {
+      typedef _Scalar Scalar;
+      typedef std::complex<Scalar> Complex;
+      std::vector<Complex> m_twiddles;
+      std::vector<int> m_stageRadix;
+      std::vector<int> m_stageRemainder;
+      std::vector<Complex> m_scratchBuf;
+      bool m_inverse;
+
+      void make_twiddles(int nfft,bool inverse)
+      {
+        m_inverse = inverse;
+        m_twiddles.resize(nfft);
+        Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
+        for (int i=0;i<nfft;++i)
+          m_twiddles[i] = exp( Complex(0,i*phinc) );
+      }
+
+      void factorize(int nfft)
+      {
+        //start factoring out 4's, then 2's, then 3,5,7,9,...
+        int n= nfft;
+        int p=4;
+        do {
+          while (n % p) {
+            switch (p) {
+              case 4: p = 2; break;
+              case 2: p = 3; break;
+              default: p += 2; break;
+            }
+            if (p*p>n)
+              p=n;// impossible to have a factor > sqrt(n)
+          }
+          n /= p;
+          m_stageRadix.push_back(p);
+          m_stageRemainder.push_back(n);
+          if ( p > 5 )
+              m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic
+        }while(n>1);
+      }
+
+      template <typename _Src>
+        void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
+        {
+          int p = m_stageRadix[stage];
+          int m = m_stageRemainder[stage];
+          Complex * Fout_beg = xout;
+          Complex * Fout_end = xout + p*m;
+
+          if (m>1) {
+            do{
+              // recursive call:
+              // DFT of size m*p performed by doing
+              // p instances of smaller DFTs of size m, 
+              // each one takes a decimated version of the input
+              work(stage+1, xout , xin, fstride*p,in_stride);
+              xin += fstride*in_stride;
+            }while( (xout += m) != Fout_end );
+          }else{
+            do{
+              *xout = *xin;
+              xin += fstride*in_stride;
+            }while(++xout != Fout_end );
+          }
+          xout=Fout_beg;
+
+          // recombine the p smaller DFTs 
+          switch (p) {
+            case 2: bfly2(xout,fstride,m); break;
+            case 3: bfly3(xout,fstride,m); break;
+            case 4: bfly4(xout,fstride,m); break;
+            case 5: bfly5(xout,fstride,m); break;
+            default: bfly_generic(xout,fstride,m,p); break;
+          }
+        }
+
+      inline
+      void bfly2( Complex * Fout, const size_t fstride, int m)
+      {
+        for (int k=0;k<m;++k) {
+          Complex t = Fout[m+k] * m_twiddles[k*fstride];
+          Fout[m+k] = Fout[k] - t;
+          Fout[k] += t;
+        }
+      }
+
+      inline
+      void bfly4( Complex * Fout, const size_t fstride, const size_t m)
+      {
+        Complex scratch[6];
+        int negative_if_inverse = m_inverse * -2 +1;
+        for (size_t k=0;k<m;++k) {
+          scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
+          scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
+          scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
+          scratch[5] = Fout[k] - scratch[1];
+
+          Fout[k] += scratch[1];
+          scratch[3] = scratch[0] + scratch[2];
+          scratch[4] = scratch[0] - scratch[2];
+          scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
+
+          Fout[k+2*m]  = Fout[k] - scratch[3];
+          Fout[k] += scratch[3];
+          Fout[k+m] = scratch[5] + scratch[4];
+          Fout[k+3*m] = scratch[5] - scratch[4];
+        }
+      }
+
+      inline
+      void bfly3( Complex * Fout, const size_t fstride, const size_t m)
+      {
+        size_t k=m;
+        const size_t m2 = 2*m;
+        Complex *tw1,*tw2;
+        Complex scratch[5];
+        Complex epi3;
+        epi3 = m_twiddles[fstride*m];
+
+        tw1=tw2=&m_twiddles[0];
+
+        do{
+          scratch[1]=Fout[m] * *tw1;
+          scratch[2]=Fout[m2] * *tw2;
+
+          scratch[3]=scratch[1]+scratch[2];
+          scratch[0]=scratch[1]-scratch[2];
+          tw1 += fstride;
+          tw2 += fstride*2;
+          Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
+          scratch[0] *= epi3.imag();
+          *Fout += scratch[3];
+          Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
+          Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
+          ++Fout;
+        }while(--k);
+      }
+
+      inline
+      void bfly5( Complex * Fout, const size_t fstride, const size_t m)
+      {
+        Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+        size_t u;
+        Complex scratch[13];
+        Complex * twiddles = &m_twiddles[0];
+        Complex *tw;
+        Complex ya,yb;
+        ya = twiddles[fstride*m];
+        yb = twiddles[fstride*2*m];
+
+        Fout0=Fout;
+        Fout1=Fout0+m;
+        Fout2=Fout0+2*m;
+        Fout3=Fout0+3*m;
+        Fout4=Fout0+4*m;
+
+        tw=twiddles;
+        for ( u=0; u<m; ++u ) {
+          scratch[0] = *Fout0;
+
+          scratch[1]  = *Fout1 * tw[u*fstride];
+          scratch[2]  = *Fout2 * tw[2*u*fstride];
+          scratch[3]  = *Fout3 * tw[3*u*fstride];
+          scratch[4]  = *Fout4 * tw[4*u*fstride];
+
+          scratch[7] = scratch[1] + scratch[4];
+          scratch[10] = scratch[1] - scratch[4];
+          scratch[8] = scratch[2] + scratch[3];
+          scratch[9] = scratch[2] - scratch[3];
+
+          *Fout0 +=  scratch[7];
+          *Fout0 +=  scratch[8];
+
+          scratch[5] = scratch[0] + Complex(
+              (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
+              (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
+              );
+
+          scratch[6] = Complex(
+              (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
+              -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
+              );
+
+          *Fout1 = scratch[5] - scratch[6];
+          *Fout4 = scratch[5] + scratch[6];
+
+          scratch[11] = scratch[0] +
+            Complex(
+                (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
+                (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
+                );
+
+          scratch[12] = Complex(
+              -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
+              (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
+              );
+
+          *Fout2=scratch[11]+scratch[12];
+          *Fout3=scratch[11]-scratch[12];
+
+          ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+        }
+      }
+
+      /* perform the butterfly for one stage of a mixed radix FFT */
+      inline
+      void bfly_generic(
+          Complex * Fout,
+          const size_t fstride,
+          int m,
+          int p
+          )
+      {
+        int u,k,q1,q;
+        Complex * twiddles = &m_twiddles[0];
+        Complex t;
+        int Norig = m_twiddles.size();
+        Complex * scratchbuf = &m_scratchBuf[0];
+
+        for ( u=0; u<m; ++u ) {
+          k=u;
+          for ( q1=0 ; q1<p ; ++q1 ) {
+            scratchbuf[q1] = Fout[ k  ];
+            k += m;
+          }
+
+          k=u;
+          for ( q1=0 ; q1<p ; ++q1 ) {
+            int twidx=0;
+            Fout[ k ] = scratchbuf[0];
+            for (q=1;q<p;++q ) {
+              twidx += fstride * k;
+              if (twidx>=Norig) twidx-=Norig;
+              t=scratchbuf[q] * twiddles[twidx];
+              Fout[ k ] += t;
+            }
+            k += m;
+          }
+        }
+      }
+    };
+
+  template <typename _Scalar>
+    struct ei_kissfft_impl
+    {
+      typedef _Scalar Scalar;
+      typedef std::complex<Scalar> Complex;
+
+      void clear() 
+      {
+        m_plans.clear();
+        m_realTwiddles.clear();
+      }
+
+      template <typename _Src>
+      inline
+        void fwd( Complex * dst,const _Src *src,int nfft)
+        {
+          get_plan(nfft,false).work(0, dst, src, 1,1);
+        }
+
+      // real-to-complex forward FFT
+      // perform two FFTs of src even and src odd
+      // then twiddle to recombine them into the half-spectrum format
+      // then fill in the conjugate symmetric half
+      inline
+      void fwd( Complex * dst,const Scalar * src,int nfft) 
+      {
+        if ( nfft&3  ) {
+          // use generic mode for odd
+          get_plan(nfft,false).work(0, dst, src, 1,1);
+        }else{
+          int ncfft = nfft>>1;
+          int ncfft2 = nfft>>2;
+          Complex * rtw = real_twiddles(ncfft2);
+
+          // use optimized mode for even real
+          fwd( dst, reinterpret_cast<const Complex*> (src), ncfft);
+          Complex dc = dst[0].real() +  dst[0].imag();
+          Complex nyquist = dst[0].real() -  dst[0].imag();
+          int k;
+          for ( k=1;k <= ncfft2 ; ++k ) {
+            Complex fpk = dst[k];
+            Complex fpnk = conj(dst[ncfft-k]);
+            Complex f1k = fpk + fpnk;
+            Complex f2k = fpk - fpnk;
+            Complex tw= f2k * rtw[k-1];
+            dst[k] =  (f1k + tw) * Scalar(.5);
+            dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
+          }
+
+          // place conjugate-symmetric half at the end for completeness
+          // TODO: make this configurable ( opt-out )
+          for ( k=1;k < ncfft ; ++k )
+            dst[nfft-k] = conj(dst[k]);
+          dst[0] = dc;
+          dst[ncfft] = nyquist;
+        }
+      }
+
+      // inverse complex-to-complex
+      inline
+      void inv(Complex * dst,const Complex  *src,int nfft)
+      {
+        get_plan(nfft,true).work(0, dst, src, 1,1);
+        scale(dst, nfft, Scalar(1)/nfft );
+      }
+
+      // half-complex to scalar
+      inline
+      void inv( Scalar * dst,const Complex * src,int nfft) 
+      {
+        if (nfft&3) {
+          m_tmpBuf.resize(nfft);
+          inv(&m_tmpBuf[0],src,nfft);
+          for (int k=0;k<nfft;++k)
+            dst[k] = m_tmpBuf[k].real();
+        }else{
+          // optimized version for multiple of 4
+          int ncfft = nfft>>1;
+          int ncfft2 = nfft>>2;
+          Complex * rtw = real_twiddles(ncfft2);
+          m_tmpBuf.resize(ncfft);
+          m_tmpBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() );
+          for (int k = 1; k <= ncfft / 2; ++k) {
+            Complex fk = src[k];
+            Complex fnkc = conj(src[ncfft-k]);
+            Complex fek = fk + fnkc;
+            Complex tmp = fk - fnkc;
+            Complex fok = tmp * conj(rtw[k-1]);
+            m_tmpBuf[k] = fek + fok;
+            m_tmpBuf[ncfft-k] = conj(fek - fok);
+          }
+          scale(&m_tmpBuf[0], ncfft, Scalar(1)/nfft );
+          get_plan(ncfft,true).work(0, reinterpret_cast<Complex*>(dst), &m_tmpBuf[0], 1,1);
+        }
+      }
+
+      protected:
+      typedef ei_kiss_cpx_fft<Scalar> PlanData;
+      typedef std::map<int,PlanData> PlanMap;
+
+      PlanMap m_plans;
+      std::map<int, std::vector<Complex> > m_realTwiddles;
+      std::vector<Complex> m_tmpBuf;
+
+      inline
+      int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; }
+
+      inline
+      PlanData & get_plan(int nfft,bool inverse)
+      {
+        // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles
+        PlanData & pd = m_plans[ PlanKey(nfft,inverse) ];
+        if ( pd.m_twiddles.size() == 0 ) {
+          pd.make_twiddles(nfft,inverse);
+          pd.factorize(nfft);
+        }
+        return pd;
+      }
+
+      inline
+      Complex * real_twiddles(int ncfft2)
+      {
+        std::vector<Complex> & twidref = m_realTwiddles[ncfft2];// creates new if not there
+        if ( (int)twidref.size() != ncfft2 ) {
+          twidref.resize(ncfft2);
+          int ncfft= ncfft2<<1;
+          Scalar pi =  acos( Scalar(-1) );
+          for (int k=1;k<=ncfft2;++k) 
+            twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) );
+        }
+        return &twidref[0];
+      }
+
+      // TODO move scaling up into Eigen::FFT
+      inline
+      void scale(Complex *dst,int n,Scalar s) 
+      {
+        for (int k=0;k<n;++k)
+          dst[k] *= s;
+      }
+    };
diff --git a/unsupported/doc/examples/FFT.cpp b/unsupported/doc/examples/FFT.cpp
new file mode 100644
index 000000000..55e29585a
--- /dev/null
+++ b/unsupported/doc/examples/FFT.cpp
@@ -0,0 +1,117 @@
+//  To use the simple FFT implementation
+//  g++ -o demofft -I.. -Wall -O3 FFT.cpp 
+
+//  To use the FFTW implementation
+//  g++ -o demofft -I.. -DUSE_FFTW -Wall -O3 FFT.cpp -lfftw3 -lfftw3f -lfftw3l
+
+#ifdef USE_FFTW
+#include <fftw3.h>
+#endif
+
+#include <vector>
+#include <complex>
+#include <algorithm>
+#include <iterator>
+#include <Eigen/Core>
+#include <unsupported/Eigen/FFT>
+
+using namespace std;
+using namespace Eigen;
+
+template <typename T>
+T mag2(T a)
+{
+    return a*a;
+}
+template <typename T>
+T mag2(std::complex<T> a)
+{
+    return norm(a);
+}
+
+template <typename T>
+T mag2(const std::vector<T> & vec)
+{
+    T out=0;
+    for (size_t k=0;k<vec.size();++k)
+        out += mag2(vec[k]);
+    return out;
+}
+
+template <typename T>
+T mag2(const std::vector<std::complex<T> > & vec)
+{
+    T out=0;
+    for (size_t k=0;k<vec.size();++k)
+        out += mag2(vec[k]);
+    return out;
+}
+
+template <typename T>
+vector<T> operator-(const vector<T> & a,const vector<T> & b )
+{
+    vector<T> c(a);
+    for (size_t k=0;k<b.size();++k) 
+        c[k] -= b[k];
+    return c;
+}
+
+template <typename T>
+void RandomFill(std::vector<T> & vec)
+{
+    for (size_t k=0;k<vec.size();++k)
+        vec[k] = T( rand() )/T(RAND_MAX) - .5;
+}
+
+template <typename T>
+void RandomFill(std::vector<std::complex<T> > & vec)
+{
+    for (size_t k=0;k<vec.size();++k)
+        vec[k] = std::complex<T> ( T( rand() )/T(RAND_MAX) - .5, T( rand() )/T(RAND_MAX) - .5);
+}
+
+template <typename T_time,typename T_freq>
+void fwd_inv(size_t nfft)
+{
+    typedef typename NumTraits<T_freq>::Real Scalar;
+    vector<T_time> timebuf(nfft);
+    RandomFill(timebuf);
+
+    vector<T_freq> freqbuf;
+    static FFT<Scalar> fft;
+    fft.fwd(freqbuf,timebuf);
+
+    vector<T_time> timebuf2;
+    fft.inv(timebuf2,freqbuf);
+
+    long double rmse = mag2(timebuf - timebuf2) / mag2(timebuf);
+    cout << "roundtrip rmse: " << rmse << endl;
+}
+
+template <typename T_scalar>
+void two_demos(int nfft)
+{
+    cout << "     scalar ";
+    fwd_inv<T_scalar,std::complex<T_scalar> >(nfft);
+    cout << "    complex ";
+    fwd_inv<std::complex<T_scalar>,std::complex<T_scalar> >(nfft);
+}
+
+void demo_all_types(int nfft)
+{
+    cout << "nfft=" << nfft << endl;
+    cout << "   float" << endl;
+    two_demos<float>(nfft);
+    cout << "   double" << endl;
+    two_demos<double>(nfft);
+    cout << "   long double" << endl;
+    two_demos<long double>(nfft);
+}
+
+int main()
+{
+    demo_all_types( 2*3*4*5*7 );
+    demo_all_types( 2*9*16*25 );
+    demo_all_types( 1024 );
+    return 0;
+}
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index abfbb0185..d182c9abf 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -19,3 +19,10 @@ ei_add_test(autodiff)
 ei_add_test(BVH)
 ei_add_test(matrixExponential)
 ei_add_test(alignedvector3)
+ei_add_test(FFT)
+
+find_package(FFTW)
+if(FFTW_FOUND)
+  ei_add_test(FFTW  "-DEIGEN_FFTW_DEFAULT " "-lfftw3 -lfftw3f -lfftw3l" )
+endif(FFTW_FOUND)
+
diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp
new file mode 100644
index 000000000..cc68f3718
--- /dev/null
+++ b/unsupported/test/FFT.cpp
@@ -0,0 +1,200 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include "main.h"
+#include <unsupported/Eigen/FFT>
+
+using namespace std;
+
+float norm(float x) {return x*x;}
+double norm(double x) {return x*x;}
+long double norm(long double x) {return x*x;}
+
+template < typename T>
+complex<long double>  promote(complex<T> x) { return complex<long double>(x.real(),x.imag()); }
+
+complex<long double>  promote(float x) { return complex<long double>( x); }
+complex<long double>  promote(double x) { return complex<long double>( x); }
+complex<long double>  promote(long double x) { return complex<long double>( x); }
+    
+
+    template <typename VectorType1,typename VectorType2>
+    long double fft_rmse( const VectorType1 & fftbuf,const VectorType2 & timebuf)
+    {
+        long double totalpower=0;
+        long double difpower=0;
+        cerr <<"idx\ttruth\t\tvalue\t|dif|=\n";
+        for (size_t k0=0;k0<size_t(fftbuf.size());++k0) {
+            complex<long double> acc = 0;
+            long double phinc = -2.*k0* M_PIl / timebuf.size();
+            for (size_t k1=0;k1<size_t(timebuf.size());++k1) {
+                acc +=  promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
+            }
+            totalpower += norm(acc);
+            complex<long double> x = promote(fftbuf[k0]); 
+            complex<long double> dif = acc - x;
+            difpower += norm(dif);
+            cerr << k0 << "\t" << acc << "\t" <<  x << "\t" << sqrt(norm(dif)) << endl;
+        }
+        cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
+        return sqrt(difpower/totalpower);
+    }
+
+    template <typename VectorType1,typename VectorType2>
+    long double dif_rmse( const VectorType1& buf1,const VectorType2& buf2)
+    {
+        long double totalpower=0;
+        long double difpower=0;
+        size_t n = min( buf1.size(),buf2.size() );
+        for (size_t k=0;k<n;++k) {
+            totalpower += (norm( buf1[k] ) + norm(buf2[k]) )/2.;
+            difpower += norm(buf1[k] - buf2[k]);
+        }
+        return sqrt(difpower/totalpower);
+    }
+
+enum { StdVectorContainer, EigenVectorContainer };
+
+template<int Container, typename Scalar> struct VectorType;
+
+template<typename Scalar> struct VectorType<StdVectorContainer,Scalar>
+{
+  typedef vector<Scalar> type;
+};
+
+template<typename Scalar> struct VectorType<EigenVectorContainer,Scalar>
+{
+  typedef Matrix<Scalar,Dynamic,1> type;
+};
+
+template <int Container, typename T>
+void test_scalar_generic(int nfft)
+{
+    typedef typename FFT<T>::Complex Complex;
+    typedef typename FFT<T>::Scalar Scalar;
+    typedef typename VectorType<Container,Scalar>::type ScalarVector;
+    typedef typename VectorType<Container,Complex>::type ComplexVector;
+
+    FFT<T> fft;
+    ScalarVector inbuf(nfft);
+    ComplexVector outbuf;
+    for (int k=0;k<nfft;++k)
+        inbuf[k]= (T)(rand()/(double)RAND_MAX - .5);
+    fft.fwd( outbuf,inbuf);
+    VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
+
+    ScalarVector buf3;
+    fft.inv( buf3 , outbuf);
+    VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
+}
+
+template <typename T>
+void test_scalar(int nfft)
+{
+  test_scalar_generic<StdVectorContainer,T>(nfft);
+  test_scalar_generic<EigenVectorContainer,T>(nfft);
+}
+
+template <int Container, typename T>
+void test_complex_generic(int nfft)
+{
+    typedef typename FFT<T>::Complex Complex;
+    typedef typename VectorType<Container,Complex>::type ComplexVector;
+
+    FFT<T> fft;
+
+    ComplexVector inbuf(nfft);
+    ComplexVector outbuf;
+    ComplexVector buf3;
+    for (int k=0;k<nfft;++k)
+        inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
+    fft.fwd( outbuf , inbuf);
+
+    VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
+
+    fft.inv( buf3 , outbuf);
+
+    VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
+}
+
+template <typename T>
+void test_complex(int nfft)
+{
+  test_complex_generic<StdVectorContainer,T>(nfft);
+  test_complex_generic<EigenVectorContainer,T>(nfft);
+}
+
+void test_FFT()
+{
+
+  CALL_SUBTEST( test_complex<float>(32) );
+  CALL_SUBTEST( test_complex<double>(32) );
+  CALL_SUBTEST( test_complex<long double>(32) );
+  
+  CALL_SUBTEST( test_complex<float>(256) );
+  CALL_SUBTEST( test_complex<double>(256) );
+  CALL_SUBTEST( test_complex<long double>(256) );
+  
+  CALL_SUBTEST( test_complex<float>(3*8) );
+  CALL_SUBTEST( test_complex<double>(3*8) );
+  CALL_SUBTEST( test_complex<long double>(3*8) );
+  
+  CALL_SUBTEST( test_complex<float>(5*32) );
+  CALL_SUBTEST( test_complex<double>(5*32) );
+  CALL_SUBTEST( test_complex<long double>(5*32) );
+  
+  CALL_SUBTEST( test_complex<float>(2*3*4) );
+  CALL_SUBTEST( test_complex<double>(2*3*4) );
+  CALL_SUBTEST( test_complex<long double>(2*3*4) );
+  
+  CALL_SUBTEST( test_complex<float>(2*3*4*5) );
+  CALL_SUBTEST( test_complex<double>(2*3*4*5) );
+  CALL_SUBTEST( test_complex<long double>(2*3*4*5) );
+  
+  CALL_SUBTEST( test_complex<float>(2*3*4*5*7) );
+  CALL_SUBTEST( test_complex<double>(2*3*4*5*7) );
+  CALL_SUBTEST( test_complex<long double>(2*3*4*5*7) );
+
+
+
+  CALL_SUBTEST( test_scalar<float>(32) );
+  CALL_SUBTEST( test_scalar<double>(32) );
+  CALL_SUBTEST( test_scalar<long double>(32) );
+  
+  CALL_SUBTEST( test_scalar<float>(45) );
+  CALL_SUBTEST( test_scalar<double>(45) );
+  CALL_SUBTEST( test_scalar<long double>(45) );
+  
+  CALL_SUBTEST( test_scalar<float>(50) );
+  CALL_SUBTEST( test_scalar<double>(50) );
+  CALL_SUBTEST( test_scalar<long double>(50) );
+  
+  CALL_SUBTEST( test_scalar<float>(256) );
+  CALL_SUBTEST( test_scalar<double>(256) );
+  CALL_SUBTEST( test_scalar<long double>(256) );
+  
+  CALL_SUBTEST( test_scalar<float>(2*3*4*5*7) );
+  CALL_SUBTEST( test_scalar<double>(2*3*4*5*7) );
+  CALL_SUBTEST( test_scalar<long double>(2*3*4*5*7) );
+}
diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp
new file mode 100644
index 000000000..cf7be75aa
--- /dev/null
+++ b/unsupported/test/FFTW.cpp
@@ -0,0 +1,136 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include "main.h"
+#include <fftw3.h>
+#include <unsupported/Eigen/FFT>
+
+using namespace std;
+
+float norm(float x) {return x*x;}
+double norm(double x) {return x*x;}
+long double norm(long double x) {return x*x;}
+
+template < typename T>
+complex<long double>  promote(complex<T> x) { return complex<long double>(x.real(),x.imag()); }
+
+complex<long double>  promote(float x) { return complex<long double>( x); }
+complex<long double>  promote(double x) { return complex<long double>( x); }
+complex<long double>  promote(long double x) { return complex<long double>( x); }
+    
+
+    template <typename T1,typename T2>
+    long double fft_rmse( const vector<T1> & fftbuf,const vector<T2> & timebuf)
+    {
+        long double totalpower=0;
+        long double difpower=0;
+        cerr <<"idx\ttruth\t\tvalue\t|dif|=\n";
+        for (size_t k0=0;k0<fftbuf.size();++k0) {
+            complex<long double> acc = 0;
+            long double phinc = -2.*k0* M_PIl / timebuf.size();
+            for (size_t k1=0;k1<timebuf.size();++k1) {
+                acc +=  promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
+            }
+            totalpower += norm(acc);
+            complex<long double> x = promote(fftbuf[k0]); 
+            complex<long double> dif = acc - x;
+            difpower += norm(dif);
+            cerr << k0 << "\t" << acc << "\t" <<  x << "\t" << sqrt(norm(dif)) << endl;
+        }
+        cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
+        return sqrt(difpower/totalpower);
+    }
+
+    template <typename T1,typename T2>
+    long double dif_rmse( const vector<T1> buf1,const vector<T2> buf2)
+    {
+        long double totalpower=0;
+        long double difpower=0;
+        size_t n = min( buf1.size(),buf2.size() );
+        for (size_t k=0;k<n;++k) {
+            totalpower += (norm( buf1[k] ) + norm(buf2[k]) )/2.;
+            difpower += norm(buf1[k] - buf2[k]);
+        }
+        return sqrt(difpower/totalpower);
+    }
+
+template <class T>
+void test_scalar(int nfft)
+{
+    typedef typename Eigen::FFT<T>::Complex Complex;
+    typedef typename Eigen::FFT<T>::Scalar Scalar;
+
+    FFT<T> fft;
+    vector<Scalar> inbuf(nfft);
+    vector<Complex> outbuf;
+    for (int k=0;k<nfft;++k)
+        inbuf[k]= (T)(rand()/(double)RAND_MAX - .5);
+    fft.fwd( outbuf,inbuf);
+    VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
+
+    vector<Scalar> buf3;
+    fft.inv( buf3 , outbuf);
+    VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
+}
+
+template <class T>
+void test_complex(int nfft)
+{
+    typedef typename Eigen::FFT<T>::Complex Complex;
+
+    FFT<T> fft;
+
+    vector<Complex> inbuf(nfft);
+    vector<Complex> outbuf;
+    vector<Complex> buf3;
+    for (int k=0;k<nfft;++k)
+        inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
+    fft.fwd( outbuf , inbuf);
+
+    VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
+
+    fft.inv( buf3 , outbuf);
+
+    VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
+}
+
+void test_FFTW()
+{
+
+  CALL_SUBTEST( test_complex<float>(32) ); CALL_SUBTEST( test_complex<double>(32) ); CALL_SUBTEST( test_complex<long double>(32) );
+  CALL_SUBTEST( test_complex<float>(256) ); CALL_SUBTEST( test_complex<double>(256) ); CALL_SUBTEST( test_complex<long double>(256) );
+  CALL_SUBTEST( test_complex<float>(3*8) ); CALL_SUBTEST( test_complex<double>(3*8) ); CALL_SUBTEST( test_complex<long double>(3*8) );
+  CALL_SUBTEST( test_complex<float>(5*32) ); CALL_SUBTEST( test_complex<double>(5*32) ); CALL_SUBTEST( test_complex<long double>(5*32) );
+  CALL_SUBTEST( test_complex<float>(2*3*4) ); CALL_SUBTEST( test_complex<double>(2*3*4) ); CALL_SUBTEST( test_complex<long double>(2*3*4) );
+  CALL_SUBTEST( test_complex<float>(2*3*4*5) ); CALL_SUBTEST( test_complex<double>(2*3*4*5) ); CALL_SUBTEST( test_complex<long double>(2*3*4*5) );
+  CALL_SUBTEST( test_complex<float>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<double>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<long double>(2*3*4*5*7) );
+
+
+
+  CALL_SUBTEST( test_scalar<float>(32) ); CALL_SUBTEST( test_scalar<double>(32) ); CALL_SUBTEST( test_scalar<long double>(32) );
+  CALL_SUBTEST( test_scalar<float>(45) ); CALL_SUBTEST( test_scalar<double>(45) ); CALL_SUBTEST( test_scalar<long double>(45) );
+  CALL_SUBTEST( test_scalar<float>(50) ); CALL_SUBTEST( test_scalar<double>(50) ); CALL_SUBTEST( test_scalar<long double>(50) );
+  CALL_SUBTEST( test_scalar<float>(256) ); CALL_SUBTEST( test_scalar<double>(256) ); CALL_SUBTEST( test_scalar<long double>(256) );
+  CALL_SUBTEST( test_scalar<float>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<double>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<long double>(2*3*4*5*7) );
+}