merge

author: Benoit Jacob <jacob.benoit.1@gmail.com> 2010-02-25 21:07:30 -0500
committer: Benoit Jacob <jacob.benoit.1@gmail.com> 2010-02-25 21:07:30 -0500
commit: b1c6c215a43850b2bc5bdc393ab5a1179e858024 (patch)
tree: 9ae1234383bef2204802606501a47bb5c05ec1d2 /Eigen/src/Core
parent: 769641bc58745fecc1fa4e537466a1fff48f4a8a (diff)
parent: 90e4a605ef920759a23cdbd24e6e7b69ce549162 (diff)
24 files changed, 572 insertions, 300 deletions
diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h
index 538e6dd76..432df0b34 100644
--- a/Eigen/src/Core/BandMatrix.h
+++ b/Eigen/src/Core/BandMatrix.h
@@ -57,7 +57,7 @@ struct ei_traits<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
 };
 
 template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
-class BandMatrix : public AnyMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
+class BandMatrix : public EigenBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
 {
   public:
 
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 2078f023b..5682d7278 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -40,7 +40,7 @@ template<typename Derived> class DenseBase
   : public ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar,
                                      typename NumTraits<typename ei_traits<Derived>::Scalar>::Real>
 #else
-  : public AnyMatrixBase<Derived>
+  : public EigenBase<Derived>
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 {
   public:
@@ -53,8 +53,8 @@ template<typename Derived> class DenseBase
     typedef typename ei_traits<Derived>::Scalar Scalar;
     typedef typename ei_packet_traits<Scalar>::type PacketScalar;
 
-    using AnyMatrixBase<Derived>::derived;
-    using AnyMatrixBase<Derived>::const_cast_derived;
+    using EigenBase<Derived>::derived;
+    using EigenBase<Derived>::const_cast_derived;
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 
     enum {
@@ -292,13 +292,13 @@ template<typename Derived> class DenseBase
     Derived& operator=(const DenseBase& other);
 
     template<typename OtherDerived>
-    Derived& operator=(const AnyMatrixBase<OtherDerived> &other);
+    Derived& operator=(const EigenBase<OtherDerived> &other);
 
     template<typename OtherDerived>
-    Derived& operator+=(const AnyMatrixBase<OtherDerived> &other);
+    Derived& operator+=(const EigenBase<OtherDerived> &other);
 
     template<typename OtherDerived>
-    Derived& operator-=(const AnyMatrixBase<OtherDerived> &other);
+    Derived& operator-=(const EigenBase<OtherDerived> &other);
 
     template<typename OtherDerived>
     Derived& operator=(const ReturnByValue<OtherDerived>& func);
diff --git a/Eigen/src/Core/DenseStorageBase.h b/Eigen/src/Core/DenseStorageBase.h
index e93e439e6..89e6e7112 100644
--- a/Eigen/src/Core/DenseStorageBase.h
+++ b/Eigen/src/Core/DenseStorageBase.h
@@ -44,7 +44,7 @@ class DenseStorageBase : public _Base<Derived>
   public:
     enum { Options = _Options };
     typedef _Base<Derived> Base;
-    typedef typename Base::PlainMatrixType PlainMatrixType;
+    typedef typename Base::PlainObject PlainObject;
     typedef typename Base::Scalar Scalar;
     typedef typename Base::PacketScalar PacketScalar;
     using Base::RowsAtCompileTime;
@@ -338,19 +338,19 @@ class DenseStorageBase : public _Base<Derived>
 //       EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
     }
 
-    /** \copydoc MatrixBase::operator=(const AnyMatrixBase<OtherDerived>&)
+    /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
       */
     template<typename OtherDerived>
-    EIGEN_STRONG_INLINE Derived& operator=(const AnyMatrixBase<OtherDerived> &other)
+    EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
     {
       resize(other.derived().rows(), other.derived().cols());
       Base::operator=(other.derived());
       return this->derived();
     }
 
-    /** \sa MatrixBase::operator=(const AnyMatrixBase<OtherDerived>&) */
+    /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
     template<typename OtherDerived>
-    EIGEN_STRONG_INLINE DenseStorageBase(const AnyMatrixBase<OtherDerived> &other)
+    EIGEN_STRONG_INLINE DenseStorageBase(const EigenBase<OtherDerived> &other)
       : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
     {
       _check_template_params();
@@ -527,7 +527,7 @@ struct ei_conservative_resize_like_impl
   {
     if (_this.rows() == rows && _this.cols() == cols) return;
     EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
-    typename Derived::PlainMatrixType tmp(rows,cols);
+    typename Derived::PlainObject tmp(rows,cols);
     const int common_rows = std::min(rows, _this.rows());
     const int common_cols = std::min(cols, _this.cols());
     tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
@@ -546,7 +546,7 @@ struct ei_conservative_resize_like_impl
     EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
     EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived)
 
-    typename Derived::PlainMatrixType tmp(other);
+    typename Derived::PlainObject tmp(other);
     const int common_rows = std::min(tmp.rows(), _this.rows());
     const int common_cols = std::min(tmp.cols(), _this.cols());
     tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
@@ -560,7 +560,7 @@ struct ei_conservative_resize_like_impl<Derived,OtherDerived,true>
   static void run(DenseBase<Derived>& _this, int size)
   {
     if (_this.size() == size) return;
-    typename Derived::PlainMatrixType tmp(size);
+    typename Derived::PlainObject tmp(size);
     const int common_size = std::min<int>(_this.size(),size);
     tmp.segment(0,common_size) = _this.segment(0,common_size);
     _this.derived().swap(tmp);
@@ -571,7 +571,7 @@ struct ei_conservative_resize_like_impl<Derived,OtherDerived,true>
     if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
 
     // segment(...) will check whether Derived/OtherDerived are vectors!
-    typename Derived::PlainMatrixType tmp(other);
+    typename Derived::PlainObject tmp(other);
     const int common_size = std::min<int>(_this.size(),tmp.size());
     tmp.segment(0,common_size) = _this.segment(0,common_size);
     _this.derived().swap(tmp);
diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h
index 08c046611..774b0d7ae 100644
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -28,7 +28,7 @@
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 template<typename Derived>
-class DiagonalBase : public AnyMatrixBase<Derived>
+class DiagonalBase : public EigenBase<Derived>
 {
   public:
     typedef typename ei_traits<Derived>::DiagonalVectorType DiagonalVectorType;
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index f0c520b1f..201bd23ca 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -299,7 +299,7 @@ inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<
   * \sa norm(), normalize()
   */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::PlainMatrixType
+inline const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::normalized() const
 {
   typedef typename ei_nested<Derived>::type Nested;
diff --git a/Eigen/src/Core/AnyMatrixBase.h b/Eigen/src/Core/EigenBase.h
index a5d1cfe9f..cf1ce4376 100644
--- a/Eigen/src/Core/AnyMatrixBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -23,21 +23,21 @@
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 
-#ifndef EIGEN_ANYMATRIXBASE_H
-#define EIGEN_ANYMATRIXBASE_H
+#ifndef EIGEN_EIGENBASE_H
+#define EIGEN_EIGENBASE_H
 
 
 /** Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
   *
-  * In other words, an AnyMatrixBase object is an object that can be copied into a MatrixBase.
+  * In other words, an EigenBase object is an object that can be copied into a MatrixBase.
   *
   * Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc.
   *
   * Notice that this class is trivial, it is only used to disambiguate overloaded functions.
   */
-template<typename Derived> struct AnyMatrixBase
+template<typename Derived> struct EigenBase
 {
-//   typedef typename ei_plain_matrix_type<Derived>::type PlainMatrixType;
+//   typedef typename ei_plain_matrix_type<Derived>::type PlainObject;
 
   /** \returns a reference to the derived object */
   Derived& derived() { return *static_cast<Derived*>(this); }
@@ -45,7 +45,7 @@ template<typename Derived> struct AnyMatrixBase
   const Derived& derived() const { return *static_cast<const Derived*>(this); }
 
   inline Derived& const_cast_derived() const
-  { return *static_cast<Derived*>(const_cast<AnyMatrixBase*>(this)); }
+  { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
 
   /** \returns the number of rows. \sa cols(), RowsAtCompileTime */
   inline int rows() const { return derived().rows(); }
@@ -61,7 +61,7 @@ template<typename Derived> struct AnyMatrixBase
   {
     // This is the default implementation,
     // derived class can reimplement it in a more optimized way.
-    typename Dest::PlainMatrixType res(rows(),cols());
+    typename Dest::PlainObject res(rows(),cols());
     evalTo(res);
     dst += res;
   }
@@ -71,7 +71,7 @@ template<typename Derived> struct AnyMatrixBase
   {
     // This is the default implementation,
     // derived class can reimplement it in a more optimized way.
-    typename Dest::PlainMatrixType res(rows(),cols());
+    typename Dest::PlainObject res(rows(),cols());
     evalTo(res);
     dst -= res;
   }
@@ -108,7 +108,7 @@ template<typename Derived> struct AnyMatrixBase
   */
 template<typename Derived>
 template<typename OtherDerived>
-Derived& DenseBase<Derived>::operator=(const AnyMatrixBase<OtherDerived> &other)
+Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
 {
   other.derived().evalTo(derived());
   return derived();
@@ -116,7 +116,7 @@ Derived& DenseBase<Derived>::operator=(const AnyMatrixBase<OtherDerived> &other)
 
 template<typename Derived>
 template<typename OtherDerived>
-Derived& DenseBase<Derived>::operator+=(const AnyMatrixBase<OtherDerived> &other)
+Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
 {
   other.derived().addTo(derived());
   return derived();
@@ -124,7 +124,7 @@ Derived& DenseBase<Derived>::operator+=(const AnyMatrixBase<OtherDerived> &other
 
 template<typename Derived>
 template<typename OtherDerived>
-Derived& DenseBase<Derived>::operator-=(const AnyMatrixBase<OtherDerived> &other)
+Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
 {
   other.derived().subTo(derived());
   return derived();
@@ -137,7 +137,7 @@ Derived& DenseBase<Derived>::operator-=(const AnyMatrixBase<OtherDerived> &other
 template<typename Derived>
 template<typename OtherDerived>
 inline Derived&
-MatrixBase<Derived>::operator*=(const AnyMatrixBase<OtherDerived> &other)
+MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
 {
   other.derived().applyThisOnTheRight(derived());
   return derived();
@@ -146,7 +146,7 @@ MatrixBase<Derived>::operator*=(const AnyMatrixBase<OtherDerived> &other)
 /** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=() */
 template<typename Derived>
 template<typename OtherDerived>
-inline void MatrixBase<Derived>::applyOnTheRight(const AnyMatrixBase<OtherDerived> &other)
+inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
 {
   other.derived().applyThisOnTheRight(derived());
 }
@@ -154,9 +154,9 @@ inline void MatrixBase<Derived>::applyOnTheRight(const AnyMatrixBase<OtherDerive
 /** replaces \c *this by \c *this * \a other. */
 template<typename Derived>
 template<typename OtherDerived>
-inline void MatrixBase<Derived>::applyOnTheLeft(const AnyMatrixBase<OtherDerived> &other)
+inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
 {
   other.derived().applyThisOnTheLeft(derived());
 }
 
-#endif // EIGEN_ANYMATRIXBASE_H
+#endif // EIGEN_EIGENBASE_H
diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h
index 9d14aceaa..9413b74fa 100644
--- a/Eigen/src/Core/Flagged.h
+++ b/Eigen/src/Core/Flagged.h
@@ -110,7 +110,7 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
     const ExpressionType& _expression() const { return m_matrix; }
 
     template<typename OtherDerived>
-    typename ExpressionType::PlainMatrixType solveTriangular(const MatrixBase<OtherDerived>& other) const;
+    typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const;
 
     template<typename OtherDerived>
     void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const;
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index b494b2f00..eae2711f4 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -139,7 +139,7 @@ class Matrix
 
     EIGEN_DENSE_PUBLIC_INTERFACE(Matrix)
 
-    typedef typename Base::PlainMatrixType PlainMatrixType;
+    typedef typename Base::PlainObject PlainObject;
 
     enum { NeedsToAlign = (!(Options&DontAlign))
                           && SizeAtCompileTime!=Dynamic && ((sizeof(Scalar)*SizeAtCompileTime)%16)==0 };
@@ -181,10 +181,10 @@ class Matrix
 
     /**
       * \brief Copies the generic expression \a other into *this.
-      * \copydetails DenseBase::operator=(const AnyMatrixBase<OtherDerived> &other)
+      * \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
       */
     template<typename OtherDerived>
-    EIGEN_STRONG_INLINE Matrix& operator=(const AnyMatrixBase<OtherDerived> &other)
+    EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
     {
       return Base::operator=(other);
     }
@@ -297,10 +297,10 @@ class Matrix
     }
 
     /** \brief Copy constructor for generic expressions.
-      * \sa MatrixBase::operator=(const AnyMatrixBase<OtherDerived>&)
+      * \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
       */
     template<typename OtherDerived>
-    EIGEN_STRONG_INLINE Matrix(const AnyMatrixBase<OtherDerived> &other)
+    EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
       : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
     {
       Base::_check_template_params();
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 229195046..9c62163ba 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -121,7 +121,7 @@ template<typename Derived> class MatrixBase
       *
       * This is not necessarily exactly the return type of eval(). In the case of plain matrices,
       * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
-      * that the return type of eval() is either PlainMatrixType or const PlainMatrixType&.
+      * that the return type of eval() is either PlainObject or const PlainObject&.
       */
     typedef Matrix<typename ei_traits<Derived>::Scalar,
                 ei_traits<Derived>::RowsAtCompileTime,
@@ -129,8 +129,7 @@ template<typename Derived> class MatrixBase
                 AutoAlign | (ei_traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
                 ei_traits<Derived>::MaxRowsAtCompileTime,
                 ei_traits<Derived>::MaxColsAtCompileTime
-          > PlainMatrixType;
-    // typedef typename ei_plain_matrix_type<Derived>::type PlainMatrixType;
+          > PlainObject;
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** \internal Represents a matrix with all coefficients equal to one another*/
@@ -193,13 +192,13 @@ template<typename Derived> class MatrixBase
     lazyProduct(const MatrixBase<OtherDerived> &other) const;
 
     template<typename OtherDerived>
-    Derived& operator*=(const AnyMatrixBase<OtherDerived>& other);
+    Derived& operator*=(const EigenBase<OtherDerived>& other);
 
     template<typename OtherDerived>
-    void applyOnTheLeft(const AnyMatrixBase<OtherDerived>& other);
+    void applyOnTheLeft(const EigenBase<OtherDerived>& other);
 
     template<typename OtherDerived>
-    void applyOnTheRight(const AnyMatrixBase<OtherDerived>& other);
+    void applyOnTheRight(const EigenBase<OtherDerived>& other);
 
     template<typename DiagonalDerived>
     const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
@@ -212,7 +211,7 @@ template<typename Derived> class MatrixBase
     RealScalar stableNorm() const;
     RealScalar blueNorm() const;
     RealScalar hypotNorm() const;
-    const PlainMatrixType normalized() const;
+    const PlainObject normalized() const;
     void normalize();
 
     const AdjointReturnType adjoint() const;
@@ -301,9 +300,9 @@ template<typename Derived> class MatrixBase
 
 /////////// LU module ///////////
 
-    const FullPivLU<PlainMatrixType> fullPivLu() const;
-    const PartialPivLU<PlainMatrixType> partialPivLu() const;
-    const PartialPivLU<PlainMatrixType> lu() const;
+    const FullPivLU<PlainObject> fullPivLu() const;
+    const PartialPivLU<PlainObject> partialPivLu() const;
+    const PartialPivLU<PlainObject> lu() const;
     const ei_inverse_impl<Derived> inverse() const;
     template<typename ResultType>
     void computeInverseAndDetWithCheck(
@@ -322,29 +321,29 @@ template<typename Derived> class MatrixBase
 
 /////////// Cholesky module ///////////
 
-    const LLT<PlainMatrixType>  llt() const;
-    const LDLT<PlainMatrixType> ldlt() const;
+    const LLT<PlainObject>  llt() const;
+    const LDLT<PlainObject> ldlt() const;
 
 /////////// QR module ///////////
 
-    const HouseholderQR<PlainMatrixType> householderQr() const;
-    const ColPivHouseholderQR<PlainMatrixType> colPivHouseholderQr() const;
-    const FullPivHouseholderQR<PlainMatrixType> fullPivHouseholderQr() const;
+    const HouseholderQR<PlainObject> householderQr() const;
+    const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
+    const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
 
     EigenvaluesReturnType eigenvalues() const;
     RealScalar operatorNorm() const;
 
 /////////// SVD module ///////////
 
-    SVD<PlainMatrixType> svd() const;
+    SVD<PlainObject> svd() const;
 
 /////////// Geometry module ///////////
 
     template<typename OtherDerived>
-    PlainMatrixType cross(const MatrixBase<OtherDerived>& other) const;
+    PlainObject cross(const MatrixBase<OtherDerived>& other) const;
     template<typename OtherDerived>
-    PlainMatrixType cross3(const MatrixBase<OtherDerived>& other) const;
-    PlainMatrixType unitOrthogonal(void) const;
+    PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
+    PlainObject unitOrthogonal(void) const;
     Matrix<Scalar,3,1> eulerAngles(int a0, int a1, int a2) const;
     const ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
     enum {
diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index 284baf678..46884dc3f 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -47,7 +47,7 @@
   * \sa class DiagonalMatrix
   */
 template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime> class PermutationMatrix;
-template<typename PermutationType, typename MatrixType, int Side> struct ei_permut_matrix_product_retval;
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false> struct ei_permut_matrix_product_retval;
 
 template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
 struct ei_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
@@ -55,7 +55,7 @@ struct ei_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
 {};
 
 template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
-class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
+class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
 {
   public:
 
@@ -132,6 +132,9 @@ class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTi
     /** \returns the number of columns */
     inline int cols() const { return m_indices.size(); }
 
+    /** \returns the size of a side of the respective square matrix, i.e., the number of indices */
+    inline int size() const { return m_indices.size(); }
+
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename DenseDerived>
     void evalTo(MatrixBase<DenseDerived>& other) const
@@ -144,7 +147,7 @@ class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTi
 
     /** \returns a Matrix object initialized from this permutation matrix. Notice that it
       * is inefficient to return this Matrix object by value. For efficiency, favor using
-      * the Matrix constructor taking AnyMatrixBase objects.
+      * the Matrix constructor taking EigenBase objects.
       */
     DenseMatrixType toDenseMatrix() const
     {
@@ -213,16 +216,29 @@ class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTi
       return *this;
     }
 
-    /**** inversion and multiplication helpers to hopefully get RVO ****/
+    /** \returns the inverse permutation matrix.
+      *
+      * \note \note_try_to_help_rvo
+      */
+    inline Transpose<PermutationMatrix> inverse() const
+    { return *this; }
+    /** \returns the tranpose permutation matrix.
+      *
+      * \note \note_try_to_help_rvo
+      */
+    inline Transpose<PermutationMatrix> transpose() const
+    { return *this; }
+
+    /**** multiplication helpers to hopefully get RVO ****/
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-  protected:
-    enum Inverse_t {Inverse};
-    PermutationMatrix(Inverse_t, const PermutationMatrix& other)
-      : m_indices(other.m_indices.size())
+    template<int OtherSize, int OtherMaxSize>
+    PermutationMatrix(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other)
+      : m_indices(other.nestedPermutation().size())
     {
-      for (int i=0; i<rows();++i) m_indices.coeffRef(other.m_indices.coeff(i)) = i;
+      for (int i=0; i<rows();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
     }
+  protected:
     enum Product_t {Product};
     PermutationMatrix(Product_t, const PermutationMatrix& lhs, const PermutationMatrix& rhs)
       : m_indices(lhs.m_indices.size())
@@ -233,12 +249,7 @@ class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTi
 #endif
 
   public:
-    /** \returns the inverse permutation matrix.
-      *
-      * \note \note_try_to_help_rvo
-      */
-    inline PermutationMatrix inverse() const
-    { return PermutationMatrix(Inverse, *this); }
+
     /** \returns the product permutation matrix.
       *
       * \note \note_try_to_help_rvo
@@ -247,6 +258,22 @@ class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTi
     inline PermutationMatrix operator*(const PermutationMatrix<OtherSize, OtherMaxSize>& other) const
     { return PermutationMatrix(Product, *this, other); }
 
+    /** \returns the product of a permutation with another inverse permutation.
+      *
+      * \note \note_try_to_help_rvo
+      */
+    template<int OtherSize, int OtherMaxSize>
+    inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other) const
+    { return PermutationMatrix(Product, *this, other.eval()); }
+
+    /** \returns the product of an inverse permutation with another permutation.
+      *
+      * \note \note_try_to_help_rvo
+      */
+    template<int OtherSize, int OtherMaxSize> friend
+    inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other, const PermutationMatrix& perm)
+    { return PermutationMatrix(Product, other.eval(), perm); }
+
   protected:
 
     IndicesType m_indices;
@@ -277,15 +304,15 @@ operator*(const PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> &perm
            (permutation, matrix.derived());
 }
 
-template<typename PermutationType, typename MatrixType, int Side>
-struct ei_traits<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side> >
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
+struct ei_traits<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
 {
-  typedef typename MatrixType::PlainMatrixType ReturnMatrixType;
+  typedef typename MatrixType::PlainObject ReturnType;
 };
 
-template<typename PermutationType, typename MatrixType, int Side>
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
 struct ei_permut_matrix_product_retval
- : public ReturnByValue<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side> >
+ : public ReturnByValue<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
 {
     typedef typename ei_cleantype<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
 
@@ -299,21 +326,46 @@ struct ei_permut_matrix_product_retval
     template<typename Dest> inline void evalTo(Dest& dst) const
     {
       const int n = Side==OnTheLeft ? rows() : cols();
-      for(int i = 0; i < n; ++i)
+
+      if(ei_is_same_type<MatrixTypeNestedCleaned,Dest>::ret && ei_extract_data(dst) == ei_extract_data(m_matrix))
       {
-        Block<
-          Dest,
-          Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime,
-          Side==OnTheRight ? 1 : Dest::ColsAtCompileTime
-        >(dst, Side==OnTheLeft ? m_permutation.indices().coeff(i) : i)
-
-        =
-
-        Block<
-          MatrixTypeNestedCleaned,
-          Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,
-          Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime
-        >(m_matrix, Side==OnTheRight ? m_permutation.indices().coeff(i) : i);
+        // apply the permutation inplace
+        Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
+        mask.fill(false);
+        int r = 0;
+        while(r < m_permutation.size())
+        {
+          // search for the next seed
+          while(r<m_permutation.size() && mask[r]) r++;
+          if(r>=m_permutation.size())
+            break;
+          // we got one, let's follow it until we are back to the seed
+          int k0 = r++;
+          int kPrev = k0;
+          mask.coeffRef(k0) = true;
+          for(int k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
+          {
+                  Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
+            .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+                       (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
+
+            mask.coeffRef(k) = true;
+            kPrev = k;
+          }
+        }
+      }
+      else
+      {
+        for(int i = 0; i < n; ++i)
+        {
+          Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+               (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i)
+
+          =
+
+          Block<MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime>
+               (m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i);
+        }
       }
     }
 
@@ -322,4 +374,78 @@ struct ei_permut_matrix_product_retval
     const typename MatrixType::Nested m_matrix;
 };
 
+/* Template partial specialization for transposed/inverse permutations */
+
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
+struct ei_traits<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > >
+ : ei_traits<Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+{};
+
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
+class Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
+  : public EigenBase<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > >
+{
+    typedef PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> PermutationType;
+    typedef typename PermutationType::IndicesType IndicesType;
+  public:
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    typedef ei_traits<PermutationType> Traits;
+    typedef Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime>
+            DenseMatrixType;
+    enum {
+      Flags = Traits::Flags,
+      CoeffReadCost = Traits::CoeffReadCost,
+      RowsAtCompileTime = Traits::RowsAtCompileTime,
+      ColsAtCompileTime = Traits::ColsAtCompileTime,
+      MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
+    };
+    typedef typename Traits::Scalar Scalar;
+    #endif
+
+    Transpose(const PermutationType& p) : m_permutation(p) {}
+
+    inline int rows() const { return m_permutation.rows(); }
+    inline int cols() const { return m_permutation.cols(); }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename DenseDerived>
+    void evalTo(MatrixBase<DenseDerived>& other) const
+    {
+      other.setZero();
+      for (int i=0; i<rows();++i)
+        other.coeffRef(i, m_permutation.indices().coeff(i)) = typename DenseDerived::Scalar(1);
+    }
+    #endif
+
+    /** \return the equivalent permutation matrix */
+    PermutationType eval() const { return *this; }
+
+    DenseMatrixType toDenseMatrix() const { return *this; }
+
+    /** \returns the matrix with the inverse permutation applied to the columns.
+      */
+    template<typename Derived> friend
+    inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true>
+    operator*(const MatrixBase<Derived>& matrix, const Transpose& trPerm)
+    {
+      return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true>(trPerm.m_permutation, matrix.derived());
+    }
+
+    /** \returns the matrix with the inverse permutation applied to the rows.
+      */
+    template<typename Derived>
+    inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true>
+    operator*(const MatrixBase<Derived>& matrix) const
+    {
+      return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true>(m_permutation, matrix.derived());
+    }
+
+    const PermutationType& nestedPermutation() const { return m_permutation; }
+
+  protected:
+    const PermutationType& m_permutation;
+};
+
 #endif // EIGEN_PERMUTATIONMATRIX_H
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index af05773ee..236e4f130 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -50,8 +50,8 @@ class GeneralProduct;
 template<int Rows, int Cols, int Depth> struct ei_product_type_selector;
 
 enum {
-  Large = Dynamic,
-  Small = Dynamic/2
+  Large = 2,
+  Small = 3
 };
 
 template<typename Lhs, typename Rhs> struct ei_product_type
@@ -95,10 +95,10 @@ template<>                    struct ei_product_type_selector<Small, Large, 1>
 template<>                    struct ei_product_type_selector<Large, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
 template<>                    struct ei_product_type_selector<1,    Large,Small>  { enum { ret = GemvProduct }; };
 template<>                    struct ei_product_type_selector<1,    Large,Large>  { enum { ret = GemvProduct }; };
-template<>                    struct ei_product_type_selector<1,    Small,Large>  { enum { ret = GemvProduct }; };
+template<>                    struct ei_product_type_selector<1,    Small,Large>  { enum { ret = CoeffBasedProductMode }; };
 template<>                    struct ei_product_type_selector<Large,1,    Small>  { enum { ret = GemvProduct }; };
 template<>                    struct ei_product_type_selector<Large,1,    Large>  { enum { ret = GemvProduct }; };
-template<>                    struct ei_product_type_selector<Small,1,    Large>  { enum { ret = GemvProduct }; };
+template<>                    struct ei_product_type_selector<Small,1,    Large>  { enum { ret = CoeffBasedProductMode }; };
 template<>                    struct ei_product_type_selector<Small,Small,Large>  { enum { ret = GemmProduct }; };
 template<>                    struct ei_product_type_selector<Large,Small,Large>  { enum { ret = GemmProduct }; };
 template<>                    struct ei_product_type_selector<Small,Large,Large>  { enum { ret = GemmProduct }; };
diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h
index 481e7c760..789aecfb6 100644
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@@ -88,7 +88,7 @@ class ProductBase : public MatrixBase<Derived>
 
   public:
 
-    typedef typename Base::PlainMatrixType PlainMatrixType;
+    typedef typename Base::PlainObject PlainObject;
 
     ProductBase(const Lhs& lhs, const Rhs& rhs)
       : m_lhs(lhs), m_rhs(rhs)
@@ -116,8 +116,8 @@ class ProductBase : public MatrixBase<Derived>
     const _LhsNested& lhs() const { return m_lhs; }
     const _RhsNested& rhs() const { return m_rhs; }
 
-    // Implicit convertion to the nested type (trigger the evaluation of the product)
-    operator const PlainMatrixType& () const
+    // Implicit conversion to the nested type (trigger the evaluation of the product)
+    operator const PlainObject& () const
     {
       m_result.resize(m_lhs.rows(), m_rhs.cols());
       this->evalTo(m_result);
@@ -139,7 +139,7 @@ class ProductBase : public MatrixBase<Derived>
     const LhsNested m_lhs;
     const RhsNested m_rhs;
 
-    mutable PlainMatrixType m_result;
+    mutable PlainObject m_result;
 
   private:
 
@@ -152,10 +152,10 @@ class ProductBase : public MatrixBase<Derived>
 
 // here we need to overload the nested rule for products
 // such that the nested type is a const reference to a plain matrix
-template<typename Lhs, typename Rhs, int Mode, int N, typename PlainMatrixType>
-struct ei_nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainMatrixType>
+template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
+struct ei_nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
 {
-  typedef PlainMatrixType const& type;
+  typedef PlainObject const& type;
 };
 
 template<typename NestedProduct>
diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h
index 8d45fc31b..160b973bd 100644
--- a/Eigen/src/Core/ReturnByValue.h
+++ b/Eigen/src/Core/ReturnByValue.h
@@ -31,13 +31,13 @@
   */
 template<typename Derived>
 struct ei_traits<ReturnByValue<Derived> >
-  : public ei_traits<typename ei_traits<Derived>::ReturnMatrixType>
+  : public ei_traits<typename ei_traits<Derived>::ReturnType>
 {
   enum {
     // We're disabling the DirectAccess because e.g. the constructor of
     // the Block-with-DirectAccess expression requires to have a coeffRef method.
     // Also, we don't want to have to implement the stride stuff.
-    Flags = (ei_traits<typename ei_traits<Derived>::ReturnMatrixType>::Flags
+    Flags = (ei_traits<typename ei_traits<Derived>::ReturnType>::Flags
              | EvalBeforeNestingBit) & ~DirectAccessBit
   };
 };
@@ -46,18 +46,18 @@ struct ei_traits<ReturnByValue<Derived> >
  * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix.
  * So ei_nested always gives the plain return matrix type.
  */
-template<typename Derived,int n,typename PlainMatrixType>
-struct ei_nested<ReturnByValue<Derived>, n, PlainMatrixType>
+template<typename Derived,int n,typename PlainObject>
+struct ei_nested<ReturnByValue<Derived>, n, PlainObject>
 {
-  typedef typename ei_traits<Derived>::ReturnMatrixType type;
+  typedef typename ei_traits<Derived>::ReturnType type;
 };
 
 template<typename Derived> class ReturnByValue
-  : public ei_traits<Derived>::ReturnMatrixType::template MakeBase<ReturnByValue<Derived> >::Type
+  : public ei_traits<Derived>::ReturnType::template MakeBase<ReturnByValue<Derived> >::Type
 {
   public:
-    typedef typename ei_traits<Derived>::ReturnMatrixType ReturnMatrixType;
-    typedef typename ReturnMatrixType::template MakeBase<ReturnByValue<Derived> >::Type Base;
+    typedef typename ei_traits<Derived>::ReturnType ReturnType;
+    typedef typename ReturnType::template MakeBase<ReturnByValue<Derived> >::Type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)
 
     template<typename Dest>
diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h
index add5a3afb..0b57b9968 100644
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -68,7 +68,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
     enum {
       Mode = ei_traits<SelfAdjointView>::Mode
     };
-    typedef typename MatrixType::PlainMatrixType PlainMatrixType;
+    typedef typename MatrixType::PlainObject PlainObject;
 
     inline SelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
     { ei_assert(ei_are_flags_consistent<Mode>::ret); }
@@ -147,8 +147,8 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
 
 /////////// Cholesky module ///////////
 
-    const LLT<PlainMatrixType, UpLo> llt() const;
-    const LDLT<PlainMatrixType> ldlt() const;
+    const LLT<PlainObject, UpLo> llt() const;
+    const LDLT<PlainObject> ldlt() const;
 
   protected:
     const typename MatrixType::Nested m_matrix;
diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h
index 58aee182d..529a9994d 100644
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -125,8 +125,8 @@ template<typename Derived>
 inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
 {
   SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived> tmp(derived());
-  typedef typename Derived::PlainMatrixType PlainMatrixType;
-  tmp = PlainMatrixType::Constant(rows(),cols(),other);
+  typedef typename Derived::PlainObject PlainObject;
+  tmp = PlainObject::Constant(rows(),cols(),other);
   return derived();
 }
 
@@ -134,8 +134,8 @@ template<typename Derived>
 inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
 {
   SelfCwiseBinaryOp<typename ei_meta_if<NumTraits<Scalar>::HasFloatingPoint,ei_scalar_product_op<Scalar>,ei_scalar_quotient_op<Scalar> >::ret, Derived> tmp(derived());
-  typedef typename Derived::PlainMatrixType PlainMatrixType;
-  tmp = PlainMatrixType::Constant(rows(),cols(), NumTraits<Scalar>::HasFloatingPoint ? Scalar(1)/other : other);
+  typedef typename Derived::PlainObject PlainObject;
+  tmp = PlainObject::Constant(rows(),cols(), NumTraits<Scalar>::HasFloatingPoint ? Scalar(1)/other : other);
   return derived();
 }
 
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index 47dae5776..1f064d1c2 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -296,25 +296,6 @@ struct ei_blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr> >
   static inline const XprType extract(const XprType& x) { return x; }
 };
 
-
-template<typename T, int Access=ei_blas_traits<T>::ActualAccess>
-struct ei_extract_data_selector {
-  static typename T::Scalar* run(const T& m)
-  {
-    return &ei_blas_traits<T>::extract(m).const_cast_derived().coeffRef(0,0);
-  }
-};
-
-template<typename T>
-struct ei_extract_data_selector<T,NoDirectAccess> {
-  static typename T::Scalar* run(const T&) { return 0; }
-};
-
-template<typename T> typename T::Scalar* ei_extract_data(const T& m)
-{
-  return ei_extract_data_selector<T>::run(m);
-}
-
 template<typename Scalar, bool DestIsTranposed, typename OtherDerived>
 struct ei_check_transpose_aliasing_selector
 {
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index c61a6d7cc..2230680d1 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -32,7 +32,7 @@
   *
   * \brief Base class for triangular part in a matrix
   */
-template<typename Derived> class TriangularBase : public AnyMatrixBase<Derived>
+template<typename Derived> class TriangularBase : public EigenBase<Derived>
 {
   public:
 
@@ -149,7 +149,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
     typedef TriangularBase<TriangularView> Base;
     typedef typename ei_traits<TriangularView>::Scalar Scalar;
     typedef _MatrixType MatrixType;
-    typedef typename MatrixType::PlainMatrixType DenseMatrixType;
+    typedef typename MatrixType::PlainObject DenseMatrixType;
     typedef typename MatrixType::Nested MatrixTypeNested;
     typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested;
 
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index a5a56f759..f78bf0dd3 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -122,7 +122,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con
 template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
 {
-#ifdef __SSE4_1__
+#ifdef EIGEN_VECTORIZE_SSE4_1
   return _mm_mullo_epi32(a,b);
 #else
   // this version is slightly faster than 4 scalar products
@@ -269,7 +269,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_pabs(const Packet2d& a)
 }
 template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
 {
-  #ifdef __SSSE3__
+  #ifdef EIGEN_VECTORIZE_SSSE3
   return _mm_abs_epi32(a);
   #else
   Packet4i aux = _mm_srai_epi32(a,31);
@@ -278,7 +278,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
 }
 
 
-#ifdef __SSE3__
+#ifdef EIGEN_VECTORIZE_SSE3
 // TODO implement SSE2 versions as well as integer versions
 template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
 {
@@ -439,7 +439,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
 // }
 #endif
 
-#ifdef __SSSE3__
+#ifdef EIGEN_VECTORIZE_SSSE3
 // SSSE3 versions
 template<int Offset>
 struct ei_palign_impl<Offset,Packet4f>
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
index f030d59b5..3343b1875 100644
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -109,7 +109,7 @@ class CoeffBasedProduct
 
     typedef MatrixBase<CoeffBasedProduct> Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
-    typedef typename Base::PlainMatrixType PlainMatrixType;
+    typedef typename Base::PlainObject PlainObject;
 
   private:
 
@@ -181,8 +181,8 @@ class CoeffBasedProduct
       return res;
     }
 
-    // Implicit convertion to the nested type (trigger the evaluation of the product)
-    operator const PlainMatrixType& () const
+    // Implicit conversion to the nested type (trigger the evaluation of the product)
+    operator const PlainObject& () const
     {
       m_result.lazyAssign(*this);
       return m_result;
@@ -205,15 +205,15 @@ class CoeffBasedProduct
     const LhsNested m_lhs;
     const RhsNested m_rhs;
 
-    mutable PlainMatrixType m_result;
+    mutable PlainObject m_result;
 };
 
 // here we need to overload the nested rule for products
 // such that the nested type is a const reference to a plain matrix
-template<typename Lhs, typename Rhs, int N, typename PlainMatrixType>
-struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainMatrixType>
+template<typename Lhs, typename Rhs, int N, typename PlainObject>
+struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
 {
-  typedef PlainMatrixType const& type;
+  typedef PlainObject const& type;
 };
 
 /***************************************************************************
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index fe1987bdd..18e913b0e 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -27,6 +27,12 @@
 
 #ifndef EIGEN_EXTERN_INSTANTIATIONS
 
+#ifdef EIGEN_HAS_FUSE_CJMADD
+#define CJMADD(A,B,C,T)  C = cj.pmadd(A,B,C);
+#else
+#define CJMADD(A,B,C,T)  T = A; T = cj.pmul(T,B); C = ei_padd(C,T);
+#endif
+
 // optimized GEneral packed Block * packed Panel product kernel
 template<typename Scalar, int mr, int nr, typename Conj>
 struct ei_gebp_kernel
@@ -74,65 +80,111 @@ struct ei_gebp_kernel
         const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB*nr];
         for(int k=0; k<peeled_kc; k+=4)
         {
-          PacketType B0, B1, B2, B3, A0, A1;
-
-                    A0 = ei_pload(&blA[0*PacketSize]);
-                    A1 = ei_pload(&blA[1*PacketSize]);
-                    B0 = ei_pload(&blB[0*PacketSize]);
-                    B1 = ei_pload(&blB[1*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-          if(nr==4) B2 = ei_pload(&blB[2*PacketSize]);
-                    C4 = cj.pmadd(A1, B0, C4);
-          if(nr==4) B3 = ei_pload(&blB[3*PacketSize]);
-                    B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    C5 = cj.pmadd(A1, B1, C5);
-                    B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4) B2 = ei_pload(&blB[6*PacketSize]);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-                    A0 = ei_pload(&blA[2*PacketSize]);
-          if(nr==4) C7 = cj.pmadd(A1, B3, C7);
-                    A1 = ei_pload(&blA[3*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[7*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-                    C4 = cj.pmadd(A1, B0, C4);
-                    B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    C5 = cj.pmadd(A1, B1, C5);
-                    B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4) B2 = ei_pload(&blB[10*PacketSize]);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-                    A0 = ei_pload(&blA[4*PacketSize]);
-          if(nr==4) C7 = cj.pmadd(A1, B3, C7);
-                    A1 = ei_pload(&blA[5*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[11*PacketSize]);
-
-                    C0 = cj.pmadd(A0, B0, C0);
-                    C4 = cj.pmadd(A1, B0, C4);
-                    B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    C5 = cj.pmadd(A1, B1, C5);
-                    B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4) B2 = ei_pload(&blB[14*PacketSize]);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-                    A0 = ei_pload(&blA[6*PacketSize]);
-          if(nr==4) C7 = cj.pmadd(A1, B3, C7);
-                    A1 = ei_pload(&blA[7*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[15*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-                    C4 = cj.pmadd(A1, B0, C4);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    C5 = cj.pmadd(A1, B1, C5);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-          if(nr==4) C7 = cj.pmadd(A1, B3, C7);
+          if(nr==2)
+          {
+            PacketType B0, T0, A0, A1;
+
+            A0 = ei_pload(&blA[0*PacketSize]);
+            A1 = ei_pload(&blA[1*PacketSize]);
+            B0 = ei_pload(&blB[0*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            CJMADD(A1,B0,C4,T0);
+            B0 = ei_pload(&blB[1*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+            CJMADD(A1,B0,C5,T0);
+
+            A0 = ei_pload(&blA[2*PacketSize]);
+            A1 = ei_pload(&blA[3*PacketSize]);
+            B0 = ei_pload(&blB[2*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            CJMADD(A1,B0,C4,T0);
+            B0 = ei_pload(&blB[3*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+            CJMADD(A1,B0,C5,T0);
+
+            A0 = ei_pload(&blA[4*PacketSize]);
+            A1 = ei_pload(&blA[5*PacketSize]);
+            B0 = ei_pload(&blB[4*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            CJMADD(A1,B0,C4,T0);
+            B0 = ei_pload(&blB[5*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+            CJMADD(A1,B0,C5,T0);
+
+            A0 = ei_pload(&blA[6*PacketSize]);
+            A1 = ei_pload(&blA[7*PacketSize]);
+            B0 = ei_pload(&blB[6*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            CJMADD(A1,B0,C4,T0);
+            B0 = ei_pload(&blB[7*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+            CJMADD(A1,B0,C5,T0);
+          }
+          else
+          {
+
+            PacketType B0, B1, B2, B3, A0, A1;
+            PacketType T0, T1;
+
+                        A0 = ei_pload(&blA[0*PacketSize]);
+                        A1 = ei_pload(&blA[1*PacketSize]);
+                        B0 = ei_pload(&blB[0*PacketSize]);
+                        B1 = ei_pload(&blB[1*PacketSize]);
+
+                        CJMADD(A0,B0,C0,T0);
+            if(nr==4)   B2 = ei_pload(&blB[2*PacketSize]);
+                        CJMADD(A1,B0,C4,T1);
+            if(nr==4)   B3 = ei_pload(&blB[3*PacketSize]);
+                        B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
+                        CJMADD(A0,B1,C1,T0);
+                        CJMADD(A1,B1,C5,T1);
+                        B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A1,B2,C6,T1); }
+            if(nr==4)   B2 = ei_pload(&blB[6*PacketSize]);
+            if(nr==4) { CJMADD(A0,B3,C3,T0); }
+                        A0 = ei_pload(&blA[2*PacketSize]);
+            if(nr==4) { CJMADD(A1,B3,C7,T1); }
+                        A1 = ei_pload(&blA[3*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[7*PacketSize]);
+                        CJMADD(A0,B0,C0,T0);
+                        CJMADD(A1,B0,C4,T1);
+                        B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]);
+                        CJMADD(A0,B1,C1,T0);
+                        CJMADD(A1,B1,C5,T1);
+                        B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A1,B2,C6,T1); }
+            if(nr==4)   B2 = ei_pload(&blB[10*PacketSize]);
+            if(nr==4) { CJMADD(A0,B3,C3,T0); }
+                        A0 = ei_pload(&blA[4*PacketSize]);
+            if(nr==4) { CJMADD(A1,B3,C7,T1); }
+                        A1 = ei_pload(&blA[5*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[11*PacketSize]);
+
+                        CJMADD(A0,B0,C0,T0);
+                        CJMADD(A1,B0,C4,T1);
+                        B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]);
+                        CJMADD(A0,B1,C1,T0);
+                        CJMADD(A1,B1,C5,T1);
+                        B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A1,B2,C6,T1); }
+            if(nr==4)   B2 = ei_pload(&blB[14*PacketSize]);
+            if(nr==4) { CJMADD(A0,B3,C3,T0); }
+                        A0 = ei_pload(&blA[6*PacketSize]);
+            if(nr==4) { CJMADD(A1,B3,C7,T1); }
+                        A1 = ei_pload(&blA[7*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[15*PacketSize]);
+                        CJMADD(A0,B0,C0,T0);
+                        CJMADD(A1,B0,C4,T1);
+                        CJMADD(A0,B1,C1,T0);
+                        CJMADD(A1,B1,C5,T1);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A1,B2,C6,T1); }
+            if(nr==4) { CJMADD(A0,B3,C3,T0); }
+            if(nr==4) { CJMADD(A1,B3,C7,T1); }
+          }
 
           blB += 4*nr*PacketSize;
           blA += 4*mr;
@@ -140,22 +192,40 @@ struct ei_gebp_kernel
         // process remaining peeled loop
         for(int k=peeled_kc; k<depth; k++)
         {
-          PacketType B0, B1, B2, B3, A0, A1;
-
-                    A0 = ei_pload(&blA[0*PacketSize]);
-                    A1 = ei_pload(&blA[1*PacketSize]);
-                    B0 = ei_pload(&blB[0*PacketSize]);
-                    B1 = ei_pload(&blB[1*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-          if(nr==4) B2 = ei_pload(&blB[2*PacketSize]);
-                    C4 = cj.pmadd(A1, B0, C4);
-          if(nr==4) B3 = ei_pload(&blB[3*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    C5 = cj.pmadd(A1, B1, C5);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-          if(nr==4) C7 = cj.pmadd(A1, B3, C7);
+          if(nr==2)
+          {
+            PacketType B0, T0, A0, A1;
+
+            A0 = ei_pload(&blA[0*PacketSize]);
+            A1 = ei_pload(&blA[1*PacketSize]);
+            B0 = ei_pload(&blB[0*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            CJMADD(A1,B0,C4,T0);
+            B0 = ei_pload(&blB[1*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+            CJMADD(A1,B0,C5,T0);
+          }
+          else
+          {
+            PacketType B0, B1, B2, B3, A0, A1, T0, T1;
+
+                        A0 = ei_pload(&blA[0*PacketSize]);
+                        A1 = ei_pload(&blA[1*PacketSize]);
+                        B0 = ei_pload(&blB[0*PacketSize]);
+                        B1 = ei_pload(&blB[1*PacketSize]);
+
+                        CJMADD(A0,B0,C0,T0);
+            if(nr==4)   B2 = ei_pload(&blB[2*PacketSize]);
+                        CJMADD(A1,B0,C4,T1);
+            if(nr==4)   B3 = ei_pload(&blB[3*PacketSize]);
+                        B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
+                        CJMADD(A0,B1,C1,T0);
+                        CJMADD(A1,B1,C5,T1);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A1,B2,C6,T1); }
+            if(nr==4) { CJMADD(A0,B3,C3,T0); }
+            if(nr==4) { CJMADD(A1,B3,C7,T1); }
+          }
 
           blB += nr*PacketSize;
           blA += mr;
@@ -189,45 +259,79 @@ struct ei_gebp_kernel
         const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB*nr];
         for(int k=0; k<peeled_kc; k+=4)
         {
-          PacketType B0, B1, B2, B3, A0;
-
-                    A0 = ei_pload(&blA[0*PacketSize]);
-                    B0 = ei_pload(&blB[0*PacketSize]);
-                    B1 = ei_pload(&blB[1*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-          if(nr==4) B2 = ei_pload(&blB[2*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[3*PacketSize]);
-                    B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) B2 = ei_pload(&blB[6*PacketSize]);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-                    A0 = ei_pload(&blA[1*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[7*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-                    B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) B2 = ei_pload(&blB[10*PacketSize]);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-                    A0 = ei_pload(&blA[2*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[11*PacketSize]);
-
-                    C0 = cj.pmadd(A0, B0, C0);
-                    B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) B2 = ei_pload(&blB[14*PacketSize]);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-                    A0 = ei_pload(&blA[3*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[15*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-                    C1 = cj.pmadd(A0, B1, C1);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
+          if(nr==2)
+          {
+            PacketType B0, T0, A0;
+
+            A0 = ei_pload(&blA[0*PacketSize]);
+            B0 = ei_pload(&blB[0*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            B0 = ei_pload(&blB[1*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+
+            A0 = ei_pload(&blA[1*PacketSize]);
+            B0 = ei_pload(&blB[2*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            B0 = ei_pload(&blB[3*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+
+            A0 = ei_pload(&blA[2*PacketSize]);
+            B0 = ei_pload(&blB[4*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            B0 = ei_pload(&blB[5*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+
+            A0 = ei_pload(&blA[3*PacketSize]);
+            B0 = ei_pload(&blB[6*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            B0 = ei_pload(&blB[7*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+          }
+          else
+          {
+
+            PacketType B0, B1, B2, B3, A0;
+            PacketType T0, T1;
+
+                        A0 = ei_pload(&blA[0*PacketSize]);
+                        B0 = ei_pload(&blB[0*PacketSize]);
+                        B1 = ei_pload(&blB[1*PacketSize]);
+
+                        CJMADD(A0,B0,C0,T0);
+            if(nr==4)   B2 = ei_pload(&blB[2*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[3*PacketSize]);
+                        B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
+                        CJMADD(A0,B1,C1,T1);
+                        B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4)   B2 = ei_pload(&blB[6*PacketSize]);
+            if(nr==4) { CJMADD(A0,B3,C3,T1); }
+                        A0 = ei_pload(&blA[1*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[7*PacketSize]);
+                        CJMADD(A0,B0,C0,T0);
+                        B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]);
+                        CJMADD(A0,B1,C1,T1);
+                        B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4)   B2 = ei_pload(&blB[10*PacketSize]);
+            if(nr==4) { CJMADD(A0,B3,C3,T1); }
+                        A0 = ei_pload(&blA[2*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[11*PacketSize]);
+
+                        CJMADD(A0,B0,C0,T0);
+                        B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]);
+                        CJMADD(A0,B1,C1,T1);
+                        B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4)   B2 = ei_pload(&blB[14*PacketSize]);
+            if(nr==4) { CJMADD(A0,B3,C3,T1); }
+                        A0 = ei_pload(&blA[3*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[15*PacketSize]);
+                        CJMADD(A0,B0,C0,T0);
+                        CJMADD(A0,B1,C1,T1);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A0,B3,C3,T1); }
+          }
 
           blB += 4*nr*PacketSize;
           blA += 4*PacketSize;
@@ -235,17 +339,32 @@ struct ei_gebp_kernel
         // process remaining peeled loop
         for(int k=peeled_kc; k<depth; k++)
         {
-          PacketType B0, B1, B2, B3, A0;
-
-                    A0 = ei_pload(&blA[0*PacketSize]);
-                    B0 = ei_pload(&blB[0*PacketSize]);
-                    B1 = ei_pload(&blB[1*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-          if(nr==4) B2 = ei_pload(&blB[2*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[3*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
+          if(nr==2)
+          {
+            PacketType B0, T0, A0;
+
+            A0 = ei_pload(&blA[0*PacketSize]);
+            B0 = ei_pload(&blB[0*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            B0 = ei_pload(&blB[1*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+          }
+          else
+          {
+            PacketType B0, B1, B2, B3, A0;
+            PacketType T0, T1;
+
+                        A0 = ei_pload(&blA[0*PacketSize]);
+                        B0 = ei_pload(&blB[0*PacketSize]);
+                        B1 = ei_pload(&blB[1*PacketSize]);
+            if(nr==4)   B2 = ei_pload(&blB[2*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[3*PacketSize]);
+
+                        CJMADD(A0,B0,C0,T0);
+                        CJMADD(A0,B1,C1,T1);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A0,B3,C3,T1); }
+          }
 
           blB += nr*PacketSize;
           blA += PacketSize;
@@ -268,17 +387,32 @@ struct ei_gebp_kernel
         const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB*nr];
         for(int k=0; k<depth; k++)
         {
-          Scalar B0, B1, B2, B3, A0;
-
-                    A0 =  blA[k];
-                    B0 =  blB[0*PacketSize];
-                    B1 =  blB[1*PacketSize];
-                    C0 = cj.pmadd(A0, B0, C0);
-          if(nr==4) B2 =  blB[2*PacketSize];
-          if(nr==4) B3 =  blB[3*PacketSize];
-                    C1 = cj.pmadd(A0, B1, C1);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
+          if(nr==2)
+          {
+            Scalar B0, T0, A0;
+
+            A0 = blA[0*PacketSize];
+            B0 = blB[0*PacketSize];
+            CJMADD(A0,B0,C0,T0);
+            B0 = blB[1*PacketSize];
+            CJMADD(A0,B0,C1,T0);
+          }
+          else
+          {
+            Scalar B0, B1, B2, B3, A0;
+            Scalar T0, T1;
+
+                        A0 = blA[k];
+                        B0 = blB[0*PacketSize];
+                        B1 = blB[1*PacketSize];
+            if(nr==4)   B2 = blB[2*PacketSize];
+            if(nr==4)   B3 = blB[3*PacketSize];
+
+                        CJMADD(A0,B0,C0,T0);
+                        CJMADD(A0,B1,C1,T1);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A0,B3,C3,T1); }
+          }
 
           blB += nr*PacketSize;
         }
@@ -310,13 +444,13 @@ struct ei_gebp_kernel
         const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB];
         for(int k=0; k<depth; k++)
         {
-          PacketType B0, A0, A1;
+          PacketType B0, A0, A1, T0, T1;
 
           A0 = ei_pload(&blA[0*PacketSize]);
           A1 = ei_pload(&blA[1*PacketSize]);
           B0 = ei_pload(&blB[0*PacketSize]);
-          C0 = cj.pmadd(A0, B0, C0);
-          C4 = cj.pmadd(A1, B0, C4);
+          CJMADD(A0,B0,C0,T0);
+          CJMADD(A1,B0,C4,T1);
 
           blB += PacketSize;
           blA += mr;
@@ -334,7 +468,7 @@ struct ei_gebp_kernel
         #endif
 
         PacketType C0 = ei_ploadu(&res[(j2+0)*resStride + i]);
-        
+
         const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB];
         for(int k=0; k<depth; k++)
         {
@@ -363,6 +497,8 @@ struct ei_gebp_kernel
   }
 };
 
+#undef CJMADD
+
 // pack a block of the lhs
 // The travesal is as follow (mr==4):
 //   0  4  8 12 ...
@@ -474,7 +610,7 @@ struct ei_gemm_pack_rhs<Scalar, nr, ColMajor, PanelMode>
       // skip what we have after
       if(PanelMode) count += PacketSize * nr * (stride-offset-depth);
     }
-    
+
     // copy the remaining columns one at a time (nr==1)
     for(int j2=packet_cols; j2<cols; ++j2)
     {
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 3777464dc..4d216d77a 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -166,7 +166,7 @@ template<typename XprType> struct ei_blas_traits
   };
   typedef typename ei_meta_if<int(ActualAccess)==HasDirectAccess,
     ExtractType,
-    typename _ExtractType::PlainMatrixType
+    typename _ExtractType::PlainObject
     >::ret DirectLinearAccessType;
   static inline ExtractType extract(const XprType& x) { return x; }
   static inline Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
@@ -227,7 +227,7 @@ struct ei_blas_traits<Transpose<NestedXpr> >
   typedef Transpose<typename Base::_ExtractType> _ExtractType;
   typedef typename ei_meta_if<int(Base::ActualAccess)==HasDirectAccess,
     ExtractType,
-    typename ExtractType::PlainMatrixType
+    typename ExtractType::PlainObject
     >::ret DirectLinearAccessType;
   enum {
     IsTransposed = Base::IsTransposed ? 0 : 1
@@ -236,4 +236,22 @@ struct ei_blas_traits<Transpose<NestedXpr> >
   static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
 };
 
+template<typename T, int Access=ei_blas_traits<T>::ActualAccess>
+struct ei_extract_data_selector {
+  static const typename T::Scalar* run(const T& m)
+  {
+    return &ei_blas_traits<T>::extract(m).const_cast_derived().coeffRef(0,0); // FIXME this should be .data()
+  }
+};
+
+template<typename T>
+struct ei_extract_data_selector<T,NoDirectAccess> {
+  static typename T::Scalar* run(const T&) { return 0; }
+};
+
+template<typename T> const typename T::Scalar* ei_extract_data(const T& m)
+{
+  return ei_extract_data_selector<T>::run(m);
+}
+
 #endif // EIGEN_BLASUTIL_H
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index c2d45dc30..6096272fa 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -29,7 +29,7 @@
 template<typename T> struct ei_traits;
 template<typename T> struct NumTraits;
 
-template<typename Derived> struct AnyMatrixBase;
+template<typename Derived> struct EigenBase;
 
 template<typename _Scalar, int _Rows, int _Cols,
          int _Options = EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION | AutoAlign,
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index dc1aa150b..37ccef047 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -211,7 +211,7 @@ using Eigen::ei_cos;
  */
 #if !EIGEN_ALIGN
   #define EIGEN_ALIGN_TO_BOUNDARY(n)
-#elif (defined __GNUC__)
+#elif (defined __GNUC__) || (defined __PGI)
   #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
 #elif (defined _MSC_VER)
   #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 8ddf4450a..eceb5ab2a 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -147,7 +147,7 @@ template<typename T, typename StorageType = typename ei_traits<T>::StorageType>
 template<typename T> struct ei_eval<T,Dense>
 {
   typedef typename ei_plain_matrix_type<T>::type type;
-//   typedef typename T::PlainMatrixType type;
+//   typedef typename T::PlainObject type;
 //   typedef T::Matrix<typename ei_traits<T>::Scalar,
 //                 ei_traits<T>::RowsAtCompileTime,
 //                 ei_traits<T>::ColsAtCompileTime,
@@ -201,6 +201,18 @@ template<typename T> struct ei_plain_matrix_type_row_major
 // we should be able to get rid of this one too
 template<typename T> struct ei_must_nest_by_value { enum { ret = false }; };
 
+template<class T>
+struct ei_is_reference
+{
+  enum { ret = false };
+};
+
+template<class T>
+struct ei_is_reference<T&>
+{
+  enum { ret = true };
+};
+
 /**
 * The reference selector for template expressions. The idea is that we don't
 * need to use references for expressions since they are light weight proxy
@@ -234,7 +246,7 @@ struct ei_ref_selector
   * const Matrix3d&, because the internal logic of ei_nested determined that since a was already a matrix, there was no point
   * in copying it into another matrix.
   */
-template<typename T, int n=1, typename PlainMatrixType = typename ei_eval<T>::type> struct ei_nested
+template<typename T, int n=1, typename PlainObject = typename ei_eval<T>::type> struct ei_nested
 {
   enum {
     CostEval   = (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost),
@@ -244,7 +256,7 @@ template<typename T, int n=1, typename PlainMatrixType = typename ei_eval<T>::ty
   typedef typename ei_meta_if<
     ( int(ei_traits<T>::Flags) & EvalBeforeNestingBit ) ||
     ( int(CostEval) <= int(CostNoEval) ),
-      PlainMatrixType,
+      PlainObject,
       typename ei_ref_selector<T>::type
   >::ret type;
 };
@@ -258,7 +270,7 @@ template<unsigned int Flags> struct ei_are_flags_consistent
   * overloads for complex types */
 template<typename Derived,typename Scalar,typename OtherScalar,
          bool EnableIt = !ei_is_same_type<Scalar,OtherScalar>::ret >
-struct ei_special_scalar_op_base : public AnyMatrixBase<Derived>
+struct ei_special_scalar_op_base : public EigenBase<Derived>
 {
   // dummy operator* so that the
   // "using ei_special_scalar_op_base::operator*" compiles
@@ -266,7 +278,7 @@ struct ei_special_scalar_op_base : public AnyMatrixBase<Derived>
 };
 
 template<typename Derived,typename Scalar,typename OtherScalar>
-struct ei_special_scalar_op_base<Derived,Scalar,OtherScalar,true>  : public AnyMatrixBase<Derived>
+struct ei_special_scalar_op_base<Derived,Scalar,OtherScalar,true>  : public EigenBase<Derived>
 {
   const CwiseUnaryOp<ei_scalar_multiple2_op<Scalar,OtherScalar>, Derived>
   operator*(const OtherScalar& scalar) const
author	Benoit Jacob <jacob.benoit.1@gmail.com>	2010-02-25 21:07:30 -0500
committer	Benoit Jacob <jacob.benoit.1@gmail.com>	2010-02-25 21:07:30 -0500
commit	b1c6c215a43850b2bc5bdc393ab5a1179e858024 (patch)
tree	9ae1234383bef2204802606501a47bb5c05ec1d2 /Eigen/src/Core
parent	769641bc58745fecc1fa4e537466a1fff48f4a8a (diff)
parent	90e4a605ef920759a23cdbd24e6e7b69ce549162 (diff)