diff options
author | 2010-02-25 21:07:30 -0500 | |
---|---|---|
committer | 2010-02-25 21:07:30 -0500 | |
commit | b1c6c215a43850b2bc5bdc393ab5a1179e858024 (patch) | |
tree | 9ae1234383bef2204802606501a47bb5c05ec1d2 /Eigen/src/Core | |
parent | 769641bc58745fecc1fa4e537466a1fff48f4a8a (diff) | |
parent | 90e4a605ef920759a23cdbd24e6e7b69ce549162 (diff) |
merge
Diffstat (limited to 'Eigen/src/Core')
24 files changed, 572 insertions, 300 deletions
diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h index 538e6dd76..432df0b34 100644 --- a/Eigen/src/Core/BandMatrix.h +++ b/Eigen/src/Core/BandMatrix.h @@ -57,7 +57,7 @@ struct ei_traits<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> > }; template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options> -class BandMatrix : public AnyMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> > +class BandMatrix : public EigenBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> > { public: diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 2078f023b..5682d7278 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -40,7 +40,7 @@ template<typename Derived> class DenseBase : public ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar, typename NumTraits<typename ei_traits<Derived>::Scalar>::Real> #else - : public AnyMatrixBase<Derived> + : public EigenBase<Derived> #endif // not EIGEN_PARSED_BY_DOXYGEN { public: @@ -53,8 +53,8 @@ template<typename Derived> class DenseBase typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; - using AnyMatrixBase<Derived>::derived; - using AnyMatrixBase<Derived>::const_cast_derived; + using EigenBase<Derived>::derived; + using EigenBase<Derived>::const_cast_derived; #endif // not EIGEN_PARSED_BY_DOXYGEN enum { @@ -292,13 +292,13 @@ template<typename Derived> class DenseBase Derived& operator=(const DenseBase& other); template<typename OtherDerived> - Derived& operator=(const AnyMatrixBase<OtherDerived> &other); + Derived& operator=(const EigenBase<OtherDerived> &other); template<typename OtherDerived> - Derived& operator+=(const AnyMatrixBase<OtherDerived> &other); + Derived& operator+=(const EigenBase<OtherDerived> &other); template<typename OtherDerived> - Derived& operator-=(const AnyMatrixBase<OtherDerived> &other); + Derived& operator-=(const EigenBase<OtherDerived> &other); template<typename OtherDerived> Derived& operator=(const ReturnByValue<OtherDerived>& func); diff --git a/Eigen/src/Core/DenseStorageBase.h b/Eigen/src/Core/DenseStorageBase.h index e93e439e6..89e6e7112 100644 --- a/Eigen/src/Core/DenseStorageBase.h +++ b/Eigen/src/Core/DenseStorageBase.h @@ -44,7 +44,7 @@ class DenseStorageBase : public _Base<Derived> public: enum { Options = _Options }; typedef _Base<Derived> Base; - typedef typename Base::PlainMatrixType PlainMatrixType; + typedef typename Base::PlainObject PlainObject; typedef typename Base::Scalar Scalar; typedef typename Base::PacketScalar PacketScalar; using Base::RowsAtCompileTime; @@ -338,19 +338,19 @@ class DenseStorageBase : public _Base<Derived> // EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED } - /** \copydoc MatrixBase::operator=(const AnyMatrixBase<OtherDerived>&) + /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&) */ template<typename OtherDerived> - EIGEN_STRONG_INLINE Derived& operator=(const AnyMatrixBase<OtherDerived> &other) + EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other) { resize(other.derived().rows(), other.derived().cols()); Base::operator=(other.derived()); return this->derived(); } - /** \sa MatrixBase::operator=(const AnyMatrixBase<OtherDerived>&) */ + /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */ template<typename OtherDerived> - EIGEN_STRONG_INLINE DenseStorageBase(const AnyMatrixBase<OtherDerived> &other) + EIGEN_STRONG_INLINE DenseStorageBase(const EigenBase<OtherDerived> &other) : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) { _check_template_params(); @@ -527,7 +527,7 @@ struct ei_conservative_resize_like_impl { if (_this.rows() == rows && _this.cols() == cols) return; EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived) - typename Derived::PlainMatrixType tmp(rows,cols); + typename Derived::PlainObject tmp(rows,cols); const int common_rows = std::min(rows, _this.rows()); const int common_cols = std::min(cols, _this.cols()); tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); @@ -546,7 +546,7 @@ struct ei_conservative_resize_like_impl EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived) EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived) - typename Derived::PlainMatrixType tmp(other); + typename Derived::PlainObject tmp(other); const int common_rows = std::min(tmp.rows(), _this.rows()); const int common_cols = std::min(tmp.cols(), _this.cols()); tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); @@ -560,7 +560,7 @@ struct ei_conservative_resize_like_impl<Derived,OtherDerived,true> static void run(DenseBase<Derived>& _this, int size) { if (_this.size() == size) return; - typename Derived::PlainMatrixType tmp(size); + typename Derived::PlainObject tmp(size); const int common_size = std::min<int>(_this.size(),size); tmp.segment(0,common_size) = _this.segment(0,common_size); _this.derived().swap(tmp); @@ -571,7 +571,7 @@ struct ei_conservative_resize_like_impl<Derived,OtherDerived,true> if (_this.rows() == other.rows() && _this.cols() == other.cols()) return; // segment(...) will check whether Derived/OtherDerived are vectors! - typename Derived::PlainMatrixType tmp(other); + typename Derived::PlainObject tmp(other); const int common_size = std::min<int>(_this.size(),tmp.size()); tmp.segment(0,common_size) = _this.segment(0,common_size); _this.derived().swap(tmp); diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index 08c046611..774b0d7ae 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -28,7 +28,7 @@ #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename Derived> -class DiagonalBase : public AnyMatrixBase<Derived> +class DiagonalBase : public EigenBase<Derived> { public: typedef typename ei_traits<Derived>::DiagonalVectorType DiagonalVectorType; diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index f0c520b1f..201bd23ca 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -299,7 +299,7 @@ inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase< * \sa norm(), normalize() */ template<typename Derived> -inline const typename MatrixBase<Derived>::PlainMatrixType +inline const typename MatrixBase<Derived>::PlainObject MatrixBase<Derived>::normalized() const { typedef typename ei_nested<Derived>::type Nested; diff --git a/Eigen/src/Core/AnyMatrixBase.h b/Eigen/src/Core/EigenBase.h index a5d1cfe9f..cf1ce4376 100644 --- a/Eigen/src/Core/AnyMatrixBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -23,21 +23,21 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see <http://www.gnu.org/licenses/>. -#ifndef EIGEN_ANYMATRIXBASE_H -#define EIGEN_ANYMATRIXBASE_H +#ifndef EIGEN_EIGENBASE_H +#define EIGEN_EIGENBASE_H /** Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). * - * In other words, an AnyMatrixBase object is an object that can be copied into a MatrixBase. + * In other words, an EigenBase object is an object that can be copied into a MatrixBase. * * Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc. * * Notice that this class is trivial, it is only used to disambiguate overloaded functions. */ -template<typename Derived> struct AnyMatrixBase +template<typename Derived> struct EigenBase { -// typedef typename ei_plain_matrix_type<Derived>::type PlainMatrixType; +// typedef typename ei_plain_matrix_type<Derived>::type PlainObject; /** \returns a reference to the derived object */ Derived& derived() { return *static_cast<Derived*>(this); } @@ -45,7 +45,7 @@ template<typename Derived> struct AnyMatrixBase const Derived& derived() const { return *static_cast<const Derived*>(this); } inline Derived& const_cast_derived() const - { return *static_cast<Derived*>(const_cast<AnyMatrixBase*>(this)); } + { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); } /** \returns the number of rows. \sa cols(), RowsAtCompileTime */ inline int rows() const { return derived().rows(); } @@ -61,7 +61,7 @@ template<typename Derived> struct AnyMatrixBase { // This is the default implementation, // derived class can reimplement it in a more optimized way. - typename Dest::PlainMatrixType res(rows(),cols()); + typename Dest::PlainObject res(rows(),cols()); evalTo(res); dst += res; } @@ -71,7 +71,7 @@ template<typename Derived> struct AnyMatrixBase { // This is the default implementation, // derived class can reimplement it in a more optimized way. - typename Dest::PlainMatrixType res(rows(),cols()); + typename Dest::PlainObject res(rows(),cols()); evalTo(res); dst -= res; } @@ -108,7 +108,7 @@ template<typename Derived> struct AnyMatrixBase */ template<typename Derived> template<typename OtherDerived> -Derived& DenseBase<Derived>::operator=(const AnyMatrixBase<OtherDerived> &other) +Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other) { other.derived().evalTo(derived()); return derived(); @@ -116,7 +116,7 @@ Derived& DenseBase<Derived>::operator=(const AnyMatrixBase<OtherDerived> &other) template<typename Derived> template<typename OtherDerived> -Derived& DenseBase<Derived>::operator+=(const AnyMatrixBase<OtherDerived> &other) +Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other) { other.derived().addTo(derived()); return derived(); @@ -124,7 +124,7 @@ Derived& DenseBase<Derived>::operator+=(const AnyMatrixBase<OtherDerived> &other template<typename Derived> template<typename OtherDerived> -Derived& DenseBase<Derived>::operator-=(const AnyMatrixBase<OtherDerived> &other) +Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other) { other.derived().subTo(derived()); return derived(); @@ -137,7 +137,7 @@ Derived& DenseBase<Derived>::operator-=(const AnyMatrixBase<OtherDerived> &other template<typename Derived> template<typename OtherDerived> inline Derived& -MatrixBase<Derived>::operator*=(const AnyMatrixBase<OtherDerived> &other) +MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other) { other.derived().applyThisOnTheRight(derived()); return derived(); @@ -146,7 +146,7 @@ MatrixBase<Derived>::operator*=(const AnyMatrixBase<OtherDerived> &other) /** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=() */ template<typename Derived> template<typename OtherDerived> -inline void MatrixBase<Derived>::applyOnTheRight(const AnyMatrixBase<OtherDerived> &other) +inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other) { other.derived().applyThisOnTheRight(derived()); } @@ -154,9 +154,9 @@ inline void MatrixBase<Derived>::applyOnTheRight(const AnyMatrixBase<OtherDerive /** replaces \c *this by \c *this * \a other. */ template<typename Derived> template<typename OtherDerived> -inline void MatrixBase<Derived>::applyOnTheLeft(const AnyMatrixBase<OtherDerived> &other) +inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other) { other.derived().applyThisOnTheLeft(derived()); } -#endif // EIGEN_ANYMATRIXBASE_H +#endif // EIGEN_EIGENBASE_H diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h index 9d14aceaa..9413b74fa 100644 --- a/Eigen/src/Core/Flagged.h +++ b/Eigen/src/Core/Flagged.h @@ -110,7 +110,7 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas const ExpressionType& _expression() const { return m_matrix; } template<typename OtherDerived> - typename ExpressionType::PlainMatrixType solveTriangular(const MatrixBase<OtherDerived>& other) const; + typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const; template<typename OtherDerived> void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const; diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index b494b2f00..eae2711f4 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -139,7 +139,7 @@ class Matrix EIGEN_DENSE_PUBLIC_INTERFACE(Matrix) - typedef typename Base::PlainMatrixType PlainMatrixType; + typedef typename Base::PlainObject PlainObject; enum { NeedsToAlign = (!(Options&DontAlign)) && SizeAtCompileTime!=Dynamic && ((sizeof(Scalar)*SizeAtCompileTime)%16)==0 }; @@ -181,10 +181,10 @@ class Matrix /** * \brief Copies the generic expression \a other into *this. - * \copydetails DenseBase::operator=(const AnyMatrixBase<OtherDerived> &other) + * \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other) */ template<typename OtherDerived> - EIGEN_STRONG_INLINE Matrix& operator=(const AnyMatrixBase<OtherDerived> &other) + EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other) { return Base::operator=(other); } @@ -297,10 +297,10 @@ class Matrix } /** \brief Copy constructor for generic expressions. - * \sa MatrixBase::operator=(const AnyMatrixBase<OtherDerived>&) + * \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */ template<typename OtherDerived> - EIGEN_STRONG_INLINE Matrix(const AnyMatrixBase<OtherDerived> &other) + EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other) : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) { Base::_check_template_params(); diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 229195046..9c62163ba 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -121,7 +121,7 @@ template<typename Derived> class MatrixBase * * This is not necessarily exactly the return type of eval(). In the case of plain matrices, * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed - * that the return type of eval() is either PlainMatrixType or const PlainMatrixType&. + * that the return type of eval() is either PlainObject or const PlainObject&. */ typedef Matrix<typename ei_traits<Derived>::Scalar, ei_traits<Derived>::RowsAtCompileTime, @@ -129,8 +129,7 @@ template<typename Derived> class MatrixBase AutoAlign | (ei_traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor), ei_traits<Derived>::MaxRowsAtCompileTime, ei_traits<Derived>::MaxColsAtCompileTime - > PlainMatrixType; - // typedef typename ei_plain_matrix_type<Derived>::type PlainMatrixType; + > PlainObject; #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal Represents a matrix with all coefficients equal to one another*/ @@ -193,13 +192,13 @@ template<typename Derived> class MatrixBase lazyProduct(const MatrixBase<OtherDerived> &other) const; template<typename OtherDerived> - Derived& operator*=(const AnyMatrixBase<OtherDerived>& other); + Derived& operator*=(const EigenBase<OtherDerived>& other); template<typename OtherDerived> - void applyOnTheLeft(const AnyMatrixBase<OtherDerived>& other); + void applyOnTheLeft(const EigenBase<OtherDerived>& other); template<typename OtherDerived> - void applyOnTheRight(const AnyMatrixBase<OtherDerived>& other); + void applyOnTheRight(const EigenBase<OtherDerived>& other); template<typename DiagonalDerived> const DiagonalProduct<Derived, DiagonalDerived, OnTheRight> @@ -212,7 +211,7 @@ template<typename Derived> class MatrixBase RealScalar stableNorm() const; RealScalar blueNorm() const; RealScalar hypotNorm() const; - const PlainMatrixType normalized() const; + const PlainObject normalized() const; void normalize(); const AdjointReturnType adjoint() const; @@ -301,9 +300,9 @@ template<typename Derived> class MatrixBase /////////// LU module /////////// - const FullPivLU<PlainMatrixType> fullPivLu() const; - const PartialPivLU<PlainMatrixType> partialPivLu() const; - const PartialPivLU<PlainMatrixType> lu() const; + const FullPivLU<PlainObject> fullPivLu() const; + const PartialPivLU<PlainObject> partialPivLu() const; + const PartialPivLU<PlainObject> lu() const; const ei_inverse_impl<Derived> inverse() const; template<typename ResultType> void computeInverseAndDetWithCheck( @@ -322,29 +321,29 @@ template<typename Derived> class MatrixBase /////////// Cholesky module /////////// - const LLT<PlainMatrixType> llt() const; - const LDLT<PlainMatrixType> ldlt() const; + const LLT<PlainObject> llt() const; + const LDLT<PlainObject> ldlt() const; /////////// QR module /////////// - const HouseholderQR<PlainMatrixType> householderQr() const; - const ColPivHouseholderQR<PlainMatrixType> colPivHouseholderQr() const; - const FullPivHouseholderQR<PlainMatrixType> fullPivHouseholderQr() const; + const HouseholderQR<PlainObject> householderQr() const; + const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const; + const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const; EigenvaluesReturnType eigenvalues() const; RealScalar operatorNorm() const; /////////// SVD module /////////// - SVD<PlainMatrixType> svd() const; + SVD<PlainObject> svd() const; /////////// Geometry module /////////// template<typename OtherDerived> - PlainMatrixType cross(const MatrixBase<OtherDerived>& other) const; + PlainObject cross(const MatrixBase<OtherDerived>& other) const; template<typename OtherDerived> - PlainMatrixType cross3(const MatrixBase<OtherDerived>& other) const; - PlainMatrixType unitOrthogonal(void) const; + PlainObject cross3(const MatrixBase<OtherDerived>& other) const; + PlainObject unitOrthogonal(void) const; Matrix<Scalar,3,1> eulerAngles(int a0, int a1, int a2) const; const ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const; enum { diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h index 284baf678..46884dc3f 100644 --- a/Eigen/src/Core/PermutationMatrix.h +++ b/Eigen/src/Core/PermutationMatrix.h @@ -47,7 +47,7 @@ * \sa class DiagonalMatrix */ template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime> class PermutationMatrix; -template<typename PermutationType, typename MatrixType, int Side> struct ei_permut_matrix_product_retval; +template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false> struct ei_permut_matrix_product_retval; template<int SizeAtCompileTime, int MaxSizeAtCompileTime> struct ei_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > @@ -55,7 +55,7 @@ struct ei_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > {}; template<int SizeAtCompileTime, int MaxSizeAtCompileTime> -class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > +class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > { public: @@ -132,6 +132,9 @@ class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTi /** \returns the number of columns */ inline int cols() const { return m_indices.size(); } + /** \returns the size of a side of the respective square matrix, i.e., the number of indices */ + inline int size() const { return m_indices.size(); } + #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename DenseDerived> void evalTo(MatrixBase<DenseDerived>& other) const @@ -144,7 +147,7 @@ class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTi /** \returns a Matrix object initialized from this permutation matrix. Notice that it * is inefficient to return this Matrix object by value. For efficiency, favor using - * the Matrix constructor taking AnyMatrixBase objects. + * the Matrix constructor taking EigenBase objects. */ DenseMatrixType toDenseMatrix() const { @@ -213,16 +216,29 @@ class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTi return *this; } - /**** inversion and multiplication helpers to hopefully get RVO ****/ + /** \returns the inverse permutation matrix. + * + * \note \note_try_to_help_rvo + */ + inline Transpose<PermutationMatrix> inverse() const + { return *this; } + /** \returns the tranpose permutation matrix. + * + * \note \note_try_to_help_rvo + */ + inline Transpose<PermutationMatrix> transpose() const + { return *this; } + + /**** multiplication helpers to hopefully get RVO ****/ #ifndef EIGEN_PARSED_BY_DOXYGEN - protected: - enum Inverse_t {Inverse}; - PermutationMatrix(Inverse_t, const PermutationMatrix& other) - : m_indices(other.m_indices.size()) + template<int OtherSize, int OtherMaxSize> + PermutationMatrix(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other) + : m_indices(other.nestedPermutation().size()) { - for (int i=0; i<rows();++i) m_indices.coeffRef(other.m_indices.coeff(i)) = i; + for (int i=0; i<rows();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i; } + protected: enum Product_t {Product}; PermutationMatrix(Product_t, const PermutationMatrix& lhs, const PermutationMatrix& rhs) : m_indices(lhs.m_indices.size()) @@ -233,12 +249,7 @@ class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTi #endif public: - /** \returns the inverse permutation matrix. - * - * \note \note_try_to_help_rvo - */ - inline PermutationMatrix inverse() const - { return PermutationMatrix(Inverse, *this); } + /** \returns the product permutation matrix. * * \note \note_try_to_help_rvo @@ -247,6 +258,22 @@ class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTi inline PermutationMatrix operator*(const PermutationMatrix<OtherSize, OtherMaxSize>& other) const { return PermutationMatrix(Product, *this, other); } + /** \returns the product of a permutation with another inverse permutation. + * + * \note \note_try_to_help_rvo + */ + template<int OtherSize, int OtherMaxSize> + inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other) const + { return PermutationMatrix(Product, *this, other.eval()); } + + /** \returns the product of an inverse permutation with another permutation. + * + * \note \note_try_to_help_rvo + */ + template<int OtherSize, int OtherMaxSize> friend + inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other, const PermutationMatrix& perm) + { return PermutationMatrix(Product, other.eval(), perm); } + protected: IndicesType m_indices; @@ -277,15 +304,15 @@ operator*(const PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> &perm (permutation, matrix.derived()); } -template<typename PermutationType, typename MatrixType, int Side> -struct ei_traits<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side> > +template<typename PermutationType, typename MatrixType, int Side, bool Transposed> +struct ei_traits<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> > { - typedef typename MatrixType::PlainMatrixType ReturnMatrixType; + typedef typename MatrixType::PlainObject ReturnType; }; -template<typename PermutationType, typename MatrixType, int Side> +template<typename PermutationType, typename MatrixType, int Side, bool Transposed> struct ei_permut_matrix_product_retval - : public ReturnByValue<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side> > + : public ReturnByValue<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> > { typedef typename ei_cleantype<typename MatrixType::Nested>::type MatrixTypeNestedCleaned; @@ -299,21 +326,46 @@ struct ei_permut_matrix_product_retval template<typename Dest> inline void evalTo(Dest& dst) const { const int n = Side==OnTheLeft ? rows() : cols(); - for(int i = 0; i < n; ++i) + + if(ei_is_same_type<MatrixTypeNestedCleaned,Dest>::ret && ei_extract_data(dst) == ei_extract_data(m_matrix)) { - Block< - Dest, - Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, - Side==OnTheRight ? 1 : Dest::ColsAtCompileTime - >(dst, Side==OnTheLeft ? m_permutation.indices().coeff(i) : i) - - = - - Block< - MatrixTypeNestedCleaned, - Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime, - Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime - >(m_matrix, Side==OnTheRight ? m_permutation.indices().coeff(i) : i); + // apply the permutation inplace + Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size()); + mask.fill(false); + int r = 0; + while(r < m_permutation.size()) + { + // search for the next seed + while(r<m_permutation.size() && mask[r]) r++; + if(r>=m_permutation.size()) + break; + // we got one, let's follow it until we are back to the seed + int k0 = r++; + int kPrev = k0; + mask.coeffRef(k0) = true; + for(int k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k)) + { + Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k) + .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime> + (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); + + mask.coeffRef(k) = true; + kPrev = k; + } + } + } + else + { + for(int i = 0; i < n; ++i) + { + Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime> + (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i) + + = + + Block<MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime> + (m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i); + } } } @@ -322,4 +374,78 @@ struct ei_permut_matrix_product_retval const typename MatrixType::Nested m_matrix; }; +/* Template partial specialization for transposed/inverse permutations */ + +template<int SizeAtCompileTime, int MaxSizeAtCompileTime> +struct ei_traits<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > > + : ei_traits<Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> > +{}; + +template<int SizeAtCompileTime, int MaxSizeAtCompileTime> +class Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > + : public EigenBase<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > > +{ + typedef PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> PermutationType; + typedef typename PermutationType::IndicesType IndicesType; + public: + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef ei_traits<PermutationType> Traits; + typedef Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> + DenseMatrixType; + enum { + Flags = Traits::Flags, + CoeffReadCost = Traits::CoeffReadCost, + RowsAtCompileTime = Traits::RowsAtCompileTime, + ColsAtCompileTime = Traits::ColsAtCompileTime, + MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = Traits::MaxColsAtCompileTime + }; + typedef typename Traits::Scalar Scalar; + #endif + + Transpose(const PermutationType& p) : m_permutation(p) {} + + inline int rows() const { return m_permutation.rows(); } + inline int cols() const { return m_permutation.cols(); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template<typename DenseDerived> + void evalTo(MatrixBase<DenseDerived>& other) const + { + other.setZero(); + for (int i=0; i<rows();++i) + other.coeffRef(i, m_permutation.indices().coeff(i)) = typename DenseDerived::Scalar(1); + } + #endif + + /** \return the equivalent permutation matrix */ + PermutationType eval() const { return *this; } + + DenseMatrixType toDenseMatrix() const { return *this; } + + /** \returns the matrix with the inverse permutation applied to the columns. + */ + template<typename Derived> friend + inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true> + operator*(const MatrixBase<Derived>& matrix, const Transpose& trPerm) + { + return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true>(trPerm.m_permutation, matrix.derived()); + } + + /** \returns the matrix with the inverse permutation applied to the rows. + */ + template<typename Derived> + inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true> + operator*(const MatrixBase<Derived>& matrix) const + { + return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true>(m_permutation, matrix.derived()); + } + + const PermutationType& nestedPermutation() const { return m_permutation; } + + protected: + const PermutationType& m_permutation; +}; + #endif // EIGEN_PERMUTATIONMATRIX_H diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index af05773ee..236e4f130 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -50,8 +50,8 @@ class GeneralProduct; template<int Rows, int Cols, int Depth> struct ei_product_type_selector; enum { - Large = Dynamic, - Small = Dynamic/2 + Large = 2, + Small = 3 }; template<typename Lhs, typename Rhs> struct ei_product_type @@ -95,10 +95,10 @@ template<> struct ei_product_type_selector<Small, Large, 1> template<> struct ei_product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; }; template<> struct ei_product_type_selector<1, Large,Small> { enum { ret = GemvProduct }; }; template<> struct ei_product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; }; -template<> struct ei_product_type_selector<1, Small,Large> { enum { ret = GemvProduct }; }; +template<> struct ei_product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; }; template<> struct ei_product_type_selector<Large,1, Small> { enum { ret = GemvProduct }; }; template<> struct ei_product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; }; -template<> struct ei_product_type_selector<Small,1, Large> { enum { ret = GemvProduct }; }; +template<> struct ei_product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; }; template<> struct ei_product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; }; template<> struct ei_product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; }; template<> struct ei_product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; }; diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h index 481e7c760..789aecfb6 100644 --- a/Eigen/src/Core/ProductBase.h +++ b/Eigen/src/Core/ProductBase.h @@ -88,7 +88,7 @@ class ProductBase : public MatrixBase<Derived> public: - typedef typename Base::PlainMatrixType PlainMatrixType; + typedef typename Base::PlainObject PlainObject; ProductBase(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) @@ -116,8 +116,8 @@ class ProductBase : public MatrixBase<Derived> const _LhsNested& lhs() const { return m_lhs; } const _RhsNested& rhs() const { return m_rhs; } - // Implicit convertion to the nested type (trigger the evaluation of the product) - operator const PlainMatrixType& () const + // Implicit conversion to the nested type (trigger the evaluation of the product) + operator const PlainObject& () const { m_result.resize(m_lhs.rows(), m_rhs.cols()); this->evalTo(m_result); @@ -139,7 +139,7 @@ class ProductBase : public MatrixBase<Derived> const LhsNested m_lhs; const RhsNested m_rhs; - mutable PlainMatrixType m_result; + mutable PlainObject m_result; private: @@ -152,10 +152,10 @@ class ProductBase : public MatrixBase<Derived> // here we need to overload the nested rule for products // such that the nested type is a const reference to a plain matrix -template<typename Lhs, typename Rhs, int Mode, int N, typename PlainMatrixType> -struct ei_nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainMatrixType> +template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject> +struct ei_nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject> { - typedef PlainMatrixType const& type; + typedef PlainObject const& type; }; template<typename NestedProduct> diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h index 8d45fc31b..160b973bd 100644 --- a/Eigen/src/Core/ReturnByValue.h +++ b/Eigen/src/Core/ReturnByValue.h @@ -31,13 +31,13 @@ */ template<typename Derived> struct ei_traits<ReturnByValue<Derived> > - : public ei_traits<typename ei_traits<Derived>::ReturnMatrixType> + : public ei_traits<typename ei_traits<Derived>::ReturnType> { enum { // We're disabling the DirectAccess because e.g. the constructor of // the Block-with-DirectAccess expression requires to have a coeffRef method. // Also, we don't want to have to implement the stride stuff. - Flags = (ei_traits<typename ei_traits<Derived>::ReturnMatrixType>::Flags + Flags = (ei_traits<typename ei_traits<Derived>::ReturnType>::Flags | EvalBeforeNestingBit) & ~DirectAccessBit }; }; @@ -46,18 +46,18 @@ struct ei_traits<ReturnByValue<Derived> > * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix. * So ei_nested always gives the plain return matrix type. */ -template<typename Derived,int n,typename PlainMatrixType> -struct ei_nested<ReturnByValue<Derived>, n, PlainMatrixType> +template<typename Derived,int n,typename PlainObject> +struct ei_nested<ReturnByValue<Derived>, n, PlainObject> { - typedef typename ei_traits<Derived>::ReturnMatrixType type; + typedef typename ei_traits<Derived>::ReturnType type; }; template<typename Derived> class ReturnByValue - : public ei_traits<Derived>::ReturnMatrixType::template MakeBase<ReturnByValue<Derived> >::Type + : public ei_traits<Derived>::ReturnType::template MakeBase<ReturnByValue<Derived> >::Type { public: - typedef typename ei_traits<Derived>::ReturnMatrixType ReturnMatrixType; - typedef typename ReturnMatrixType::template MakeBase<ReturnByValue<Derived> >::Type Base; + typedef typename ei_traits<Derived>::ReturnType ReturnType; + typedef typename ReturnType::template MakeBase<ReturnByValue<Derived> >::Type Base; EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue) template<typename Dest> diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index add5a3afb..0b57b9968 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -68,7 +68,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView enum { Mode = ei_traits<SelfAdjointView>::Mode }; - typedef typename MatrixType::PlainMatrixType PlainMatrixType; + typedef typename MatrixType::PlainObject PlainObject; inline SelfAdjointView(const MatrixType& matrix) : m_matrix(matrix) { ei_assert(ei_are_flags_consistent<Mode>::ret); } @@ -147,8 +147,8 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView /////////// Cholesky module /////////// - const LLT<PlainMatrixType, UpLo> llt() const; - const LDLT<PlainMatrixType> ldlt() const; + const LLT<PlainObject, UpLo> llt() const; + const LDLT<PlainObject> ldlt() const; protected: const typename MatrixType::Nested m_matrix; diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h index 58aee182d..529a9994d 100644 --- a/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -125,8 +125,8 @@ template<typename Derived> inline Derived& DenseBase<Derived>::operator*=(const Scalar& other) { SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived> tmp(derived()); - typedef typename Derived::PlainMatrixType PlainMatrixType; - tmp = PlainMatrixType::Constant(rows(),cols(),other); + typedef typename Derived::PlainObject PlainObject; + tmp = PlainObject::Constant(rows(),cols(),other); return derived(); } @@ -134,8 +134,8 @@ template<typename Derived> inline Derived& DenseBase<Derived>::operator/=(const Scalar& other) { SelfCwiseBinaryOp<typename ei_meta_if<NumTraits<Scalar>::HasFloatingPoint,ei_scalar_product_op<Scalar>,ei_scalar_quotient_op<Scalar> >::ret, Derived> tmp(derived()); - typedef typename Derived::PlainMatrixType PlainMatrixType; - tmp = PlainMatrixType::Constant(rows(),cols(), NumTraits<Scalar>::HasFloatingPoint ? Scalar(1)/other : other); + typedef typename Derived::PlainObject PlainObject; + tmp = PlainObject::Constant(rows(),cols(), NumTraits<Scalar>::HasFloatingPoint ? Scalar(1)/other : other); return derived(); } diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 47dae5776..1f064d1c2 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -296,25 +296,6 @@ struct ei_blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr> > static inline const XprType extract(const XprType& x) { return x; } }; - -template<typename T, int Access=ei_blas_traits<T>::ActualAccess> -struct ei_extract_data_selector { - static typename T::Scalar* run(const T& m) - { - return &ei_blas_traits<T>::extract(m).const_cast_derived().coeffRef(0,0); - } -}; - -template<typename T> -struct ei_extract_data_selector<T,NoDirectAccess> { - static typename T::Scalar* run(const T&) { return 0; } -}; - -template<typename T> typename T::Scalar* ei_extract_data(const T& m) -{ - return ei_extract_data_selector<T>::run(m); -} - template<typename Scalar, bool DestIsTranposed, typename OtherDerived> struct ei_check_transpose_aliasing_selector { diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index c61a6d7cc..2230680d1 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -32,7 +32,7 @@ * * \brief Base class for triangular part in a matrix */ -template<typename Derived> class TriangularBase : public AnyMatrixBase<Derived> +template<typename Derived> class TriangularBase : public EigenBase<Derived> { public: @@ -149,7 +149,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView typedef TriangularBase<TriangularView> Base; typedef typename ei_traits<TriangularView>::Scalar Scalar; typedef _MatrixType MatrixType; - typedef typename MatrixType::PlainMatrixType DenseMatrixType; + typedef typename MatrixType::PlainObject DenseMatrixType; typedef typename MatrixType::Nested MatrixTypeNested; typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested; diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index a5a56f759..f78bf0dd3 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -122,7 +122,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { -#ifdef __SSE4_1__ +#ifdef EIGEN_VECTORIZE_SSE4_1 return _mm_mullo_epi32(a,b); #else // this version is slightly faster than 4 scalar products @@ -269,7 +269,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_pabs(const Packet2d& a) } template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { - #ifdef __SSSE3__ + #ifdef EIGEN_VECTORIZE_SSSE3 return _mm_abs_epi32(a); #else Packet4i aux = _mm_srai_epi32(a,31); @@ -278,7 +278,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) } -#ifdef __SSE3__ +#ifdef EIGEN_VECTORIZE_SSE3 // TODO implement SSE2 versions as well as integer versions template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs) { @@ -439,7 +439,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a) // } #endif -#ifdef __SSSE3__ +#ifdef EIGEN_VECTORIZE_SSSE3 // SSSE3 versions template<int Offset> struct ei_palign_impl<Offset,Packet4f> diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h index f030d59b5..3343b1875 100644 --- a/Eigen/src/Core/products/CoeffBasedProduct.h +++ b/Eigen/src/Core/products/CoeffBasedProduct.h @@ -109,7 +109,7 @@ class CoeffBasedProduct typedef MatrixBase<CoeffBasedProduct> Base; EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct) - typedef typename Base::PlainMatrixType PlainMatrixType; + typedef typename Base::PlainObject PlainObject; private: @@ -181,8 +181,8 @@ class CoeffBasedProduct return res; } - // Implicit convertion to the nested type (trigger the evaluation of the product) - operator const PlainMatrixType& () const + // Implicit conversion to the nested type (trigger the evaluation of the product) + operator const PlainObject& () const { m_result.lazyAssign(*this); return m_result; @@ -205,15 +205,15 @@ class CoeffBasedProduct const LhsNested m_lhs; const RhsNested m_rhs; - mutable PlainMatrixType m_result; + mutable PlainObject m_result; }; // here we need to overload the nested rule for products // such that the nested type is a const reference to a plain matrix -template<typename Lhs, typename Rhs, int N, typename PlainMatrixType> -struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainMatrixType> +template<typename Lhs, typename Rhs, int N, typename PlainObject> +struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject> { - typedef PlainMatrixType const& type; + typedef PlainObject const& type; }; /*************************************************************************** diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index fe1987bdd..18e913b0e 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -27,6 +27,12 @@ #ifndef EIGEN_EXTERN_INSTANTIATIONS +#ifdef EIGEN_HAS_FUSE_CJMADD +#define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C); +#else +#define CJMADD(A,B,C,T) T = A; T = cj.pmul(T,B); C = ei_padd(C,T); +#endif + // optimized GEneral packed Block * packed Panel product kernel template<typename Scalar, int mr, int nr, typename Conj> struct ei_gebp_kernel @@ -74,65 +80,111 @@ struct ei_gebp_kernel const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB*nr]; for(int k=0; k<peeled_kc; k+=4) { - PacketType B0, B1, B2, B3, A0, A1; - - A0 = ei_pload(&blA[0*PacketSize]); - A1 = ei_pload(&blA[1*PacketSize]); - B0 = ei_pload(&blB[0*PacketSize]); - B1 = ei_pload(&blB[1*PacketSize]); - C0 = cj.pmadd(A0, B0, C0); - if(nr==4) B2 = ei_pload(&blB[2*PacketSize]); - C4 = cj.pmadd(A1, B0, C4); - if(nr==4) B3 = ei_pload(&blB[3*PacketSize]); - B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]); - C1 = cj.pmadd(A0, B1, C1); - C5 = cj.pmadd(A1, B1, C5); - B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]); - if(nr==4) C2 = cj.pmadd(A0, B2, C2); - if(nr==4) C6 = cj.pmadd(A1, B2, C6); - if(nr==4) B2 = ei_pload(&blB[6*PacketSize]); - if(nr==4) C3 = cj.pmadd(A0, B3, C3); - A0 = ei_pload(&blA[2*PacketSize]); - if(nr==4) C7 = cj.pmadd(A1, B3, C7); - A1 = ei_pload(&blA[3*PacketSize]); - if(nr==4) B3 = ei_pload(&blB[7*PacketSize]); - C0 = cj.pmadd(A0, B0, C0); - C4 = cj.pmadd(A1, B0, C4); - B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]); - C1 = cj.pmadd(A0, B1, C1); - C5 = cj.pmadd(A1, B1, C5); - B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]); - if(nr==4) C2 = cj.pmadd(A0, B2, C2); - if(nr==4) C6 = cj.pmadd(A1, B2, C6); - if(nr==4) B2 = ei_pload(&blB[10*PacketSize]); - if(nr==4) C3 = cj.pmadd(A0, B3, C3); - A0 = ei_pload(&blA[4*PacketSize]); - if(nr==4) C7 = cj.pmadd(A1, B3, C7); - A1 = ei_pload(&blA[5*PacketSize]); - if(nr==4) B3 = ei_pload(&blB[11*PacketSize]); - - C0 = cj.pmadd(A0, B0, C0); - C4 = cj.pmadd(A1, B0, C4); - B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]); - C1 = cj.pmadd(A0, B1, C1); - C5 = cj.pmadd(A1, B1, C5); - B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]); - if(nr==4) C2 = cj.pmadd(A0, B2, C2); - if(nr==4) C6 = cj.pmadd(A1, B2, C6); - if(nr==4) B2 = ei_pload(&blB[14*PacketSize]); - if(nr==4) C3 = cj.pmadd(A0, B3, C3); - A0 = ei_pload(&blA[6*PacketSize]); - if(nr==4) C7 = cj.pmadd(A1, B3, C7); - A1 = ei_pload(&blA[7*PacketSize]); - if(nr==4) B3 = ei_pload(&blB[15*PacketSize]); - C0 = cj.pmadd(A0, B0, C0); - C4 = cj.pmadd(A1, B0, C4); - C1 = cj.pmadd(A0, B1, C1); - C5 = cj.pmadd(A1, B1, C5); - if(nr==4) C2 = cj.pmadd(A0, B2, C2); - if(nr==4) C6 = cj.pmadd(A1, B2, C6); - if(nr==4) C3 = cj.pmadd(A0, B3, C3); - if(nr==4) C7 = cj.pmadd(A1, B3, C7); + if(nr==2) + { + PacketType B0, T0, A0, A1; + + A0 = ei_pload(&blA[0*PacketSize]); + A1 = ei_pload(&blA[1*PacketSize]); + B0 = ei_pload(&blB[0*PacketSize]); + CJMADD(A0,B0,C0,T0); + CJMADD(A1,B0,C4,T0); + B0 = ei_pload(&blB[1*PacketSize]); + CJMADD(A0,B0,C1,T0); + CJMADD(A1,B0,C5,T0); + + A0 = ei_pload(&blA[2*PacketSize]); + A1 = ei_pload(&blA[3*PacketSize]); + B0 = ei_pload(&blB[2*PacketSize]); + CJMADD(A0,B0,C0,T0); + CJMADD(A1,B0,C4,T0); + B0 = ei_pload(&blB[3*PacketSize]); + CJMADD(A0,B0,C1,T0); + CJMADD(A1,B0,C5,T0); + + A0 = ei_pload(&blA[4*PacketSize]); + A1 = ei_pload(&blA[5*PacketSize]); + B0 = ei_pload(&blB[4*PacketSize]); + CJMADD(A0,B0,C0,T0); + CJMADD(A1,B0,C4,T0); + B0 = ei_pload(&blB[5*PacketSize]); + CJMADD(A0,B0,C1,T0); + CJMADD(A1,B0,C5,T0); + + A0 = ei_pload(&blA[6*PacketSize]); + A1 = ei_pload(&blA[7*PacketSize]); + B0 = ei_pload(&blB[6*PacketSize]); + CJMADD(A0,B0,C0,T0); + CJMADD(A1,B0,C4,T0); + B0 = ei_pload(&blB[7*PacketSize]); + CJMADD(A0,B0,C1,T0); + CJMADD(A1,B0,C5,T0); + } + else + { + + PacketType B0, B1, B2, B3, A0, A1; + PacketType T0, T1; + + A0 = ei_pload(&blA[0*PacketSize]); + A1 = ei_pload(&blA[1*PacketSize]); + B0 = ei_pload(&blB[0*PacketSize]); + B1 = ei_pload(&blB[1*PacketSize]); + + CJMADD(A0,B0,C0,T0); + if(nr==4) B2 = ei_pload(&blB[2*PacketSize]); + CJMADD(A1,B0,C4,T1); + if(nr==4) B3 = ei_pload(&blB[3*PacketSize]); + B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]); + CJMADD(A0,B1,C1,T0); + CJMADD(A1,B1,C5,T1); + B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]); + if(nr==4) { CJMADD(A0,B2,C2,T0); } + if(nr==4) { CJMADD(A1,B2,C6,T1); } + if(nr==4) B2 = ei_pload(&blB[6*PacketSize]); + if(nr==4) { CJMADD(A0,B3,C3,T0); } + A0 = ei_pload(&blA[2*PacketSize]); + if(nr==4) { CJMADD(A1,B3,C7,T1); } + A1 = ei_pload(&blA[3*PacketSize]); + if(nr==4) B3 = ei_pload(&blB[7*PacketSize]); + CJMADD(A0,B0,C0,T0); + CJMADD(A1,B0,C4,T1); + B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]); + CJMADD(A0,B1,C1,T0); + CJMADD(A1,B1,C5,T1); + B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]); + if(nr==4) { CJMADD(A0,B2,C2,T0); } + if(nr==4) { CJMADD(A1,B2,C6,T1); } + if(nr==4) B2 = ei_pload(&blB[10*PacketSize]); + if(nr==4) { CJMADD(A0,B3,C3,T0); } + A0 = ei_pload(&blA[4*PacketSize]); + if(nr==4) { CJMADD(A1,B3,C7,T1); } + A1 = ei_pload(&blA[5*PacketSize]); + if(nr==4) B3 = ei_pload(&blB[11*PacketSize]); + + CJMADD(A0,B0,C0,T0); + CJMADD(A1,B0,C4,T1); + B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]); + CJMADD(A0,B1,C1,T0); + CJMADD(A1,B1,C5,T1); + B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]); + if(nr==4) { CJMADD(A0,B2,C2,T0); } + if(nr==4) { CJMADD(A1,B2,C6,T1); } + if(nr==4) B2 = ei_pload(&blB[14*PacketSize]); + if(nr==4) { CJMADD(A0,B3,C3,T0); } + A0 = ei_pload(&blA[6*PacketSize]); + if(nr==4) { CJMADD(A1,B3,C7,T1); } + A1 = ei_pload(&blA[7*PacketSize]); + if(nr==4) B3 = ei_pload(&blB[15*PacketSize]); + CJMADD(A0,B0,C0,T0); + CJMADD(A1,B0,C4,T1); + CJMADD(A0,B1,C1,T0); + CJMADD(A1,B1,C5,T1); + if(nr==4) { CJMADD(A0,B2,C2,T0); } + if(nr==4) { CJMADD(A1,B2,C6,T1); } + if(nr==4) { CJMADD(A0,B3,C3,T0); } + if(nr==4) { CJMADD(A1,B3,C7,T1); } + } blB += 4*nr*PacketSize; blA += 4*mr; @@ -140,22 +192,40 @@ struct ei_gebp_kernel // process remaining peeled loop for(int k=peeled_kc; k<depth; k++) { - PacketType B0, B1, B2, B3, A0, A1; - - A0 = ei_pload(&blA[0*PacketSize]); - A1 = ei_pload(&blA[1*PacketSize]); - B0 = ei_pload(&blB[0*PacketSize]); - B1 = ei_pload(&blB[1*PacketSize]); - C0 = cj.pmadd(A0, B0, C0); - if(nr==4) B2 = ei_pload(&blB[2*PacketSize]); - C4 = cj.pmadd(A1, B0, C4); - if(nr==4) B3 = ei_pload(&blB[3*PacketSize]); - C1 = cj.pmadd(A0, B1, C1); - C5 = cj.pmadd(A1, B1, C5); - if(nr==4) C2 = cj.pmadd(A0, B2, C2); - if(nr==4) C6 = cj.pmadd(A1, B2, C6); - if(nr==4) C3 = cj.pmadd(A0, B3, C3); - if(nr==4) C7 = cj.pmadd(A1, B3, C7); + if(nr==2) + { + PacketType B0, T0, A0, A1; + + A0 = ei_pload(&blA[0*PacketSize]); + A1 = ei_pload(&blA[1*PacketSize]); + B0 = ei_pload(&blB[0*PacketSize]); + CJMADD(A0,B0,C0,T0); + CJMADD(A1,B0,C4,T0); + B0 = ei_pload(&blB[1*PacketSize]); + CJMADD(A0,B0,C1,T0); + CJMADD(A1,B0,C5,T0); + } + else + { + PacketType B0, B1, B2, B3, A0, A1, T0, T1; + + A0 = ei_pload(&blA[0*PacketSize]); + A1 = ei_pload(&blA[1*PacketSize]); + B0 = ei_pload(&blB[0*PacketSize]); + B1 = ei_pload(&blB[1*PacketSize]); + + CJMADD(A0,B0,C0,T0); + if(nr==4) B2 = ei_pload(&blB[2*PacketSize]); + CJMADD(A1,B0,C4,T1); + if(nr==4) B3 = ei_pload(&blB[3*PacketSize]); + B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]); + CJMADD(A0,B1,C1,T0); + CJMADD(A1,B1,C5,T1); + if(nr==4) { CJMADD(A0,B2,C2,T0); } + if(nr==4) { CJMADD(A1,B2,C6,T1); } + if(nr==4) { CJMADD(A0,B3,C3,T0); } + if(nr==4) { CJMADD(A1,B3,C7,T1); } + } blB += nr*PacketSize; blA += mr; @@ -189,45 +259,79 @@ struct ei_gebp_kernel const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB*nr]; for(int k=0; k<peeled_kc; k+=4) { - PacketType B0, B1, B2, B3, A0; - - A0 = ei_pload(&blA[0*PacketSize]); - B0 = ei_pload(&blB[0*PacketSize]); - B1 = ei_pload(&blB[1*PacketSize]); - C0 = cj.pmadd(A0, B0, C0); - if(nr==4) B2 = ei_pload(&blB[2*PacketSize]); - if(nr==4) B3 = ei_pload(&blB[3*PacketSize]); - B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]); - C1 = cj.pmadd(A0, B1, C1); - B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]); - if(nr==4) C2 = cj.pmadd(A0, B2, C2); - if(nr==4) B2 = ei_pload(&blB[6*PacketSize]); - if(nr==4) C3 = cj.pmadd(A0, B3, C3); - A0 = ei_pload(&blA[1*PacketSize]); - if(nr==4) B3 = ei_pload(&blB[7*PacketSize]); - C0 = cj.pmadd(A0, B0, C0); - B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]); - C1 = cj.pmadd(A0, B1, C1); - B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]); - if(nr==4) C2 = cj.pmadd(A0, B2, C2); - if(nr==4) B2 = ei_pload(&blB[10*PacketSize]); - if(nr==4) C3 = cj.pmadd(A0, B3, C3); - A0 = ei_pload(&blA[2*PacketSize]); - if(nr==4) B3 = ei_pload(&blB[11*PacketSize]); - - C0 = cj.pmadd(A0, B0, C0); - B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]); - C1 = cj.pmadd(A0, B1, C1); - B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]); - if(nr==4) C2 = cj.pmadd(A0, B2, C2); - if(nr==4) B2 = ei_pload(&blB[14*PacketSize]); - if(nr==4) C3 = cj.pmadd(A0, B3, C3); - A0 = ei_pload(&blA[3*PacketSize]); - if(nr==4) B3 = ei_pload(&blB[15*PacketSize]); - C0 = cj.pmadd(A0, B0, C0); - C1 = cj.pmadd(A0, B1, C1); - if(nr==4) C2 = cj.pmadd(A0, B2, C2); - if(nr==4) C3 = cj.pmadd(A0, B3, C3); + if(nr==2) + { + PacketType B0, T0, A0; + + A0 = ei_pload(&blA[0*PacketSize]); + B0 = ei_pload(&blB[0*PacketSize]); + CJMADD(A0,B0,C0,T0); + B0 = ei_pload(&blB[1*PacketSize]); + CJMADD(A0,B0,C1,T0); + + A0 = ei_pload(&blA[1*PacketSize]); + B0 = ei_pload(&blB[2*PacketSize]); + CJMADD(A0,B0,C0,T0); + B0 = ei_pload(&blB[3*PacketSize]); + CJMADD(A0,B0,C1,T0); + + A0 = ei_pload(&blA[2*PacketSize]); + B0 = ei_pload(&blB[4*PacketSize]); + CJMADD(A0,B0,C0,T0); + B0 = ei_pload(&blB[5*PacketSize]); + CJMADD(A0,B0,C1,T0); + + A0 = ei_pload(&blA[3*PacketSize]); + B0 = ei_pload(&blB[6*PacketSize]); + CJMADD(A0,B0,C0,T0); + B0 = ei_pload(&blB[7*PacketSize]); + CJMADD(A0,B0,C1,T0); + } + else + { + + PacketType B0, B1, B2, B3, A0; + PacketType T0, T1; + + A0 = ei_pload(&blA[0*PacketSize]); + B0 = ei_pload(&blB[0*PacketSize]); + B1 = ei_pload(&blB[1*PacketSize]); + + CJMADD(A0,B0,C0,T0); + if(nr==4) B2 = ei_pload(&blB[2*PacketSize]); + if(nr==4) B3 = ei_pload(&blB[3*PacketSize]); + B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]); + CJMADD(A0,B1,C1,T1); + B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]); + if(nr==4) { CJMADD(A0,B2,C2,T0); } + if(nr==4) B2 = ei_pload(&blB[6*PacketSize]); + if(nr==4) { CJMADD(A0,B3,C3,T1); } + A0 = ei_pload(&blA[1*PacketSize]); + if(nr==4) B3 = ei_pload(&blB[7*PacketSize]); + CJMADD(A0,B0,C0,T0); + B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]); + CJMADD(A0,B1,C1,T1); + B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]); + if(nr==4) { CJMADD(A0,B2,C2,T0); } + if(nr==4) B2 = ei_pload(&blB[10*PacketSize]); + if(nr==4) { CJMADD(A0,B3,C3,T1); } + A0 = ei_pload(&blA[2*PacketSize]); + if(nr==4) B3 = ei_pload(&blB[11*PacketSize]); + + CJMADD(A0,B0,C0,T0); + B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]); + CJMADD(A0,B1,C1,T1); + B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]); + if(nr==4) { CJMADD(A0,B2,C2,T0); } + if(nr==4) B2 = ei_pload(&blB[14*PacketSize]); + if(nr==4) { CJMADD(A0,B3,C3,T1); } + A0 = ei_pload(&blA[3*PacketSize]); + if(nr==4) B3 = ei_pload(&blB[15*PacketSize]); + CJMADD(A0,B0,C0,T0); + CJMADD(A0,B1,C1,T1); + if(nr==4) { CJMADD(A0,B2,C2,T0); } + if(nr==4) { CJMADD(A0,B3,C3,T1); } + } blB += 4*nr*PacketSize; blA += 4*PacketSize; @@ -235,17 +339,32 @@ struct ei_gebp_kernel // process remaining peeled loop for(int k=peeled_kc; k<depth; k++) { - PacketType B0, B1, B2, B3, A0; - - A0 = ei_pload(&blA[0*PacketSize]); - B0 = ei_pload(&blB[0*PacketSize]); - B1 = ei_pload(&blB[1*PacketSize]); - C0 = cj.pmadd(A0, B0, C0); - if(nr==4) B2 = ei_pload(&blB[2*PacketSize]); - if(nr==4) B3 = ei_pload(&blB[3*PacketSize]); - C1 = cj.pmadd(A0, B1, C1); - if(nr==4) C2 = cj.pmadd(A0, B2, C2); - if(nr==4) C3 = cj.pmadd(A0, B3, C3); + if(nr==2) + { + PacketType B0, T0, A0; + + A0 = ei_pload(&blA[0*PacketSize]); + B0 = ei_pload(&blB[0*PacketSize]); + CJMADD(A0,B0,C0,T0); + B0 = ei_pload(&blB[1*PacketSize]); + CJMADD(A0,B0,C1,T0); + } + else + { + PacketType B0, B1, B2, B3, A0; + PacketType T0, T1; + + A0 = ei_pload(&blA[0*PacketSize]); + B0 = ei_pload(&blB[0*PacketSize]); + B1 = ei_pload(&blB[1*PacketSize]); + if(nr==4) B2 = ei_pload(&blB[2*PacketSize]); + if(nr==4) B3 = ei_pload(&blB[3*PacketSize]); + + CJMADD(A0,B0,C0,T0); + CJMADD(A0,B1,C1,T1); + if(nr==4) { CJMADD(A0,B2,C2,T0); } + if(nr==4) { CJMADD(A0,B3,C3,T1); } + } blB += nr*PacketSize; blA += PacketSize; @@ -268,17 +387,32 @@ struct ei_gebp_kernel const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB*nr]; for(int k=0; k<depth; k++) { - Scalar B0, B1, B2, B3, A0; - - A0 = blA[k]; - B0 = blB[0*PacketSize]; - B1 = blB[1*PacketSize]; - C0 = cj.pmadd(A0, B0, C0); - if(nr==4) B2 = blB[2*PacketSize]; - if(nr==4) B3 = blB[3*PacketSize]; - C1 = cj.pmadd(A0, B1, C1); - if(nr==4) C2 = cj.pmadd(A0, B2, C2); - if(nr==4) C3 = cj.pmadd(A0, B3, C3); + if(nr==2) + { + Scalar B0, T0, A0; + + A0 = blA[0*PacketSize]; + B0 = blB[0*PacketSize]; + CJMADD(A0,B0,C0,T0); + B0 = blB[1*PacketSize]; + CJMADD(A0,B0,C1,T0); + } + else + { + Scalar B0, B1, B2, B3, A0; + Scalar T0, T1; + + A0 = blA[k]; + B0 = blB[0*PacketSize]; + B1 = blB[1*PacketSize]; + if(nr==4) B2 = blB[2*PacketSize]; + if(nr==4) B3 = blB[3*PacketSize]; + + CJMADD(A0,B0,C0,T0); + CJMADD(A0,B1,C1,T1); + if(nr==4) { CJMADD(A0,B2,C2,T0); } + if(nr==4) { CJMADD(A0,B3,C3,T1); } + } blB += nr*PacketSize; } @@ -310,13 +444,13 @@ struct ei_gebp_kernel const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB]; for(int k=0; k<depth; k++) { - PacketType B0, A0, A1; + PacketType B0, A0, A1, T0, T1; A0 = ei_pload(&blA[0*PacketSize]); A1 = ei_pload(&blA[1*PacketSize]); B0 = ei_pload(&blB[0*PacketSize]); - C0 = cj.pmadd(A0, B0, C0); - C4 = cj.pmadd(A1, B0, C4); + CJMADD(A0,B0,C0,T0); + CJMADD(A1,B0,C4,T1); blB += PacketSize; blA += mr; @@ -334,7 +468,7 @@ struct ei_gebp_kernel #endif PacketType C0 = ei_ploadu(&res[(j2+0)*resStride + i]); - + const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB]; for(int k=0; k<depth; k++) { @@ -363,6 +497,8 @@ struct ei_gebp_kernel } }; +#undef CJMADD + // pack a block of the lhs // The travesal is as follow (mr==4): // 0 4 8 12 ... @@ -474,7 +610,7 @@ struct ei_gemm_pack_rhs<Scalar, nr, ColMajor, PanelMode> // skip what we have after if(PanelMode) count += PacketSize * nr * (stride-offset-depth); } - + // copy the remaining columns one at a time (nr==1) for(int j2=packet_cols; j2<cols; ++j2) { diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 3777464dc..4d216d77a 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -166,7 +166,7 @@ template<typename XprType> struct ei_blas_traits }; typedef typename ei_meta_if<int(ActualAccess)==HasDirectAccess, ExtractType, - typename _ExtractType::PlainMatrixType + typename _ExtractType::PlainObject >::ret DirectLinearAccessType; static inline ExtractType extract(const XprType& x) { return x; } static inline Scalar extractScalarFactor(const XprType&) { return Scalar(1); } @@ -227,7 +227,7 @@ struct ei_blas_traits<Transpose<NestedXpr> > typedef Transpose<typename Base::_ExtractType> _ExtractType; typedef typename ei_meta_if<int(Base::ActualAccess)==HasDirectAccess, ExtractType, - typename ExtractType::PlainMatrixType + typename ExtractType::PlainObject >::ret DirectLinearAccessType; enum { IsTransposed = Base::IsTransposed ? 0 : 1 @@ -236,4 +236,22 @@ struct ei_blas_traits<Transpose<NestedXpr> > static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); } }; +template<typename T, int Access=ei_blas_traits<T>::ActualAccess> +struct ei_extract_data_selector { + static const typename T::Scalar* run(const T& m) + { + return &ei_blas_traits<T>::extract(m).const_cast_derived().coeffRef(0,0); // FIXME this should be .data() + } +}; + +template<typename T> +struct ei_extract_data_selector<T,NoDirectAccess> { + static typename T::Scalar* run(const T&) { return 0; } +}; + +template<typename T> const typename T::Scalar* ei_extract_data(const T& m) +{ + return ei_extract_data_selector<T>::run(m); +} + #endif // EIGEN_BLASUTIL_H diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index c2d45dc30..6096272fa 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -29,7 +29,7 @@ template<typename T> struct ei_traits; template<typename T> struct NumTraits; -template<typename Derived> struct AnyMatrixBase; +template<typename Derived> struct EigenBase; template<typename _Scalar, int _Rows, int _Cols, int _Options = EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION | AutoAlign, diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index dc1aa150b..37ccef047 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -211,7 +211,7 @@ using Eigen::ei_cos; */ #if !EIGEN_ALIGN #define EIGEN_ALIGN_TO_BOUNDARY(n) -#elif (defined __GNUC__) +#elif (defined __GNUC__) || (defined __PGI) #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) #elif (defined _MSC_VER) #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n)) diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 8ddf4450a..eceb5ab2a 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -147,7 +147,7 @@ template<typename T, typename StorageType = typename ei_traits<T>::StorageType> template<typename T> struct ei_eval<T,Dense> { typedef typename ei_plain_matrix_type<T>::type type; -// typedef typename T::PlainMatrixType type; +// typedef typename T::PlainObject type; // typedef T::Matrix<typename ei_traits<T>::Scalar, // ei_traits<T>::RowsAtCompileTime, // ei_traits<T>::ColsAtCompileTime, @@ -201,6 +201,18 @@ template<typename T> struct ei_plain_matrix_type_row_major // we should be able to get rid of this one too template<typename T> struct ei_must_nest_by_value { enum { ret = false }; }; +template<class T> +struct ei_is_reference +{ + enum { ret = false }; +}; + +template<class T> +struct ei_is_reference<T&> +{ + enum { ret = true }; +}; + /** * The reference selector for template expressions. The idea is that we don't * need to use references for expressions since they are light weight proxy @@ -234,7 +246,7 @@ struct ei_ref_selector * const Matrix3d&, because the internal logic of ei_nested determined that since a was already a matrix, there was no point * in copying it into another matrix. */ -template<typename T, int n=1, typename PlainMatrixType = typename ei_eval<T>::type> struct ei_nested +template<typename T, int n=1, typename PlainObject = typename ei_eval<T>::type> struct ei_nested { enum { CostEval = (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost), @@ -244,7 +256,7 @@ template<typename T, int n=1, typename PlainMatrixType = typename ei_eval<T>::ty typedef typename ei_meta_if< ( int(ei_traits<T>::Flags) & EvalBeforeNestingBit ) || ( int(CostEval) <= int(CostNoEval) ), - PlainMatrixType, + PlainObject, typename ei_ref_selector<T>::type >::ret type; }; @@ -258,7 +270,7 @@ template<unsigned int Flags> struct ei_are_flags_consistent * overloads for complex types */ template<typename Derived,typename Scalar,typename OtherScalar, bool EnableIt = !ei_is_same_type<Scalar,OtherScalar>::ret > -struct ei_special_scalar_op_base : public AnyMatrixBase<Derived> +struct ei_special_scalar_op_base : public EigenBase<Derived> { // dummy operator* so that the // "using ei_special_scalar_op_base::operator*" compiles @@ -266,7 +278,7 @@ struct ei_special_scalar_op_base : public AnyMatrixBase<Derived> }; template<typename Derived,typename Scalar,typename OtherScalar> -struct ei_special_scalar_op_base<Derived,Scalar,OtherScalar,true> : public AnyMatrixBase<Derived> +struct ei_special_scalar_op_base<Derived,Scalar,OtherScalar,true> : public EigenBase<Derived> { const CwiseUnaryOp<ei_scalar_multiple2_op<Scalar,OtherScalar>, Derived> operator*(const OtherScalar& scalar) const |