diff options
Diffstat (limited to 'Eigen/src/Core')
40 files changed, 622 insertions, 99 deletions
diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h index fa0a24762..7ee1eea08 100644 --- a/Eigen/src/Core/BandMatrix.h +++ b/Eigen/src/Core/BandMatrix.h @@ -47,6 +47,7 @@ struct ei_traits<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> > { typedef _Scalar Scalar; typedef Dense StorageKind; + typedef DenseIndex Index; enum { CoeffReadCost = NumTraits<Scalar>::ReadCost, RowsAtCompileTime = Rows, diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index bb1b8a6b9..59a0f72bc 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -162,7 +162,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> c .coeffRef(row + m_startRow.value(), col + m_startCol.value()); } - inline const CoeffReturnType coeff(Index row, Index col) const + EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const { return m_xpr.coeff(row + m_startRow.value(), col + m_startCol.value()); } diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index 530777577..d0816211a 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -45,8 +45,19 @@ * \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp */ template<typename BinaryOp, typename Lhs, typename Rhs> -struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > : ei_traits<Lhs> +struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > { + // we must not inherit from ei_traits<Lhs> since it has + // the potential to cause problems with MSVC + typedef typename ei_cleantype<Lhs>::type Ancestor; + typedef typename ei_traits<Ancestor>::XprKind XprKind; + enum { + RowsAtCompileTime = ei_traits<Ancestor>::RowsAtCompileTime, + ColsAtCompileTime = ei_traits<Ancestor>::ColsAtCompileTime, + MaxRowsAtCompileTime = ei_traits<Ancestor>::MaxRowsAtCompileTime, + MaxColsAtCompileTime = ei_traits<Ancestor>::MaxColsAtCompileTime + }; + // even though we require Lhs and Rhs to have the same scalar type (see CwiseBinaryOp constructor), // we still want to handle the case when the result type is different. typedef typename ei_result_of< @@ -57,6 +68,8 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > : ei_traits<Lhs> >::type Scalar; typedef typename ei_promote_storage_type<typename ei_traits<Lhs>::StorageKind, typename ei_traits<Rhs>::StorageKind>::ret StorageKind; + typedef typename ei_promote_index_type<typename ei_traits<Lhs>::Index, + typename ei_traits<Rhs>::Index>::type Index; typedef typename Lhs::Nested LhsNested; typedef typename Rhs::Nested RhsNested; typedef typename ei_unref<LhsNested>::type _LhsNested; @@ -97,7 +110,7 @@ class CwiseBinaryOp : ei_no_assignment_operator, BinaryOp, Lhs, Rhs, typename ei_promote_storage_type<typename ei_traits<Lhs>::StorageKind, typename ei_traits<Rhs>::StorageKind>::ret>::Base Base; - EIGEN_GENERIC_PUBLIC_INTERFACE_NEW(CwiseBinaryOp) + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp) typedef typename ei_nested<Lhs>::type LhsNested; typedef typename ei_nested<Rhs>::type RhsNested; @@ -125,14 +138,14 @@ class CwiseBinaryOp : ei_no_assignment_operator, EIGEN_STRONG_INLINE Index rows() const { // return the fixed size type if available to enable compile time optimizations - if (ei_traits<typename ei_cleantype<LhsNested>::type>::RowsAtCompileTime==Dynamic) + if (ei_traits<typename ei_cleantype<LhsNested>::type>::RowsAtCompileTime==Dynamic) return m_rhs.rows(); else return m_lhs.rows(); } EIGEN_STRONG_INLINE Index cols() const { // return the fixed size type if available to enable compile time optimizations - if (ei_traits<typename ei_cleantype<LhsNested>::type>::ColsAtCompileTime==Dynamic) + if (ei_traits<typename ei_cleantype<LhsNested>::type>::ColsAtCompileTime==Dynamic) return m_rhs.cols(); else return m_lhs.cols(); diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h index da398d131..a05ef1d9f 100644 --- a/Eigen/src/Core/CwiseUnaryOp.h +++ b/Eigen/src/Core/CwiseUnaryOp.h @@ -71,7 +71,7 @@ class CwiseUnaryOp : ei_no_assignment_operator, public: typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename ei_traits<XprType>::StorageKind>::Base Base; - EIGEN_GENERIC_PUBLIC_INTERFACE_NEW(CwiseUnaryOp) + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) : m_xpr(xpr), m_functor(func) {} diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h index 953fc89a6..f89088072 100644 --- a/Eigen/src/Core/CwiseUnaryView.h +++ b/Eigen/src/Core/CwiseUnaryView.h @@ -70,7 +70,7 @@ class CwiseUnaryView : ei_no_assignment_operator, public: typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename ei_traits<MatrixType>::StorageKind>::Base Base; - EIGEN_GENERIC_PUBLIC_INTERFACE_NEW(CwiseUnaryView) + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp()) : m_matrix(mat), m_functor(func) {} diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index c4b4057a4..95e37801d 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -51,7 +51,7 @@ template<typename Derived> class DenseBase class InnerIterator; typedef typename ei_traits<Derived>::StorageKind StorageKind; - typedef typename ei_index<StorageKind>::type Index; + typedef typename ei_traits<Derived>::Index Index; typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename NumTraits<Scalar>::Real RealScalar; @@ -218,8 +218,8 @@ template<typename Derived> class DenseBase */ void resize(Index rows, Index cols) { - EIGEN_ONLY_USED_FOR_DEBUG(rows); - EIGEN_ONLY_USED_FOR_DEBUG(cols); + EIGEN_ONLY_USED_FOR_DEBUG(rows); + EIGEN_ONLY_USED_FOR_DEBUG(cols); ei_assert(rows == this->rows() && cols == this->cols() && "DenseBase::resize() does not actually allow to resize."); } @@ -247,7 +247,7 @@ template<typename Derived> class DenseBase /** \internal expression type of a block of whole rows */ template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, ei_traits<Derived>::ColsAtCompileTime> Type; }; - + #endif // not EIGEN_PARSED_BY_DOXYGEN /** Copies \a other into *this. \returns a reference to *this. */ @@ -440,8 +440,8 @@ template<typename Derived> class DenseBase * a const reference, in order to avoid a useless copy. */ EIGEN_STRONG_INLINE const typename ei_eval<Derived>::type eval() const - { - // Even though MSVC does not honor strong inlining when the return type + { + // Even though MSVC does not honor strong inlining when the return type // is a dynamic matrix, we desperately need strong inlining for fixed // size types on MSVC. return typename ei_eval<Derived>::type(derived()); diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h index 7026bbe34..c55576c02 100644 --- a/Eigen/src/Core/DenseCoeffsBase.h +++ b/Eigen/src/Core/DenseCoeffsBase.h @@ -30,7 +30,7 @@ class DenseCoeffsBase : public EigenBase<Derived> { public: typedef typename ei_traits<Derived>::StorageKind StorageKind; - typedef typename ei_index<StorageKind>::type Index; + typedef typename ei_traits<Derived>::Index Index; typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename ei_meta_if<ei_has_direct_access<Derived>::ret, const Scalar&, Scalar>::ret CoeffReturnType; @@ -40,7 +40,7 @@ class DenseCoeffsBase : public EigenBase<Derived> using Base::cols; using Base::size; using Base::derived; - + EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const { return int(Derived::RowsAtCompileTime) == 1 ? 0 @@ -245,7 +245,7 @@ class DenseCoeffsBase<Derived, true> : public DenseCoeffsBase<Derived, false> typedef DenseCoeffsBase<Derived, false> Base; typedef typename ei_traits<Derived>::StorageKind StorageKind; - typedef typename ei_index<StorageKind>::type Index; + typedef typename ei_traits<Derived>::Index Index; typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename NumTraits<Scalar>::Real RealScalar; diff --git a/Eigen/src/Core/DenseStorageBase.h b/Eigen/src/Core/DenseStorageBase.h index 6df7e4b18..2bdb12b78 100644 --- a/Eigen/src/Core/DenseStorageBase.h +++ b/Eigen/src/Core/DenseStorageBase.h @@ -46,7 +46,7 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type typedef typename ei_dense_xpr_base<Derived>::type Base; typedef typename ei_traits<Derived>::StorageKind StorageKind; - typedef typename ei_index<StorageKind>::type Index; + typedef typename ei_traits<Derived>::Index Index; typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename NumTraits<Scalar>::Real RealScalar; @@ -159,7 +159,7 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type * * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t) */ - inline void resize(Index rows, Index cols) + EIGEN_STRONG_INLINE void resize(Index rows, Index cols) { #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO Index size = rows*cols; @@ -471,7 +471,9 @@ class DenseStorageBase : public ei_dense_xpr_base<Derived>::type template<typename OtherDerived> EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other) { - _resize_to_match(other); + // I don't think we need this resize call since the lazyAssign will anyways resize + // and lazyAssign will be called by the assign selector. + //_resize_to_match(other); // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because // it wouldn't allow to copy a row-vector into a column-vector. return ei_assign_selector<Derived,OtherDerived,false>::run(this->derived(), other.derived()); diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index 8d3b458a9..34eeda089 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -34,7 +34,7 @@ class DiagonalBase : public EigenBase<Derived> typedef typename ei_traits<Derived>::DiagonalVectorType DiagonalVectorType; typedef typename DiagonalVectorType::Scalar Scalar; typedef typename ei_traits<Derived>::StorageKind StorageKind; - typedef typename ei_index<StorageKind>::type Index; + typedef typename ei_traits<Derived>::Index Index; enum { RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, @@ -103,6 +103,7 @@ struct ei_traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> { typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType; typedef Dense StorageKind; + typedef DenseIndex Index; }; template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime> @@ -115,7 +116,7 @@ class DiagonalMatrix typedef const DiagonalMatrix& Nested; typedef _Scalar Scalar; typedef typename ei_traits<DiagonalMatrix>::StorageKind StorageKind; - typedef typename ei_index<StorageKind>::type Index; + typedef typename ei_traits<DiagonalMatrix>::Index Index; #endif protected: @@ -203,6 +204,7 @@ struct ei_traits<DiagonalWrapper<_DiagonalVectorType> > { typedef _DiagonalVectorType DiagonalVectorType; typedef typename DiagonalVectorType::Scalar Scalar; + typedef typename DiagonalVectorType::Index Index; typedef typename DiagonalVectorType::StorageKind StorageKind; enum { RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 6e54dac3c..dfa313e89 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -85,7 +85,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const * \sa dot(), norm() */ template<typename Derived> -inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const +EIGEN_STRONG_INLINE typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const { return ei_real((*this).cwiseAbs2().sum()); } diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h index c9d3bd875..3485b36d2 100644 --- a/Eigen/src/Core/EigenBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -40,7 +40,7 @@ template<typename Derived> struct EigenBase // typedef typename ei_plain_matrix_type<Derived>::type PlainObject; typedef typename ei_traits<Derived>::StorageKind StorageKind; - typedef typename ei_index<StorageKind>::type Index; + typedef typename ei_traits<Derived>::Index Index; /** \returns a reference to the derived object */ Derived& derived() { return *static_cast<Derived*>(this); } diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index 979eef707..a08de30d3 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -46,7 +46,7 @@ template<typename Derived> class MapBase typedef typename ei_traits<Derived>::StorageKind StorageKind; - typedef typename ei_index<StorageKind>::type Index; + typedef typename ei_traits<Derived>::Index Index; typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename NumTraits<Scalar>::Real RealScalar; diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index 4407b0db1..54fca7046 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -113,6 +113,7 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > { typedef _Scalar Scalar; typedef Dense StorageKind; + typedef DenseIndex Index; typedef MatrixXpr XprKind; enum { RowsAtCompileTime = _Rows, diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 0a81c4da0..cc35800bf 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -58,11 +58,11 @@ template<typename Derived> class MatrixBase #ifndef EIGEN_PARSED_BY_DOXYGEN typedef MatrixBase StorageBaseType; typedef typename ei_traits<Derived>::StorageKind StorageKind; - typedef typename ei_index<StorageKind>::type Index; + typedef typename ei_traits<Derived>::Index Index; typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; typedef typename NumTraits<Scalar>::Real RealScalar; - + typedef DenseBase<Derived> Base; using Base::RowsAtCompileTime; using Base::ColsAtCompileTime; diff --git a/Eigen/src/Core/MatrixStorage.h b/Eigen/src/Core/MatrixStorage.h index aff83a64c..0165966f4 100644 --- a/Eigen/src/Core/MatrixStorage.h +++ b/Eigen/src/Core/MatrixStorage.h @@ -243,7 +243,7 @@ template<typename T, int _Rows, int _Options> class ei_matrix_storage<T, Dynamic m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols); m_cols = cols; } - void resize(DenseIndex size, DenseIndex, DenseIndex cols) + EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex, DenseIndex cols) { if(size != _Rows*m_cols) { @@ -279,7 +279,7 @@ template<typename T, int _Cols, int _Options> class ei_matrix_storage<T, Dynamic m_data = ei_conditional_aligned_realloc_new<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols); m_rows = rows; } - void resize(DenseIndex size, DenseIndex rows, DenseIndex) + EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex rows, DenseIndex) { if(size != m_rows*_Cols) { diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h index 0542571e2..30ddbeb3c 100644 --- a/Eigen/src/Core/NoAlias.h +++ b/Eigen/src/Core/NoAlias.h @@ -45,18 +45,12 @@ class NoAlias public: NoAlias(ExpressionType& expression) : m_expression(expression) {} - /* \sa MatrixBase::lazyAssign() */ + /** Behaves like MatrixBase::lazyAssign(other) + * \sa MatrixBase::lazyAssign() */ template<typename OtherDerived> EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other) { return m_expression.lazyAssign(other.derived()); } - template<typename OtherDerived> - EIGEN_STRONG_INLINE ExpressionType& operator=(const ReturnByValue<OtherDerived>& other) - { - other.evalTo(m_expression); - return m_expression; - } - /** \sa MatrixBase::operator+= */ template<typename OtherDerived> EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other) diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h index 46884dc3f..8227c9bf9 100644 --- a/Eigen/src/Core/PermutationMatrix.h +++ b/Eigen/src/Core/PermutationMatrix.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr> // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -46,7 +47,6 @@ * * \sa class DiagonalMatrix */ -template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime> class PermutationMatrix; template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false> struct ei_permut_matrix_product_retval; template<int SizeAtCompileTime, int MaxSizeAtCompileTime> @@ -77,8 +77,12 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime, typedef Matrix<int, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType; inline PermutationMatrix() - { - } + {} + + /** Constructs an uninitialized permutation matrix of given size. + */ + inline PermutationMatrix(int size) : m_indices(size) + {} /** Copy constructor. */ template<int OtherSize, int OtherMaxSize> @@ -102,6 +106,14 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime, explicit inline PermutationMatrix(const MatrixBase<Other>& indices) : m_indices(indices) {} + /** Convert the Transpositions \a tr to a permutation matrix */ + template<int OtherSize, int OtherMaxSize> + explicit PermutationMatrix(const Transpositions<OtherSize,OtherMaxSize>& tr) + : m_indices(tr.size()) + { + *this = tr; + } + /** Copies the other permutation into *this */ template<int OtherSize, int OtherMaxSize> PermutationMatrix& operator=(const PermutationMatrix<OtherSize, OtherMaxSize>& other) @@ -110,6 +122,16 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime, return *this; } + /** Assignment from the Transpositions \a tr */ + template<int OtherSize, int OtherMaxSize> + PermutationMatrix& operator=(const Transpositions<OtherSize,OtherMaxSize>& tr) + { + setIdentity(tr.size()); + for(int k=size()-1; k>=0; --k) + applyTranspositionOnTheRight(k,tr.coeff(k)); + return *this; + } + #ifndef EIGEN_PARSED_BY_DOXYGEN /** This is a special case of the templated operator=. Its purpose is to * prevent a default operator= from hiding the templated operator=. @@ -121,11 +143,6 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime, } #endif - /** Constructs an uninitialized permutation matrix of given size. - */ - inline PermutationMatrix(int size) : m_indices(size) - {} - /** \returns the number of rows */ inline int rows() const { return m_indices.size(); } diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index b9aab524b..753f6b4b5 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -194,6 +194,11 @@ class GeneralProduct<Lhs, Rhs, InnerProduct> } typename Base::Scalar value() const { return Base::coeff(0,0); } + + /** Convertion to scalar */ + operator const typename Base::Scalar() const { + return Base::coeff(0,0); + } }; /*********************************************************************** diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h index 36626f838..35a3a280f 100644 --- a/Eigen/src/Core/ProductBase.h +++ b/Eigen/src/Core/ProductBase.h @@ -37,6 +37,8 @@ struct ei_traits<ProductBase<Derived,_Lhs,_Rhs> > typedef typename ei_scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar; typedef typename ei_promote_storage_type<typename ei_traits<Lhs>::StorageKind, typename ei_traits<Rhs>::StorageKind>::ret StorageKind; + typedef typename ei_promote_index_type<typename ei_traits<Lhs>::Index, + typename ei_traits<Rhs>::Index>::type Index; enum { RowsAtCompileTime = ei_traits<Lhs>::RowsAtCompileTime, ColsAtCompileTime = ei_traits<Rhs>::ColsAtCompileTime, diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index b3e60192f..cfbaa0986 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -181,7 +181,7 @@ struct ei_redux_impl<Func, Derived, DefaultTraversal, NoUnrolling> { typedef typename Derived::Scalar Scalar; typedef typename Derived::Index Index; - static Scalar run(const Derived& mat, const Func& func) + static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) { ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix"); Scalar res; @@ -311,7 +311,7 @@ struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling */ template<typename Derived> template<typename Func> -inline typename ei_result_of<Func(typename ei_traits<Derived>::Scalar)>::type +EIGEN_STRONG_INLINE typename ei_result_of<Func(typename ei_traits<Derived>::Scalar)>::type DenseBase<Derived>::redux(const Func& func) const { typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested; diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h index 665d48031..90887356c 100644 --- a/Eigen/src/Core/ReturnByValue.h +++ b/Eigen/src/Core/ReturnByValue.h @@ -36,9 +36,9 @@ struct ei_traits<ReturnByValue<Derived> > enum { // We're disabling the DirectAccess because e.g. the constructor of // the Block-with-DirectAccess expression requires to have a coeffRef method. - // FIXME this should be fixed so we can have DirectAccessBit here. + // Also, we don't want to have to implement the stride stuff. Flags = (ei_traits<typename ei_traits<Derived>::ReturnType>::Flags - | EvalBeforeNestingBit | EvalBeforeAssigningBit) & ~DirectAccessBit + | EvalBeforeNestingBit) & ~DirectAccessBit }; }; @@ -84,11 +84,8 @@ template<typename Derived> template<typename OtherDerived> Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other) { - // since we're by-passing the mechanisms in Assign.h, we implement here the EvalBeforeAssigningBit. - // override by using .noalias(), see corresponding operator= in NoAlias. - typename Derived::PlainObject result(rows(), cols()); - other.evalTo(result); - return (derived() = result); + other.evalTo(derived()); + return derived(); } #endif // EIGEN_RETURNBYVALUE_H diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index eed3f9336..84c4dc521 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -153,7 +153,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView /////////// Cholesky module /////////// const LLT<PlainObject, UpLo> llt() const; - const LDLT<PlainObject> ldlt() const; + const LDLT<PlainObject, UpLo> ldlt() const; /////////// Eigenvalue module /////////// diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index d2bed929b..e3eab4bd8 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -87,7 +87,7 @@ MatrixBase<Derived>::blueNorm() const static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr; if(nmax <= 0) { - Index nbig, ibeta, it, iemin, iemax, iexp; + int nbig, ibeta, it, iemin, iemax, iexp; RealScalar abig, eps; // This program calculates the machine-dependent constants // bl, b2, slm, s2m, relerr overfl, nmax diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 38d942e04..6928ae31a 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -66,7 +66,7 @@ template<typename MatrixType> class Transpose public: typedef typename TransposeImpl<MatrixType,typename ei_traits<MatrixType>::StorageKind>::Base Base; - EIGEN_GENERIC_PUBLIC_INTERFACE_NEW(Transpose) + EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose) inline Transpose(const MatrixType& matrix) : m_matrix(matrix) {} diff --git a/Eigen/src/Core/Transpositions.h b/Eigen/src/Core/Transpositions.h new file mode 100644 index 000000000..b71d46aa6 --- /dev/null +++ b/Eigen/src/Core/Transpositions.h @@ -0,0 +1,292 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud <g.gael@free.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_TRANSPOSITIONS_H +#define EIGEN_TRANSPOSITIONS_H + +/** \class Transpositions + * + * \brief Represents a sequence of transpositions (row/column interchange) + * + * \param SizeAtCompileTime the number of transpositions, or Dynamic + * \param MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it. + * + * This class represents a permutation transformation as a sequence of \em n transpositions + * \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices. + * Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges + * the rows \c i and \c indices[i] of the matrix \c M. + * A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange. + * + * Compared to the class PermutationMatrix, such a sequence of transpositions is what is + * computed during a decomposition with pivoting, and it is faster when applying the permutation in-place. + * + * To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example: + * \code + * Transpositions tr; + * MatrixXf mat; + * mat = tr * mat; + * \endcode + * In this example, we detect that the matrix appears on both side, and so the transpositions + * are applied in-place without any temporary or extra copy. + * + * \sa class PermutationMatrix + */ +template<typename TranspositionType, typename MatrixType, int Side, bool Transposed=false> struct ei_transposition_matrix_product_retval; + +template<int SizeAtCompileTime, int MaxSizeAtCompileTime> +class Transpositions +{ + public: + + typedef Matrix<DenseIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType; + typedef typename IndicesType::Index Index; + + inline Transpositions() {} + + /** Copy constructor. */ + template<int OtherSize, int OtherMaxSize> + inline Transpositions(const Transpositions<OtherSize, OtherMaxSize>& other) + : m_indices(other.indices()) {} + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** Standard copy constructor. Defined only to prevent a default copy constructor + * from hiding the other templated constructor */ + inline Transpositions(const Transpositions& other) : m_indices(other.indices()) {} + #endif + + /** Generic constructor from expression of the transposition indices. */ + template<typename Other> + explicit inline Transpositions(const MatrixBase<Other>& indices) : m_indices(indices) + {} + + /** Copies the \a other transpositions into \c *this */ + template<int OtherSize, int OtherMaxSize> + Transpositions& operator=(const Transpositions<OtherSize, OtherMaxSize>& other) + { + m_indices = other.indices(); + return *this; + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + Transpositions& operator=(const Transpositions& other) + { + m_indices = other.m_indices; + return *this; + } + #endif + + /** Constructs an uninitialized permutation matrix of given size. + */ + inline Transpositions(Index size) : m_indices(size) + {} + + /** \returns the number of transpositions */ + inline Index size() const { return m_indices.size(); } + + /** Direct access to the underlying index vector */ + inline const Index& coeff(Index i) const { return m_indices.coeff(i); } + /** Direct access to the underlying index vector */ + inline Index& coeffRef(Index i) { return m_indices.coeffRef(i); } + /** Direct access to the underlying index vector */ + inline const Index& operator()(Index i) const { return m_indices(i); } + /** Direct access to the underlying index vector */ + inline Index& operator()(Index i) { return m_indices(i); } + /** Direct access to the underlying index vector */ + inline const Index& operator[](Index i) const { return m_indices(i); } + /** Direct access to the underlying index vector */ + inline Index& operator[](Index i) { return m_indices(i); } + + /** const version of indices(). */ + const IndicesType& indices() const { return m_indices; } + /** \returns a reference to the stored array representing the transpositions. */ + IndicesType& indices() { return m_indices; } + + /** Resizes to given size. */ + inline void resize(int size) + { + m_indices.resize(size); + } + + /** Sets \c *this to represents an identity transformation */ + void setIdentity() + { + for(int i = 0; i < m_indices.size(); ++i) + m_indices.coeffRef(i) = i; + } + + // FIXME: do we want such methods ? + // might be usefull when the target matrix expression is complex, e.g.: + // object.matrix().block(..,..,..,..) = trans * object.matrix().block(..,..,..,..); + /* + template<typename MatrixType> + void applyForwardToRows(MatrixType& mat) const + { + for(Index k=0 ; k<size() ; ++k) + if(m_indices(k)!=k) + mat.row(k).swap(mat.row(m_indices(k))); + } + + template<typename MatrixType> + void applyBackwardToRows(MatrixType& mat) const + { + for(Index k=size()-1 ; k>=0 ; --k) + if(m_indices(k)!=k) + mat.row(k).swap(mat.row(m_indices(k))); + } + */ + + /** \returns the inverse transformation */ + inline Transpose<Transpositions> inverse() const + { return *this; } + + /** \returns the tranpose transformation */ + inline Transpose<Transpositions> transpose() const + { return *this; } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + template<int OtherSize, int OtherMaxSize> + Transpositions(const Transpose<Transpositions<OtherSize,OtherMaxSize> >& other) + : m_indices(other.size()) + { + Index n = size(); + Index j = size-1; + for(Index i=0; i<n;++i,--j) + m_indices.coeffRef(j) = other.nestedTranspositions().indices().coeff(i); + } +#endif + + protected: + + IndicesType m_indices; +}; + +/** \returns the \a matrix with the \a transpositions applied to the columns. + */ +template<typename Derived, int SizeAtCompileTime, int MaxSizeAtCompileTime> +inline const ei_transposition_matrix_product_retval<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheRight> +operator*(const MatrixBase<Derived>& matrix, + const Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> &transpositions) +{ + return ei_transposition_matrix_product_retval + <Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheRight> + (transpositions, matrix.derived()); +} + +/** \returns the \a matrix with the \a transpositions applied to the rows. + */ +template<typename Derived, int SizeAtCompileTime, int MaxSizeAtCompileTime> +inline const ei_transposition_matrix_product_retval + <Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheLeft> +operator*(const Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> &transpositions, + const MatrixBase<Derived>& matrix) +{ + return ei_transposition_matrix_product_retval + <Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime>, Derived, OnTheLeft> + (transpositions, matrix.derived()); +} + +template<typename TranspositionType, typename MatrixType, int Side, bool Transposed> +struct ei_traits<ei_transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> > +{ + typedef typename MatrixType::PlainObject ReturnType; +}; + +template<typename TranspositionType, typename MatrixType, int Side, bool Transposed> +struct ei_transposition_matrix_product_retval + : public ReturnByValue<ei_transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> > +{ + typedef typename ei_cleantype<typename MatrixType::Nested>::type MatrixTypeNestedCleaned; + typedef typename TranspositionType::Index Index; + + ei_transposition_matrix_product_retval(const TranspositionType& tr, const MatrixType& matrix) + : m_transpositions(tr), m_matrix(matrix) + {} + + inline int rows() const { return m_matrix.rows(); } + inline int cols() const { return m_matrix.cols(); } + + template<typename Dest> inline void evalTo(Dest& dst) const + { + const int size = m_transpositions.size(); + Index j = 0; + + if(!(ei_is_same_type<MatrixTypeNestedCleaned,Dest>::ret && ei_extract_data(dst) == ei_extract_data(m_matrix))) + dst = m_matrix; + + for(int k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k) + if((j=m_transpositions.coeff(k))!=k) + { + if(Side==OnTheLeft) + dst.row(k).swap(dst.row(j)); + else if(Side==OnTheRight) + dst.col(k).swap(dst.col(j)); + } + } + + protected: + const TranspositionType& m_transpositions; + const typename MatrixType::Nested m_matrix; +}; + +/* Template partial specialization for transposed/inverse transpositions */ + +template<int SizeAtCompileTime, int MaxSizeAtCompileTime> +class Transpose<Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> > +{ + typedef Transpositions<SizeAtCompileTime, MaxSizeAtCompileTime> TranspositionType; + typedef typename TranspositionType::IndicesType IndicesType; + public: + + Transpose(const TranspositionType& t) : m_transpositions(t) {} + + inline int size() const { return m_transpositions.size(); } + + /** \returns the \a matrix with the inverse transpositions applied to the columns. + */ + template<typename Derived> friend + inline const ei_transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true> + operator*(const MatrixBase<Derived>& matrix, const Transpose& trt) + { + return ei_transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>(trt.m_transpositions, matrix.derived()); + } + + /** \returns the \a matrix with the inverse transpositions applied to the rows. + */ + template<typename Derived> + inline const ei_transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true> + operator*(const MatrixBase<Derived>& matrix) const + { + return ei_transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>(m_transpositions, matrix.derived()); + } + + const TranspositionType& nestedTranspositions() const { return m_transpositions; } + + protected: + const TranspositionType& m_transpositions; +}; + +#endif // EIGEN_TRANSPOSITIONS_H diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 03347b09c..1f5739c3c 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -46,7 +46,7 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived> }; typedef typename ei_traits<Derived>::Scalar Scalar; typedef typename ei_traits<Derived>::StorageKind StorageKind; - typedef typename ei_index<StorageKind>::type Index; + typedef typename ei_traits<Derived>::Index Index; inline TriangularBase() { ei_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); } @@ -156,10 +156,10 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView typedef typename MatrixType::PlainObject DenseMatrixType; typedef typename MatrixType::Nested MatrixTypeNested; typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested; - using TriangularBase<TriangularView<_MatrixType, _Mode> >::evalToLazy; + using Base::evalToLazy; typedef typename ei_traits<TriangularView>::StorageKind StorageKind; - typedef typename ei_index<StorageKind>::type Index; + typedef typename ei_traits<TriangularView>::Index Index; enum { Mode = _Mode, diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 0a7b07645..2feca365a 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -31,10 +31,10 @@ #ifndef EIGEN_HAS_FUSE_CJMADD #define EIGEN_HAS_FUSE_CJMADD 1 -#endif +#endif #ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE -#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 8*128*128 +#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 8*256*256 #endif // NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16 @@ -153,7 +153,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return vc; } -template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { +template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { int EIGEN_ALIGN16 ai[4]; ai[0] = from; Packet4i vc = vec_ld(0, ai); diff --git a/Eigen/src/Core/arch/Default/Settings.h b/Eigen/src/Core/arch/Default/Settings.h index 1ab2877b6..150c4bdc7 100644 --- a/Eigen/src/Core/arch/Default/Settings.h +++ b/Eigen/src/Core/arch/Default/Settings.h @@ -52,7 +52,7 @@ * Typically for a single-threaded application you would set that to 25% of the size of your CPU caches in bytes */ #ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE -#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*256*256) +#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*512*512) #endif /** Defines the maximal width of the blocks used in the triangular product and solver diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 96c75101c..d4dd33322 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -32,7 +32,7 @@ #endif #ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE -#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 4*96*96 +#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 4*192*192 #endif // FIXME NEON has 16 quad registers, but since the current register allocator diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h index 170641589..43477282c 100644 --- a/Eigen/src/Core/products/CoeffBasedProduct.h +++ b/Eigen/src/Core/products/CoeffBasedProduct.h @@ -54,6 +54,8 @@ struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > typedef typename ei_scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar; typedef typename ei_promote_storage_type<typename ei_traits<_LhsNested>::StorageKind, typename ei_traits<_RhsNested>::StorageKind>::ret StorageKind; + typedef typename ei_promote_index_type<typename ei_traits<_LhsNested>::Index, + typename ei_traits<_RhsNested>::Index>::type Index; enum { LhsCoeffReadCost = _LhsNested::CoeffReadCost, diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index d81715528..be20be833 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -25,13 +25,156 @@ #ifndef EIGEN_GENERAL_BLOCK_PANEL_H #define EIGEN_GENERAL_BLOCK_PANEL_H -#ifndef EIGEN_EXTERN_INSTANTIATIONS +/** \internal */ +inline void ei_manage_caching_sizes(Action action, std::ptrdiff_t* a=0, std::ptrdiff_t* b=0, std::ptrdiff_t* c=0, int scalar_size = 0) +{ + const int nbScalarSizes = 12; + static std::ptrdiff_t m_maxK[nbScalarSizes]; + static std::ptrdiff_t m_maxM[nbScalarSizes]; + static std::ptrdiff_t m_maxN[nbScalarSizes]; + static std::ptrdiff_t m_l1CacheSize = 0; + static std::ptrdiff_t m_l2CacheSize = 0; + if(m_l1CacheSize==0) + { + // initialization + m_l1CacheSize = EIGEN_TUNE_FOR_CPU_CACHE_SIZE; + m_l2CacheSize = 32*EIGEN_TUNE_FOR_CPU_CACHE_SIZE; + ei_manage_caching_sizes(SetAction,&m_l1CacheSize, &m_l2CacheSize); + } + + if(action==SetAction && scalar_size==0) + { + // set the cpu cache size and cache all block sizes from a global cache size in byte + ei_internal_assert(a!=0 && b!=0 && c==0); + m_l1CacheSize = *a; + m_l2CacheSize = *b; + int ss = 4; + for(int i=0; i<nbScalarSizes;++i,ss+=4) + { + // Round the block size such that it is a multiple of 64/ss. + // This is to make sure the block size are multiple of the register block sizes. + // And in the worst case we ensure an even number. + std::ptrdiff_t rb = 64/ss; + if(rb==0) rb = 1; + m_maxK[i] = 4 * std::ptrdiff_t(ei_sqrt<float>(m_l1CacheSize/(64*ss))); + m_maxM[i] = 2 * m_maxK[i]; + m_maxN[i] = ((m_l2CacheSize / (2 * m_maxK[i] * ss))/4)*4; + } + } + else if(action==SetAction && scalar_size!=0) + { + // set the block sizes for the given scalar type (represented as its size) + ei_internal_assert(a!=0 && b!=0 && c!=0); + int i = std::max((scalar_size>>2)-1,0); + if(i<nbScalarSizes) + { + m_maxK[i] = *a; + m_maxM[i] = *b; + m_maxN[i] = *c; + } + } + else if(action==GetAction && scalar_size==0) + { + ei_internal_assert(a!=0 && b!=0 && c==0); + *a = m_l1CacheSize; + *b = m_l2CacheSize; + } + else if(action==GetAction && scalar_size!=0) + { + ei_internal_assert(a!=0 && b!=0 && c!=0); + int i = std::min(std::max((scalar_size>>2),1),nbScalarSizes)-1; + *a = m_maxK[i]; + *b = m_maxM[i]; + *c = m_maxN[i]; + } + else + { + ei_internal_assert(false); + } +} + +/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters. + * \sa setCpuCacheSize */ +inline std::ptrdiff_t l1CacheSize() +{ + std::ptrdiff_t l1, l2; + ei_manage_caching_sizes(GetAction, &l1, &l2); + return l1; +} + +/** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters. + * \sa setCpuCacheSize */ +inline std::ptrdiff_t l2CacheSize() +{ + std::ptrdiff_t l1, l2; + ei_manage_caching_sizes(GetAction, &l1, &l2); + return l2; +} + +/** Set the cpu L1 and L2 cache sizes (in bytes). + * These values are use to adjust the size of the blocks + * for the algorithms working per blocks. + * + * This function also automatically set the blocking size parameters + * for each scalar type using the following rules: + * \code + * max_k = 4 * sqrt(l1/(64*sizeof(Scalar))); + * max_m = 2 * k; + * max_n = l2/(2*max_k*sizeof(Scalar)); + * \endcode + * overwriting custom values set using the setBlockingSizes function. + * + * See setBlockingSizes() for an explanation about the meaning of these parameters. + * + * \sa setBlockingSizes */ +inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2) +{ + ei_manage_caching_sizes(SetAction, &l1, &l2); +} + +/** \brief Set the blocking size parameters \a maxK, \a maxM and \a maxN for the scalar type \a Scalar. + * + * \param[in] maxK the size of the L1 and L2 blocks along the k dimension + * \param[in] maxM the size of the L1 blocks along the m dimension + * \param[in] maxN the size of the L2 blocks along the n dimension + * + * This function sets the blocking size parameters for matrix products and related algorithms. + * More precisely, let A * B be a m x k by k x n matrix product. Then Eigen's product like + * algorithms perform L2 blocking on B with horizontal panels of size maxK x maxN, + * and L1 blocking on A with blocks of size maxM x maxK. + * + * Theoretically, for best performances maxM should be closed to maxK and maxM * maxK should + * note exceed half of the L1 cache. Likewise, maxK * maxM should be smaller than the L2 cache. + * + * Note that in practice there is no distinction between scalar types of same size. + * + * \sa setCpuCacheSizes */ +template<typename Scalar> +void setBlockingSizes(std::ptrdiff_t maxK, std::ptrdiff_t maxM, std::ptrdiff_t maxN) +{ + std::ptrdiff_t k, m, n; + typedef ei_product_blocking_traits<Scalar> Traits; + k = ((maxK)/4)*4; + m = ((maxM)/Traits::mr)*Traits::mr; + n = ((maxN)/Traits::nr)*Traits::nr; + ei_manage_caching_sizes(SetAction,&k,&m,&n,sizeof(Scalar)); +} + +/** \returns in \a makK, \a maxM and \a maxN the blocking size parameters for the scalar type \a Scalar. + * + * See setBlockingSizes for an explanation about the meaning of these parameters. + * + * \sa setBlockingSizes */ +template<typename Scalar> +void getBlockingSizes(std::ptrdiff_t& maxK, std::ptrdiff_t& maxM, std::ptrdiff_t& maxN) +{ + ei_manage_caching_sizes(GetAction,&maxK,&maxM,&maxN,sizeof(Scalar)); +} #ifdef EIGEN_HAS_FUSE_CJMADD -#define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C); + #define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C); #else -#define CJMADD(A,B,C,T) T = B; T = cj.pmul(A,T); C = ei_padd(C,T); -// #define CJMADD(A,B,C,T) T = A; T = cj.pmul(T,B); C = ei_padd(C,T); + #define CJMADD(A,B,C,T) T = B; T = cj.pmul(A,T); C = ei_padd(C,T); #endif // optimized GEneral packed Block * packed Panel product kernel @@ -762,6 +905,4 @@ struct ei_gemm_pack_rhs<Scalar, Index, nr, RowMajor, PanelMode> } }; -#endif // EIGEN_EXTERN_INSTANTIATIONS - #endif // EIGEN_GENERAL_BLOCK_PANEL_H diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 991977c1f..3513d118e 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -25,8 +25,6 @@ #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H #define EIGEN_GENERAL_MATRIX_MATRIX_H -#ifndef EIGEN_EXTERN_INSTANTIATIONS - /* Specialization for a row-major destination matrix => simple transposition of the product */ template< typename Scalar, typename Index, @@ -77,8 +75,13 @@ static void run(Index rows, Index cols, Index depth, typedef typename ei_packet_traits<Scalar>::type PacketType; typedef ei_product_blocking_traits<Scalar> Blocking; - Index kc = std::min<Index>(Blocking::Max_kc,depth); // cache block size along the K direction - Index mc = std::min<Index>(Blocking::Max_mc,rows); // cache block size along the M direction + Index kc; // cache block size along the K direction + Index mc; // cache block size along the M direction + Index nc; // cache block size along the N direction + getBlockingSizes<Scalar>(kc, mc, nc); + kc = std::min<Index>(kc,depth); + mc = std::min<Index>(mc,rows); + nc = std::min<Index>(nc,cols); ei_gemm_pack_rhs<Scalar, Index, Blocking::nr, RhsStorageOrder> pack_rhs; ei_gemm_pack_lhs<Scalar, Index, Blocking::mr, LhsStorageOrder> pack_lhs; @@ -159,7 +162,8 @@ static void run(Index rows, Index cols, Index depth, else #endif // EIGEN_HAS_OPENMP { - (void)info; // info is not used + EIGEN_UNUSED_VARIABLE(info); + // this is the sequential version! Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc); std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols; @@ -203,8 +207,6 @@ static void run(Index rows, Index cols, Index depth, }; -#endif // EIGEN_EXTERN_INSTANTIATIONS - /********************************************************************************* * Specialization of GeneralProduct<> for "large" GEMM, i.e., * implementation of the high level wrapper to ei_general_matrix_matrix_product @@ -239,7 +241,9 @@ struct ei_gemm_functor Index sharedBlockBSize() const { - return std::min<Index>(ei_product_blocking_traits<Scalar>::Max_kc,m_rhs.rows()) * m_rhs.cols(); + Index maxKc, maxMc, maxNc; + getBlockingSizes<Scalar>(maxKc, maxMc, maxNc); + return std::min<Index>(maxKc,m_rhs.rows()) * m_rhs.cols(); } protected: diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index 5e4eb6f1e..588f78b4c 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -25,6 +25,50 @@ #ifndef EIGEN_PARALLELIZER_H #define EIGEN_PARALLELIZER_H +/** \internal */ +inline void ei_manage_multi_threading(Action action, int* v) +{ + static int m_maxThreads = -1; + + if(action==SetAction) + { + ei_internal_assert(v!=0); + m_maxThreads = *v; + } + else if(action==GetAction) + { + ei_internal_assert(v!=0); + #ifdef EIGEN_HAS_OPENMP + if(m_maxThreads>0) + *v = m_maxThreads; + else + *v = omp_get_max_threads(); + #else + *v = 1; + #endif + } + else + { + ei_internal_assert(false); + } +} + +/** \returns the max number of threads reserved for Eigen + * \sa setNbThreads */ +inline int nbThreads() +{ + int ret; + ei_manage_multi_threading(GetAction, &ret); + return ret; +} + +/** Sets the max number of threads reserved for Eigen + * \sa nbThreads */ +inline void setNbThreads(int v) +{ + ei_manage_multi_threading(SetAction, &v); +} + template<typename BlockBScalar, typename Index> struct GemmParallelInfo { GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0), blockB(0) {} @@ -57,10 +101,10 @@ void ei_parallelize_gemm(const Functor& func, Index rows, Index cols) // 2- compute the maximal number of threads from the size of the product: // FIXME this has to be fine tuned - Index max_threads = std::max(1,rows / 32); + Index max_threads = std::max<Index>(1,rows / 32); // 3 - compute the number of threads we are going to use - Index threads = std::min<Index>(omp_get_max_threads(), max_threads); + Index threads = std::min<Index>(nbThreads(), max_threads); if(threads==1) return func(0,rows, 0,cols); @@ -71,7 +115,8 @@ void ei_parallelize_gemm(const Functor& func, Index rows, Index cols) typedef typename Functor::BlockBScalar BlockBScalar; BlockBScalar* sharedBlockB = new BlockBScalar[func.sharedBlockBSize()]; - GemmParallelInfo<BlockBScalar>* info = new GemmParallelInfo<BlockBScalar>[threads]; + GemmParallelInfo<BlockBScalar,Index>* info = new + GemmParallelInfo<BlockBScalar,Index>[threads]; #pragma omp parallel for schedule(static,1) num_threads(threads) for(Index i=0; i<threads; ++i) diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 6cbd26689..24d27bce2 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -139,7 +139,7 @@ struct ei_product_blocking_traits mr = 2 * PacketSize, // max cache block size along the K direction - Max_kc = 8 * ei_meta_sqrt<EIGEN_TUNE_FOR_CPU_CACHE_SIZE/(64*sizeof(Scalar))>::ret, + Max_kc = 4 * ei_meta_sqrt<EIGEN_TUNE_FOR_CPU_CACHE_SIZE/(64*sizeof(Scalar))>::ret, // max cache block size along the M direction Max_mc = 2*Max_kc @@ -162,7 +162,7 @@ template<typename XprType> struct ei_blas_traits && ( /* Uncomment this when the low-level matrix-vector product functions support strided vectors bool(XprType::IsVectorAtCompileTime) || */ - int(ei_inner_stride_at_compile_time<XprType>::ret) == 1) + int(ei_inner_stride_at_compile_time<XprType>::ret) <= 1) ) ? 1 : 0 }; typedef typename ei_meta_if<bool(HasUsableDirectAccess), diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index e4dbc4aef..2335a3f08 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -259,6 +259,8 @@ namespace Architecture enum { CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct }; +enum Action {GetAction, SetAction}; + /** The type used to identify a dense storage. */ struct Dense {}; diff --git a/Eigen/src/Core/util/DisableMSVCWarnings.h b/Eigen/src/Core/util/DisableMSVCWarnings.h index dcc71143d..7bab741ff 100644 --- a/Eigen/src/Core/util/DisableMSVCWarnings.h +++ b/Eigen/src/Core/util/DisableMSVCWarnings.h @@ -3,7 +3,8 @@ // 4273 - QtAlignedMalloc, inconsistent DLL linkage // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p)) // 4101 - unreferenced local variable + // 4324 - structure was padded due to declspec(align()) // 4512 - assignment operator could not be generated #pragma warning( push ) - #pragma warning( disable : 4100 4101 4181 4244 4127 4211 4273 4512 4522 4717 ) + #pragma warning( disable : 4100 4101 4181 4244 4127 4211 4273 4324 4512 4522 4717 ) #endif diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index b3bc9c161..6a9a7941c 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -77,6 +77,8 @@ template<typename _DiagonalVectorType> class DiagonalWrapper; template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix; template<typename MatrixType, typename DiagonalType, int ProductOrder> class DiagonalProduct; template<typename MatrixType, int Index> class Diagonal; +template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime> class PermutationMatrix; +template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime> class Transpositions; template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride; template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map; @@ -158,7 +160,7 @@ template<typename MatrixType> class FullPivHouseholderQR; template<typename MatrixType> class SVD; template<typename MatrixType, unsigned int Options = 0> class JacobiSVD; template<typename MatrixType, int UpLo = Lower> class LLT; -template<typename MatrixType> class LDLT; +template<typename MatrixType, int UpLo = Lower> class LDLT; template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence; template<typename Scalar> class PlanarRotation; diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 3a65fb807..6a05ee87a 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -98,10 +98,6 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t #endif -#ifndef EIGEN_DEFAULT_SPARSE_INDEX_TYPE -#define EIGEN_DEFAULT_SPARSE_INDEX_TYPE int -#endif - /** Allows to disable some optimizations which might affect the accuracy of the result. * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them. * They currently include: @@ -180,6 +176,9 @@ #define EIGEN_UNUSED #endif +// Suppresses 'unused variable' warnings. +#define EIGEN_UNUSED_VARIABLE(var) (void)var; + #if (defined __GNUC__) #define EIGEN_ASM_COMMENT(X) asm("#"X) #else @@ -269,13 +268,13 @@ * documentation in a single line. **/ -#define EIGEN_GENERIC_PUBLIC_INTERFACE_NEW(Derived) \ +#define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \ typedef typename Eigen::ei_traits<Derived>::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex<float>. */ \ typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \ typedef typename Base::CoeffReturnType CoeffReturnType; /*!< \brief The return type for coefficient access. \details Depending on whether the object allows direct coefficient access (e.g. for a MatrixXd), this type is either 'const Scalar&' or simply 'Scalar' for objects that do not allow direct coefficient access. */ \ typedef typename Eigen::ei_nested<Derived>::type Nested; \ typedef typename Eigen::ei_traits<Derived>::StorageKind StorageKind; \ - typedef typename Eigen::ei_index<StorageKind>::type Index; \ + typedef typename Eigen::ei_traits<Derived>::Index Index; \ enum { RowsAtCompileTime = Eigen::ei_traits<Derived>::RowsAtCompileTime, \ ColsAtCompileTime = Eigen::ei_traits<Derived>::ColsAtCompileTime, \ Flags = Eigen::ei_traits<Derived>::Flags, \ @@ -292,7 +291,7 @@ typedef typename Base::CoeffReturnType CoeffReturnType; /*!< \brief The return type for coefficient access. \details Depending on whether the object allows direct coefficient access (e.g. for a MatrixXd), this type is either 'const Scalar&' or simply 'Scalar' for objects that do not allow direct coefficient access. */ \ typedef typename Eigen::ei_nested<Derived>::type Nested; \ typedef typename Eigen::ei_traits<Derived>::StorageKind StorageKind; \ - typedef typename Eigen::ei_index<StorageKind>::type Index; \ + typedef typename Eigen::ei_traits<Derived>::Index Index; \ enum { RowsAtCompileTime = Eigen::ei_traits<Derived>::RowsAtCompileTime, \ ColsAtCompileTime = Eigen::ei_traits<Derived>::ColsAtCompileTime, \ MaxRowsAtCompileTime = Eigen::ei_traits<Derived>::MaxRowsAtCompileTime, \ diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 6b202dbc8..c67b9774f 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -218,7 +218,7 @@ inline void ei_aligned_free(void *ptr) **/ inline void* ei_aligned_realloc(void *ptr, size_t new_size, size_t old_size) { - (void)old_size; // Suppress 'unused variable' warning. Seen in boost tee. + EIGEN_UNUSED_VARIABLE(old_size); void *result; #if !EIGEN_ALIGN diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index cef7874a9..adb878665 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -42,13 +42,14 @@ class ei_no_assignment_operator ei_no_assignment_operator& operator=(const ei_no_assignment_operator&); }; -template<typename StorageKind> struct ei_index {}; +typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex; -template<> -struct ei_index<Dense> -{ typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE type; }; - -typedef ei_index<Dense>::type DenseIndex; +/** \internal return the index type with the largest number of bits */ +template<typename I1, typename I2> +struct ei_promote_index_type +{ + typedef typename ei_meta_if<(sizeof(I1)<sizeof(I2)), I2, I1>::ret type; +}; /** \internal If the template parameter Value is Dynamic, this class is just a wrapper around a T variable that * can be accessed using value() and setValue(). |