diff options
Diffstat (limited to 'Eigen/src')
177 files changed, 10333 insertions, 7850 deletions
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index aa9784e54..5acbf4651 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -85,7 +85,7 @@ template<typename _MatrixType, int _UpLo> class LDLT * according to the specified problem \a size. * \sa LDLT() */ - LDLT(Index size) + explicit LDLT(Index size) : m_matrix(size, size), m_transpositions(size), m_temporary(size), @@ -98,7 +98,7 @@ template<typename _MatrixType, int _UpLo> class LDLT * This calculates the decomposition for the input \a matrix. * \sa LDLT(Index size) */ - LDLT(const MatrixType& matrix) + explicit LDLT(const MatrixType& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_transpositions(matrix.rows()), m_temporary(matrix.rows()), @@ -175,13 +175,13 @@ template<typename _MatrixType, int _UpLo> class LDLT * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt() */ template<typename Rhs> - inline const internal::solve_retval<LDLT, Rhs> + inline const Solve<LDLT, Rhs> solve(const MatrixBase<Rhs>& b) const { eigen_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_matrix.rows()==b.rows() && "LDLT::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval<LDLT, Rhs>(*this, b.derived()); + return Solve<LDLT, Rhs>(*this, b.derived()); } template<typename Derived> @@ -217,6 +217,12 @@ template<typename _MatrixType, int _UpLo> class LDLT eigen_assert(m_isInitialized && "LDLT is not initialized."); return Success; } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif protected: @@ -400,16 +406,16 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Lower> { typedef const TriangularView<const MatrixType, UnitLower> MatrixL; typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitUpper> MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } }; template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper> { typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitLower> MatrixL; typedef const TriangularView<const MatrixType, UnitUpper> MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); } - static inline MatrixU getU(const MatrixType& m) { return m; } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } }; } // end namespace internal @@ -427,6 +433,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a) m_transpositions.resize(size); m_isInitialized = false; m_temporary.resize(size); + m_sign = internal::ZeroSign; internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign); @@ -466,52 +473,45 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri return *this; } -namespace internal { -template<typename _MatrixType, int _UpLo, typename Rhs> -struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs> - : solve_retval_base<LDLT<_MatrixType,_UpLo>, Rhs> +#ifndef EIGEN_PARSED_BY_DOXYGEN +template<typename _MatrixType, int _UpLo> +template<typename RhsType, typename DstType> +void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const { - typedef LDLT<_MatrixType,_UpLo> LDLTType; - EIGEN_MAKE_SOLVE_HELPERS(LDLTType,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const + eigen_assert(rhs.rows() == rows()); + // dst = P b + dst = m_transpositions * rhs; + + // dst = L^-1 (P b) + matrixL().solveInPlace(dst); + + // dst = D^-1 (L^-1 P b) + // more precisely, use pseudo-inverse of D (see bug 241) + using std::abs; + const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD()); + // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon + // as motivated by LAPACK's xGELSS: + // RealScalar tolerance = numext::maxi(vectorD.array().abs().maxCoeff() *NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest()); + // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest + // diagonal element is not well justified and to numerical issues in some cases. + // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. + RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest(); + + for (Index i = 0; i < vecD.size(); ++i) { - eigen_assert(rhs().rows() == dec().matrixLDLT().rows()); - // dst = P b - dst = dec().transpositionsP() * rhs(); - - // dst = L^-1 (P b) - dec().matrixL().solveInPlace(dst); - - // dst = D^-1 (L^-1 P b) - // more precisely, use pseudo-inverse of D (see bug 241) - using std::abs; - EIGEN_USING_STD_MATH(max); - typedef typename LDLTType::MatrixType MatrixType; - typedef typename LDLTType::RealScalar RealScalar; - const typename Diagonal<const MatrixType>::RealReturnType vectorD(dec().vectorD()); - // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon - // as motivated by LAPACK's xGELSS: - // RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() *NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest()); - // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest - // diagonal element is not well justified and to numerical issues in some cases. - // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. - RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest(); - for (Index i = 0; i < vectorD.size(); ++i) { - if(abs(vectorD(i)) > tolerance) - dst.row(i) /= vectorD(i); - else - dst.row(i).setZero(); - } + if(abs(vecD(i)) > tolerance) + dst.row(i) /= vecD(i); + else + dst.row(i).setZero(); + } - // dst = L^-T (D^-1 L^-1 P b) - dec().matrixU().solveInPlace(dst); + // dst = L^-T (D^-1 L^-1 P b) + matrixU().solveInPlace(dst); - // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b - dst = dec().transpositionsP().transpose() * dst; - } -}; + // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b + dst = m_transpositions.transpose() * dst; } +#endif /** \internal use x = ldlt_object.solve(x); * diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index 38e820165..90194e64d 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -83,10 +83,10 @@ template<typename _MatrixType, int _UpLo> class LLT * according to the specified problem \a size. * \sa LLT() */ - LLT(Index size) : m_matrix(size, size), + explicit LLT(Index size) : m_matrix(size, size), m_isInitialized(false) {} - LLT(const MatrixType& matrix) + explicit LLT(const MatrixType& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_isInitialized(false) { @@ -118,13 +118,13 @@ template<typename _MatrixType, int _UpLo> class LLT * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt() */ template<typename Rhs> - inline const internal::solve_retval<LLT, Rhs> + inline const Solve<LLT, Rhs> solve(const MatrixBase<Rhs>& b) const { eigen_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_matrix.rows()==b.rows() && "LLT::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval<LLT, Rhs>(*this, b.derived()); + return Solve<LLT, Rhs>(*this, b.derived()); } template<typename Derived> @@ -161,6 +161,12 @@ template<typename _MatrixType, int _UpLo> class LLT template<typename VectorType> LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1); + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif protected: /** \internal @@ -345,8 +351,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Lower> { typedef const TriangularView<const MatrixType, Lower> MatrixL; typedef const TriangularView<const typename MatrixType::AdjointReturnType, Upper> MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } static bool inplace_decomposition(MatrixType& m) { return llt_inplace<typename MatrixType::Scalar, Lower>::blocked(m)==-1; } }; @@ -355,8 +361,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper> { typedef const TriangularView<const typename MatrixType::AdjointReturnType, Lower> MatrixL; typedef const TriangularView<const MatrixType, Upper> MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); } - static inline MatrixU getU(const MatrixType& m) { return m; } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } static bool inplace_decomposition(MatrixType& m) { return llt_inplace<typename MatrixType::Scalar, Upper>::blocked(m)==-1; } }; @@ -404,22 +410,16 @@ LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, c return *this; } - -namespace internal { -template<typename _MatrixType, int UpLo, typename Rhs> -struct solve_retval<LLT<_MatrixType, UpLo>, Rhs> - : solve_retval_base<LLT<_MatrixType, UpLo>, Rhs> + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template<typename _MatrixType,int _UpLo> +template<typename RhsType, typename DstType> +void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const { - typedef LLT<_MatrixType,UpLo> LLTType; - EIGEN_MAKE_SOLVE_HELPERS(LLTType,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dst = rhs(); - dec().solveInPlace(dst); - } -}; + dst = rhs; + solveInPlace(dst); } +#endif /** \internal use x = llt_object.solve(x); * diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index c449960de..3eadb83a0 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h @@ -105,7 +105,7 @@ const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& /** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix. * The data are not copied but shared. */ template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo> -cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat) +cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat) { cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived()); @@ -157,8 +157,12 @@ enum CholmodMode { * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT */ template<typename _MatrixType, int _UpLo, typename Derived> -class CholmodBase : internal::noncopyable +class CholmodBase : public SparseSolverBase<Derived> { + protected: + typedef SparseSolverBase<Derived> Base; + using Base::derived; + using Base::m_isInitialized; public: typedef _MatrixType MatrixType; enum { UpLo = _UpLo }; @@ -170,14 +174,14 @@ class CholmodBase : internal::noncopyable public: CholmodBase() - : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) + : m_cholmodFactor(0), m_info(Success) { m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); cholmod_start(&m_cholmod); } - CholmodBase(const MatrixType& matrix) - : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) + explicit CholmodBase(const MatrixType& matrix) + : m_cholmodFactor(0), m_info(Success) { m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); cholmod_start(&m_cholmod); @@ -194,9 +198,6 @@ class CholmodBase : internal::noncopyable inline Index cols() const { return m_cholmodFactor->n; } inline Index rows() const { return m_cholmodFactor->n; } - Derived& derived() { return *static_cast<Derived*>(this); } - const Derived& derived() const { return *static_cast<const Derived*>(this); } - /** \brief Reports whether previous computation was successful. * * \returns \c Success if computation was succesful, @@ -216,34 +217,6 @@ class CholmodBase : internal::noncopyable return derived(); } - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::solve_retval<CholmodBase, Rhs> - solve(const MatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "LLT is not initialized."); - eigen_assert(rows()==b.rows() - && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval<CholmodBase, Rhs>(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::sparse_solve_retval<CholmodBase, Rhs> - solve(const SparseMatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "LLT is not initialized."); - eigen_assert(rows()==b.rows() - && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval<CholmodBase, Rhs>(*this, b.derived()); - } - /** Performs a symbolic decomposition on the sparsity pattern of \a matrix. * * This function is particularly useful when solving for several problems having the same structure. @@ -290,7 +263,7 @@ class CholmodBase : internal::noncopyable #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal */ template<typename Rhs,typename Dest> - void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const + void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const { eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); const Index size = m_cholmodFactor->n; @@ -312,7 +285,7 @@ class CholmodBase : internal::noncopyable /** \internal */ template<typename RhsScalar, int RhsOptions, typename RhsIndex, typename DestScalar, int DestOptions, typename DestIndex> - void _solve(const SparseMatrix<RhsScalar,RhsOptions,RhsIndex> &b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const + void _solve_impl(const SparseMatrix<RhsScalar,RhsOptions,RhsIndex> &b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const { eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); const Index size = m_cholmodFactor->n; @@ -357,7 +330,6 @@ class CholmodBase : internal::noncopyable cholmod_factor* m_cholmodFactor; RealScalar m_shiftOffset[2]; mutable ComputationInfo m_info; - bool m_isInitialized; int m_factorizationIsOk; int m_analysisIsOk; }; @@ -572,36 +544,6 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom } }; -namespace internal { - -template<typename _MatrixType, int _UpLo, typename Derived, typename Rhs> -struct solve_retval<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs> - : solve_retval_base<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs> -{ - typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template<typename _MatrixType, int _UpLo, typename Derived, typename Rhs> -struct sparse_solve_retval<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs> - : sparse_solve_retval_base<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs> -{ - typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - } // end namespace Eigen #endif // EIGEN_CHOLMODSUPPORT_H diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h index 28d6f1443..337086615 100644 --- a/Eigen/src/Core/Array.h +++ b/Eigen/src/Core/Array.h @@ -74,6 +74,21 @@ class Array { return Base::operator=(other); } + + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() + */ + /* This overload is needed because the usage of + * using Base::operator=; + * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped + * the usage of 'using'. This should be done only for operator=. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const Scalar &value) + { + Base::setConstant(value); + return *this; + } /** Copies the value of the expression \a other into \c *this with automatic resizing. * @@ -99,7 +114,7 @@ class Array { return Base::_set(other); } - + /** Default constructor. * * For fixed-size matrices, does nothing. @@ -144,7 +159,6 @@ class Array } #endif - #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename T> EIGEN_DEVICE_FUNC @@ -244,13 +258,6 @@ class Array *this = other; } - /** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the - * data pointers. - */ - template<typename OtherDerived> - void swap(ArrayBase<OtherDerived> const & other) - { this->_swap(other.derived()); } - EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 2c9ace4a7..d42693d4b 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -64,8 +64,7 @@ template<typename Derived> class ArrayBase using Base::MaxSizeAtCompileTime; using Base::IsVectorAtCompileTime; using Base::Flags; - using Base::CoeffReadCost; - + using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -121,8 +120,15 @@ template<typename Derived> class ArrayBase EIGEN_DEVICE_FUNC Derived& operator=(const ArrayBase& other) { - return internal::assign_selector<Derived,Derived>::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } + + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() */ + EIGEN_DEVICE_FUNC + Derived& operator=(const Scalar &value) + { Base::setConstant(value); return derived(); } EIGEN_DEVICE_FUNC Derived& operator+=(const Scalar& scalar); @@ -153,9 +159,9 @@ template<typename Derived> class ArrayBase /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array * \sa MatrixBase::array() */ EIGEN_DEVICE_FUNC - MatrixWrapper<Derived> matrix() { return derived(); } + MatrixWrapper<Derived> matrix() { return MatrixWrapper<Derived>(derived()); } EIGEN_DEVICE_FUNC - const MatrixWrapper<const Derived> matrix() const { return derived(); } + const MatrixWrapper<const Derived> matrix() const { return MatrixWrapper<const Derived>(derived()); } // template<typename Dest> // inline void evalTo(Dest& dst) const { dst = matrix(); } @@ -186,8 +192,7 @@ template<typename OtherDerived> EIGEN_STRONG_INLINE Derived & ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other) { - SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>()); return derived(); } @@ -200,8 +205,7 @@ template<typename OtherDerived> EIGEN_STRONG_INLINE Derived & ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other) { - SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>()); return derived(); } @@ -214,8 +218,7 @@ template<typename OtherDerived> EIGEN_STRONG_INLINE Derived & ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other) { - SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>()); return derived(); } @@ -228,8 +231,7 @@ template<typename OtherDerived> EIGEN_STRONG_INLINE Derived & ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other) { - SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar>()); return derived(); } diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h index 28d7b7bd5..0b89c58cb 100644 --- a/Eigen/src/Core/ArrayWrapper.h +++ b/Eigen/src/Core/ArrayWrapper.h @@ -44,6 +44,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > typedef ArrayBase<ArrayWrapper> Base; EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper) + typedef typename internal::remove_all<ExpressionType>::type NestedExpression; typedef typename internal::conditional< internal::is_lvalue<ExpressionType>::value, @@ -54,7 +55,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > typedef typename internal::nested<ExpressionType>::type NestedExpressionType; EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} + explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } @@ -186,6 +187,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > typedef MatrixBase<MatrixWrapper<ExpressionType> > Base; EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper) + typedef typename internal::remove_all<ExpressionType>::type NestedExpression; typedef typename internal::conditional< internal::is_lvalue<ExpressionType>::value, @@ -196,7 +198,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > typedef typename internal::nested<ExpressionType>::type NestedExpressionType; EIGEN_DEVICE_FUNC - inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {} + explicit inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {} EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 07da2fe31..53806ba33 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -14,485 +14,6 @@ namespace Eigen { -namespace internal { - -/*************************************************************************** -* Part 1 : the logic deciding a strategy for traversal and unrolling * -***************************************************************************/ - -template <typename Derived, typename OtherDerived> -struct assign_traits -{ -public: - enum { - DstIsAligned = Derived::Flags & AlignedBit, - DstHasDirectAccess = Derived::Flags & DirectAccessBit, - SrcIsAligned = OtherDerived::Flags & AlignedBit, - JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned - }; - -private: - enum { - InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime) - : int(Derived::RowsAtCompileTime), - InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime) - : int(Derived::MaxRowsAtCompileTime), - MaxSizeAtCompileTime = Derived::SizeAtCompileTime, - PacketSize = packet_traits<typename Derived::Scalar>::size - }; - - enum { - StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)), - MightVectorize = StorageOrdersAgree - && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), - MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 - && int(DstIsAligned) && int(SrcIsAligned), - MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), - MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess - && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), - /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, - so it's only good for large enough sizes. */ - MaySliceVectorize = MightVectorize && DstHasDirectAccess - && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize) - /* slice vectorization can be slow, so we only want it if the slices are big, which is - indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block - in a fixed-size matrix */ - }; - -public: - enum { - Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) - : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) - : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) - : int(MayLinearize) ? int(LinearTraversal) - : int(DefaultTraversal), - Vectorized = int(Traversal) == InnerVectorizedTraversal - || int(Traversal) == LinearVectorizedTraversal - || int(Traversal) == SliceVectorizedTraversal - }; - -private: - enum { - UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), - MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), - MayUnrollInner = int(InnerSize) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit) - }; - -public: - enum { - Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) - ? ( - int(MayUnrollCompletely) ? int(CompleteUnrolling) - : int(MayUnrollInner) ? int(InnerUnrolling) - : int(NoUnrolling) - ) - : int(Traversal) == int(LinearVectorizedTraversal) - ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) - : int(Traversal) == int(LinearTraversal) - ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) - : int(NoUnrolling) - }; - -#ifdef EIGEN_DEBUG_ASSIGN - static void debug() - { - EIGEN_DEBUG_VAR(DstIsAligned) - EIGEN_DEBUG_VAR(SrcIsAligned) - EIGEN_DEBUG_VAR(JointAlignment) - EIGEN_DEBUG_VAR(Derived::SizeAtCompileTime) - EIGEN_DEBUG_VAR(OtherDerived::CoeffReadCost) - EIGEN_DEBUG_VAR(InnerSize) - EIGEN_DEBUG_VAR(InnerMaxSize) - EIGEN_DEBUG_VAR(PacketSize) - EIGEN_DEBUG_VAR(StorageOrdersAgree) - EIGEN_DEBUG_VAR(MightVectorize) - EIGEN_DEBUG_VAR(MayLinearize) - EIGEN_DEBUG_VAR(MayInnerVectorize) - EIGEN_DEBUG_VAR(MayLinearVectorize) - EIGEN_DEBUG_VAR(MaySliceVectorize) - EIGEN_DEBUG_VAR(Traversal) - EIGEN_DEBUG_VAR(UnrollingLimit) - EIGEN_DEBUG_VAR(MayUnrollCompletely) - EIGEN_DEBUG_VAR(MayUnrollInner) - EIGEN_DEBUG_VAR(Unrolling) - } -#endif -}; - -/*************************************************************************** -* Part 2 : meta-unrollers -***************************************************************************/ - -/************************ -*** Default traversal *** -************************/ - -template<typename Derived1, typename Derived2, int Index, int Stop> -struct assign_DefaultTraversal_CompleteUnrolling -{ - enum { - outer = Index / Derived1::InnerSizeAtCompileTime, - inner = Index % Derived1::InnerSizeAtCompileTime - }; - - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.copyCoeffByOuterInner(outer, inner, src); - assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src); - } -}; - -template<typename Derived1, typename Derived2, int Stop> -struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop> -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -template<typename Derived1, typename Derived2, int Index, int Stop> -struct assign_DefaultTraversal_InnerUnrolling -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) - { - dst.copyCoeffByOuterInner(outer, Index, src); - assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer); - } -}; - -template<typename Derived1, typename Derived2, int Stop> -struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop> -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} -}; - -/*********************** -*** Linear traversal *** -***********************/ - -template<typename Derived1, typename Derived2, int Index, int Stop> -struct assign_LinearTraversal_CompleteUnrolling -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.copyCoeff(Index, src); - assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src); - } -}; - -template<typename Derived1, typename Derived2, int Stop> -struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop> -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -/************************** -*** Inner vectorization *** -**************************/ - -template<typename Derived1, typename Derived2, int Index, int Stop> -struct assign_innervec_CompleteUnrolling -{ - enum { - outer = Index / Derived1::InnerSizeAtCompileTime, - inner = Index % Derived1::InnerSizeAtCompileTime, - JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment - }; - - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src); - assign_innervec_CompleteUnrolling<Derived1, Derived2, - Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src); - } -}; - -template<typename Derived1, typename Derived2, int Stop> -struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop> -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -template<typename Derived1, typename Derived2, int Index, int Stop> -struct assign_innervec_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) - { - dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src); - assign_innervec_InnerUnrolling<Derived1, Derived2, - Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer); - } -}; - -template<typename Derived1, typename Derived2, int Stop> -struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop> -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} -}; - -/*************************************************************************** -* Part 3 : implementation of all cases -***************************************************************************/ - -template<typename Derived1, typename Derived2, - int Traversal = assign_traits<Derived1, Derived2>::Traversal, - int Unrolling = assign_traits<Derived1, Derived2>::Unrolling, - int Version = Specialized> -struct assign_impl; - -/************************ -*** Default traversal *** -************************/ - -template<typename Derived1, typename Derived2, int Unrolling, int Version> -struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version> -{ - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &, const Derived2 &) { } -}; - -template<typename Derived1, typename Derived2, int Version> -struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version> -{ - typedef typename Derived1::Index Index; - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - for(Index inner = 0; inner < innerSize; ++inner) - dst.copyCoeffByOuterInner(outer, inner, src); - } -}; - -template<typename Derived1, typename Derived2, int Version> -struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version> -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime> - ::run(dst, src); - } -}; - -template<typename Derived1, typename Derived2, int Version> -struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version> -{ - typedef typename Derived1::Index Index; - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime> - ::run(dst, src, outer); - } -}; - -/*********************** -*** Linear traversal *** -***********************/ - -template<typename Derived1, typename Derived2, int Version> -struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version> -{ - typedef typename Derived1::Index Index; - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index size = dst.size(); - for(Index i = 0; i < size; ++i) - dst.copyCoeff(i, src); - } -}; - -template<typename Derived1, typename Derived2, int Version> -struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version> -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime> - ::run(dst, src); - } -}; - -/************************** -*** Inner vectorization *** -**************************/ - -template<typename Derived1, typename Derived2, int Version> -struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Version> -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - const Index packetSize = packet_traits<typename Derived1::Scalar>::size; - for(Index outer = 0; outer < outerSize; ++outer) - for(Index inner = 0; inner < innerSize; inner+=packetSize) - dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, inner, src); - } -}; - -template<typename Derived1, typename Derived2, int Version> -struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling, Version> -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime> - ::run(dst, src); - } -}; - -template<typename Derived1, typename Derived2, int Version> -struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling, Version> -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime> - ::run(dst, src, outer); - } -}; - -/*************************** -*** Linear vectorization *** -***************************/ - -template <bool IsAligned = false> -struct unaligned_assign_impl -{ - template <typename Derived, typename OtherDerived> - static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {} -}; - -template <> -struct unaligned_assign_impl<false> -{ - // MSVC must not inline this functions. If it does, it fails to optimize the - // packet access path. -#ifdef _MSC_VER - template <typename Derived, typename OtherDerived> - static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) -#else - template <typename Derived, typename OtherDerived> - static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) -#endif - { - for (typename Derived::Index index = start; index < end; ++index) - dst.copyCoeff(index, src); - } -}; - -template<typename Derived1, typename Derived2, int Version> -struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, Version> -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index size = dst.size(); - typedef packet_traits<typename Derived1::Scalar> PacketTraits; - enum { - packetSize = PacketTraits::size, - dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) , - srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment - }; - const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0 - : internal::first_aligned(&dst.coeffRef(0), size); - const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; - - unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart); - - for(Index index = alignedStart; index < alignedEnd; index += packetSize) - { - dst.template copyPacket<Derived2, dstAlignment, srcAlignment>(index, src); - } - - unaligned_assign_impl<>::run(src,dst,alignedEnd,size); - } -}; - -template<typename Derived1, typename Derived2, int Version> -struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling, Version> -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - enum { size = Derived1::SizeAtCompileTime, - packetSize = packet_traits<typename Derived1::Scalar>::size, - alignedSize = (size/packetSize)*packetSize }; - - assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src); - assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src); - } -}; - -/************************** -*** Slice vectorization *** -***************************/ - -template<typename Derived1, typename Derived2, int Version> -struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Version> -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - typedef packet_traits<typename Derived1::Scalar> PacketTraits; - enum { - packetSize = PacketTraits::size, - alignable = PacketTraits::AlignedOnScalar, - dstAlignment = alignable ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) , - srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment - }; - const Index packetAlignedMask = packetSize - 1; - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0; - Index alignedStart = ((!alignable) || assign_traits<Derived1,Derived2>::DstIsAligned) ? 0 - : internal::first_aligned(&dst.coeffRef(0,0), innerSize); - - for(Index outer = 0; outer < outerSize; ++outer) - { - const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); - // do the non-vectorizable part of the assignment - for(Index inner = 0; inner<alignedStart ; ++inner) - dst.copyCoeffByOuterInner(outer, inner, src); - - // do the vectorizable part of the assignment - for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) - dst.template copyPacketByOuterInner<Derived2, dstAlignment, Unaligned>(outer, inner, src); - - // do the non-vectorizable part of the assignment - for(Index inner = alignedEnd; inner<innerSize ; ++inner) - dst.copyCoeffByOuterInner(outer, inner, src); - - alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize); - } - } -}; - -} // end namespace internal - -/*************************************************************************** -* Part 4 : implementation of DenseBase methods -***************************************************************************/ - template<typename Derived> template<typename OtherDerived> EIGEN_STRONG_INLINE Derived& DenseBase<Derived> @@ -506,91 +27,35 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived> EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) -#ifdef EIGEN_TEST_EVALUATORS - -#ifdef EIGEN_DEBUG_ASSIGN - internal::copy_using_evaluator_traits<Derived, OtherDerived>::debug(); -#endif - eigen_assert(rows() == other.rows() && cols() == other.cols()); - internal::call_dense_assignment_loop(derived(),other.derived()); - -#else // EIGEN_TEST_EVALUATORS - -#ifdef EIGEN_DEBUG_ASSIGN - internal::assign_traits<Derived, OtherDerived>::debug(); -#endif eigen_assert(rows() == other.rows() && cols() == other.cols()); - internal::assign_impl<Derived, OtherDerived, int(SameType) ? int(internal::assign_traits<Derived, OtherDerived>::Traversal) - : int(InvalidTraversal)>::run(derived(),other.derived()); + internal::call_assignment_no_alias(derived(),other.derived()); -#endif // EIGEN_TEST_EVALUATORS - -#ifndef EIGEN_NO_DEBUG - checkTransposeAliasing(other.derived()); -#endif return derived(); } -namespace internal { - -template<typename Derived, typename OtherDerived, - bool EvalBeforeAssigning = (int(internal::traits<OtherDerived>::Flags) & EvalBeforeAssigningBit) != 0, - bool NeedToTranspose = ((int(Derived::RowsAtCompileTime) == 1 && int(OtherDerived::ColsAtCompileTime) == 1) - | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&". - // revert to || as soon as not needed anymore. - (int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1)) - && int(Derived::SizeAtCompileTime) != 1> -struct assign_selector; - -template<typename Derived, typename OtherDerived> -struct assign_selector<Derived,OtherDerived,false,false> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } - template<typename ActualDerived, typename ActualOtherDerived> - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; } -}; -template<typename Derived, typename OtherDerived> -struct assign_selector<Derived,OtherDerived,true,false> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); } -}; -template<typename Derived, typename OtherDerived> -struct assign_selector<Derived,OtherDerived,false,true> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); } - template<typename ActualDerived, typename ActualOtherDerived> - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose<ActualDerived> dstTrans(dst); other.evalTo(dstTrans); return dst; } -}; -template<typename Derived, typename OtherDerived> -struct assign_selector<Derived,OtherDerived,true,true> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } -}; - -} // end namespace internal - template<typename Derived> template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other) { - return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template<typename Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other) { - return internal::assign_selector<Derived,Derived>::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template<typename Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other) { - return internal::assign_selector<Derived,Derived>::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template<typename Derived> @@ -598,7 +63,8 @@ template <typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other) { - return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template<typename Derived> @@ -606,7 +72,8 @@ template <typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other) { - return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template<typename Derived> @@ -614,7 +81,8 @@ template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other) { - return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived()); + other.derived().evalTo(derived()); + return derived(); } } // end namespace Eigen diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 5451a138f..4db10e697 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com> -// Copyright (C) 2011-2013 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk> // // This Source Code Form is subject to the terms of the Mozilla @@ -24,37 +24,46 @@ namespace internal { // copy_using_evaluator_traits is based on assign_traits -template <typename Derived, typename OtherDerived> +template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc> struct copy_using_evaluator_traits { + typedef typename DstEvaluator::XprType Dst; + + enum { + DstFlags = DstEvaluator::Flags, + SrcFlags = SrcEvaluator::Flags + }; + public: enum { - DstIsAligned = Derived::Flags & AlignedBit, - DstHasDirectAccess = Derived::Flags & DirectAccessBit, - SrcIsAligned = OtherDerived::Flags & AlignedBit, - JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned, - SrcEvalBeforeAssign = (evaluator_traits<OtherDerived>::HasEvalTo == 1) + DstIsAligned = DstFlags & AlignedBit, + DstHasDirectAccess = DstFlags & DirectAccessBit, + SrcIsAligned = SrcFlags & AlignedBit, + JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned }; private: enum { - InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime) - : int(Derived::RowsAtCompileTime), - InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime) - : int(Derived::MaxRowsAtCompileTime), - MaxSizeAtCompileTime = Derived::SizeAtCompileTime, - PacketSize = packet_traits<typename Derived::Scalar>::size + InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) + : int(Dst::MaxRowsAtCompileTime), + MaxSizeAtCompileTime = Dst::SizeAtCompileTime, + PacketSize = packet_traits<typename Dst::Scalar>::size }; enum { - StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)), + DstIsRowMajor = DstFlags&RowMajorBit, + SrcIsRowMajor = SrcFlags&RowMajorBit, + StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), MightVectorize = StorageOrdersAgree - && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), + && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) + && (functor_traits<AssignFunc>::PacketAccess), MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 && int(DstIsAligned) && int(SrcIsAligned), - MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), + MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, @@ -68,8 +77,7 @@ private: public: enum { - Traversal = int(SrcEvalBeforeAssign) ? int(AllAtOnceTraversal) - : int(MayInnerVectorize) ? int(InnerVectorizedTraversal) + Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) : int(MayLinearize) ? int(LinearTraversal) @@ -82,12 +90,12 @@ public: private: enum { UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), - MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), + MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic + && int(SrcEvaluator::CoeffReadCost) != Dynamic + && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit) + && int(SrcEvaluator::CoeffReadCost) != Dynamic + && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) }; public: @@ -110,6 +118,12 @@ public: #ifdef EIGEN_DEBUG_ASSIGN static void debug() { + std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; + std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + EIGEN_DEBUG_VAR(DstFlags) + EIGEN_DEBUG_VAR(SrcFlags) + std::cerr.unsetf(std::ios::hex); EIGEN_DEBUG_VAR(DstIsAligned) EIGEN_DEBUG_VAR(SrcIsAligned) EIGEN_DEBUG_VAR(JointAlignment) @@ -127,6 +141,7 @@ public: EIGEN_DEBUG_VAR(MayUnrollCompletely) EIGEN_DEBUG_VAR(MayUnrollInner) EIGEN_DEBUG_VAR(Unrolling) + std::cerr << std::endl; } #endif }; @@ -142,6 +157,7 @@ public: template<typename Kernel, int Index, int Stop> struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? typedef typename Kernel::DstEvaluatorType DstEvaluatorType; typedef typename DstEvaluatorType::XprType DstXprType; @@ -150,7 +166,7 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling inner = Index % DstXprType::InnerSizeAtCompileTime }; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { kernel.assignCoeffByOuterInner(outer, inner); copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); @@ -160,13 +176,13 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling template<typename Kernel, int Stop> struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(Kernel&) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } }; template<typename Kernel, int Index, int Stop> struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, typename Kernel::Index outer) { kernel.assignCoeffByOuterInner(outer, Index); copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index+1, Stop>::run(kernel, outer); @@ -176,7 +192,7 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling template<typename Kernel, int Stop> struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(Kernel&, int) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index) { } }; /*********************** @@ -186,7 +202,7 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> template<typename Kernel, int Index, int Stop> struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel& kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { kernel.assignCoeff(Index); copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); @@ -196,7 +212,7 @@ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling template<typename Kernel, int Stop> struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(Kernel&) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } }; /************************** @@ -206,16 +222,17 @@ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop template<typename Kernel, int Index, int Stop> struct copy_using_evaluator_innervec_CompleteUnrolling { + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? typedef typename Kernel::DstEvaluatorType DstEvaluatorType; typedef typename DstEvaluatorType::XprType DstXprType; - + enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime, JointAlignment = Kernel::AssignmentTraits::JointAlignment }; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { kernel.template assignPacketByOuterInner<Aligned, JointAlignment>(outer, inner); enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size }; @@ -226,17 +243,16 @@ struct copy_using_evaluator_innervec_CompleteUnrolling template<typename Kernel, int Stop> struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(Kernel&) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } }; template<typename Kernel, int Index, int Stop> struct copy_using_evaluator_innervec_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, typename Kernel::Index outer) { kernel.template assignPacketByOuterInner<Aligned, Aligned>(outer, Index); - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size }; + enum { NextIndex = Index + packet_traits<typename Kernel::Scalar>::size }; copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer); } }; @@ -244,7 +260,7 @@ struct copy_using_evaluator_innervec_InnerUnrolling template<typename Kernel, int Stop> struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop> { - static EIGEN_STRONG_INLINE void run(Kernel &, int) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, typename Kernel::Index) { } }; /*************************************************************************** @@ -265,7 +281,7 @@ struct dense_assignment_loop; template<typename Kernel> struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling> { - static void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static void run(Kernel &kernel) { typedef typename Kernel::Index Index; @@ -280,7 +296,7 @@ struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling> template<typename Kernel> struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling> { - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); @@ -291,7 +307,7 @@ template<typename Kernel> struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling> { typedef typename Kernel::Index Index; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; @@ -314,7 +330,7 @@ struct unaligned_dense_assignment_loop { // if IsAligned = true, then do nothing template <typename Kernel> - static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index, typename Kernel::Index) {} + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index, typename Kernel::Index) {} }; template <> @@ -323,14 +339,14 @@ struct unaligned_dense_assignment_loop<false> // MSVC must not inline this functions. If it does, it fails to optimize the // packet access path. // FIXME check which version exhibits this issue -#ifdef _MSC_VER +#if EIGEN_COMP_MSVC template <typename Kernel> static EIGEN_DONT_INLINE void run(Kernel &kernel, typename Kernel::Index start, typename Kernel::Index end) #else template <typename Kernel> - static EIGEN_STRONG_INLINE void run(Kernel &kernel, + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, typename Kernel::Index start, typename Kernel::Index end) #endif @@ -343,7 +359,7 @@ struct unaligned_dense_assignment_loop<false> template<typename Kernel> struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling> { - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::Index Index; @@ -371,7 +387,7 @@ template<typename Kernel> struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling> { typedef typename Kernel::Index Index; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; @@ -391,7 +407,7 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin template<typename Kernel> struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> { - static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { typedef typename Kernel::Index Index; @@ -407,7 +423,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> template<typename Kernel> struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling> { - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); @@ -418,7 +434,7 @@ template<typename Kernel> struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> { typedef typename Kernel::Index Index; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; const Index outerSize = kernel.outerSize(); @@ -434,7 +450,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> template<typename Kernel> struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling> { - static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { typedef typename Kernel::Index Index; const Index size = kernel.size(); @@ -446,7 +462,7 @@ struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling> template<typename Kernel> struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> { - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); @@ -460,7 +476,7 @@ struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> template<typename Kernel> struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> { - static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { typedef typename Kernel::Index Index; typedef packet_traits<typename Kernel::Scalar> PacketTraits; @@ -496,25 +512,8 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> } }; -/**************************** -*** All-at-once traversal *** -****************************/ - -// TODO: this 'AllAtOnceTraversal' should be dropped or caught earlier (Gael) -// Indeed, what to do with the kernel's functor?? -template<typename Kernel> -struct dense_assignment_loop<Kernel, AllAtOnceTraversal, NoUnrolling> -{ - static inline void run(Kernel & kernel) - { - // Evaluate rhs in temporary to prevent aliasing problems in a = a * a; - // TODO: Do not pass the xpr object to evalTo() (Jitse) - kernel.srcEvaluator().evalTo(kernel.dstEvaluator(), kernel.dstExpression()); - } -}; - /*************************************************************************** -* Part 4 : Generic Assignment routine +* Part 4 : Generic dense assignment kernel ***************************************************************************/ // This class generalize the assignment of a coefficient (or packet) from one dense evaluator @@ -523,7 +522,7 @@ struct dense_assignment_loop<Kernel, AllAtOnceTraversal, NoUnrolling> // This abstraction level permits to keep the evaluation loops as simple and as generic as possible. // One can customize the assignment using this generic dense_assignment_kernel with different // functors, or by completely overloading it, by-passing a functor. -template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor> +template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized> class generic_dense_assignment_kernel { protected: @@ -535,35 +534,44 @@ public: typedef SrcEvaluatorTypeT SrcEvaluatorType; typedef typename DstEvaluatorType::Scalar Scalar; typedef typename DstEvaluatorType::Index Index; - typedef copy_using_evaluator_traits<DstXprType, SrcXprType> AssignmentTraits; + typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits; - generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) + EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) - {} + { + #ifdef EIGEN_DEBUG_ASSIGN + AssignmentTraits::debug(); + #endif + } - Index size() const { return m_dstExpr.size(); } - Index innerSize() const { return m_dstExpr.innerSize(); } - Index outerSize() const { return m_dstExpr.outerSize(); } - Index outerStride() const { return m_dstExpr.outerStride(); } + EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); } + EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } + EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } // TODO get rid of this one: - DstXprType& dstExpression() const { return m_dstExpr; } + EIGEN_DEVICE_FUNC DstXprType& dstExpression() const { return m_dstExpr; } - DstEvaluatorType& dstEvaluator() { return m_dst; } - const SrcEvaluatorType& srcEvaluator() const { return m_src; } + EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } + EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } - void assignCoeff(Index row, Index col) + /// Assign src(row,col) to dst(row,col) through the assignment functor. + EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) { m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); } - void assignCoeff(Index index) + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC void assignCoeff(Index index) { m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); } - void assignCoeffByOuterInner(Index outer, Index inner) + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC void assignCoeffByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); @@ -572,40 +580,40 @@ public: template<int StoreMode, int LoadMode> - void assignPacket(Index row, Index col) + EIGEN_DEVICE_FUNC void assignPacket(Index row, Index col) { m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode>(row,col)); } template<int StoreMode, int LoadMode> - void assignPacket(Index index) + EIGEN_DEVICE_FUNC void assignPacket(Index index) { m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode>(index)); } template<int StoreMode, int LoadMode> - void assignPacketByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC void assignPacketByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); assignPacket<StoreMode,LoadMode>(row, col); } - static Index rowIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC static Index rowIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::RowsAtCompileTime) == 1 ? 0 : int(Traits::ColsAtCompileTime) == 1 ? inner - : int(Traits::Flags)&RowMajorBit ? outer + : int(DstEvaluatorType::Flags)&RowMajorBit ? outer : inner; } - static Index colIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC static Index colIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::ColsAtCompileTime) == 1 ? 0 : int(Traits::RowsAtCompileTime) == 1 ? inner - : int(Traits::Flags)&RowMajorBit ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? inner : outer; } @@ -617,13 +625,13 @@ protected: DstXprType& m_dstExpr; }; +/*************************************************************************** +* Part 5 : Entry point for dense rectangular assignment +***************************************************************************/ + template<typename DstXprType, typename SrcXprType, typename Functor> -void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) +EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) { -#ifdef EIGEN_DEBUG_ASSIGN - // TODO these traits should be computed from information provided by the evaluators - internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug(); -#endif eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); typedef typename evaluator<DstXprType>::type DstEvaluatorType; @@ -639,201 +647,147 @@ void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, co } template<typename DstXprType, typename SrcXprType> -void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) +EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) { call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>()); } /*************************************************************************** -* Part 5 : Entry points +* Part 6 : Generic assignment ***************************************************************************/ -// Based on DenseBase::LazyAssign() -// The following functions are just for testing and they are meant to be moved to operator= and the likes. - -template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType> -EIGEN_STRONG_INLINE -const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst, - const EigenBase<SrcXprType>& src) -{ - return noalias_copy_using_evaluator(dst.expression(), src.derived(), internal::assign_op<typename DstXprType::Scalar>()); -} +// Based on the respective shapes of the destination and source, +// the class AssignmentKind determine the kind of assignment mechanism. +// AssignmentKind must define a Kind typedef. +template<typename DstShape, typename SrcShape> struct AssignmentKind; -template<typename XprType, int AssumeAliasing = evaluator_traits<XprType>::AssumeAliasing> -struct AddEvalIfAssumingAliasing; +// Assignement kind defined in this file: +struct Dense2Dense {}; +struct EigenBase2EigenBase {}; -template<typename XprType> -struct AddEvalIfAssumingAliasing<XprType, 0> -{ - static const XprType& run(const XprType& xpr) - { - return xpr; - } -}; +template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; +template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; }; + +// This is the main assignment class +template< typename DstXprType, typename SrcXprType, typename Functor, + typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind, + typename Scalar = typename DstXprType::Scalar> +struct Assignment; -template<typename XprType> -struct AddEvalIfAssumingAliasing<XprType, 1> -{ - static const EvalToTemp<XprType> run(const XprType& xpr) - { - return EvalToTemp<XprType>(xpr); - } -}; -template<typename DstXprType, typename SrcXprType, typename Functor> -EIGEN_STRONG_INLINE -const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func) -{ - return noalias_copy_using_evaluator(dst.const_cast_derived(), - AddEvalIfAssumingAliasing<SrcXprType>::run(src.derived()), - func - ); -} +// The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition. +// Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated. +// So this intermediate function removes everything related to AssumeAliasing such that Assignment +// does not has to bother about these annoying details. -// this mimics operator= -template<typename DstXprType, typename SrcXprType> -EIGEN_STRONG_INLINE -const DstXprType& copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src) +template<typename Dst, typename Src> +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src) { - return copy_using_evaluator(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>()); + call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); } - -template<typename DstXprType, typename SrcXprType, typename Functor> -EIGEN_STRONG_INLINE -const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func) +template<typename Dst, typename Src> +EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src) { -#ifdef EIGEN_DEBUG_ASSIGN - internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug(); -#endif -#ifdef EIGEN_NO_AUTOMATIC_RESIZING - eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size()) - : (dst.rows() == src.rows() && dst.cols() == src.cols()))) - && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); -#else - dst.const_cast_derived().resizeLike(src.derived()); -#endif - call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func); - return dst.derived(); + call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); } - -template<typename DstXprType, typename SrcXprType, typename Functor> -EIGEN_STRONG_INLINE -const DstXprType& noalias_copy_using_evaluator(const EigenBase<DstXprType>& dst, const EigenBase<SrcXprType>& src, const Functor &func) + +// Deal with AssumeAliasing +template<typename Dst, typename Src, typename Func> +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==1, void*>::type = 0) { - call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func); - return dst.derived(); + typename plain_matrix_type<Src>::type tmp(src); + call_assignment_no_alias(dst, tmp, func); } -// Based on DenseBase::swap() -// TODO: Check whether we need to do something special for swapping two -// Arrays or Matrices. (Jitse) - -// Overload default assignPacket behavior for swapping them -template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT> -class swap_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> > -{ - typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar> > Base; - typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar; - using Base::m_dst; - using Base::m_src; - using Base::m_functor; - -public: - typedef typename Base::Scalar Scalar; - typedef typename Base::Index Index; - typedef typename Base::DstXprType DstXprType; - - swap_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, DstXprType& dstExpr) - : Base(dst, src, swap_assign_op<Scalar>(), dstExpr) - {} - - template<int StoreMode, int LoadMode> - void assignPacket(Index row, Index col) - { - m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col)); - } - - template<int StoreMode, int LoadMode> - void assignPacket(Index index) - { - m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index)); - } - - // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael) - template<int StoreMode, int LoadMode> - void assignPacketByOuterInner(Index outer, Index inner) - { - Index row = Base::rowIndexByOuterInner(outer, inner); - Index col = Base::colIndexByOuterInner(outer, inner); - assignPacket<StoreMode,LoadMode>(row, col); - } -}; - -template<typename DstXprType, typename SrcXprType> -void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src) +template<typename Dst, typename Src, typename Func> +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==0, void*>::type = 0) { - // TODO there is too much redundancy with call_dense_assignment_loop - - eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - - typedef typename evaluator<DstXprType>::type DstEvaluatorType; - typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; - - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); - - typedef swap_kernel<DstEvaluatorType,SrcEvaluatorType> Kernel; - Kernel kernel(dstEvaluator, srcEvaluator, dst.const_cast_derived()); - - dense_assignment_loop<Kernel>::run(kernel); + call_assignment_no_alias(dst, src, func); } -// Based on MatrixBase::operator+= (in CwiseBinaryOp.h) -template<typename DstXprType, typename SrcXprType> -void add_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src) +// by-pass AssumeAliasing +// FIXME the const version should probably not be needed +// When there is no aliasing, we require that 'dst' has been properly resized +template<typename Dst, template <typename> class StorageBase, typename Src, typename Func> +EIGEN_DEVICE_FUNC void call_assignment(const NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) { - typedef typename DstXprType::Scalar Scalar; - copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>()); + call_assignment_no_alias(dst.expression(), src, func); } - -// Based on ArrayBase::operator+= -template<typename DstXprType, typename SrcXprType> -void add_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src) +template<typename Dst, template <typename> class StorageBase, typename Src, typename Func> +EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) { - typedef typename DstXprType::Scalar Scalar; - copy_using_evaluator(dst.derived(), src.derived(), add_assign_op<Scalar>()); + call_assignment_no_alias(dst.expression(), src, func); } -// TODO: Add add_assign_using_evaluator for EigenBase ? (Jitse) -template<typename DstXprType, typename SrcXprType> -void subtract_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src) +template<typename Dst, typename Src, typename Func> +EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) { - typedef typename DstXprType::Scalar Scalar; - copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>()); -} + enum { + NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) + | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&". + // revert to || as soon as not needed anymore. + (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) + && int(Dst::SizeAtCompileTime) != 1 + }; -template<typename DstXprType, typename SrcXprType> -void subtract_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src) -{ - typedef typename DstXprType::Scalar Scalar; - copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op<Scalar>()); + typename Dst::Index dstRows = NeedToTranspose ? src.cols() : src.rows(); + typename Dst::Index dstCols = NeedToTranspose ? src.rows() : src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned; + typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType; + ActualDstType actualDst(dst); + + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) + + // TODO this line is commented to allow matrix = permutation + // Actually, the "Scalar" type for a permutation matrix does not really make sense, + // perhaps it could be void, and EIGEN_CHECK_BINARY_COMPATIBILIY could allow micing void with anything...? +// EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); + + Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func); } - -template<typename DstXprType, typename SrcXprType> -void multiply_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src) +template<typename Dst, typename Src> +EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src) { - typedef typename DstXprType::Scalar Scalar; - copy_using_evaluator(dst.derived(), src.derived(), mul_assign_op<Scalar>()); + call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>()); } -template<typename DstXprType, typename SrcXprType> -void divide_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src) +// forward declaration +template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src); + +// Generic Dense to Dense assignment +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar> { - typedef typename DstXprType::Scalar Scalar; - copy_using_evaluator(dst.derived(), src.derived(), div_assign_op<Scalar>()); -} + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + +#ifndef EIGEN_NO_DEBUG + internal::check_for_aliasing(dst, src); +#endif + + call_dense_assignment_loop(dst, src, func); + } +}; +// Generic assignment through evalTo. +// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar> +{ + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + src.evalTo(dst); + } +}; } // namespace internal diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h index ffd7fe8b3..e59ee3da9 100644 --- a/Eigen/src/Core/BandMatrix.h +++ b/Eigen/src/Core/BandMatrix.h @@ -204,7 +204,7 @@ class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Sub typedef typename internal::traits<BandMatrix>::Index Index; typedef typename internal::traits<BandMatrix>::CoefficientsType CoefficientsType; - inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) + explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) : m_coeffs(1+supers+subs,cols), m_rows(rows), m_supers(supers), m_subs(subs) { @@ -266,7 +266,7 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType; typedef typename internal::traits<BandMatrixWrapper>::Index Index; - inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) + explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) : m_coeffs(coeffs), m_rows(rows), m_supers(supers), m_subs(subs) { @@ -314,7 +314,7 @@ class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint typedef BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> Base; typedef typename Base::Index Index; public: - TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} + explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} inline typename Base::template DiagonalIntReturnType<1>::Type super() { return Base::template diagonal<1>(); } @@ -327,6 +327,25 @@ class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint protected: }; + +struct BandShape {}; + +template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options> +struct evaluator_traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> > + : public evaluator_traits_base<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> > +{ + typedef BandShape Shape; +}; + +template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options> +struct evaluator_traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> > + : public evaluator_traits_base<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> > +{ + typedef BandShape Shape; +}; + +template<> struct AssignmentKind<DenseShape,BandShape> { typedef EigenBase2EigenBase Kind; }; + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index da193d1a2..9cf9d5432 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -68,6 +68,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp MaxColsAtCompileTime = BlockCols==0 ? 0 : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) : int(traits<XprType>::MaxColsAtCompileTime), + XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0, IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -80,18 +81,14 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp OuterStrideAtCompileTime = HasSameStorageOrderAsXprType ? int(outer_stride_at_compile_time<XprType>::ret) : int(inner_stride_at_compile_time<XprType>::ret), - MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0) - && (InnerStrideAtCompileTime == 1) - ? PacketAccessBit : 0, - MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0, - FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (traits<XprType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, + // IsAligned is needed by MapBase's assertions + // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator + IsAligned = 0, + // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0, FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, - Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) | - DirectAccessBit | - MaskPacketAccessBit | - MaskAlignedBit), - Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit + Flags = (traits<XprType>::Flags & DirectAccessBit) | FlagsLvalueBit | FlagsRowMajorBit + // FIXME DirectAccessBit should not be handled by expressions }; }; @@ -111,6 +108,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class typedef Impl Base; EIGEN_GENERIC_PUBLIC_INTERFACE(Block) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) + + typedef typename internal::remove_all<XprType>::type NestedExpression; /** Column or Row constructor */ @@ -179,7 +178,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H EIGEN_DENSE_PUBLIC_INTERFACE(BlockType) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense) - class InnerIterator; + // class InnerIterator; // FIXME apparently never used /** Column or Row constructor */ @@ -333,6 +332,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> > { typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType; + enum { + XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0 + }; public: typedef MapBase<BlockType> Base; @@ -343,9 +345,8 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> */ EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index i) - : Base(internal::const_cast_ptr(&xpr.coeffRef( - (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0, - (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)), + : Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor)) + || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()), BlockRows==1 ? 1 : xpr.rows(), BlockCols==1 ? 1 : xpr.cols()), m_xpr(xpr) @@ -357,7 +358,8 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> */ EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) - : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr) + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)), + m_xpr(xpr) { init(); } @@ -368,7 +370,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) - : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols), + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols), m_xpr(xpr) { init(); diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h index be9f48a8c..dac1887e0 100644 --- a/Eigen/src/Core/BooleanRedux.h +++ b/Eigen/src/Core/BooleanRedux.h @@ -17,9 +17,10 @@ namespace internal { template<typename Derived, int UnrollCount> struct all_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Derived::RowsAtCompileTime, - row = (UnrollCount-1) % Derived::RowsAtCompileTime + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; static inline bool run(const Derived &mat) @@ -43,11 +44,12 @@ struct all_unroller<Derived, Dynamic> template<typename Derived, int UnrollCount> struct any_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Derived::RowsAtCompileTime, - row = (UnrollCount-1) % Derived::RowsAtCompileTime + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; - + static inline bool run(const Derived &mat) { return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col); @@ -78,19 +80,21 @@ struct any_unroller<Derived, Dynamic> template<typename Derived> inline bool DenseBase<Derived>::all() const { + typedef typename internal::evaluator<Derived>::type Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && CoeffReadCost != Dynamic + && Evaluator::CoeffReadCost != Dynamic && NumTraits<Scalar>::AddCost != Dynamic - && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT }; + Evaluator evaluator(derived()); if(unroll) - return internal::all_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived()); + return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator); else { for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if (!coeff(i, j)) return false; + if (!evaluator.coeff(i, j)) return false; return true; } } @@ -102,19 +106,21 @@ inline bool DenseBase<Derived>::all() const template<typename Derived> inline bool DenseBase<Derived>::any() const { + typedef typename internal::evaluator<Derived>::type Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && CoeffReadCost != Dynamic + && Evaluator::CoeffReadCost != Dynamic && NumTraits<Scalar>::AddCost != Dynamic - && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT }; + Evaluator evaluator(derived()); if(unroll) - return internal::any_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived()); + return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator); else { for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if (coeff(i, j)) return true; + if (evaluator.coeff(i, j)) return true; return false; } } diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 3568cb85f..1c7123b85 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com> -// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk> // // This Source Code Form is subject to the terms of the Mozilla @@ -14,57 +14,85 @@ #define EIGEN_COREEVALUATORS_H namespace Eigen { - + namespace internal { -// evaluator_traits<T> contains traits for evaluator_impl<T> +// This class returns the evaluator kind from the expression storage kind. +// Default assumes index based accessors +template<typename StorageKind> +struct storage_kind_to_evaluator_kind { + typedef IndexBased Kind; +}; -template<typename T> -struct evaluator_traits -{ - // 1 if evaluator_impl<T>::evalTo() exists - // 0 if evaluator_impl<T> allows coefficient-based access - static const int HasEvalTo = 0; +// This class returns the evaluator shape from the expression storage kind. +// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc. +template<typename StorageKind> struct storage_kind_to_shape; - // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a - // temporary; 0 if not. - static const int AssumeAliasing = 0; -}; -// expression class for evaluating nested expression to a temporary - -template<typename ArgType> -class EvalToTemp; +template<> struct storage_kind_to_shape<Dense> { typedef DenseShape Shape; }; -// evaluator<T>::type is type of evaluator for T -// evaluator<T>::nestedType is type of evaluator if T is nested inside another evaluator - -template<typename T> -struct evaluator_impl -{ }; - -template<typename T, int Nested = evaluator_traits<T>::HasEvalTo> -struct evaluator_nested_type; + +// FIXME Is this necessary? And why was it not before refactoring??? +template<> struct storage_kind_to_shape<PermutationStorage> { typedef PermutationShape Shape; }; + + +// Evaluators have to be specialized with respect to various criteria such as: +// - storage/structure/shape +// - scalar type +// - etc. +// Therefore, we need specialization of evaluator providing additional template arguments for each kind of evaluators. +// We currently distinguish the following kind of evaluators: +// - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate) +// - binary_evaluator for expression taking two arguments (CwiseBinaryOp) +// - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching. +// - mapbase_evaluator for Map, Block, Ref +// - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator) + +template< typename T, + typename LhsKind = typename evaluator_traits<typename T::Lhs>::Kind, + typename RhsKind = typename evaluator_traits<typename T::Rhs>::Kind, + typename LhsScalar = typename traits<typename T::Lhs>::Scalar, + typename RhsScalar = typename traits<typename T::Rhs>::Scalar> struct binary_evaluator; + +template< typename T, + typename Kind = typename evaluator_traits<typename T::NestedExpression>::Kind, + typename Scalar = typename T::Scalar> struct unary_evaluator; + +// evaluator_traits<T> contains traits for evaluator<T> template<typename T> -struct evaluator_nested_type<T, 0> +struct evaluator_traits_base { - typedef evaluator_impl<T> type; + // TODO check whether these two indirections are really needed. + // Basically, if nobody overwrite type and nestedType, then, they can be dropped +// typedef evaluator<T> type; +// typedef evaluator<T> nestedType; + + // by default, get evaluator kind and shape from storage + typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind; + typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape; + + // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a + // temporary; 0 if not. + static const int AssumeAliasing = 0; }; +// Default evaluator traits template<typename T> -struct evaluator_nested_type<T, 1> +struct evaluator_traits : public evaluator_traits_base<T> { - typedef evaluator_impl<EvalToTemp<T> > type; }; + +// By default, we assume a unary expression: template<typename T> -struct evaluator +struct evaluator : public unary_evaluator<T> { - typedef evaluator_impl<T> type; - typedef typename evaluator_nested_type<T>::type nestedType; + typedef unary_evaluator<T> Base; + EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {} }; + // TODO: Think about const-correctness template<typename T> @@ -76,47 +104,59 @@ struct evaluator<const T> // TODO this class does not seem to be necessary anymore template<typename ExpressionType> -struct evaluator_impl_base +struct evaluator_base { - typedef typename ExpressionType::Index Index; +// typedef typename evaluator_traits<ExpressionType>::type type; +// typedef typename evaluator_traits<ExpressionType>::nestedType nestedType; + typedef evaluator<ExpressionType> type; + typedef evaluator<ExpressionType> nestedType; + + typedef typename traits<ExpressionType>::Index Index; // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. typedef traits<ExpressionType> ExpressionTraits; - - evaluator_impl<ExpressionType>& derived() - { - return *static_cast<evaluator_impl<ExpressionType>*>(this); - } }; // -------------------- Matrix and Array -------------------- // -// evaluator_impl<PlainObjectBase> is a common base class for the +// evaluator<PlainObjectBase> is a common base class for the // Matrix and Array evaluators. +// Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, +// so no need for more sophisticated dispatching. template<typename Derived> -struct evaluator_impl<PlainObjectBase<Derived> > - : evaluator_impl_base<Derived> +struct evaluator<PlainObjectBase<Derived> > + : evaluator_base<Derived> { typedef PlainObjectBase<Derived> PlainObjectType; + typedef typename PlainObjectType::Index Index; + typedef typename PlainObjectType::Scalar Scalar; + typedef typename PlainObjectType::CoeffReturnType CoeffReturnType; + typedef typename PlainObjectType::PacketScalar PacketScalar; + typedef typename PlainObjectType::PacketReturnType PacketReturnType; enum { IsRowMajor = PlainObjectType::IsRowMajor, IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime, RowsAtCompileTime = PlainObjectType::RowsAtCompileTime, - ColsAtCompileTime = PlainObjectType::ColsAtCompileTime + ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, + + CoeffReadCost = NumTraits<Scalar>::ReadCost, + Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime, + Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret }; - - evaluator_impl(const PlainObjectType& m) + + EIGEN_DEVICE_FUNC evaluator() + : m_data(0), + m_outerStride(IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime) + {} + + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) { } - typedef typename PlainObjectType::Index Index; - typedef typename PlainObjectType::Scalar Scalar; - typedef typename PlainObjectType::CoeffReturnType CoeffReturnType; - typedef typename PlainObjectType::PacketScalar PacketScalar; - typedef typename PlainObjectType::PacketReturnType PacketReturnType; - - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { if (IsRowMajor) return m_data[row * m_outerStride.value() + col]; @@ -124,12 +164,12 @@ struct evaluator_impl<PlainObjectBase<Derived> > return m_data[row + col * m_outerStride.value()]; } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_data[index]; } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { if (IsRowMajor) return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col]; @@ -137,7 +177,7 @@ struct evaluator_impl<PlainObjectBase<Derived> > return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()]; } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return const_cast<Scalar*>(m_data)[index]; } @@ -184,153 +224,45 @@ protected: }; template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> -struct evaluator_impl<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > - : evaluator_impl<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > +struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > + : evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > { typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType; + + evaluator() {} - evaluator_impl(const XprType& m) - : evaluator_impl<PlainObjectBase<XprType> >(m) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator<PlainObjectBase<XprType> >(m) { } }; template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> -struct evaluator_impl<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > - : evaluator_impl<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > +struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > + : evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > { typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType; - evaluator_impl(const XprType& m) - : evaluator_impl<PlainObjectBase<XprType> >(m) - { } -}; - -// -------------------- EvalToTemp -------------------- - -template<typename ArgType> -struct traits<EvalToTemp<ArgType> > - : public traits<ArgType> -{ }; - -template<typename ArgType> -class EvalToTemp - : public dense_xpr_base<EvalToTemp<ArgType> >::type -{ - public: - - typedef typename dense_xpr_base<EvalToTemp>::type Base; - EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) - - EvalToTemp(const ArgType& arg) - : m_arg(arg) - { } - - const ArgType& arg() const - { - return m_arg; - } - - Index rows() const - { - return m_arg.rows(); - } - - Index cols() const - { - return m_arg.cols(); - } - - private: - const ArgType& m_arg; -}; - -template<typename ArgType> -struct evaluator_impl<EvalToTemp<ArgType> > -{ - typedef EvalToTemp<ArgType> XprType; - typedef typename ArgType::PlainObject PlainObject; - - evaluator_impl(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()), m_resultImpl(m_result) - { - // TODO we should simply do m_result(xpr.arg()); - call_dense_assignment_loop(m_result, xpr.arg()); - } - - // This constructor is used when nesting an EvalTo evaluator in another evaluator - evaluator_impl(const ArgType& arg) - : m_result(arg.rows(), arg.cols()), m_resultImpl(m_result) - { - // TODO we should simply do m_result(xpr.arg()); - call_dense_assignment_loop(m_result, arg); - } - - typedef typename PlainObject::Index Index; - typedef typename PlainObject::Scalar Scalar; - typedef typename PlainObject::CoeffReturnType CoeffReturnType; - typedef typename PlainObject::PacketScalar PacketScalar; - typedef typename PlainObject::PacketReturnType PacketReturnType; - - // All other functions are forwarded to m_resultImpl - - CoeffReturnType coeff(Index row, Index col) const - { - return m_resultImpl.coeff(row, col); - } - - CoeffReturnType coeff(Index index) const - { - return m_resultImpl.coeff(index); - } + evaluator() {} - Scalar& coeffRef(Index row, Index col) - { - return m_resultImpl.coeffRef(row, col); - } - - Scalar& coeffRef(Index index) - { - return m_resultImpl.coeffRef(index); - } - - template<int LoadMode> - PacketReturnType packet(Index row, Index col) const - { - return m_resultImpl.template packet<LoadMode>(row, col); - } - - template<int LoadMode> - PacketReturnType packet(Index index) const - { - return m_resultImpl.packet<LoadMode>(index); - } - - template<int StoreMode> - void writePacket(Index row, Index col, const PacketScalar& x) - { - m_resultImpl.template writePacket<StoreMode>(row, col, x); - } - - template<int StoreMode> - void writePacket(Index index, const PacketScalar& x) - { - m_resultImpl.template writePacket<StoreMode>(index, x); - } - -protected: - PlainObject m_result; - typename evaluator<PlainObject>::nestedType m_resultImpl; + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator<PlainObjectBase<XprType> >(m) + { } }; // -------------------- Transpose -------------------- template<typename ArgType> -struct evaluator_impl<Transpose<ArgType> > - : evaluator_impl_base<Transpose<ArgType> > +struct unary_evaluator<Transpose<ArgType>, IndexBased> + : evaluator_base<Transpose<ArgType> > { typedef Transpose<ArgType> XprType; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + Flags = evaluator<ArgType>::Flags ^ RowMajorBit + }; - evaluator_impl(const XprType& t) : m_argImpl(t.nestedExpression()) {} + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; @@ -338,22 +270,22 @@ struct evaluator_impl<Transpose<ArgType> > typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(col, row); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(col, row); } - typename XprType::Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } @@ -387,13 +319,27 @@ protected: }; // -------------------- CwiseNullaryOp -------------------- +// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator. +// Likewise, there is not need to more sophisticated dispatching here. template<typename NullaryOp, typename PlainObjectType> -struct evaluator_impl<CwiseNullaryOp<NullaryOp,PlainObjectType> > +struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> > + : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> > { typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType; + typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned; + + enum { + CoeffReadCost = internal::functor_traits<NullaryOp>::Cost, + + Flags = (evaluator<PlainObjectTypeCleaned>::Flags + & ( HereditaryBits + | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0) + | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0))) + | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore + }; - evaluator_impl(const XprType& n) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) : m_functor(n.functor()) { } @@ -401,12 +347,12 @@ struct evaluator_impl<CwiseNullaryOp<NullaryOp,PlainObjectType> > typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_functor(row, col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(index); } @@ -430,11 +376,20 @@ protected: // -------------------- CwiseUnaryOp -------------------- template<typename UnaryOp, typename ArgType> -struct evaluator_impl<CwiseUnaryOp<UnaryOp, ArgType> > +struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > + : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType> > { typedef CwiseUnaryOp<UnaryOp, ArgType> XprType; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost, + + Flags = evaluator<ArgType>::Flags & ( + HereditaryBits | LinearAccessBit | AlignedBit + | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)) + }; - evaluator_impl(const XprType& op) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) { } @@ -443,12 +398,12 @@ struct evaluator_impl<CwiseUnaryOp<UnaryOp, ArgType> > typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_functor(m_argImpl.coeff(row, col)); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_argImpl.coeff(index)); } @@ -472,12 +427,43 @@ protected: // -------------------- CwiseBinaryOp -------------------- +// this is a binary expression template<typename BinaryOp, typename Lhs, typename Rhs> -struct evaluator_impl<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > +struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > + : public binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > { typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; + typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; - evaluator_impl(const XprType& xpr) +template<typename BinaryOp, typename Lhs, typename Rhs> +struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBased> + : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > +{ + typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; + + enum { + CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, + + LhsFlags = evaluator<Lhs>::Flags, + RhsFlags = evaluator<Rhs>::Flags, + SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value, + StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit), + Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( + HereditaryBits + | (int(LhsFlags) & int(RhsFlags) & + ( AlignedBit + | (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit) + }; + + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) @@ -487,12 +473,12 @@ struct evaluator_impl<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); } @@ -501,14 +487,14 @@ struct evaluator_impl<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > PacketScalar packet(Index row, Index col) const { return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(row, col), - m_rhsImpl.template packet<LoadMode>(row, col)); + m_rhsImpl.template packet<LoadMode>(row, col)); } template<int LoadMode> PacketScalar packet(Index index) const { return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(index), - m_rhsImpl.template packet<LoadMode>(index)); + m_rhsImpl.template packet<LoadMode>(index)); } protected: @@ -520,12 +506,18 @@ protected: // -------------------- CwiseUnaryView -------------------- template<typename UnaryOp, typename ArgType> -struct evaluator_impl<CwiseUnaryView<UnaryOp, ArgType> > - : evaluator_impl_base<CwiseUnaryView<UnaryOp, ArgType> > +struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> + : evaluator_base<CwiseUnaryView<UnaryOp, ArgType> > { typedef CwiseUnaryView<UnaryOp, ArgType> XprType; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost, + + Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)) + }; - evaluator_impl(const XprType& op) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_unaryOp(op.functor()), m_argImpl(op.nestedExpression()) { } @@ -534,22 +526,22 @@ struct evaluator_impl<CwiseUnaryView<UnaryOp, ArgType> > typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_unaryOp(m_argImpl.coeff(row, col)); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_unaryOp(m_argImpl.coeff(index)); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_unaryOp(m_argImpl.coeffRef(row, col)); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_unaryOp(m_argImpl.coeffRef(index)); } @@ -561,13 +553,15 @@ protected: // -------------------- Map -------------------- -template<typename Derived, int AccessorsType> -struct evaluator_impl<MapBase<Derived, AccessorsType> > - : evaluator_impl_base<Derived> -{ - typedef MapBase<Derived, AccessorsType> MapType; - typedef Derived XprType; +// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ? +// but that might complicate template specialization +template<typename Derived, typename PlainObjectType> +struct mapbase_evaluator; +template<typename Derived, typename PlainObjectType> +struct mapbase_evaluator : evaluator_base<Derived> +{ + typedef Derived XprType; typedef typename XprType::PointerType PointerType; typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; @@ -575,81 +569,121 @@ struct evaluator_impl<MapBase<Derived, AccessorsType> > typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; - evaluator_impl(const XprType& map) - : m_data(const_cast<PointerType>(map.data())), - m_rowStride(map.rowStride()), - m_colStride(map.colStride()) - { } - enum { - RowsAtCompileTime = XprType::RowsAtCompileTime + IsRowMajor = XprType::RowsAtCompileTime, + ColsAtCompileTime = XprType::ColsAtCompileTime, + CoeffReadCost = NumTraits<Scalar>::ReadCost }; + + EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) + : m_data(const_cast<PointerType>(map.data())), + m_xpr(map) + { + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1), + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + } - CoeffReturnType coeff(Index row, Index col) const - { - return m_data[col * m_colStride + row * m_rowStride]; + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } - CoeffReturnType coeff(Index index) const - { - return coeff(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_data[index * m_xpr.innerStride()]; } - Scalar& coeffRef(Index row, Index col) - { - return m_data[col * m_colStride + row * m_rowStride]; + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } - Scalar& coeffRef(Index index) - { - return coeffRef(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return m_data[index * m_xpr.innerStride()]; } template<int LoadMode> PacketReturnType packet(Index row, Index col) const - { - PointerType ptr = m_data + row * m_rowStride + col * m_colStride; + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::ploadt<PacketScalar, LoadMode>(ptr); } template<int LoadMode> PacketReturnType packet(Index index) const - { - return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + { + return internal::ploadt<PacketScalar, LoadMode>(m_data + index * m_xpr.innerStride()); } template<int StoreMode> void writePacket(Index row, Index col, const PacketScalar& x) - { - PointerType ptr = m_data + row * m_rowStride + col * m_colStride; + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::pstoret<Scalar, PacketScalar, StoreMode>(ptr, x); } template<int StoreMode> void writePacket(Index index, const PacketScalar& x) - { - return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, - x); + { + internal::pstoret<Scalar, PacketScalar, StoreMode>(m_data + index * m_xpr.innerStride(), x); } protected: PointerType m_data; - int m_rowStride; - int m_colStride; + const XprType& m_xpr; }; template<typename PlainObjectType, int MapOptions, typename StrideType> -struct evaluator_impl<Map<PlainObjectType, MapOptions, StrideType> > - : public evaluator_impl<MapBase<Map<PlainObjectType, MapOptions, StrideType> > > +struct evaluator<Map<PlainObjectType, MapOptions, StrideType> > + : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType> { typedef Map<PlainObjectType, MapOptions, StrideType> XprType; + typedef typename XprType::Scalar Scalar; + + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + HasNoInnerStride = InnerStrideAtCompileTime == 1, + HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, + HasNoStride = HasNoInnerStride && HasNoOuterStride, + IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), + IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + KeepsPacketAccess = bool(HasNoInnerStride) + && ( bool(IsDynamicSize) + || HasNoOuterStride + || ( OuterStrideAtCompileTime!=Dynamic + && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ), + Flags0 = evaluator<PlainObjectType>::Flags, + Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), + Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) + ? int(Flags1) : int(Flags1 & ~LinearAccessBit), + Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit) + }; - evaluator_impl(const XprType& map) - : evaluator_impl<MapBase<XprType> >(map) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) + : mapbase_evaluator<XprType, PlainObjectType>(map) + { } +}; + +// -------------------- Ref -------------------- + +template<typename PlainObjectType, int RefOptions, typename StrideType> +struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> > + : public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType> +{ + typedef Ref<PlainObjectType, RefOptions, StrideType> XprType; + + enum { + Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref) + : mapbase_evaluator<XprType, PlainObjectType>(ref) { } }; @@ -659,21 +693,68 @@ template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess = internal::has_direct_access<ArgType>::ret> struct block_evaluator; template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> -struct evaluator_impl<Block<ArgType, BlockRows, BlockCols, InnerPanel> > +struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; + typedef typename XprType::Scalar Scalar; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + + RowsAtCompileTime = traits<XprType>::RowsAtCompileTime, + ColsAtCompileTime = traits<XprType>::ColsAtCompileTime, + MaxRowsAtCompileTime = traits<XprType>::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits<XprType>::MaxColsAtCompileTime, + + ArgTypeIsRowMajor = (int(evaluator<ArgType>::Flags)&RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : ArgTypeIsRowMajor, + HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(inner_stride_at_compile_time<ArgType>::ret) + : int(outer_stride_at_compile_time<ArgType>::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(outer_stride_at_compile_time<ArgType>::ret) + : int(inner_stride_at_compile_time<ArgType>::ret), + MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0) + && (InnerStrideAtCompileTime == 1) + ? PacketAccessBit : 0, + + MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0, + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, + FlagsRowMajorBit = XprType::Flags&RowMajorBit, + Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) | + DirectAccessBit | + MaskPacketAccessBit | + MaskAlignedBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit + }; typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type; - evaluator_impl(const XprType& block) : block_evaluator_type(block) {} + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {} }; +// no direct-access => dispatch to a unary evaluator template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAccess*/ false> - : evaluator_impl_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> > + : unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > +{ + typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; + + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : unary_evaluator<XprType>(block) + {} +}; + +template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> +struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBased> + : evaluator_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> > { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; - block_evaluator(const XprType& block) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) : m_argImpl(block.nestedExpression()), m_startRow(block.startRow()), m_startCol(block.startCol()) @@ -689,26 +770,24 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc RowsAtCompileTime = XprType::RowsAtCompileTime }; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { - return coeff(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { - return coeffRef(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } template<int LoadMode> @@ -721,7 +800,7 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc PacketReturnType packet(Index index) const { return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + RowsAtCompileTime == 1 ? index : 0); } template<int StoreMode> @@ -734,8 +813,8 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc void writePacket(Index index, const PacketScalar& x) { return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, - x); + RowsAtCompileTime == 1 ? index : 0, + x); } protected: @@ -749,24 +828,38 @@ protected: template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true> - : evaluator_impl<MapBase<Block<ArgType, BlockRows, BlockCols, InnerPanel> > > + : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, + typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject> { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; - block_evaluator(const XprType& block) - : evaluator_impl<MapBase<XprType> >(block) - { } + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : mapbase_evaluator<XprType, typename XprType::PlainObject>(block) + { + // FIXME this should be an internal assertion + eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned"); + } }; // -------------------- Select -------------------- +// TODO shall we introduce a ternary_evaluator? +// TODO enable vectorization for Select template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> -struct evaluator_impl<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > +struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > + : evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > { typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType; + enum { + CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost + + EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost, + evaluator<ElseMatrixType>::CoeffReadCost), + + Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits + }; - evaluator_impl(const XprType& select) + inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) : m_conditionImpl(select.conditionMatrix()), m_thenImpl(select.thenMatrix()), m_elseImpl(select.elseMatrix()) @@ -775,7 +868,7 @@ struct evaluator_impl<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - CoeffReturnType coeff(Index row, Index col) const + inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { if (m_conditionImpl.coeff(row, col)) return m_thenImpl.coeff(row, col); @@ -783,7 +876,7 @@ struct evaluator_impl<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType return m_elseImpl.coeff(row, col); } - CoeffReturnType coeff(Index index) const + inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { if (m_conditionImpl.coeff(index)) return m_thenImpl.coeff(index); @@ -801,21 +894,33 @@ protected: // -------------------- Replicate -------------------- template<typename ArgType, int RowFactor, int ColFactor> -struct evaluator_impl<Replicate<ArgType, RowFactor, ColFactor> > +struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > + : evaluator_base<Replicate<ArgType, RowFactor, ColFactor> > { typedef Replicate<ArgType, RowFactor, ColFactor> XprType; - - evaluator_impl(const XprType& replicate) - : m_argImpl(replicate.nestedExpression()), - m_rows(replicate.nestedExpression().rows()), - m_cols(replicate.nestedExpression().cols()) - { } - typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketReturnType PacketReturnType; + enum { + Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor + }; + typedef typename internal::nested_eval<ArgType,Factor>::type ArgTypeNested; + typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned; + + enum { + CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost, + + Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit) + }; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate) + : m_arg(replicate.nestedExpression()), + m_argImpl(m_arg), + m_rows(replicate.nestedExpression().rows()), + m_cols(replicate.nestedExpression().cols()) + {} + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { // try to avoid using modulo; this is a pure optimization strategy const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0 @@ -842,9 +947,10 @@ struct evaluator_impl<Replicate<ArgType, RowFactor, ColFactor> > } protected: - typename evaluator<ArgType>::nestedType m_argImpl; - const variable_if_dynamic<Index, XprType::RowsAtCompileTime> m_rows; - const variable_if_dynamic<Index, XprType::ColsAtCompileTime> m_cols; + const ArgTypeNested m_arg; // FIXME is it OK to store both the argument and its evaluator?? (we have the same situation in evaluator_product) + typename evaluator<ArgTypeNestedCleaned>::nestedType m_argImpl; + const variable_if_dynamic<Index, ArgType::RowsAtCompileTime> m_rows; + const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols; }; @@ -855,23 +961,35 @@ protected: // the row() and col() member functions. template< typename ArgType, typename MemberOp, int Direction> -struct evaluator_impl<PartialReduxExpr<ArgType, MemberOp, Direction> > +struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> > + : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> > { typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType; + typedef typename XprType::Scalar InputScalar; + enum { + TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(XprType::ColsAtCompileTime) + }; + typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType; + enum { + CoeffReadCost = TraversalSize==Dynamic ? Dynamic + : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value), + + Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits) + }; - evaluator_impl(const XprType expr) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr) : m_expr(expr) - { } + {} typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_expr.coeff(row, col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_expr.coeff(index); } @@ -883,16 +1001,20 @@ protected: // -------------------- MatrixWrapper and ArrayWrapper -------------------- // -// evaluator_impl_wrapper_base<T> is a common base class for the +// evaluator_wrapper_base<T> is a common base class for the // MatrixWrapper and ArrayWrapper evaluators. template<typename XprType> -struct evaluator_impl_wrapper_base - : evaluator_impl_base<XprType> +struct evaluator_wrapper_base + : evaluator_base<XprType> { typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType; + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + Flags = evaluator<ArgType>::Flags + }; - evaluator_impl_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} + EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} typedef typename ArgType::Index Index; typedef typename ArgType::Scalar Scalar; @@ -900,22 +1022,22 @@ struct evaluator_impl_wrapper_base typedef typename ArgType::PacketScalar PacketScalar; typedef typename ArgType::PacketReturnType PacketReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(row, col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(row, col); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } @@ -949,24 +1071,24 @@ protected: }; template<typename TArgType> -struct evaluator_impl<MatrixWrapper<TArgType> > - : evaluator_impl_wrapper_base<MatrixWrapper<TArgType> > +struct unary_evaluator<MatrixWrapper<TArgType> > + : evaluator_wrapper_base<MatrixWrapper<TArgType> > { typedef MatrixWrapper<TArgType> XprType; - evaluator_impl(const XprType& wrapper) - : evaluator_impl_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression()) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression()) { } }; template<typename TArgType> -struct evaluator_impl<ArrayWrapper<TArgType> > - : evaluator_impl_wrapper_base<ArrayWrapper<TArgType> > +struct unary_evaluator<ArrayWrapper<TArgType> > + : evaluator_wrapper_base<ArrayWrapper<TArgType> > { typedef ArrayWrapper<TArgType> XprType; - evaluator_impl(const XprType& wrapper) - : evaluator_impl_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression()) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression()) { } }; @@ -977,8 +1099,8 @@ struct evaluator_impl<ArrayWrapper<TArgType> > template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond; template<typename ArgType, int Direction> -struct evaluator_impl<Reverse<ArgType, Direction> > - : evaluator_impl_base<Reverse<ArgType, Direction> > +struct unary_evaluator<Reverse<ArgType, Direction> > + : evaluator_base<Reverse<ArgType, Direction> > { typedef Reverse<ArgType, Direction> XprType; typedef typename XprType::Index Index; @@ -997,34 +1119,44 @@ struct evaluator_impl<Reverse<ArgType, Direction> > OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1, ReversePacket = (Direction == BothDirections) || ((Direction == Vertical) && IsColMajor) - || ((Direction == Horizontal) && IsRowMajor) + || ((Direction == Horizontal) && IsRowMajor), + + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + + // let's enable LinearAccess only with vectorization because of the product overhead + // FIXME enable DirectAccess with negative strides? + Flags0 = evaluator<ArgType>::Flags, + LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) + ? LinearAccessBit : 0, + + Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess) }; typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet; - evaluator_impl(const XprType& reverse) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse) : m_argImpl(reverse.nestedExpression()), m_rows(ReverseRow ? reverse.nestedExpression().rows() : 0), m_cols(ReverseCol ? reverse.nestedExpression().cols() : 0) { } - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, - ReverseCol ? m_cols.value() - col - 1 : col); + ReverseCol ? m_cols.value() - col - 1 : col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, - ReverseCol ? m_cols.value() - col - 1 : col); + ReverseCol ? m_cols.value() - col - 1 : col); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); } @@ -1071,36 +1203,44 @@ protected: // -------------------- Diagonal -------------------- template<typename ArgType, int DiagIndex> -struct evaluator_impl<Diagonal<ArgType, DiagIndex> > - : evaluator_impl_base<Diagonal<ArgType, DiagIndex> > +struct evaluator<Diagonal<ArgType, DiagIndex> > + : evaluator_base<Diagonal<ArgType, DiagIndex> > { typedef Diagonal<ArgType, DiagIndex> XprType; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + + Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit + }; - evaluator_impl(const XprType& diagonal) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal) : m_argImpl(diagonal.nestedExpression()), m_index(diagonal.index()) { } typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; + // FIXME having to check whether ArgType is sparse here i not very nice. + typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value, + typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType; - CoeffReturnType coeff(Index row, Index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const { return m_argImpl.coeff(row + rowOffset(), row + colOffset()); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index + rowOffset(), index + colOffset()); } - Scalar& coeffRef(Index row, Index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index) { return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); } @@ -1110,8 +1250,88 @@ protected: const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index; private: - EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } - EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } +}; + + +//---------------------------------------------------------------------- +// deprecated code +//---------------------------------------------------------------------- + +// -------------------- EvalToTemp -------------------- + +// expression class for evaluating nested expression to a temporary + +template<typename ArgType> class EvalToTemp; + +template<typename ArgType> +struct traits<EvalToTemp<ArgType> > + : public traits<ArgType> +{ }; + +template<typename ArgType> +class EvalToTemp + : public dense_xpr_base<EvalToTemp<ArgType> >::type +{ + public: + + typedef typename dense_xpr_base<EvalToTemp>::type Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) + + explicit EvalToTemp(const ArgType& arg) + : m_arg(arg) + { } + + const ArgType& arg() const + { + return m_arg; + } + + Index rows() const + { + return m_arg.rows(); + } + + Index cols() const + { + return m_arg.cols(); + } + + private: + const ArgType& m_arg; +}; + +template<typename ArgType> +struct evaluator<EvalToTemp<ArgType> > + : public evaluator<typename ArgType::PlainObject>::type +{ + typedef EvalToTemp<ArgType> XprType; + typedef typename ArgType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type Base; + + typedef evaluator type; + typedef evaluator nestedType; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast<Base*>(this)) Base(m_result); + // TODO we should simply do m_result(xpr.arg()); + call_dense_assignment_loop(m_result, xpr.arg()); + } + + // This constructor is used when nesting an EvalTo evaluator in another evaluator + EIGEN_DEVICE_FUNC evaluator(const ArgType& arg) + : m_result(arg.rows(), arg.cols()) + { + ::new (static_cast<Base*>(this)) Base(m_result); + // TODO we should simply do m_result(xpr.arg()); + call_dense_assignment_loop(m_result, arg); + } + +protected: + PlainObject m_result; }; } // namespace internal diff --git a/Eigen/src/Core/CoreIterators.h b/Eigen/src/Core/CoreIterators.h index 6da4683d2..7feebc4e4 100644 --- a/Eigen/src/Core/CoreIterators.h +++ b/Eigen/src/Core/CoreIterators.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,47 +15,116 @@ namespace Eigen { /* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core */ -/** \ingroup SparseCore_Module - * \class InnerIterator - * \brief An InnerIterator allows to loop over the element of a sparse (or dense) matrix or expression - * - * todo +namespace internal { + +template<typename XprType, typename EvaluatorKind> +class inner_iterator_selector; + +} + +/** \class InnerIterator + * \brief An InnerIterator allows to loop over the element of any matrix expression. + * + * \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed. + * + * TODO: add a usage example */ +template<typename XprType> +class InnerIterator +{ +protected: + typedef internal::inner_iterator_selector<XprType, typename internal::evaluator_traits<XprType>::Kind> IteratorType; + typedef typename internal::evaluator<XprType>::type EvaluatorType; + typedef typename internal::traits<XprType>::Scalar Scalar; + typedef typename internal::traits<XprType>::Index Index; +public: + /** Construct an iterator over the \a outerId -th row or column of \a xpr */ + InnerIterator(const XprType &xpr, const Index &outerId) + : m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize()) + {} + + /// \returns the value of the current coefficient. + EIGEN_STRONG_INLINE Scalar value() const { return m_iter.value(); } + /** Increment the iterator \c *this to the next non-zero coefficient. + * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView + */ + EIGEN_STRONG_INLINE InnerIterator& operator++() { m_iter.operator++(); return *this; } + /// \returns the column or row index of the current coefficient. + EIGEN_STRONG_INLINE Index index() const { return m_iter.index(); } + /// \returns the row index of the current coefficient. + EIGEN_STRONG_INLINE Index row() const { return m_iter.row(); } + /// \returns the column index of the current coefficient. + EIGEN_STRONG_INLINE Index col() const { return m_iter.col(); } + /// \returns \c true if the iterator \c *this still references a valid coefficient. + EIGEN_STRONG_INLINE operator bool() const { return m_iter; } + +protected: + EvaluatorType m_eval; + IteratorType m_iter; +private: + // If you get here, then you're not using the right InnerIterator type, e.g.: + // SparseMatrix<double,RowMajor> A; + // SparseMatrix<double>::InnerIterator it(A,0); + template<typename T> InnerIterator(const EigenBase<T>&,Index outer); +}; + +namespace internal { -// generic version for dense matrix and expressions -template<typename Derived> class DenseBase<Derived>::InnerIterator +// Generic inner iterator implementation for dense objects +template<typename XprType> +class inner_iterator_selector<XprType, IndexBased> { - protected: - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Index Index; - - enum { IsRowMajor = (Derived::Flags&RowMajorBit)==RowMajorBit }; - public: - EIGEN_STRONG_INLINE InnerIterator(const Derived& expr, Index outer) - : m_expression(expr), m_inner(0), m_outer(outer), m_end(expr.innerSize()) - {} - - EIGEN_STRONG_INLINE Scalar value() const - { - return (IsRowMajor) ? m_expression.coeff(m_outer, m_inner) - : m_expression.coeff(m_inner, m_outer); - } - - EIGEN_STRONG_INLINE InnerIterator& operator++() { m_inner++; return *this; } - - EIGEN_STRONG_INLINE Index index() const { return m_inner; } - inline Index row() const { return IsRowMajor ? m_outer : index(); } - inline Index col() const { return IsRowMajor ? index() : m_outer; } - - EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } - - protected: - const Derived& m_expression; - Index m_inner; - const Index m_outer; - const Index m_end; +protected: + typedef typename evaluator<XprType>::type EvaluatorType; + typedef typename traits<XprType>::Scalar Scalar; + typedef typename traits<XprType>::Index Index; + enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit }; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize) + : m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize) + {} + + EIGEN_STRONG_INLINE Scalar value() const + { + return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner) + : m_eval.coeff(m_inner, m_outer); + } + + EIGEN_STRONG_INLINE inner_iterator_selector& operator++() { m_inner++; return *this; } + + EIGEN_STRONG_INLINE Index index() const { return m_inner; } + inline Index row() const { return IsRowMajor ? m_outer : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer; } + + EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } + +protected: + const EvaluatorType& m_eval; + Index m_inner; + const Index m_outer; + const Index m_end; }; +// For iterator-based evaluator, inner-iterator is already implemented as +// evaluator<>::InnerIterator +template<typename XprType> +class inner_iterator_selector<XprType, IteratorBased> + : public evaluator<XprType>::InnerIterator +{ +protected: + typedef typename evaluator<XprType>::InnerIterator Base; + typedef typename evaluator<XprType>::type EvaluatorType; + typedef typename traits<XprType>::Index Index; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/) + : Base(eval, outerId) + {} +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_COREITERATORS_H diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index e20daacc8..a205c3f10 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> // // This Source Code Form is subject to the terms of the Mozilla @@ -56,8 +56,9 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > typename Rhs::Scalar ) >::type Scalar; - typedef typename promote_storage_type<typename traits<Lhs>::StorageKind, - typename traits<Rhs>::StorageKind>::ret StorageKind; + typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind, + typename traits<Rhs>::StorageKind, + BinaryOp>::ret StorageKind; typedef typename promote_index_type<typename traits<Lhs>::Index, typename traits<Rhs>::Index>::type Index; typedef typename Lhs::Nested LhsNested; @@ -65,60 +66,37 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > typedef typename remove_reference<LhsNested>::type _LhsNested; typedef typename remove_reference<RhsNested>::type _RhsNested; enum { - LhsCoeffReadCost = _LhsNested::CoeffReadCost, - RhsCoeffReadCost = _RhsNested::CoeffReadCost, - LhsFlags = _LhsNested::Flags, - RhsFlags = _RhsNested::Flags, - SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value, - StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit), - Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( - HereditaryBits - | (int(LhsFlags) & int(RhsFlags) & - ( AlignedBit - | (StorageOrdersAgree ? LinearAccessBit : 0) - | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) - ) - ) - ), - Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), - CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost + Flags = _LhsNested::Flags & RowMajorBit }; }; } // end namespace internal -// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor -// that would take two operands of different types. If there were such an example, then this check should be -// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as -// currently they take only one typename Scalar template parameter. -// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. -// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to -// add together a float matrix and a double matrix. -#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ - EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \ - ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \ - : int(internal::is_same<LHS, RHS>::value)), \ - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind> class CwiseBinaryOpImpl; -template<typename BinaryOp, typename Lhs, typename Rhs> -class CwiseBinaryOp : internal::no_assignment_operator, +template<typename BinaryOp, typename LhsType, typename RhsType> +class CwiseBinaryOp : public CwiseBinaryOpImpl< - BinaryOp, Lhs, Rhs, - typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind, - typename internal::traits<Rhs>::StorageKind>::ret> + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind, + typename internal::traits<RhsType>::StorageKind, + BinaryOp>::ret>, + internal::no_assignment_operator { public: + + typedef typename internal::remove_all<LhsType>::type Lhs; + typedef typename internal::remove_all<RhsType>::type Rhs; typedef typename CwiseBinaryOpImpl< - BinaryOp, Lhs, Rhs, - typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind, - typename internal::traits<Rhs>::StorageKind>::ret>::Base Base; + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind, + typename internal::traits<Rhs>::StorageKind, + BinaryOp>::ret>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp) - typedef typename internal::nested<Lhs>::type LhsNested; - typedef typename internal::nested<Rhs>::type RhsNested; + typedef typename internal::nested<LhsType>::type LhsNested; + typedef typename internal::nested<RhsType>::type RhsNested; typedef typename internal::remove_reference<LhsNested>::type _LhsNested; typedef typename internal::remove_reference<RhsNested>::type _RhsNested; @@ -165,43 +143,13 @@ class CwiseBinaryOp : internal::no_assignment_operator, const BinaryOp m_functor; }; -template<typename BinaryOp, typename Lhs, typename Rhs> -class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense> - : public internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type +// Generic API dispatcher +template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind> +class CwiseBinaryOpImpl + : public internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type { - typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived; - public: - - typedef typename internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base; - EIGEN_DENSE_PUBLIC_INTERFACE( Derived ) - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const - { - return derived().functor()(derived().lhs().coeff(rowId, colId), - derived().rhs().coeff(rowId, colId)); - } - - template<int LoadMode> - EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const - { - return derived().functor().packetOp(derived().lhs().template packet<LoadMode>(rowId, colId), - derived().rhs().template packet<LoadMode>(rowId, colId)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return derived().functor()(derived().lhs().coeff(index), - derived().rhs().coeff(index)); - } - - template<int LoadMode> - EIGEN_STRONG_INLINE PacketScalar packet(Index index) const - { - return derived().functor().packetOp(derived().lhs().template packet<LoadMode>(index), - derived().rhs().template packet<LoadMode>(index)); - } +public: + typedef typename internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base; }; /** replaces \c *this by \c *this - \a other. @@ -213,8 +161,7 @@ template<typename OtherDerived> EIGEN_STRONG_INLINE Derived & MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other) { - SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>()); return derived(); } @@ -227,8 +174,7 @@ template<typename OtherDerived> EIGEN_STRONG_INLINE Derived & MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other) { - SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>()); return derived(); } diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 124383114..05c4fedd0 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -35,19 +35,13 @@ template<typename NullaryOp, typename PlainObjectType> struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType> { enum { - Flags = (traits<PlainObjectType>::Flags - & ( HereditaryBits - | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0) - | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0))) - | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit), - CoeffReadCost = functor_traits<NullaryOp>::Cost + Flags = traits<PlainObjectType>::Flags & RowMajorBit }; }; } template<typename NullaryOp, typename PlainObjectType> -class CwiseNullaryOp : internal::no_assignment_operator, - public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type +class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator { public: diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h index aa7df197f..da1d1992d 100644 --- a/Eigen/src/Core/CwiseUnaryOp.h +++ b/Eigen/src/Core/CwiseUnaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> // // This Source Code Form is subject to the terms of the Mozilla @@ -44,10 +44,7 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> > typedef typename XprType::Nested XprTypeNested; typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; enum { - Flags = _XprTypeNested::Flags & ( - HereditaryBits | LinearAccessBit | AlignedBit - | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)), - CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost + Flags = _XprTypeNested::Flags & RowMajorBit }; }; } @@ -56,16 +53,16 @@ template<typename UnaryOp, typename XprType, typename StorageKind> class CwiseUnaryOpImpl; template<typename UnaryOp, typename XprType> -class CwiseUnaryOp : internal::no_assignment_operator, - public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind> +class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator { public: typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) + typedef typename internal::remove_all<XprType>::type NestedExpression; EIGEN_DEVICE_FUNC - inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + explicit inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) : m_xpr(xpr), m_functor(func) {} EIGEN_DEVICE_FUNC @@ -92,42 +89,13 @@ class CwiseUnaryOp : internal::no_assignment_operator, const UnaryOp m_functor; }; -// This is the generic implementation for dense storage. -// It can be used for any expression types implementing the dense concept. -template<typename UnaryOp, typename XprType> -class CwiseUnaryOpImpl<UnaryOp,XprType,Dense> - : public internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type +// Generic API dispatcher +template<typename UnaryOp, typename XprType, typename StorageKind> +class CwiseUnaryOpImpl + : public internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type { - public: - - typedef CwiseUnaryOp<UnaryOp, XprType> Derived; - typedef typename internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base; - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const - { - return derived().functor()(derived().nestedExpression().coeff(rowId, colId)); - } - - template<int LoadMode> - EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const - { - return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(rowId, colId)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return derived().functor()(derived().nestedExpression().coeff(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE PacketScalar packet(Index index) const - { - return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(index)); - } +public: + typedef typename internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base; }; } // end namespace Eigen diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h index b2638d326..6680f32dd 100644 --- a/Eigen/src/Core/CwiseUnaryView.h +++ b/Eigen/src/Core/CwiseUnaryView.h @@ -37,8 +37,8 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> > typedef typename MatrixType::Nested MatrixTypeNested; typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested; enum { - Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)), - CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits<ViewOp>::Cost, + FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0, + Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions MatrixTypeInnerStride = inner_stride_at_compile_time<MatrixType>::ret, // need to cast the sizeof's from size_t to int explicitly, otherwise: // "error: no integral type can represent all of the enumerator values @@ -62,8 +62,9 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) + typedef typename internal::remove_all<MatrixType>::type NestedExpression; - inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp()) + explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp()) : m_matrix(mat), m_functor(func) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView) @@ -88,6 +89,15 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in ViewOp m_functor; }; +// Generic API dispatcher +template<typename ViewOp, typename XprType, typename StorageKind> +class CwiseUnaryViewImpl + : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type +{ +public: + typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type Base; +}; + template<typename ViewOp, typename MatrixType> class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense> : public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type @@ -100,38 +110,18 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense> EIGEN_DENSE_PUBLIC_INTERFACE(Derived) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) - inline Scalar* data() { return &coeffRef(0); } - inline const Scalar* data() const { return &coeff(0); } + EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); } - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar); } - inline Index outerStride() const + EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar); } - - EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const - { - return derived().functor()(derived().nestedExpression().coeff(row, col)); - } - - EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return derived().functor()(derived().nestedExpression().coeff(index)); - } - - EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) - { - return derived().functor()(const_cast_derived().nestedExpression().coeffRef(row, col)); - } - - EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - return derived().functor()(const_cast_derived().nestedExpression().coeffRef(index)); - } }; } // end namespace Eigen diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index bd5dd14ed..e81b58481 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -50,7 +50,11 @@ template<typename Derived> class DenseBase using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar, typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*; - class InnerIterator; + + /** Inner iterator type to iterate over the coefficients of a row or column. + * \sa class InnerIterator + */ + typedef Eigen::InnerIterator<Derived> InnerIterator; typedef typename internal::traits<Derived>::StorageKind StorageKind; @@ -74,16 +78,6 @@ template<typename Derived> class DenseBase using Base::colIndexByOuterInner; using Base::coeff; using Base::coeffByOuterInner; - using Base::packet; - using Base::packetByOuterInner; - using Base::writePacket; - using Base::writePacketByOuterInner; - using Base::coeffRef; - using Base::coeffRefByOuterInner; - using Base::copyCoeff; - using Base::copyCoeffByOuterInner; - using Base::copyPacket; - using Base::copyPacketByOuterInner; using Base::operator(); using Base::operator[]; using Base::x; @@ -169,16 +163,11 @@ template<typename Derived> class DenseBase InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime) : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime), - CoeffReadCost = internal::traits<Derived>::CoeffReadCost, - /**< This is a rough measure of how expensive it is to read one coefficient from - * this expression. - */ - InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret, OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret }; - enum { ThisConstantIsPrivateInPlainObjectBase }; + enum { IsPlainObjectBase = 0 }; /** \returns the number of nonzero coefficients which is in practice the number * of stored coefficients. */ @@ -278,7 +267,8 @@ template<typename Derived> class DenseBase Derived& operator=(const ReturnByValue<OtherDerived>& func); #ifndef EIGEN_PARSED_BY_DOXYGEN - /** Copies \a other into *this without evaluating other. \returns a reference to *this. */ + /** Copies \a other into *this without evaluating other. \returns a reference to *this. + * \deprecated */ template<typename OtherDerived> EIGEN_DEVICE_FUNC Derived& lazyAssign(const DenseBase<OtherDerived>& other); @@ -287,27 +277,24 @@ template<typename Derived> class DenseBase EIGEN_DEVICE_FUNC CommaInitializer<Derived> operator<< (const Scalar& s); + // TODO flagged is temporarly disabled. It seems useless now template<unsigned int Added,unsigned int Removed> - const Flagged<Derived, Added, Removed> flagged() const; + EIGEN_DEPRECATED + const Derived& flagged() const + { return derived(); } template<typename OtherDerived> EIGEN_DEVICE_FUNC CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other); + typedef Transpose<Derived> TransposeReturnType; EIGEN_DEVICE_FUNC - Eigen::Transpose<Derived> transpose(); + TransposeReturnType transpose(); typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType; EIGEN_DEVICE_FUNC ConstTransposeReturnType transpose() const; EIGEN_DEVICE_FUNC void transposeInPlace(); -#ifndef EIGEN_NO_DEBUG - protected: - template<typename OtherDerived> - void checkTransposeAliasing(const OtherDerived& other) const; - public: -#endif - EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(Index rows, Index cols, const Scalar& value); @@ -387,16 +374,17 @@ template<typename Derived> class DenseBase // size types on MSVC. return typename internal::eval<Derived>::type(derived()); } - + /** swaps *this with the expression \a other. * */ template<typename OtherDerived> EIGEN_DEVICE_FUNC - void swap(const DenseBase<OtherDerived>& other, - int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase) + void swap(const DenseBase<OtherDerived>& other) { - SwapWrapper<Derived>(derived()).lazyAssign(other.derived()); + EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>()); } /** swaps *this with the matrix or array \a other. @@ -406,10 +394,10 @@ template<typename Derived> class DenseBase EIGEN_DEVICE_FUNC void swap(PlainObjectBase<OtherDerived>& other) { - SwapWrapper<Derived>(derived()).lazyAssign(other.derived()); + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.derived(), internal::swap_assign_op<Scalar>()); } - EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const; EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const; EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess(); diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h index 4e986e875..a9e4dbaf9 100644 --- a/Eigen/src/Core/DenseCoeffsBase.h +++ b/Eigen/src/Core/DenseCoeffsBase.h @@ -97,8 +97,8 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived> EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().coeff(row, col); + && col >= 0 && col < cols()); + return typename internal::evaluator<Derived>::type(derived()).coeff(row,col); } EIGEN_DEVICE_FUNC @@ -117,7 +117,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived> { eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); - return derived().coeff(row, col); + return coeff(row, col); } /** Short version: don't use this function, use @@ -140,7 +140,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived> coeff(Index index) const { eigen_internal_assert(index >= 0 && index < size()); - return derived().coeff(index); + return typename internal::evaluator<Derived>::type(derived()).coeff(index); } @@ -159,7 +159,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived> EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) eigen_assert(index >= 0 && index < size()); - return derived().coeff(index); + return coeff(index); } /** \returns the coefficient at given index. @@ -177,7 +177,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived> operator()(Index index) const { eigen_assert(index >= 0 && index < size()); - return derived().coeff(index); + return coeff(index); } /** equivalent to operator[](0). */ @@ -217,9 +217,8 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived> template<int LoadMode> EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().template packet<LoadMode>(row,col); + eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); + return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode>(row,col); } @@ -245,7 +244,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived> EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { eigen_internal_assert(index >= 0 && index < size()); - return derived().template packet<LoadMode>(index); + return typename internal::evaluator<Derived>::type(derived()).template packet<LoadMode>(index); } protected: @@ -325,8 +324,8 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().coeffRef(row, col); + && col >= 0 && col < cols()); + return typename internal::evaluator<Derived>::type(derived()).coeffRef(row,col); } EIGEN_DEVICE_FUNC @@ -348,7 +347,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, { eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); - return derived().coeffRef(row, col); + return coeffRef(row, col); } @@ -372,7 +371,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, coeffRef(Index index) { eigen_internal_assert(index >= 0 && index < size()); - return derived().coeffRef(index); + return typename internal::evaluator<Derived>::type(derived()).coeffRef(index); } /** \returns a reference to the coefficient at given index. @@ -389,7 +388,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) eigen_assert(index >= 0 && index < size()); - return derived().coeffRef(index); + return coeffRef(index); } /** \returns a reference to the coefficient at given index. @@ -406,7 +405,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, operator()(Index index) { eigen_assert(index >= 0 && index < size()); - return derived().coeffRef(index); + return coeffRef(index); } /** equivalent to operator[](0). */ @@ -432,144 +431,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& w() { return (*this)[3]; } - - /** \internal - * Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility - * to ensure that a packet really starts there. This method is only available on expressions having the - * PacketAccessBit. - * - * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select - * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets - * starting at an address which is a multiple of the packet size. - */ - - template<int StoreMode> - EIGEN_STRONG_INLINE void writePacket - (Index row, Index col, const typename internal::packet_traits<Scalar>::type& val) - { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().template writePacket<StoreMode>(row,col,val); - } - - - /** \internal */ - template<int StoreMode> - EIGEN_STRONG_INLINE void writePacketByOuterInner - (Index outer, Index inner, const typename internal::packet_traits<Scalar>::type& val) - { - writePacket<StoreMode>(rowIndexByOuterInner(outer, inner), - colIndexByOuterInner(outer, inner), - val); - } - - /** \internal - * Stores the given packet of coefficients, at the given index in this expression. It is your responsibility - * to ensure that a packet really starts there. This method is only available on expressions having the - * PacketAccessBit and the LinearAccessBit. - * - * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select - * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets - * starting at an address which is a multiple of the packet size. - */ - template<int StoreMode> - EIGEN_STRONG_INLINE void writePacket - (Index index, const typename internal::packet_traits<Scalar>::type& val) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().template writePacket<StoreMode>(index,val); - } - -#ifndef EIGEN_PARSED_BY_DOXYGEN - - /** \internal Copies the coefficient at position (row,col) of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other) - { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().coeffRef(row, col) = other.derived().coeff(row, col); - } - - /** \internal Copies the coefficient at the given index of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().coeffRef(index) = other.derived().coeff(index); - } - - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other) - { - const Index row = rowIndexByOuterInner(outer,inner); - const Index col = colIndexByOuterInner(outer,inner); - // derived() is important here: copyCoeff() may be reimplemented in Derived! - derived().copyCoeff(row, col, other); - } - - /** \internal Copies the packet at position (row,col) of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template<typename OtherDerived, int StoreMode, int LoadMode> - EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other) - { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().template writePacket<StoreMode>(row, col, - other.derived().template packet<LoadMode>(row, col)); - } - - /** \internal Copies the packet at the given index of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template<typename OtherDerived, int StoreMode, int LoadMode> - EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase<OtherDerived>& other) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().template writePacket<StoreMode>(index, - other.derived().template packet<LoadMode>(index)); - } - - /** \internal */ - template<typename OtherDerived, int StoreMode, int LoadMode> - EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other) - { - const Index row = rowIndexByOuterInner(outer,inner); - const Index col = colIndexByOuterInner(outer,inner); - // derived() is important here: copyCoeff() may be reimplemented in Derived! - derived().template copyPacket< OtherDerived, StoreMode, LoadMode>(row, col, other); - } -#endif - }; /** \brief Base class providing direct read-only coefficient access to matrices and arrays. diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index 59f515495..852648639 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -130,7 +130,7 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt public: EIGEN_DEVICE_FUNC DenseStorage() {} EIGEN_DEVICE_FUNC - DenseStorage(internal::constructor_without_unaligned_array_assert) + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()) {} EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data) {} @@ -155,7 +155,7 @@ template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0 { public: EIGEN_DEVICE_FUNC DenseStorage() {} - EIGEN_DEVICE_FUNC DenseStorage(internal::constructor_without_unaligned_array_assert) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {} EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {} EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; } EIGEN_DEVICE_FUNC DenseStorage(DenseIndex,DenseIndex,DenseIndex) {} @@ -186,7 +186,7 @@ template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic DenseIndex m_cols; public: EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {} - DenseStorage(internal::constructor_without_unaligned_array_assert) + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {} DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {} DenseStorage& operator=(const DenseStorage& other) @@ -217,7 +217,7 @@ template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Si DenseIndex m_rows; public: EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {} - DenseStorage(internal::constructor_without_unaligned_array_assert) + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {} DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {} DenseStorage& operator=(const DenseStorage& other) @@ -246,7 +246,7 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si DenseIndex m_cols; public: EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {} - DenseStorage(internal::constructor_without_unaligned_array_assert) + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {} DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {} DenseStorage& operator=(const DenseStorage& other) @@ -276,7 +276,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam DenseIndex m_cols; public: EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {} - DenseStorage(internal::constructor_without_unaligned_array_assert) + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0), m_cols(0) {} DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows), m_cols(nbCols) @@ -350,7 +350,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro DenseIndex m_cols; public: EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {} - DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } DenseStorage(const DenseStorage& other) @@ -416,7 +416,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn DenseIndex m_rows; public: EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {} - DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } DenseStorage(const DenseStorage& other) diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h index b160479ab..33b82f90f 100644 --- a/Eigen/src/Core/Diagonal.h +++ b/Eigen/src/Core/Diagonal.h @@ -52,8 +52,7 @@ struct traits<Diagonal<MatrixType,DiagIndex> > MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))), MaxColsAtCompileTime = 1, MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0, - Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, - CoeffReadCost = _MatrixTypeNested::CoeffReadCost, + Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret, InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1, OuterStrideAtCompileTime = 0 @@ -71,17 +70,15 @@ template<typename MatrixType, int _DiagIndex> class Diagonal EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) EIGEN_DEVICE_FUNC - inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} + explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) EIGEN_DEVICE_FUNC inline Index rows() const { - EIGEN_USING_STD_MATH(min); - return m_index.value()<0 ? (min)(Index(m_matrix.cols()),Index(m_matrix.rows()+m_index.value())) - : (min)(Index(m_matrix.rows()),Index(m_matrix.cols()-m_index.value())); - + return m_index.value()<0 ? numext::mini(Index(m_matrix.cols()),Index(m_matrix.rows()+m_index.value())) + : numext::mini(Index(m_matrix.rows()),Index(m_matrix.cols()-m_index.value())); } EIGEN_DEVICE_FUNC @@ -149,14 +146,14 @@ template<typename MatrixType, int _DiagIndex> class Diagonal } EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename MatrixType::Nested>::type& + inline const typename internal::remove_all<typename MatrixType::Nested>::type& nestedExpression() const { return m_matrix; } EIGEN_DEVICE_FUNC - int index() const + inline Index index() const { return m_index.value(); } @@ -173,7 +170,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; } - // triger a compile time error is someone try to call packet + // trigger a compile time error is someone try to call packet template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const; template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const; }; @@ -190,7 +187,7 @@ template<typename Derived> inline typename MatrixBase<Derived>::DiagonalReturnType MatrixBase<Derived>::diagonal() { - return derived(); + return DiagonalReturnType(derived()); } /** This is the const version of diagonal(). */ @@ -213,18 +210,18 @@ MatrixBase<Derived>::diagonal() const * * \sa MatrixBase::diagonal(), class Diagonal */ template<typename Derived> -inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<DynamicIndex>::Type +inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType MatrixBase<Derived>::diagonal(Index index) { - return typename DiagonalIndexReturnType<DynamicIndex>::Type(derived(), index); + return DiagonalDynamicIndexReturnType(derived(), index); } /** This is the const version of diagonal(Index). */ template<typename Derived> -inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<DynamicIndex>::Type +inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType MatrixBase<Derived>::diagonal(Index index) const { - return typename ConstDiagonalIndexReturnType<DynamicIndex>::Type(derived(), index); + return ConstDiagonalDynamicIndexReturnType(derived(), index); } /** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this @@ -239,20 +236,20 @@ MatrixBase<Derived>::diagonal(Index index) const * * \sa MatrixBase::diagonal(), class Diagonal */ template<typename Derived> -template<int Index> -inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index>::Type +template<int Index_> +inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type MatrixBase<Derived>::diagonal() { - return derived(); + return typename DiagonalIndexReturnType<Index_>::Type(derived()); } /** This is the const version of diagonal<int>(). */ template<typename Derived> -template<int Index> -inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index>::Type +template<int Index_> +inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type MatrixBase<Derived>::diagonal() const { - return derived(); + return typename ConstDiagonalIndexReturnType<Index_>::Type(derived()); } } // end namespace Eigen diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index 96b65483d..49b9b7925 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -30,7 +30,7 @@ class DiagonalBase : public EigenBase<Derived> MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, IsVectorAtCompileTime = 0, - Flags = 0 + Flags = NoPreferredStorageOrderBit }; typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType; @@ -44,18 +44,7 @@ class DiagonalBase : public EigenBase<Derived> EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); } - template<typename DenseDerived> - EIGEN_DEVICE_FUNC - void evalTo(MatrixBase<DenseDerived> &other) const; - template<typename DenseDerived> - EIGEN_DEVICE_FUNC - void addTo(MatrixBase<DenseDerived> &other) const - { other.diagonal() += diagonal(); } - template<typename DenseDerived> - EIGEN_DEVICE_FUNC - void subTo(MatrixBase<DenseDerived> &other) const - { other.diagonal() -= diagonal(); } - + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); } EIGEN_DEVICE_FUNC @@ -66,44 +55,37 @@ class DiagonalBase : public EigenBase<Derived> EIGEN_DEVICE_FUNC inline Index cols() const { return diagonal().size(); } - /** \returns the diagonal matrix product of \c *this by the matrix \a matrix. - */ template<typename MatrixDerived> EIGEN_DEVICE_FUNC - const DiagonalProduct<MatrixDerived, Derived, OnTheLeft> + const Product<Derived,MatrixDerived,LazyProduct> operator*(const MatrixBase<MatrixDerived> &matrix) const { - return DiagonalProduct<MatrixDerived, Derived, OnTheLeft>(matrix.derived(), derived()); + return Product<Derived, MatrixDerived, LazyProduct>(derived(),matrix.derived()); } + typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> > InverseReturnType; EIGEN_DEVICE_FUNC - inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> > + inline const InverseReturnType inverse() const { - return diagonal().cwiseInverse(); + return InverseReturnType(diagonal().cwiseInverse()); } + typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> > ScalarMultipleReturnType; EIGEN_DEVICE_FUNC - inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> > + inline const ScalarMultipleReturnType operator*(const Scalar& scalar) const { - return diagonal() * scalar; + return ScalarMultipleReturnType(diagonal() * scalar); } EIGEN_DEVICE_FUNC - friend inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> > + friend inline const ScalarMultipleReturnType operator*(const Scalar& scalar, const DiagonalBase& other) { - return other.diagonal() * scalar; + return ScalarMultipleReturnType(other.diagonal() * scalar); } }; -template<typename Derived> -template<typename DenseDerived> -void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const -{ - other.setZero(); - other.diagonal() = diagonal(); -} #endif /** \class DiagonalMatrix @@ -125,10 +107,10 @@ struct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> > : traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> > { typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType; - typedef Dense StorageKind; + typedef DiagonalShape StorageKind; typedef DenseIndex Index; enum { - Flags = LvalueBit + Flags = LvalueBit | NoPreferredStorageOrderBit }; }; } @@ -164,7 +146,7 @@ class DiagonalMatrix /** Constructs a diagonal matrix with given dimension */ EIGEN_DEVICE_FUNC - inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} + explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} /** 2D constructor. */ EIGEN_DEVICE_FUNC @@ -249,13 +231,14 @@ struct traits<DiagonalWrapper<_DiagonalVectorType> > typedef _DiagonalVectorType DiagonalVectorType; typedef typename DiagonalVectorType::Scalar Scalar; typedef typename DiagonalVectorType::Index Index; - typedef typename DiagonalVectorType::StorageKind StorageKind; + typedef DiagonalShape StorageKind; + typedef typename traits<DiagonalVectorType>::XprKind XprKind; enum { RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - Flags = traits<DiagonalVectorType>::Flags & LvalueBit + MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + Flags = (traits<DiagonalVectorType>::Flags & LvalueBit) | NoPreferredStorageOrderBit }; }; } @@ -272,7 +255,7 @@ class DiagonalWrapper /** Constructor from expression of diagonal coefficients to wrap. */ EIGEN_DEVICE_FUNC - inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} + explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} /** \returns a const reference to the wrapped expression of diagonal coefficients. */ EIGEN_DEVICE_FUNC @@ -295,7 +278,7 @@ template<typename Derived> inline const DiagonalWrapper<const Derived> MatrixBase<Derived>::asDiagonal() const { - return derived(); + return DiagonalWrapper<const Derived>(derived()); } /** \returns true if *this is approximately equal to a diagonal matrix, @@ -326,6 +309,33 @@ bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const return true; } +namespace internal { + +template<> struct storage_kind_to_shape<DiagonalShape> { typedef DiagonalShape Shape; }; + +struct Diagonal2Dense {}; + +template<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2Dense Kind; }; + +// Diagonal matrix to Dense assignment +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense, Scalar> +{ + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/) + { + dst.setZero(); + dst.diagonal() = src.diagonal(); + } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar> &/*func*/) + { dst.diagonal() += src.diagonal(); } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar> &/*func*/) + { dst.diagonal() -= src.diagonal(); } +}; + +} // namespace internal + } // end namespace Eigen #endif // EIGEN_DIAGONALMATRIX_H diff --git a/Eigen/src/Core/DiagonalProduct.h b/Eigen/src/Core/DiagonalProduct.h index c03a0c2e1..d372b938f 100644 --- a/Eigen/src/Core/DiagonalProduct.h +++ b/Eigen/src/Core/DiagonalProduct.h @@ -13,116 +13,14 @@ namespace Eigen { -namespace internal { -template<typename MatrixType, typename DiagonalType, int ProductOrder> -struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> > - : traits<MatrixType> -{ - typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar; - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - - _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor, - _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) - ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), - _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value, - // FIXME currently we need same types, but in the future the next rule should be the one - //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), - _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), - _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0, - - Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit,//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit), - CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost - }; -}; -} - -template<typename MatrixType, typename DiagonalType, int ProductOrder> -class DiagonalProduct : internal::no_assignment_operator, - public MatrixBase<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> > -{ - public: - - typedef MatrixBase<DiagonalProduct> Base; - EIGEN_DENSE_PUBLIC_INTERFACE(DiagonalProduct) - - inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal) - : m_matrix(matrix), m_diagonal(diagonal) - { - eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols())); - } - - EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); } - - EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const - { - return m_diagonal.diagonal().coeff(ProductOrder == OnTheLeft ? row : col) * m_matrix.coeff(row, col); - } - - EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const - { - enum { - StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor - }; - return coeff(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); - } - - template<int LoadMode> - EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const - { - enum { - StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor - }; - const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col; - return packet_impl<LoadMode>(row,col,indexInDiagonalVector,typename internal::conditional< - ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft) - ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type()); - } - - template<int LoadMode> - EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const - { - enum { - StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor - }; - return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); - } - - protected: - template<int LoadMode> - EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const - { - return internal::pmul(m_matrix.template packet<LoadMode>(row, col), - internal::pset1<PacketScalar>(m_diagonal.diagonal().coeff(id))); - } - - template<int LoadMode> - EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const - { - enum { - InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, - DiagonalVectorPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) - }; - return internal::pmul(m_matrix.template packet<LoadMode>(row, col), - m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id)); - } - - typename MatrixType::Nested m_matrix; - typename DiagonalType::Nested m_diagonal; -}; - /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal. */ template<typename Derived> template<typename DiagonalDerived> -inline const DiagonalProduct<Derived, DiagonalDerived, OnTheRight> +inline const Product<Derived, DiagonalDerived, LazyProduct> MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const { - return DiagonalProduct<Derived, DiagonalDerived, OnTheRight>(derived(), a_diagonal.derived()); + return Product<Derived, DiagonalDerived, LazyProduct>(derived(),a_diagonal.derived()); } } // end namespace Eigen diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index db16e4acc..68e9c2660 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -113,8 +113,7 @@ template<typename Derived> inline const typename MatrixBase<Derived>::PlainObject MatrixBase<Derived>::normalized() const { - typedef typename internal::nested<Derived>::type Nested; - typedef typename internal::remove_reference<Nested>::type _Nested; + typedef typename internal::nested_eval<Derived,2>::type _Nested; _Nested n(derived()); return n / n.norm(); } @@ -206,8 +205,8 @@ template<typename OtherDerived> bool MatrixBase<Derived>::isOrthogonal (const MatrixBase<OtherDerived>& other, const RealScalar& prec) const { - typename internal::nested<Derived,2>::type nested(derived()); - typename internal::nested<OtherDerived,2>::type otherNested(other.derived()); + typename internal::nested_eval<Derived,2>::type nested(derived()); + typename internal::nested_eval<OtherDerived,2>::type otherNested(other.derived()); return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm(); } diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h index 1a577c2dc..52b66e6dc 100644 --- a/Eigen/src/Core/EigenBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -121,7 +121,7 @@ template<typename Derived> template<typename OtherDerived> Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other) { - other.derived().evalTo(derived()); + call_assignment(derived(), other.derived()); return derived(); } @@ -129,7 +129,7 @@ template<typename Derived> template<typename OtherDerived> Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other) { - other.derived().addTo(derived()); + call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>()); return derived(); } @@ -137,7 +137,7 @@ template<typename Derived> template<typename OtherDerived> Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other) { - other.derived().subTo(derived()); + call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>()); return derived(); } diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h index 1f2955fc1..2e2a50be5 100644 --- a/Eigen/src/Core/Flagged.h +++ b/Eigen/src/Core/Flagged.h @@ -48,39 +48,39 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas ExpressionType, const ExpressionType&>::type ExpressionTypeNested; typedef typename ExpressionType::InnerIterator InnerIterator; - inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {} + explicit inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {} - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - inline Index outerStride() const { return m_matrix.outerStride(); } - inline Index innerStride() const { return m_matrix.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); } - inline CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const { return m_matrix.coeff(row, col); } - inline CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const { return m_matrix.coeff(index); } - inline const Scalar& coeffRef(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index col) const { return m_matrix.const_cast_derived().coeffRef(row, col); } - inline const Scalar& coeffRef(Index index) const + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { return m_matrix.const_cast_derived().coeffRef(index); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_matrix.const_cast_derived().coeffRef(row, col); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_matrix.const_cast_derived().coeffRef(index); } @@ -109,13 +109,13 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas m_matrix.const_cast_derived().template writePacket<LoadMode>(index, x); } - const ExpressionType& _expression() const { return m_matrix; } + EIGEN_DEVICE_FUNC const ExpressionType& _expression() const { return m_matrix; } template<typename OtherDerived> - typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const; + EIGEN_DEVICE_FUNC typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const; template<typename OtherDerived> - void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const; + EIGEN_DEVICE_FUNC void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const; protected: ExpressionTypeNested m_matrix; @@ -132,7 +132,7 @@ template<unsigned int Added,unsigned int Removed> inline const Flagged<Derived, Added, Removed> DenseBase<Derived>::flagged() const { - return derived(); + return Flagged<Derived, Added, Removed>(derived()); } } // end namespace Eigen diff --git a/Eigen/src/Core/ForceAlignedAccess.h b/Eigen/src/Core/ForceAlignedAccess.h index 807c7a293..7b08b45e6 100644 --- a/Eigen/src/Core/ForceAlignedAccess.h +++ b/Eigen/src/Core/ForceAlignedAccess.h @@ -39,29 +39,29 @@ template<typename ExpressionType> class ForceAlignedAccess typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess) - inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} - inline Index rows() const { return m_expression.rows(); } - inline Index cols() const { return m_expression.cols(); } - inline Index outerStride() const { return m_expression.outerStride(); } - inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const { return m_expression.coeff(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_expression.const_cast_derived().coeffRef(row, col); } - inline const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } @@ -90,7 +90,7 @@ template<typename ExpressionType> class ForceAlignedAccess m_expression.const_cast_derived().template writePacket<Aligned>(index, x); } - operator const ExpressionType&() const { return m_expression; } + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } protected: const ExpressionType& m_expression; @@ -127,7 +127,7 @@ template<bool Enable> inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type MatrixBase<Derived>::forceAlignedAccessIf() const { - return derived(); + return derived(); // FIXME This should not work but apparently is never used } /** \returns an expression of *this with forced aligned access if \a Enable is true. @@ -138,7 +138,7 @@ template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type MatrixBase<Derived>::forceAlignedAccessIf() { - return derived(); + return derived(); // FIXME This should not work but apparently is never used } } // end namespace Eigen diff --git a/Eigen/src/Core/Fuzzy.h b/Eigen/src/Core/Fuzzy.h index f9a88dd3c..3e403a09d 100644 --- a/Eigen/src/Core/Fuzzy.h +++ b/Eigen/src/Core/Fuzzy.h @@ -22,10 +22,9 @@ struct isApprox_selector EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) { - EIGEN_USING_STD_MATH(min); - typename internal::nested<Derived,2>::type nested(x); - typename internal::nested<OtherDerived,2>::type otherNested(y); - return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); + typename internal::nested_eval<Derived,2>::type nested(x); + typename internal::nested_eval<OtherDerived,2>::type otherNested(y); + return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); } }; diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 9d3d5562c..81750722c 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -13,28 +13,6 @@ namespace Eigen { -/** \class GeneralProduct - * \ingroup Core_Module - * - * \brief Expression of the product of two general matrices or vectors - * - * \param LhsNested the type used to store the left-hand side - * \param RhsNested the type used to store the right-hand side - * \param ProductMode the type of the product - * - * This class represents an expression of the product of two general matrices. - * We call a general matrix, a dense matrix with full storage. For instance, - * This excludes triangular, selfadjoint, and sparse matrices. - * It is the return type of the operator* between general matrices. Its template - * arguments are determined automatically by ProductReturnType. Therefore, - * GeneralProduct should never be used direclty. To determine the result type of a - * function which involves a matrix product, use ProductReturnType::Type. - * - * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&) - */ -template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value> -class GeneralProduct; - enum { Large = 2, Small = 3 @@ -59,14 +37,14 @@ template<typename Lhs, typename Rhs> struct product_type typedef typename remove_all<Lhs>::type _Lhs; typedef typename remove_all<Rhs>::type _Rhs; enum { - MaxRows = _Lhs::MaxRowsAtCompileTime, - Rows = _Lhs::RowsAtCompileTime, - MaxCols = _Rhs::MaxColsAtCompileTime, - Cols = _Rhs::ColsAtCompileTime, - MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime, - _Rhs::MaxRowsAtCompileTime), - Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, - _Rhs::RowsAtCompileTime) + MaxRows = traits<_Lhs>::MaxRowsAtCompileTime, + Rows = traits<_Lhs>::RowsAtCompileTime, + MaxCols = traits<_Rhs>::MaxColsAtCompileTime, + Cols = traits<_Rhs>::ColsAtCompileTime, + MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime, + traits<_Rhs>::MaxRowsAtCompileTime), + Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime, + traits<_Rhs>::RowsAtCompileTime) }; // the splitting into different lines of code here, introducing the _select enums and the typedef below, @@ -81,7 +59,8 @@ private: public: enum { - value = selector::ret + value = selector::ret, + ret = selector::ret }; #ifdef EIGEN_DEBUG_PRODUCT static void debug() @@ -97,6 +76,31 @@ public: #endif }; +// template<typename Lhs, typename Rhs> struct product_tag +// { +// private: +// +// typedef typename remove_all<Lhs>::type _Lhs; +// typedef typename remove_all<Rhs>::type _Rhs; +// enum { +// Rows = _Lhs::RowsAtCompileTime, +// Cols = _Rhs::ColsAtCompileTime, +// Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime) +// }; +// +// enum { +// rows_select = Rows==1 ? int(Rows) : int(Large), +// cols_select = Cols==1 ? int(Cols) : int(Large), +// depth_select = Depth==1 ? int(Depth) : int(Large) +// }; +// typedef product_type_selector<rows_select, cols_select, depth_select> selector; +// +// public: +// enum { +// ret = selector::ret +// }; +// +// }; /* The following allows to select the kind of product at compile time * based on the three dimensions of the product. @@ -127,54 +131,6 @@ template<> struct product_type_selector<Large,Large,Small> { enum } // end namespace internal -/** \class ProductReturnType - * \ingroup Core_Module - * - * \brief Helper class to get the correct and optimized returned type of operator* - * - * \param Lhs the type of the left-hand side - * \param Rhs the type of the right-hand side - * \param ProductMode the type of the product (determined automatically by internal::product_mode) - * - * This class defines the typename Type representing the optimized product expression - * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type - * is the recommended way to define the result type of a function returning an expression - * which involve a matrix product. The class Product should never be - * used directly. - * - * \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&) - */ -template<typename Lhs, typename Rhs, int ProductType> -struct ProductReturnType -{ - // TODO use the nested type to reduce instanciations ???? -// typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; -// typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; - - typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type; -}; - -template<typename Lhs, typename Rhs> -struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode> -{ - typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested; - typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested; - typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type; -}; - -template<typename Lhs, typename Rhs> -struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode> -{ - typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested; - typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested; - typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type; -}; - -// this is a workaround for sun CC -template<typename Lhs, typename Rhs> -struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode> -{}; - /*********************************************************************** * Implementation of Inner Vector Vector Product ***********************************************************************/ @@ -186,119 +142,10 @@ struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedPr // product ends up to a row-vector times col-vector product... To tackle this use // case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x); -namespace internal { - -template<typename Lhs, typename Rhs> -struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> > - : traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> > -{}; - -} - -template<typename Lhs, typename Rhs> -class GeneralProduct<Lhs, Rhs, InnerProduct> - : internal::no_assignment_operator, - public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> -{ - typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base; - public: - GeneralProduct(const Lhs& lhs, const Rhs& rhs) - { - EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); - } - - /** Convertion to scalar */ - operator const typename Base::Scalar() const { - return Base::coeff(0,0); - } -}; - /*********************************************************************** * Implementation of Outer Vector Vector Product ***********************************************************************/ -namespace internal { - -// Column major -template<typename ProductType, typename Dest, typename Func> -EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&) -{ - typedef typename Dest::Index Index; - // FIXME make sure lhs is sequentially stored - // FIXME not very good if rhs is real and lhs complex while alpha is real too - const Index cols = dest.cols(); - for (Index j=0; j<cols; ++j) - func(dest.col(j), prod.rhs().coeff(0,j) * prod.lhs()); -} - -// Row major -template<typename ProductType, typename Dest, typename Func> -EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) { - typedef typename Dest::Index Index; - // FIXME make sure rhs is sequentially stored - // FIXME not very good if lhs is real and rhs complex while alpha is real too - const Index rows = dest.rows(); - for (Index i=0; i<rows; ++i) - func(dest.row(i), prod.lhs().coeff(i,0) * prod.rhs()); -} - -template<typename Lhs, typename Rhs> -struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> > - : traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> > -{}; - -} - -template<typename Lhs, typename Rhs> -class GeneralProduct<Lhs, Rhs, OuterProduct> - : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> -{ - template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; - - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - { - EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - } - - struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; - struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; - struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; - struct adds { - Scalar m_scale; - adds(const Scalar& s) : m_scale(s) {} - template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { - dst.const_cast_derived() += m_scale * src; - } - }; - - template<typename Dest> - inline void evalTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>()); - } - - template<typename Dest> - inline void addTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>()); - } - - template<typename Dest> - inline void subTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>()); - } - - template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const - { - internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>()); - } -}; - /*********************************************************************** * Implementation of General Matrix Vector Product ***********************************************************************/ @@ -312,60 +159,13 @@ class GeneralProduct<Lhs, Rhs, OuterProduct> */ namespace internal { -template<typename Lhs, typename Rhs> -struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> > - : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> > -{}; - template<int Side, int StorageOrder, bool BlasCompatible> -struct gemv_selector; +struct gemv_dense_sense_selector; } // end namespace internal -template<typename Lhs, typename Rhs> -class GeneralProduct<Lhs, Rhs, GemvProduct> - : public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> -{ - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - typedef typename Lhs::Scalar LhsScalar; - typedef typename Rhs::Scalar RhsScalar; - - GeneralProduct(const Lhs& a_lhs, const Rhs& a_rhs) : Base(a_lhs,a_rhs) - { -// EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value), -// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - } - - enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; - typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType; - - template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const - { - eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols()); - internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor, - bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha); - } -}; - namespace internal { -// The vector is on the left => transposition -template<int StorageOrder, bool BlasCompatible> -struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible> -{ - template<typename ProductType, typename Dest> - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) - { - Transpose<Dest> destT(dest); - enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; - gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible> - ::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct> - (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha); - } -}; - template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if; template<typename Scalar,int Size,int MaxSize> @@ -402,27 +202,43 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true> #endif }; -template<> struct gemv_selector<OnTheRight,ColMajor,true> +// The vector is on the left => transposition +template<int StorageOrder, bool BlasCompatible> +struct gemv_dense_sense_selector<OnTheLeft,StorageOrder,BlasCompatible> +{ + template<typename Lhs, typename Rhs, typename Dest> + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + Transpose<Dest> destT(dest); + enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; + gemv_dense_sense_selector<OnTheRight,OtherStorageOrder,BlasCompatible> + ::run(rhs.transpose(), lhs.transpose(), destT, alpha); + } +}; + +template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,true> { - template<typename ProductType, typename Dest> - static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template<typename Lhs, typename Rhs, typename Dest> + static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename ProductType::Index Index; - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::RealScalar RealScalar; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + typedef typename Dest::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + typedef typename Dest::RealScalar RealScalar; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; - ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs()); - ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs()); + ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); + ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 @@ -477,34 +293,35 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true> } }; -template<> struct gemv_selector<OnTheRight,RowMajor,true> +template<> struct gemv_dense_sense_selector<OnTheRight,RowMajor,true> { - template<typename ProductType, typename Dest> - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template<typename Lhs, typename Rhs, typename Dest> + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::Index Index; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::_ActualRhsType _ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; - - typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); - - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + typedef typename Dest::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned; + + typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); + typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 // on, the other hand it is good for the cache to pack the vector anyways... - DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1 + DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 }; - gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs; + gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs; ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data()); @@ -515,7 +332,7 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true> Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif - Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; + Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; } typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper; @@ -530,29 +347,29 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true> } }; -template<> struct gemv_selector<OnTheRight,ColMajor,false> +template<> struct gemv_dense_sense_selector<OnTheRight,ColMajor,false> { - template<typename ProductType, typename Dest> - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template<typename Lhs, typename Rhs, typename Dest> + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { typedef typename Dest::Index Index; // TODO makes sure dest is sequentially stored in memory, otherwise use a temp - const Index size = prod.rhs().rows(); + const Index size = rhs.rows(); for(Index k=0; k<size; ++k) - dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k); + dest += (alpha*rhs.coeff(k)) * lhs.col(k); } }; -template<> struct gemv_selector<OnTheRight,RowMajor,false> +template<> struct gemv_dense_sense_selector<OnTheRight,RowMajor,false> { - template<typename ProductType, typename Dest> - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template<typename Lhs, typename Rhs, typename Dest> + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { typedef typename Dest::Index Index; // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp - const Index rows = prod.rows(); + const Index rows = dest.rows(); for(Index i=0; i<rows; ++i) - dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum(); + dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(rhs.transpose())).sum(); } }; @@ -570,7 +387,6 @@ template<> struct gemv_selector<OnTheRight,RowMajor,false> */ #ifndef __CUDACC__ -#ifdef EIGEN_TEST_EVALUATORS template<typename Derived> template<typename OtherDerived> inline const Product<Derived, OtherDerived> @@ -601,39 +417,9 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const return Product<Derived, OtherDerived>(derived(), other.derived()); } -#else -template<typename Derived> -template<typename OtherDerived> -inline const typename ProductReturnType<Derived, OtherDerived>::Type -MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const -{ - // A note regarding the function declaration: In MSVC, this function will sometimes - // not be inlined since DenseStorage is an unwindable object for dynamic - // matrices and product types are holding a member to store the result. - // Thus it does not help tagging this function with EIGEN_STRONG_INLINE. - enum { - ProductIsValid = Derived::ColsAtCompileTime==Dynamic - || OtherDerived::RowsAtCompileTime==Dynamic - || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime), - AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, - SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived) - }; - // note to the lost user: - // * for a dot product use: v1.dot(v2) - // * for a coeff-wise product use: v1.cwiseProduct(v2) - EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), - INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) - EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), - INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) - EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) -#ifdef EIGEN_DEBUG_PRODUCT - internal::product_type<Derived,OtherDerived>::debug(); -#endif - return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived()); -} -#endif -#endif +#endif // __CUDACC__ + /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. * * The returned product will behave like any other expressions: the coefficients of the product will be @@ -647,7 +433,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const */ template<typename Derived> template<typename OtherDerived> -const typename LazyProductReturnType<Derived,OtherDerived>::Type +const Product<Derived,OtherDerived,LazyProduct> MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const { enum { @@ -666,7 +452,7 @@ MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) - return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived()); + return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived()); } } // end namespace Eigen diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 3ef3475c7..8759cd06c 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -129,12 +129,12 @@ pdiv(const Packet& a, /** \internal \returns the min of \a a and \a b (coeff-wise) */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, - const Packet& b) { EIGEN_USING_STD_MATH(min); return (min)(a, b); } + const Packet& b) { return numext::mini(a, b); } /** \internal \returns the max of \a a and \a b (coeff-wise) */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, - const Packet& b) { EIGEN_USING_STD_MATH(max); return (max)(a, b); } + const Packet& b) { return numext::maxi(a, b); } /** \internal \returns the absolute value of \a a */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet @@ -245,8 +245,8 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu /** \internal tries to do cache prefetching of \a addr */ template<typename Scalar> inline void prefetch(const Scalar* addr) { -#if !defined(_MSC_VER) -__builtin_prefetch(addr); +#if !EIGEN_COMP_MSVC + __builtin_prefetch(addr); #endif } diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 2067a2a6e..ee67b7d3c 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -15,7 +15,7 @@ template<typename Derived> \ inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \ NAME(const Eigen::ArrayBase<Derived>& x) { \ - return x.derived(); \ + return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \ } #define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \ @@ -30,7 +30,7 @@ { \ static inline typename NAME##_retval<ArrayBase<Derived> >::type run(const Eigen::ArrayBase<Derived>& x) \ { \ - return x.derived(); \ + return typename NAME##_retval<ArrayBase<Derived> >::type(x.derived()); \ } \ }; diff --git a/Eigen/src/Core/Inverse.h b/Eigen/src/Core/Inverse.h new file mode 100644 index 000000000..f3b0dff87 --- /dev/null +++ b/Eigen/src/Core/Inverse.h @@ -0,0 +1,129 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INVERSE_H +#define EIGEN_INVERSE_H + +namespace Eigen { + +// TODO move the general declaration in Core, and rename this file DenseInverseImpl.h, or something like this... + +template<typename XprType,typename StorageKind> class InverseImpl; + +namespace internal { + +template<typename XprType> +struct traits<Inverse<XprType> > + : traits<typename XprType::PlainObject> +{ + typedef typename XprType::PlainObject PlainObject; + typedef traits<PlainObject> BaseTraits; + enum { + Flags = BaseTraits::Flags & RowMajorBit + }; +}; + +} // end namespace internal + +/** \class Inverse + * + * \brief Expression of the inverse of another expression + * + * \tparam XprType the type of the expression we are taking the inverse + * + * This class represents an abstract expression of A.inverse() + * and most of the time this is the only way it is used. + * + */ +template<typename XprType> +class Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::StorageKind> +{ +public: + typedef typename XprType::Index Index; + typedef typename XprType::PlainObject PlainObject; + typedef typename internal::nested<XprType>::type XprTypeNested; + typedef typename internal::remove_all<XprTypeNested>::type XprTypeNestedCleaned; + + explicit Inverse(const XprType &xpr) + : m_xpr(xpr) + {} + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + + EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; } + +protected: + XprTypeNested m_xpr; +}; + +/** \internal + * Specialization of the Inverse expression for dense expressions. + * Direct access to the coefficients are discared. + * FIXME this intermediate class is probably not needed anymore. + */ +template<typename XprType> +class InverseImpl<XprType,Dense> + : public MatrixBase<Inverse<XprType> > +{ + typedef Inverse<XprType> Derived; + +public: + + typedef MatrixBase<Derived> Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + typedef typename internal::remove_all<XprType>::type NestedExpression; + +private: + + Scalar coeff(Index row, Index col) const; + Scalar coeff(Index i) const; +}; + +namespace internal { + +/** \internal + * \brief Default evaluator for Inverse expression. + * + * This default evaluator for Inverse expression simply evaluate the inverse into a temporary + * by a call to internal::call_assignment_no_alias. + * Therefore, inverse implementers only have to specialize Assignment<Dst,Inverse<...>, ...> for + * there own nested expression. + * + * \sa class Inverse + */ +template<typename ArgType> +struct unary_evaluator<Inverse<ArgType> > + : public evaluator<typename Inverse<ArgType>::PlainObject>::type +{ + typedef Inverse<ArgType> InverseType; + typedef typename InverseType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type Base; + + typedef evaluator<InverseType> type; + typedef evaluator<InverseType> nestedType; + + enum { Flags = Base::Flags | EvalBeforeNestingBit }; + + unary_evaluator(const InverseType& inv_xpr) + : m_result(inv_xpr.rows(), inv_xpr.cols()) + { + ::new (static_cast<Base*>(this)) Base(m_result); + internal::call_assignment_no_alias(m_result, inv_xpr); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_INVERSE_H diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index ced1b76ba..098f1c096 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -79,22 +79,9 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> > OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 ? int(PlainObjectType::OuterStrideAtCompileTime) : int(StrideType::OuterStrideAtCompileTime), - HasNoInnerStride = InnerStrideAtCompileTime == 1, - HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, - HasNoStride = HasNoInnerStride && HasNoOuterStride, IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), - IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, - KeepsPacketAccess = bool(HasNoInnerStride) - && ( bool(IsDynamicSize) - || HasNoOuterStride - || ( OuterStrideAtCompileTime!=Dynamic - && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ), Flags0 = TraitsBase::Flags & (~NestByRefBit), - Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), - Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) - ? int(Flags1) : int(Flags1 & ~LinearAccessBit), - Flags3 = is_lvalue<PlainObjectType>::value ? int(Flags2) : (int(Flags2) & ~LvalueBit), - Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit) + Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) }; private: enum { Options }; // Expressions don't have Options @@ -135,7 +122,7 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma * \param a_stride optional Stride object, passing the strides. */ EIGEN_DEVICE_FUNC - inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType()) + explicit inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType()) : Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride) { PlainObjectType::Base::_check_template_params(); diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index e8ecb175b..1589cbaae 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -12,7 +12,7 @@ #define EIGEN_MAPBASE_H #define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \ - EIGEN_STATIC_ASSERT((int(internal::traits<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ + EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT) namespace Eigen { @@ -85,7 +85,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors> * * \sa innerStride(), outerStride() */ - inline const Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; } EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index rowId, Index colId) const @@ -128,7 +128,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors> } EIGEN_DEVICE_FUNC - inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) + explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) checkSanity(); @@ -161,11 +161,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors> EIGEN_DEVICE_FUNC void checkSanity() const { - EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit, - internal::inner_stride_at_compile_time<Derived>::ret==1), - PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); - eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0) - && "data is not aligned"); + eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned"); } PointerType m_data; @@ -176,6 +172,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors> template<typename Derived> class MapBase<Derived, WriteAccessors> : public MapBase<Derived, ReadOnlyAccessors> { + typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase; public: typedef MapBase<Derived, ReadOnlyAccessors> Base; @@ -243,11 +240,13 @@ template<typename Derived> class MapBase<Derived, WriteAccessors> EIGEN_DEVICE_FUNC Derived& operator=(const MapBase& other) { - Base::Base::operator=(other); + ReadOnlyMapBase::Base::operator=(other); return derived(); } - using Base::Base::operator=; + // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base, + // see bugs 821 and 920. + using ReadOnlyMapBase::Base::operator=; }; #undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index e9fed2e52..16ad2dc7e 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -14,7 +14,7 @@ namespace Eigen { // On WINCE, std::abs is defined for int only, so let's defined our own overloads: // This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too. -#if defined(_WIN32_WCE) && defined(_MSC_VER) && _MSC_VER<=1500 +#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500 long abs(long x) { return (labs(x)); } double abs(double x) { return (fabs(x)); } float abs(float x) { return (fabsf(x)); } @@ -360,50 +360,31 @@ inline NewType cast(const OldType& x) } /**************************************************************************** -* Implementation of atanh2 * +* Implementation of logp1 * ****************************************************************************/ template<typename Scalar> -struct atanh2_impl +struct log1p_impl { - static inline Scalar run(const Scalar& x, const Scalar& r) + static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) - #if (__cplusplus >= 201103L) && !defined(__CYGWIN__) + // Let's be conservative and enable the default C++11 implementation only if we are sure it exists + #if (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \ + && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC) using std::log1p; - return log1p(2 * x / (r - x)) / 2; + return log1p(x); #else - using std::abs; + typedef typename NumTraits<Scalar>::Real RealScalar; using std::log; - using std::sqrt; - Scalar z = x / r; - if (r == 0 || abs(z) > sqrt(NumTraits<Scalar>::epsilon())) - return log((r + x) / (r - x)) / 2; - else - return z + z*z*z / 3; + Scalar x1p = RealScalar(1) + x; + return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); #endif } }; -template<typename RealScalar> -struct atanh2_impl<std::complex<RealScalar> > -{ - typedef std::complex<RealScalar> Scalar; - static inline Scalar run(const Scalar& x, const Scalar& r) - { - using std::log; - using std::norm; - using std::sqrt; - Scalar z = x / r; - if (r == Scalar(0) || norm(z) > NumTraits<RealScalar>::epsilon()) - return RealScalar(0.5) * log((r + x) / (r - x)); - else - return z + z*z*z / RealScalar(3); - } -}; - template<typename Scalar> -struct atanh2_retval +struct log1p_retval { typedef Scalar type; }; @@ -591,6 +572,22 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() ****************************************************************************/ namespace numext { + +template<typename T> +EIGEN_DEVICE_FUNC +inline T mini(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(min); + return min EIGEN_NOT_A_MACRO (x,y); +} + +template<typename T> +EIGEN_DEVICE_FUNC +inline T maxi(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(max); + return max EIGEN_NOT_A_MACRO (x,y); +} template<typename Scalar> EIGEN_DEVICE_FUNC @@ -664,9 +661,9 @@ inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& template<typename Scalar> EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(atanh2, Scalar) atanh2(const Scalar& x, const Scalar& y) +inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(atanh2, Scalar)::run(x, y); + return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } template<typename Scalar> @@ -694,6 +691,21 @@ bool (isfinite)(const std::complex<T>& x) return isfinite(real(x)) && isfinite(imag(x)); } +// Log base 2 for 32 bits positive integers. +// Conveniently returns 0 for x==0. +inline int log2(int x) +{ + eigen_assert(x>=0); + unsigned int v(x); + static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return table[(v * 0x07C4ACDDU) >> 27]; +} + } // end namespace numext namespace internal { diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index 8c95ee3ca..0b3d90786 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -115,7 +115,8 @@ struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > MaxRowsAtCompileTime = _MaxRows, MaxColsAtCompileTime = _MaxCols, Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, - CoeffReadCost = NumTraits<Scalar>::ReadCost, + // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase + EvaluatorFlags = compute_matrix_evaluator_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, Options = _Options, InnerStrideAtCompileTime = 1, OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime @@ -213,7 +214,7 @@ class Matrix // FIXME is it still needed EIGEN_DEVICE_FUNC - Matrix(internal::constructor_without_unaligned_array_assert) + explicit Matrix(internal::constructor_without_unaligned_array_assert) : Base(internal::constructor_without_unaligned_array_assert()) { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } @@ -359,15 +360,6 @@ class Matrix *this = other; } - /** \internal - * \brief Override MatrixBase::swap() since for dynamic-sized matrices - * of same type it is enough to swap the data pointers. - */ - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - void swap(MatrixBase<OtherDerived> const & other) - { this->_swap(other.derived()); } - EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 3cb5e04fd..86994cb36 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -66,8 +66,7 @@ template<typename Derived> class MatrixBase using Base::MaxSizeAtCompileTime; using Base::IsVectorAtCompileTime; using Base::Flags; - using Base::CoeffReadCost; - + using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -181,26 +180,20 @@ template<typename Derived> class MatrixBase #ifdef __CUDACC__ template<typename OtherDerived> EIGEN_DEVICE_FUNC - const typename LazyProductReturnType<Derived,OtherDerived>::Type + const Product<Derived,OtherDerived,LazyProduct> operator*(const MatrixBase<OtherDerived> &other) const { return this->lazyProduct(other); } #else -#ifdef EIGEN_TEST_EVALUATORS template<typename OtherDerived> const Product<Derived,OtherDerived> operator*(const MatrixBase<OtherDerived> &other) const; -#else - template<typename OtherDerived> - const typename ProductReturnType<Derived,OtherDerived>::Type - operator*(const MatrixBase<OtherDerived> &other) const; -#endif #endif template<typename OtherDerived> EIGEN_DEVICE_FUNC - const typename LazyProductReturnType<Derived,OtherDerived>::Type + const Product<Derived,OtherDerived,LazyProduct> lazyProduct(const MatrixBase<OtherDerived> &other) const; template<typename OtherDerived> @@ -214,7 +207,7 @@ template<typename Derived> class MatrixBase template<typename DiagonalDerived> EIGEN_DEVICE_FUNC - const DiagonalProduct<Derived, DiagonalDerived, OnTheRight> + const Product<Derived, DiagonalDerived, LazyProduct> operator*(const DiagonalBase<DiagonalDerived> &diagonal) const; template<typename OtherDerived> @@ -251,19 +244,14 @@ template<typename Derived> class MatrixBase template<int Index> EIGEN_DEVICE_FUNC typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const; + + typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType; + typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType; - // Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations. - // On the other hand they confuse MSVC8... - #if (defined _MSC_VER) && (_MSC_VER >= 1500) // 2008 or later - typename MatrixBase::template DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index); - typename MatrixBase::template ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const; - #else EIGEN_DEVICE_FUNC - typename DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index); - + DiagonalDynamicIndexReturnType diagonal(Index index); EIGEN_DEVICE_FUNC - typename ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const; - #endif + ConstDiagonalDynamicIndexReturnType diagonal(Index index) const; template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; }; template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; }; @@ -334,10 +322,12 @@ template<typename Derived> class MatrixBase NoAlias<Derived,Eigen::MatrixBase > noalias(); - inline const ForceAlignedAccess<Derived> forceAlignedAccess() const; - inline ForceAlignedAccess<Derived> forceAlignedAccess(); - template<bool Enable> inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type forceAlignedAccessIf() const; - template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf(); + // TODO forceAlignedAccess is temporarily disabled + // Need to find a nicer workaround. + inline const Derived& forceAlignedAccess() const { return derived(); } + inline Derived& forceAlignedAccess() { return derived(); } + template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); } + template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); } Scalar trace() const; @@ -348,10 +338,10 @@ template<typename Derived> class MatrixBase /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix * \sa ArrayBase::matrix() */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return derived(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return ArrayWrapper<Derived>(derived()); } /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix * \sa ArrayBase::matrix() */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return derived(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return ArrayWrapper<const Derived>(derived()); } /////////// LU module /////////// @@ -361,7 +351,8 @@ template<typename Derived> class MatrixBase const PartialPivLU<PlainObject> lu() const; EIGEN_DEVICE_FUNC - const internal::inverse_impl<Derived> inverse() const; + const Inverse<Derived> inverse() const; + template<typename ResultType> void computeInverseAndDetWithCheck( ResultType& inverse, @@ -394,6 +385,7 @@ template<typename Derived> class MatrixBase /////////// SVD module /////////// JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const; + BDCSVD<PlainObject> bdcSvd(unsigned int computationOptions = 0) const; /////////// Geometry module /////////// diff --git a/Eigen/src/Core/NestByValue.h b/Eigen/src/Core/NestByValue.h index a893b1761..9aeaf8d18 100644 --- a/Eigen/src/Core/NestByValue.h +++ b/Eigen/src/Core/NestByValue.h @@ -40,29 +40,29 @@ template<typename ExpressionType> class NestByValue typedef typename internal::dense_xpr_base<NestByValue>::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue) - inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} - inline Index rows() const { return m_expression.rows(); } - inline Index cols() const { return m_expression.cols(); } - inline Index outerStride() const { return m_expression.outerStride(); } - inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const { return m_expression.coeff(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_expression.const_cast_derived().coeffRef(row, col); } - inline const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } @@ -91,7 +91,7 @@ template<typename ExpressionType> class NestByValue m_expression.const_cast_derived().template writePacket<LoadMode>(index, x); } - operator const ExpressionType&() const { return m_expression; } + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } protected: const ExpressionType m_expression; diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h index 0a1c32743..0ade75255 100644 --- a/Eigen/src/Core/NoAlias.h +++ b/Eigen/src/Core/NoAlias.h @@ -30,68 +30,35 @@ namespace Eigen { template<typename ExpressionType, template <typename> class StorageBase> class NoAlias { - typedef typename ExpressionType::Scalar Scalar; public: - NoAlias(ExpressionType& expression) : m_expression(expression) {} - - /** Behaves like MatrixBase::lazyAssign(other) - * \sa MatrixBase::lazyAssign() */ + typedef typename ExpressionType::Scalar Scalar; + + explicit NoAlias(ExpressionType& expression) : m_expression(expression) {} + template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other) - { return internal::assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); } - - /** \sa MatrixBase::operator+= */ + { + call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar>()); + return m_expression; + } + template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other) { - typedef SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder; - SelfAdder tmp(m_expression); - typedef typename internal::nested<OtherDerived>::type OtherDerivedNested; - typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested; - internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived())); + call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar>()); return m_expression; } - - /** \sa MatrixBase::operator-= */ + template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other) { - typedef SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder; - SelfAdder tmp(m_expression); - typedef typename internal::nested<OtherDerived>::type OtherDerivedNested; - typedef typename internal::remove_all<OtherDerivedNested>::type _OtherDerivedNested; - internal::assign_selector<SelfAdder,_OtherDerivedNested,false>::run(tmp,OtherDerivedNested(other.derived())); + call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar>()); return m_expression; } -#ifndef EIGEN_PARSED_BY_DOXYGEN - template<typename ProductDerived, typename Lhs, typename Rhs> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other) - { other.derived().addTo(m_expression); return m_expression; } - - template<typename ProductDerived, typename Lhs, typename Rhs> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other) - { other.derived().subTo(m_expression); return m_expression; } - - template<typename Lhs, typename Rhs, int NestingFlags> - EIGEN_STRONG_INLINE ExpressionType& operator+=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other) - { return m_expression.derived() += CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); } - - template<typename Lhs, typename Rhs, int NestingFlags> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other) - { return m_expression.derived() -= CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); } - - template<typename OtherDerived> - ExpressionType& operator=(const ReturnByValue<OtherDerived>& func) - { return m_expression = func; } -#endif - EIGEN_DEVICE_FUNC ExpressionType& expression() const { @@ -133,7 +100,7 @@ class NoAlias template<typename Derived> NoAlias<Derived,MatrixBase> MatrixBase<Derived>::noalias() { - return derived(); + return NoAlias<Derived, Eigen::MatrixBase >(derived()); } } // end namespace Eigen diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h index 8aa4c8bc5..4846f2ae1 100644 --- a/Eigen/src/Core/PermutationMatrix.h +++ b/Eigen/src/Core/PermutationMatrix.h @@ -13,7 +13,8 @@ namespace Eigen { -template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKind> class PermutedImpl; +// TODO: this does not seems to be needed at all: +// template<int RowCol,typename IndicesType,typename MatrixType, typename StorageKind> class PermutedImpl; /** \class PermutationBase * \ingroup Core_Module @@ -60,7 +61,6 @@ class PermutationBase : public EigenBase<Derived> typedef typename Traits::IndicesType IndicesType; enum { Flags = Traits::Flags, - CoeffReadCost = Traits::CoeffReadCost, RowsAtCompileTime = Traits::RowsAtCompileTime, ColsAtCompileTime = Traits::ColsAtCompileTime, MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, @@ -73,6 +73,7 @@ class PermutationBase : public EigenBase<Derived> typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,StorageIndexType> PlainPermutationType; using Base::derived; + typedef Transpose<PermutationBase> TransposeReturnType; #endif /** Copies the other permutation into *this */ @@ -118,7 +119,7 @@ class PermutationBase : public EigenBase<Derived> void evalTo(MatrixBase<DenseDerived>& other) const { other.setZero(); - for (int i=0; i<rows();++i) + for (Index i=0; i<rows(); ++i) other.coeffRef(indices().coeff(i),i) = typename DenseDerived::Scalar(1); } #endif @@ -163,10 +164,10 @@ class PermutationBase : public EigenBase<Derived> * * \returns a reference to *this. * - * \warning This is much slower than applyTranspositionOnTheRight(int,int): + * \warning This is much slower than applyTranspositionOnTheRight(Index,Index): * this has linear complexity and requires a lot of branching. * - * \sa applyTranspositionOnTheRight(int,int) + * \sa applyTranspositionOnTheRight(Index,Index) */ Derived& applyTranspositionOnTheLeft(Index i, Index j) { @@ -185,7 +186,7 @@ class PermutationBase : public EigenBase<Derived> * * This is a fast operation, it only consists in swapping two indices. * - * \sa applyTranspositionOnTheLeft(int,int) + * \sa applyTranspositionOnTheLeft(Index,Index) */ Derived& applyTranspositionOnTheRight(Index i, Index j) { @@ -198,14 +199,14 @@ class PermutationBase : public EigenBase<Derived> * * \note \note_try_to_help_rvo */ - inline Transpose<PermutationBase> inverse() const - { return derived(); } + inline TransposeReturnType inverse() const + { return TransposeReturnType(derived()); } /** \returns the tranpose permutation matrix. * * \note \note_try_to_help_rvo */ - inline Transpose<PermutationBase> transpose() const - { return derived(); } + inline TransposeReturnType transpose() const + { return TransposeReturnType(derived()); } /**** multiplication helpers to hopefully get RVO ****/ @@ -215,13 +216,13 @@ class PermutationBase : public EigenBase<Derived> template<typename OtherDerived> void assignTranspose(const PermutationBase<OtherDerived>& other) { - for (int i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i; + for (Index i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i; } template<typename Lhs,typename Rhs> void assignProduct(const Lhs& lhs, const Rhs& rhs) { eigen_assert(lhs.cols() == rhs.rows()); - for (int i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i)); + for (Index i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i)); } #endif @@ -274,6 +275,7 @@ template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType> > : traits<Matrix<_StorageIndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> > { + typedef PermutationStorage StorageKind; typedef Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType; typedef typename IndicesType::Index Index; typedef _StorageIndexType StorageIndexType; @@ -287,6 +289,8 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile typedef internal::traits<PermutationMatrix> Traits; public: + typedef const PermutationMatrix& Nested; + #ifndef EIGEN_PARSED_BY_DOXYGEN typedef typename Traits::IndicesType IndicesType; typedef typename Traits::StorageIndexType StorageIndexType; @@ -298,8 +302,10 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile /** Constructs an uninitialized permutation matrix of given size. */ - inline PermutationMatrix(Index size) : m_indices(size) - {} + explicit inline PermutationMatrix(Index size) : m_indices(size) + { + eigen_internal_assert(size <= NumTraits<StorageIndexType>::highest()); + } /** Copy constructor. */ template<typename OtherDerived> @@ -370,7 +376,10 @@ class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompile PermutationMatrix(const Transpose<PermutationBase<Other> >& other) : m_indices(other.nestedPermutation().size()) { - for (int i=0; i<m_indices.size();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i; + eigen_internal_assert(m_indices.size() <= NumTraits<StorageIndexType>::highest()); + StorageIndexType end = StorageIndexType(m_indices.size()); + for (StorageIndexType i=0; i<end;++i) + m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i; } template<typename Lhs,typename Rhs> PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs) @@ -391,6 +400,7 @@ template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndexType>,_PacketAccess> > : traits<Matrix<_StorageIndexType,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> > { + typedef PermutationStorage StorageKind; typedef Map<const Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType; typedef typename IndicesType::Index Index; typedef _StorageIndexType StorageIndexType; @@ -462,8 +472,6 @@ class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageInd * \sa class PermutationBase, class PermutationMatrix */ -struct PermutationStorage {}; - template<typename _IndicesType> class TranspositionsWrapper; namespace internal { template<typename _IndicesType> @@ -477,10 +485,9 @@ struct traits<PermutationWrapper<_IndicesType> > enum { RowsAtCompileTime = _IndicesType::SizeAtCompileTime, ColsAtCompileTime = _IndicesType::SizeAtCompileTime, - MaxRowsAtCompileTime = IndicesType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = IndicesType::MaxColsAtCompileTime, - Flags = 0, - CoeffReadCost = _IndicesType::CoeffReadCost + MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + Flags = 0 }; }; } @@ -509,35 +516,39 @@ class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesTyp typename IndicesType::Nested m_indices; }; + +// TODO: Do we need to define these operator* functions? Would it be better to have them inherited +// from MatrixBase? + /** \returns the matrix with the permutation applied to the columns. */ -template<typename Derived, typename PermutationDerived> -inline const internal::permut_matrix_product_retval<PermutationDerived, Derived, OnTheRight> -operator*(const MatrixBase<Derived>& matrix, - const PermutationBase<PermutationDerived> &permutation) +template<typename MatrixDerived, typename PermutationDerived> +EIGEN_DEVICE_FUNC +const Product<MatrixDerived, PermutationDerived, DefaultProduct> +operator*(const MatrixBase<MatrixDerived> &matrix, + const PermutationBase<PermutationDerived>& permutation) { - return internal::permut_matrix_product_retval - <PermutationDerived, Derived, OnTheRight> - (permutation.derived(), matrix.derived()); + return Product<MatrixDerived, PermutationDerived, DefaultProduct> + (matrix.derived(), permutation.derived()); } /** \returns the matrix with the permutation applied to the rows. */ -template<typename Derived, typename PermutationDerived> -inline const internal::permut_matrix_product_retval - <PermutationDerived, Derived, OnTheLeft> +template<typename PermutationDerived, typename MatrixDerived> +EIGEN_DEVICE_FUNC +const Product<PermutationDerived, MatrixDerived, DefaultProduct> operator*(const PermutationBase<PermutationDerived> &permutation, - const MatrixBase<Derived>& matrix) + const MatrixBase<MatrixDerived>& matrix) { - return internal::permut_matrix_product_retval - <PermutationDerived, Derived, OnTheLeft> - (permutation.derived(), matrix.derived()); + return Product<PermutationDerived, MatrixDerived, DefaultProduct> + (permutation.derived(), matrix.derived()); } namespace internal { template<typename PermutationType, typename MatrixType, int Side, bool Transposed> struct traits<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> > + : traits<typename MatrixType::PlainObject> { typedef typename MatrixType::PlainObject ReturnType; }; @@ -590,7 +601,7 @@ struct permut_matrix_product_retval } else { - for(int i = 0; i < n; ++i) + for(Index i = 0; i < n; ++i) { Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime> (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i) @@ -617,6 +628,8 @@ struct traits<Transpose<PermutationBase<Derived> > > } // end namespace internal +// TODO: the specificties should be handled by the evaluator, +// at the very least we should only specialize TransposeImpl template<typename Derived> class Transpose<PermutationBase<Derived> > : public EigenBase<Transpose<PermutationBase<Derived> > > @@ -631,26 +644,26 @@ class Transpose<PermutationBase<Derived> > typedef typename Derived::DenseMatrixType DenseMatrixType; enum { Flags = Traits::Flags, - CoeffReadCost = Traits::CoeffReadCost, RowsAtCompileTime = Traits::RowsAtCompileTime, ColsAtCompileTime = Traits::ColsAtCompileTime, MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, MaxColsAtCompileTime = Traits::MaxColsAtCompileTime }; typedef typename Traits::Scalar Scalar; + typedef typename Traits::Index Index; #endif Transpose(const PermutationType& p) : m_permutation(p) {} - inline int rows() const { return m_permutation.rows(); } - inline int cols() const { return m_permutation.cols(); } + inline Index rows() const { return m_permutation.rows(); } + inline Index cols() const { return m_permutation.cols(); } #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename DenseDerived> void evalTo(MatrixBase<DenseDerived>& other) const { other.setZero(); - for (int i=0; i<rows();++i) + for (Index i=0; i<rows();++i) other.coeffRef(i, m_permutation.indices().coeff(i)) = typename DenseDerived::Scalar(1); } #endif @@ -663,19 +676,19 @@ class Transpose<PermutationBase<Derived> > /** \returns the matrix with the inverse permutation applied to the columns. */ template<typename OtherDerived> friend - inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true> + const Product<OtherDerived, Transpose, DefaultProduct> operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trPerm) { - return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheRight, true>(trPerm.m_permutation, matrix.derived()); + return Product<OtherDerived, Transpose, DefaultProduct>(matrix.derived(), trPerm.derived()); } /** \returns the matrix with the inverse permutation applied to the rows. */ template<typename OtherDerived> - inline const internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true> + const Product<Transpose, OtherDerived, DefaultProduct> operator*(const MatrixBase<OtherDerived>& matrix) const { - return internal::permut_matrix_product_retval<PermutationType, OtherDerived, OnTheLeft, true>(m_permutation, matrix.derived()); + return Product<Transpose, OtherDerived, DefaultProduct>(*this, matrix.derived()); } const PermutationType& nestedPermutation() const { return m_permutation; } @@ -690,6 +703,38 @@ const PermutationWrapper<const Derived> MatrixBase<Derived>::asPermutation() con return derived(); } +namespace internal { + +// TODO currently a permutation matrix expression has the form PermutationMatrix or PermutationWrapper +// or their transpose; in the future shape should be defined by the expression traits +template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType> +struct evaluator_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, IndexType> > +{ + typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind; + typedef PermutationShape Shape; + static const int AssumeAliasing = 0; +}; + +template<typename IndicesType> +struct evaluator_traits<PermutationWrapper<IndicesType> > +{ + typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind; + typedef PermutationShape Shape; + static const int AssumeAliasing = 0; +}; + +template<typename Derived> +struct evaluator_traits<Transpose<PermutationBase<Derived> > > +{ + typedef typename storage_kind_to_evaluator_kind<Dense>::Kind Kind; + typedef PermutationShape Shape; + static const int AssumeAliasing = 0; +}; + +template<> struct AssignmentKind<DenseShape,PermutationShape> { typedef EigenBase2EigenBase Kind; }; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_PERMUTATIONMATRIX_H diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 69f34bd3e..06e326a05 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -128,7 +128,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage; public: - enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 }; + enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::EvaluatorFlags & AlignedBit) != 0 }; EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) EIGEN_DEVICE_FUNC @@ -221,11 +221,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type } /** \returns a const pointer to the data array of this matrix */ - EIGEN_STRONG_INLINE const Scalar *data() const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } /** \returns a pointer to the data array of this matrix */ - EIGEN_STRONG_INLINE Scalar *data() + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } /** Resizes \c *this to a \a rows x \a cols matrix. @@ -457,7 +457,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type // FIXME is it still needed ? /** \internal */ EIGEN_DEVICE_FUNC - PlainObjectBase(internal::constructor_without_unaligned_array_assert) + explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert) : m_storage(internal::constructor_without_unaligned_array_assert()) { // _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED @@ -639,22 +639,16 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type * * \internal */ + // aliasing is dealt once in internall::call_assignment + // so at this stage we have to assume aliasing... and resising has to be done later. template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other) { - _set_selector(other.derived(), typename internal::conditional<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type()); + internal::call_assignment(this->derived(), other.derived()); return this->derived(); } - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); } - /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which * is the case when creating a new matrix) so one can enforce lazy evaluation. * @@ -669,7 +663,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type //_resize_to_match(other); // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because // it wouldn't allow to copy a row-vector into a column-vector. - return internal::assign_selector<Derived,OtherDerived,false>::run(this->derived(), other.derived()); + internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar>()); + return this->derived(); } template<typename T0, typename T1> @@ -704,9 +699,12 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type m_storage.data()[1] = Scalar(val1); } + // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array, + // then the argument is meant to be the size of the object. template<typename T> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value,T>::type* = 0) + EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if< (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value) + && ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0) { // NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument. const bool is_integer = NumTraits<T>::IsInteger; @@ -714,6 +712,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) resize(size); } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitely converted) template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0) @@ -722,6 +722,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type m_storage.data()[0] = val0; } + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type match the index type) template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Index& val0, @@ -734,18 +735,21 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type m_storage.data()[0] = Scalar(val0); } + // Initialize a fixed size matrix from a pointer to raw data template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Scalar* data){ this->_set_noalias(ConstMapType(data)); } + // Initialize an arbitrary matrix from a dense expression template<typename T, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){ this->_set_noalias(other); } + // Initialize an arbitrary matrix from a generic Eigen expression template<typename T, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){ @@ -766,23 +770,58 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type { this->derived() = r; } - + + // For fixed -size arrays: + template<typename T> + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, + typename internal::enable_if< Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible<T, Scalar>::value + && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>::type* = 0) + { + Base::setConstant(val0); + } + + template<typename T> + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Index& val0, + typename internal::enable_if< (!internal::is_same<Index,Scalar>::value) + && (internal::is_same<Index,T>::value) + && Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible<T, Scalar>::value + && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>::type* = 0) + { + Base::setConstant(val0); + } + template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> friend struct internal::matrix_swap_impl; - /** \internal generic implementation of swap for dense storage since for dynamic-sized matrices of same type it is enough to swap the - * data pointers. + public: + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal + * \brief Override DenseBase::swap() since for dynamic-sized matrices + * of same type it is enough to swap the data pointers. */ template<typename OtherDerived> EIGEN_DEVICE_FUNC - void _swap(DenseBase<OtherDerived> const & other) + void swap(DenseBase<OtherDerived> & other) { enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic }; - internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.const_cast_derived()); + internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.derived()); } - - public: -#ifndef EIGEN_PARSED_BY_DOXYGEN + + /** \internal + * \brief const version forwarded to DenseBase::swap + */ + template<typename OtherDerived> + EIGEN_DEVICE_FUNC + void swap(DenseBase<OtherDerived> const & other) + { Base::swap(other.derived()); } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void _check_template_params() { @@ -797,10 +836,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type && (Options & (DontAlign|RowMajor)) == Options), INVALID_MATRIX_TEMPLATE_PARAMETERS) } -#endif -private: - enum { ThisConstantIsPrivateInPlainObjectBase }; + enum { IsPlainObjectBase = 1 }; +#endif }; namespace internal { diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 5d3789be7..cb79543ef 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -12,8 +12,7 @@ namespace Eigen { -template<typename Lhs, typename Rhs> class Product; -template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl; +template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl; /** \class Product * \ingroup Core_Module @@ -24,53 +23,108 @@ template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl; * \param Rhs the type of the right-hand side expression * * This class represents an expression of the product of two arbitrary matrices. + * + * The other template parameters are: + * \tparam Option can be DefaultProduct or LazyProduct * */ -// Use ProductReturnType to get correct traits, in particular vectorization flags + namespace internal { -template<typename Lhs, typename Rhs> -struct traits<Product<Lhs, Rhs> > - : traits<typename ProductReturnType<Lhs, Rhs>::Type> -{ - // We want A+B*C to be of type Product<Matrix, Sum> and not Product<Matrix, Matrix> - // TODO: This flag should eventually go in a separate evaluator traits class + +// Determine the scalar of Product<Lhs, Rhs>. This is normally the same as Lhs::Scalar times +// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor. +template<typename Lhs, typename Rhs, typename LhsShape = typename evaluator_traits<Lhs>::Shape, + typename RhsShape = typename evaluator_traits<Rhs>::Shape > +struct product_result_scalar +{ + typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar; +}; + +template<typename Lhs, typename Rhs, typename RhsShape> +struct product_result_scalar<Lhs, Rhs, PermutationShape, RhsShape> +{ + typedef typename Rhs::Scalar Scalar; +}; + +template<typename Lhs, typename Rhs, typename LhsShape> + struct product_result_scalar<Lhs, Rhs, LhsShape, PermutationShape> +{ + typedef typename Lhs::Scalar Scalar; +}; + +template<typename Lhs, typename Rhs, int Option> +struct traits<Product<Lhs, Rhs, Option> > +{ + typedef typename remove_all<Lhs>::type LhsCleaned; + typedef typename remove_all<Rhs>::type RhsCleaned; + typedef traits<LhsCleaned> LhsTraits; + typedef traits<RhsCleaned> RhsTraits; + + typedef MatrixXpr XprKind; + + typedef typename product_result_scalar<LhsCleaned,RhsCleaned>::Scalar Scalar; + typedef typename product_promote_storage_type<typename LhsTraits::StorageKind, + typename RhsTraits::StorageKind, + internal::product_type<Lhs,Rhs>::ret>::ret StorageKind; + typedef typename promote_index_type<typename LhsTraits::Index, + typename RhsTraits::Index>::type Index; + enum { - Flags = traits<typename ProductReturnType<Lhs, Rhs>::Type>::Flags & ~(EvalBeforeNestingBit | DirectAccessBit) + RowsAtCompileTime = LhsTraits::RowsAtCompileTime, + ColsAtCompileTime = RhsTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime, + + // FIXME: only needed by GeneralMatrixMatrixTriangular + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime), + + // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator. + Flags = ( (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) + || ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit)) + || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) + ? RowMajorBit : (MaxColsAtCompileTime==1 ? 0 : NoPreferredStorageOrderBit) }; }; + } // end namespace internal -template<typename Lhs, typename Rhs> -class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_type<typename internal::traits<Lhs>::StorageKind, - typename internal::traits<Rhs>::StorageKind>::ret> +template<typename _Lhs, typename _Rhs, int Option> +class Product : public ProductImpl<_Lhs,_Rhs,Option, + typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind, + typename internal::traits<_Rhs>::StorageKind, + internal::product_type<_Lhs,_Rhs>::ret>::ret> { public: + typedef _Lhs Lhs; + typedef _Rhs Rhs; + typedef typename ProductImpl< - Lhs, Rhs, - typename internal::promote_storage_type<typename Lhs::StorageKind, - typename Rhs::StorageKind>::ret>::Base Base; + Lhs, Rhs, Option, + typename internal::product_promote_storage_type<typename internal::traits<Lhs>::StorageKind, + typename internal::traits<Rhs>::StorageKind, + internal::product_type<Lhs,Rhs>::ret>::ret>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(Product) - typedef typename Lhs::Nested LhsNested; - typedef typename Rhs::Nested RhsNested; + typedef typename internal::nested<Lhs>::type LhsNested; + typedef typename internal::nested<Rhs>::type RhsNested; typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned; typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned; - Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) + EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { eigen_assert(lhs.cols() == rhs.rows() && "invalid matrix product" && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); } - inline Index rows() const { return m_lhs.rows(); } - inline Index cols() const { return m_rhs.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } - const LhsNestedCleaned& lhs() const { return m_lhs; } - const RhsNestedCleaned& rhs() const { return m_rhs; } + EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; } + EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; } protected: @@ -78,14 +132,77 @@ class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_ty RhsNested m_rhs; }; -template<typename Lhs, typename Rhs> -class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,Rhs> >::type +namespace internal { + +template<typename Lhs, typename Rhs, int Option, int ProductTag = internal::product_type<Lhs,Rhs>::ret> +class dense_product_base + : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type +{}; + +/** Convertion to scalar for inner-products */ +template<typename Lhs, typename Rhs, int Option> +class dense_product_base<Lhs, Rhs, Option, InnerProduct> + : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type +{ + typedef Product<Lhs,Rhs,Option> ProductXpr; + typedef typename internal::dense_xpr_base<ProductXpr>::type Base; +public: + using Base::derived; + typedef typename Base::Scalar Scalar; + typedef typename Base::Index Index; + + operator const Scalar() const + { + return typename internal::evaluator<ProductXpr>::type(derived()).coeff(0,0); + } +}; + +} // namespace internal + +// Generic API dispatcher +template<typename Lhs, typename Rhs, int Option, typename StorageKind> +class ProductImpl : public internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type { - typedef Product<Lhs, Rhs> Derived; public: + typedef typename internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type Base; +}; - typedef typename internal::dense_xpr_base<Product<Lhs, Rhs> >::type Base; +template<typename Lhs, typename Rhs, int Option> +class ProductImpl<Lhs,Rhs,Option,Dense> + : public internal::dense_product_base<Lhs,Rhs,Option> +{ + typedef Product<Lhs, Rhs, Option> Derived; + + public: + + typedef typename internal::dense_product_base<Lhs, Rhs, Option> Base; EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + protected: + enum { + IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) && + (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic), + EnableCoeff = IsOneByOne || Option==LazyProduct + }; + + public: + + EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return typename internal::evaluator<Derived>::type(derived()).coeff(row,col); + } + + EIGEN_DEVICE_FUNC Scalar coeff(Index i) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return typename internal::evaluator<Derived>::type(derived()).coeff(i); + } + + }; /*************************************************************************** @@ -102,6 +219,15 @@ prod(const Lhs& lhs, const Rhs& rhs) return Product<Lhs,Rhs>(lhs,rhs); } +/** \internal used to test the evaluator only + */ +template<typename Lhs,typename Rhs> +const Product<Lhs,Rhs,LazyProduct> +lazyprod(const Lhs& lhs, const Rhs& rhs) +{ + return Product<Lhs,Rhs,LazyProduct>(lhs,rhs); +} + } // end namespace Eigen #endif // EIGEN_PRODUCT_H diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h index 483914a9b..050343b2d 100644 --- a/Eigen/src/Core/ProductBase.h +++ b/Eigen/src/Core/ProductBase.h @@ -12,253 +12,6 @@ namespace Eigen { -/** \class ProductBase - * \ingroup Core_Module - * - */ - -namespace internal { -template<typename Derived, typename _Lhs, typename _Rhs> -struct traits<ProductBase<Derived,_Lhs,_Rhs> > -{ - typedef MatrixXpr XprKind; - typedef typename remove_all<_Lhs>::type Lhs; - typedef typename remove_all<_Rhs>::type Rhs; - typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar; - typedef typename promote_storage_type<typename traits<Lhs>::StorageKind, - typename traits<Rhs>::StorageKind>::ret StorageKind; - typedef typename promote_index_type<typename traits<Lhs>::Index, - typename traits<Rhs>::Index>::type Index; - enum { - RowsAtCompileTime = traits<Lhs>::RowsAtCompileTime, - ColsAtCompileTime = traits<Rhs>::ColsAtCompileTime, - MaxRowsAtCompileTime = traits<Lhs>::MaxRowsAtCompileTime, - MaxColsAtCompileTime = traits<Rhs>::MaxColsAtCompileTime, - Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0) - | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit, - // Note that EvalBeforeNestingBit and NestByRefBit - // are not used in practice because nested is overloaded for products - CoeffReadCost = 0 // FIXME why is it needed ? - }; -}; -} - -#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \ - typedef ProductBase<Derived, Lhs, Rhs > Base; \ - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ - typedef typename Base::LhsNested LhsNested; \ - typedef typename Base::_LhsNested _LhsNested; \ - typedef typename Base::LhsBlasTraits LhsBlasTraits; \ - typedef typename Base::ActualLhsType ActualLhsType; \ - typedef typename Base::_ActualLhsType _ActualLhsType; \ - typedef typename Base::RhsNested RhsNested; \ - typedef typename Base::_RhsNested _RhsNested; \ - typedef typename Base::RhsBlasTraits RhsBlasTraits; \ - typedef typename Base::ActualRhsType ActualRhsType; \ - typedef typename Base::_ActualRhsType _ActualRhsType; \ - using Base::m_lhs; \ - using Base::m_rhs; - -template<typename Derived, typename Lhs, typename Rhs> -class ProductBase : public MatrixBase<Derived> -{ - public: - typedef MatrixBase<Derived> Base; - EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase) - - typedef typename Lhs::Nested LhsNested; - typedef typename internal::remove_all<LhsNested>::type _LhsNested; - typedef internal::blas_traits<_LhsNested> LhsBlasTraits; - typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; - typedef typename internal::remove_all<ActualLhsType>::type _ActualLhsType; - typedef typename internal::traits<Lhs>::Scalar LhsScalar; - - typedef typename Rhs::Nested RhsNested; - typedef typename internal::remove_all<RhsNested>::type _RhsNested; - typedef internal::blas_traits<_RhsNested> RhsBlasTraits; - typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - typedef typename internal::remove_all<ActualRhsType>::type _ActualRhsType; - typedef typename internal::traits<Rhs>::Scalar RhsScalar; - - // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once - typedef CoeffBasedProduct<LhsNested, RhsNested, 0> FullyLazyCoeffBaseProductType; - - public: - - typedef typename Base::PlainObject PlainObject; - - ProductBase(const Lhs& a_lhs, const Rhs& a_rhs) - : m_lhs(a_lhs), m_rhs(a_rhs) - { - eigen_assert(a_lhs.cols() == a_rhs.rows() - && "invalid matrix product" - && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); - } - - inline Index rows() const { return m_lhs.rows(); } - inline Index cols() const { return m_rhs.cols(); } - - template<typename Dest> - inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); } - - template<typename Dest> - inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); } - - template<typename Dest> - inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); } - - template<typename Dest> - inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); } - - const _LhsNested& lhs() const { return m_lhs; } - const _RhsNested& rhs() const { return m_rhs; } - - // Implicit conversion to the nested type (trigger the evaluation of the product) - operator const PlainObject& () const - { - m_result.resize(m_lhs.rows(), m_rhs.cols()); - derived().evalTo(m_result); - return m_result; - } - - const Diagonal<const FullyLazyCoeffBaseProductType,0> diagonal() const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } - - template<int Index> - const Diagonal<FullyLazyCoeffBaseProductType,Index> diagonal() const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } - - const Diagonal<FullyLazyCoeffBaseProductType,Dynamic> diagonal(Index index) const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); } - - // restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isn't an Lvalue expression - typename Base::CoeffReturnType coeff(Index row, Index col) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - Matrix<Scalar,1,1> result = *this; - return result.coeff(row,col); - } - - typename Base::CoeffReturnType coeff(Index i) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - Matrix<Scalar,1,1> result = *this; - return result.coeff(i); - } - - const Scalar& coeffRef(Index row, Index col) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - return derived().coeffRef(row,col); - } - - const Scalar& coeffRef(Index i) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - return derived().coeffRef(i); - } - - protected: - - LhsNested m_lhs; - RhsNested m_rhs; - - mutable PlainObject m_result; -}; - -// here we need to overload the nested rule for products -// such that the nested type is a const reference to a plain matrix -namespace internal { -template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject> -struct nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject> -{ - typedef PlainObject const& type; -}; -} - -template<typename NestedProduct> -class ScaledProduct; - -// Note that these two operator* functions are not defined as member -// functions of ProductBase, because, otherwise we would have to -// define all overloads defined in MatrixBase. Furthermore, Using -// "using Base::operator*" would not work with MSVC. -// -// Also note that here we accept any compatible scalar types -template<typename Derived,typename Lhs,typename Rhs> -const ScaledProduct<Derived> -operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::Scalar& x) -{ return ScaledProduct<Derived>(prod.derived(), x); } - -template<typename Derived,typename Lhs,typename Rhs> -typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value, - const ScaledProduct<Derived> >::type -operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::RealScalar& x) -{ return ScaledProduct<Derived>(prod.derived(), x); } - - -template<typename Derived,typename Lhs,typename Rhs> -const ScaledProduct<Derived> -operator*(const typename Derived::Scalar& x,const ProductBase<Derived,Lhs,Rhs>& prod) -{ return ScaledProduct<Derived>(prod.derived(), x); } - -template<typename Derived,typename Lhs,typename Rhs> -typename internal::enable_if<!internal::is_same<typename Derived::Scalar,typename Derived::RealScalar>::value, - const ScaledProduct<Derived> >::type -operator*(const typename Derived::RealScalar& x,const ProductBase<Derived,Lhs,Rhs>& prod) -{ return ScaledProduct<Derived>(prod.derived(), x); } - -namespace internal { -template<typename NestedProduct> -struct traits<ScaledProduct<NestedProduct> > - : traits<ProductBase<ScaledProduct<NestedProduct>, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> > -{ - typedef typename traits<NestedProduct>::StorageKind StorageKind; -}; -} - -template<typename NestedProduct> -class ScaledProduct - : public ProductBase<ScaledProduct<NestedProduct>, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> -{ - public: - typedef ProductBase<ScaledProduct<NestedProduct>, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> Base; - typedef typename Base::Scalar Scalar; - typedef typename Base::PlainObject PlainObject; -// EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct) - - ScaledProduct(const NestedProduct& prod, const Scalar& x) - : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {} - - template<typename Dest> - inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); } - - template<typename Dest> - inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); } - - template<typename Dest> - inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); } - - template<typename Dest> - inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); } - - const Scalar& alpha() const { return m_alpha; } - - protected: - const NestedProduct& m_prod; - Scalar m_alpha; -}; - /** \internal * Overloaded to perform an efficient C = (A*B).lazy() */ template<typename Derived> diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 855914f2e..488eee00c 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -16,95 +16,347 @@ namespace Eigen { namespace internal { + +/** \internal + * Evaluator of a product expression. + * Since products require special treatments to handle all possible cases, + * we simply deffer the evaluation logic to a product_evaluator class + * which offers more partial specialization possibilities. + * + * \sa class product_evaluator + */ +template<typename Lhs, typename Rhs, int Options> +struct evaluator<Product<Lhs, Rhs, Options> > + : public product_evaluator<Product<Lhs, Rhs, Options> > +{ + typedef Product<Lhs, Rhs, Options> XprType; + typedef product_evaluator<XprType> Base; + + typedef evaluator type; + typedef evaluator nestedType; -// We can evaluate the product either all at once, like GeneralProduct and its evalTo() function, or -// traverse the matrix coefficient by coefficient, like CoeffBasedProduct. Use the existing logic -// in ProductReturnType to decide. + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +// Catch scalar * ( A * B ) and transform it to (A*scalar) * B +// TODO we should apply that rule only if that's really helpful +template<typename Lhs, typename Rhs, typename Scalar> +struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > > + : public evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > +{ + typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > XprType; + typedef evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > Base; + + typedef evaluator type; + typedef evaluator nestedType; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs()) + {} +}; -template<typename XprType, typename ProductType> -struct product_evaluator_dispatcher; + +template<typename Lhs, typename Rhs, int DiagIndex> +struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> > + : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > +{ + typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType; + typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base; + + typedef evaluator type; + typedef evaluator nestedType; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>( + Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), + xpr.index() )) + {} +}; + + +// Helper class to perform a matrix product with the destination at hand. +// Depending on the sizes of the factors, there are different evaluation strategies +// as controlled by internal::product_type. +template< typename Lhs, typename Rhs, + typename LhsShape = typename evaluator_traits<Lhs>::Shape, + typename RhsShape = typename evaluator_traits<Rhs>::Shape, + int ProductType = internal::product_type<Lhs,Rhs>::value> +struct generic_product_impl; template<typename Lhs, typename Rhs> -struct evaluator_impl<Product<Lhs, Rhs> > - : product_evaluator_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type> +struct evaluator_traits<Product<Lhs, Rhs, DefaultProduct> > + : evaluator_traits_base<Product<Lhs, Rhs, DefaultProduct> > { - typedef Product<Lhs, Rhs> XprType; - typedef product_evaluator_dispatcher<XprType, typename ProductReturnType<Lhs, Rhs>::Type> Base; + enum { AssumeAliasing = 1 }; +}; - evaluator_impl(const XprType& xpr) : Base(xpr) - { } +// This is the default evaluator implementation for products: +// It creates a temporary and call generic_product_impl +template<typename Lhs, typename Rhs, int ProductTag, typename LhsShape, typename RhsShape> +struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, LhsShape, RhsShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> + : public evaluator<typename Product<Lhs, Rhs, DefaultProduct>::PlainObject>::type +{ + typedef Product<Lhs, Rhs, DefaultProduct> XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit +// CoeffReadCost = 0 // FIXME why is it needed? (this was already the case before the evaluators, see traits<ProductBase>) + }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast<Base*>(this)) Base(m_result); + +// FIXME shall we handle nested_eval here? +// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; +// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; +// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned; +// typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned; +// +// const LhsNested lhs(xpr.lhs()); +// const RhsNested rhs(xpr.rhs()); +// +// generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs); + + generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs()); + } + +protected: + PlainObject m_result; }; -template<typename XprType, typename ProductType> -struct product_evaluator_traits_dispatcher; +// Dense = Product +template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> +struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar>, Dense2Dense, Scalar> +{ + typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) + { + // FIXME shall we handle nested_eval here? + generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs()); + } +}; -template<typename Lhs, typename Rhs> -struct evaluator_traits<Product<Lhs, Rhs> > - : product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type> -{ - static const int AssumeAliasing = 1; +// Dense += Product +template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> +struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar>, Dense2Dense, Scalar> +{ + typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &) + { + // FIXME shall we handle nested_eval here? + generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs()); + } }; -// Case 1: Evaluate all at once -// -// We can view the GeneralProduct class as a part of the product evaluator. -// Four sub-cases: InnerProduct, OuterProduct, GemmProduct and GemvProduct. -// InnerProduct is special because GeneralProduct does not have an evalTo() method in this case. +// Dense -= Product +template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> +struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar>, Dense2Dense, Scalar> +{ + typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &) + { + // FIXME shall we handle nested_eval here? + generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs()); + } +}; -template<typename Lhs, typename Rhs> -struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> > + +// Dense ?= scalar * Product +// TODO we should apply that rule if that's really helpful +// for instance, this is not good for inner products +template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis> +struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>, + const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense, Scalar> { - static const int HasEvalTo = 0; + typedef CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>, + const Product<Lhs,Rhs,DefaultProduct> > SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) + { + // TODO use operator* instead of prod() once we have made enough progress + call_assignment(dst.noalias(), prod(src.functor().m_other * src.nestedExpression().lhs(), src.nestedExpression().rhs()), func); + } }; + template<typename Lhs, typename Rhs> -struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> > - : public evaluator<typename Product<Lhs, Rhs>::PlainObject>::type +struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct> { - typedef Product<Lhs, Rhs> XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator<PlainObject>::type evaluator_base; + template<typename Dst> + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + template<typename Dst> + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + template<typename Dst> + static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } +}; + + +/*********************************************************************** +* Implementation of outer dense * dense vector product +***********************************************************************/ + +// Column major result +template<typename Dst, typename Lhs, typename Rhs, typename Func> +EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) +{ + typedef typename Dst::Index Index; + typename evaluator<Rhs>::type rhsEval(rhs); + // FIXME make sure lhs is sequentially stored + // FIXME not very good if rhs is real and lhs complex while alpha is real too + // FIXME we should probably build an evaluator for dst + const Index cols = dst.cols(); + for (Index j=0; j<cols; ++j) + func(dst.col(j), rhsEval.coeff(0,j) * lhs); +} + +// Row major result +template<typename Dst, typename Lhs, typename Rhs, typename Func> +EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) { + typedef typename Dst::Index Index; + typename evaluator<Lhs>::type lhsEval(lhs); + // FIXME make sure rhs is sequentially stored + // FIXME not very good if lhs is real and rhs complex while alpha is real too + // FIXME we should probably build an evaluator for dst + const Index rows = dst.rows(); + for (Index i=0; i<rows; ++i) + func(dst.row(i), lhsEval.coeff(i,0) * rhs); +} - // TODO: Computation is too early (?) - product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result) +template<typename Lhs, typename Rhs> +struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct> +{ + template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + + // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose + struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; + struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; + struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; + struct adds { + Scalar m_scale; + explicit adds(const Scalar& s) : m_scale(s) {} + template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { + dst.const_cast_derived() += m_scale * src; + } + }; + + template<typename Dst> + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - m_result.coeffRef(0,0) = (xpr.lhs().transpose().cwiseProduct(xpr.rhs())).sum(); + internal::outer_product_selector_run(dst, lhs, rhs, set(), IsRowMajor<Dst>()); + } + + template<typename Dst> + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, add(), IsRowMajor<Dst>()); + } + + template<typename Dst> + static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, sub(), IsRowMajor<Dst>()); + } + + template<typename Dst> + static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), IsRowMajor<Dst>()); } -protected: - PlainObject m_result; }; -// For the other three subcases, simply call the evalTo() method of GeneralProduct -// TODO: GeneralProduct should take evaluators, not expression objects. -template<typename Lhs, typename Rhs, int ProductType> -struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> > +// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo +template<typename Lhs, typename Rhs, typename Derived> +struct generic_product_impl_base { - static const int HasEvalTo = 1; + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + + template<typename Dst> + static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } + + template<typename Dst> + static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } + + template<typename Dst> + static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } + + template<typename Dst> + static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } + }; -template<typename Lhs, typename Rhs, int ProductType> -struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> > +template<typename Lhs, typename Rhs> +struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> + : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> > { - typedef Product<Lhs, Rhs> XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator<PlainObject>::type evaluator_base; + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; + typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType; + + template<typename Dest> + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + internal::gemv_dense_sense_selector<Side, + (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor, + bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess) + >::run(lhs, rhs, dst, alpha); + } +}; + +template<typename Lhs, typename Rhs> +struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> +{ + typedef typename Product<Lhs,Rhs>::Scalar Scalar; - product_evaluator_dispatcher(const XprType& xpr) : m_xpr(xpr) - { } + template<typename Dst> + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // TODO: use the following instead of calling call_assignment, same for the other methods + // dst = lazyprod(lhs,rhs); + call_assignment(dst, lazyprod(lhs,rhs), internal::assign_op<Scalar>()); + } - template<typename DstEvaluatorType, typename DstXprType> - void evalTo(DstEvaluatorType /* not used */, DstXprType& dst) const + template<typename Dst> + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - dst.resize(m_xpr.rows(), m_xpr.cols()); - GeneralProduct<Lhs, Rhs, ProductType>(m_xpr.lhs(), m_xpr.rhs()).evalTo(dst); + // dst += lazyprod(lhs,rhs); + call_assignment(dst, lazyprod(lhs,rhs), internal::add_assign_op<Scalar>()); } -protected: - const XprType& m_xpr; + template<typename Dst> + static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // dst -= lazyprod(lhs,rhs); + call_assignment(dst, lazyprod(lhs,rhs), internal::sub_assign_op<Scalar>()); + } + +// template<typename Dst> +// static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) +// { dst += alpha * lazyprod(lhs,rhs); } }; +// This specialization enforces the use of a coefficient-based evaluation strategy +template<typename Lhs, typename Rhs> +struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode> + : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {}; + // Case 2: Evaluate coeff by coeff // // This is mostly taken from CoeffBasedProduct.h @@ -117,65 +369,116 @@ struct etor_product_coeff_impl; template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> struct etor_product_packet_impl; -template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags> -struct product_evaluator_traits_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> > -{ - static const int HasEvalTo = 0; -}; - -template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags> -struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> > - : evaluator_impl_base<Product<Lhs, Rhs> > +template<typename Lhs, typename Rhs, int ProductTag> +struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > + : evaluator_base<Product<Lhs, Rhs, LazyProduct> > { - typedef Product<Lhs, Rhs> XprType; - typedef CoeffBasedProduct<LhsNested, RhsNested, Flags> CoeffBasedProductType; - - product_evaluator_dispatcher(const XprType& xpr) - : m_lhsImpl(xpr.lhs()), - m_rhsImpl(xpr.rhs()), - m_innerDim(xpr.lhs().cols()) - { } - + typedef Product<Lhs, Rhs, LazyProduct> XprType; typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : m_lhs(xpr.lhs()), + m_rhs(xpr.rhs()), + m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! + m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, + // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. + m_innerDim(xpr.lhs().cols()) + { } + // Everything below here is taken from CoeffBasedProduct.h + typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; + typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; + + typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned; + typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned; + + typedef typename evaluator<LhsNestedCleaned>::type LhsEtorType; + typedef typename evaluator<RhsNestedCleaned>::type RhsEtorType; + enum { - RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime, + RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, + ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), + MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime, + PacketSize = packet_traits<Scalar>::size, - InnerSize = traits<CoeffBasedProductType>::InnerSize, - CoeffReadCost = traits<CoeffBasedProductType>::CoeffReadCost, + + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, + RhsCoeffReadCost = RhsEtorType::CoeffReadCost, + CoeffReadCost = (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic + : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + + (InnerSize - 1) * NumTraits<Scalar>::AddCost, + Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, - CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner + + LhsFlags = LhsEtorType::Flags, + RhsFlags = RhsEtorType::Flags, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value, + + CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) + && (ColsAtCompileTime == Dynamic + || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0 + && (RhsFlags&AlignedBit) + ) + ), + + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) + && (RowsAtCompileTime == Dynamic + || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0 + && (LhsFlags&AlignedBit) + ) + ), + + EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 + : (RhsRowMajor && !CanVectorizeLhs), + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) + | (EvalToRowMajor ? RowMajorBit : 0) + | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0) + | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0) + // TODO enable vectorization for mixed types + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + CanVectorizeInner = SameType + && LhsRowMajor + && (!RhsRowMajor) + && (LhsFlags & RhsFlags & ActualPacketAccessBit) + && (LhsFlags & RhsFlags & AlignedBit) + && (InnerSize % packet_traits<Scalar>::size == 0) }; - - typedef typename evaluator<Lhs>::type LhsEtorType; - typedef typename evaluator<Rhs>::type RhsEtorType; - typedef etor_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, - Unroll ? InnerSize-1 : Dynamic, - LhsEtorType, RhsEtorType, Scalar> CoeffImpl; - - const CoeffReturnType coeff(Index row, Index col) const + + EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index row, Index col) const { - Scalar res; - CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); - return res; + // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, * which is why we don't set the LinearAccessBit. + * TODO: this seems possible when the result is a vector */ - const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const { - Scalar res; const Index row = RowsAtCompileTime == 1 ? 0 : index; const Index col = RowsAtCompileTime == 1 ? index : 0; - CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); - return res; + // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } template<int LoadMode> @@ -183,224 +486,382 @@ struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNest { PacketScalar res; typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, - Unroll ? InnerSize-1 : Dynamic, - LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl; + Unroll ? InnerSize-1 : Dynamic, + LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl; + PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); return res; } protected: - typename evaluator<Lhs>::type m_lhsImpl; - typename evaluator<Rhs>::type m_rhsImpl; + const LhsNested m_lhs; + const RhsNested m_rhs; + + LhsEtorType m_lhsImpl; + RhsEtorType m_rhsImpl; // TODO: Get rid of m_innerDim if known at compile time Index m_innerDim; }; -/*************************************************************************** -* Normal product .coeff() implementation (with meta-unrolling) -***************************************************************************/ +template<typename Lhs, typename Rhs> +struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar > + : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar > +{ + typedef Product<Lhs, Rhs, DefaultProduct> XprType; + typedef Product<Lhs, Rhs, LazyProduct> BaseProduct; + typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(BaseProduct(xpr.lhs(),xpr.rhs())) + {} +}; -/************************************** -*** Scalar path - no vectorization *** -**************************************/ +/**************************************** +*** Coeff based product, Packet path *** +****************************************/ -template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> -struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> +template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) { - etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res); - res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); + etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); } }; -template<typename Lhs, typename Rhs, typename RetScalar> -struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> +template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) { - res = lhs.coeff(row, 0) * rhs.coeff(0, col); + etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); } }; -template<typename Lhs, typename Rhs, typename RetScalar> -struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { - eigen_assert(innerDim>0 && "you are using a non initialized matrix"); - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - for(Index i = 1; i < innerDim; ++i) - res += lhs.coeff(row, i) * rhs.coeff(i, col); + res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); } }; -/******************************************* -*** Scalar path with inner vectorization *** -*******************************************/ - -template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet> -struct etor_product_coeff_vectorized_unroller +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; - enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) { - etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres); - pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); + res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); } }; -template<typename Lhs, typename Rhs, typename Packet> -struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { - pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); + eigen_assert(innerDim>0 && "you are using a non initialized matrix"); + res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); + for(Index i = 1; i < innerDim; ++i) + res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); } }; -template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> -struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> { - typedef typename Lhs::PacketScalar Packet; typedef typename Lhs::Index Index; - enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) { - Packet pres; - etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres); - etor_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, innerDim, res); - res = predux(pres); + eigen_assert(innerDim>0 && "you are using a non initialized matrix"); + res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); + for(Index i = 1; i < innerDim; ++i) + res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); } }; -template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime> -struct etor_product_coeff_vectorized_dyn_selector + +/*************************************************************************** +* Triangular products +***************************************************************************/ +template<int Mode, bool LhsIsTriangular, + typename Lhs, bool LhsIsVector, + typename Rhs, bool RhsIsVector> +struct triangular_product_impl; + +template<typename Lhs, typename Rhs, int ProductTag> +struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> + : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> > { - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + + template<typename Dest> + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); + triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1> + ::run(dst, lhs.nestedExpression(), rhs, alpha); } }; -// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower -// NOTE maybe they are now useless since we have a specialization for Block<Matrix> -template<typename Lhs, typename Rhs, int RhsCols> -struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> +template<typename Lhs, typename Rhs, int ProductTag> +struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> +: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> > { - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + + template<typename Dest> + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); + triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha); } }; -template<typename Lhs, typename Rhs, int LhsRows> -struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> + +/*************************************************************************** +* SelfAdjoint products +***************************************************************************/ +template <typename Lhs, int LhsMode, bool LhsIsVector, + typename Rhs, int RhsMode, bool RhsIsVector> +struct selfadjoint_product_impl; + +template<typename Lhs, typename Rhs, int ProductTag> +struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> + : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> > { - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + + template<typename Dest> + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); + selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha); } }; -template<typename Lhs, typename Rhs> -struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> +template<typename Lhs, typename Rhs, int ProductTag> +struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> +: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> > { - typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + + template<typename Dest> + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - res = lhs.transpose().cwiseProduct(rhs).sum(); + selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha); } }; -template<typename Lhs, typename Rhs, typename RetScalar> -struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> + +/*************************************************************************** +* Diagonal products +***************************************************************************/ + +template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder> +struct diagonal_product_evaluator_base + : evaluator_base<Derived> { - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res) + typedef typename MatrixType::Index Index; + typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar; + typedef typename internal::packet_traits<Scalar>::type PacketScalar; +public: + enum { + CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost, + + MatrixFlags = evaluator<MatrixType>::Flags, + DiagFlags = evaluator<DiagonalType>::Flags, + _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, + _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) + ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), + _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value, + // FIXME currently we need same types, but in the future the next rule should be the one + //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), + _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), + _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit + //(int(MatrixFlags)&int(DiagFlags)&AlignedBit), + }; + + diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) + : m_diagImpl(diag), m_matImpl(mat) { - etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const + { + return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); + } + +protected: + template<int LoadMode> + EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const + { + return internal::pmul(m_matImpl.template packet<LoadMode>(row, col), + internal::pset1<PacketScalar>(m_diagImpl.coeff(id))); + } + + template<int LoadMode> + EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const + { + enum { + InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, + DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) + }; + return internal::pmul(m_matImpl.template packet<LoadMode>(row, col), + m_diagImpl.template packet<DiagonalPacketLoadMode>(id)); + } + + typename evaluator<DiagonalType>::nestedType m_diagImpl; + typename evaluator<MatrixType>::nestedType m_matImpl; }; -/******************* -*** Packet path *** -*******************/ - -template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> +// diagonal * dense +template<typename Lhs, typename Rhs, int ProductKind, int ProductTag> +struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> + : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> { - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + using Base::packet_impl; + typedef typename Base::Scalar Scalar; + typedef typename Base::Index Index; + typedef typename Base::PacketScalar PacketScalar; + + typedef Product<Lhs, Rhs, ProductKind> XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { + StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor + }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.rhs(), xpr.lhs().diagonal()) { - etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); - res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); + } + +#ifndef __CUDACC__ + template<int LoadMode> + EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const + { + // NVCC complains about template keyword, so we disable this function in CUDA mode + return this->template packet_impl<LoadMode>(row,col, row, + typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type()); + } + + template<int LoadMode> + EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const + { + return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif }; -template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> +// dense * diagonal +template<typename Lhs, typename Rhs, int ProductKind, int ProductTag> +struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape, typename Lhs::Scalar, typename Rhs::Scalar> + : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> { - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + using Base::packet_impl; + typedef typename Base::Scalar Scalar; + typedef typename Base::Index Index; + typedef typename Base::PacketScalar PacketScalar; + + typedef Product<Lhs, Rhs, ProductKind> XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.lhs(), xpr.rhs().diagonal()) { - etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); - res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); + } + +#ifndef __CUDACC__ + template<int LoadMode> + EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const + { + return this->template packet_impl<LoadMode>(row,col, col, + typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type()); + } + + template<int LoadMode> + EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const + { + return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif }; -template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> +/*************************************************************************** +* Products with permutation matrices +***************************************************************************/ + +template<typename Lhs, typename Rhs, int ProductTag> +struct generic_product_impl<Lhs, Rhs, PermutationShape, DenseShape, ProductTag> { - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + template<typename Dest> + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { - res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); + permut_matrix_product_retval<Lhs, Rhs, OnTheLeft, false> pmpr(lhs, rhs); + pmpr.evalTo(dst); } }; -template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> +template<typename Lhs, typename Rhs, int ProductTag> +struct generic_product_impl<Lhs, Rhs, DenseShape, PermutationShape, ProductTag> { - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + template<typename Dest> + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { - res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); + permut_matrix_product_retval<Rhs, Lhs, OnTheRight, false> pmpr(rhs, lhs); + pmpr.evalTo(dst); } }; -template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> +template<typename Lhs, typename Rhs, int ProductTag> +struct generic_product_impl<Transpose<Lhs>, Rhs, PermutationShape, DenseShape, ProductTag> { - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + template<typename Dest> + static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs) { - eigen_assert(innerDim>0 && "you are using a non initialized matrix"); - res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); - for(Index i = 1; i < innerDim; ++i) - res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); + permut_matrix_product_retval<Lhs, Rhs, OnTheLeft, true> pmpr(lhs.nestedPermutation(), rhs); + pmpr.evalTo(dst); } }; -template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> +template<typename Lhs, typename Rhs, int ProductTag> +struct generic_product_impl<Lhs, Transpose<Rhs>, DenseShape, PermutationShape, ProductTag> { - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + template<typename Dest> + static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs) { - eigen_assert(innerDim>0 && "you are using a non initialized matrix"); - res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); - for(Index i = 1; i < innerDim; ++i) - res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); + permut_matrix_product_retval<Rhs, Lhs, OnTheRight, true> pmpr(rhs.nestedPermutation(), lhs); + pmpr.evalTo(dst); } }; diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index c626946ba..f6546917e 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -65,6 +65,25 @@ public: ? CompleteUnrolling : NoUnrolling }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + EIGEN_DEBUG_VAR(Derived::Flags) + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + EIGEN_DEBUG_VAR(Traversal) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(Unrolling) + std::cerr << std::endl; + } +#endif }; /*************************************************************************** @@ -174,7 +193,7 @@ struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling> typedef typename Derived::Scalar Scalar; typedef typename Derived::Index Index; EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) + static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); Scalar res; @@ -200,10 +219,10 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling> typedef typename packet_traits<Scalar>::type PacketScalar; typedef typename Derived::Index Index; - static Scalar run(const Derived& mat, const Func& func) + static Scalar run(const Derived &mat, const Func& func) { const Index size = mat.size(); - eigen_assert(size && "you are using an empty matrix"); + const Index packetSize = packet_traits<Scalar>::size; const Index alignedStart = internal::first_aligned(mat); enum { @@ -258,7 +277,7 @@ struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling> typedef typename packet_traits<Scalar>::type PacketScalar; typedef typename Derived::Index Index; - static Scalar run(const Derived& mat, const Func& func) + EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); const Index innerSize = mat.innerSize(); @@ -300,7 +319,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> Size = Derived::SizeAtCompileTime, VectorizedSize = (Size / PacketSize) * PacketSize }; - static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); if (VectorizedSize > 0) { @@ -315,6 +334,66 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> } }; +// evaluator adaptor +template<typename _XprType> +class redux_evaluator +{ +public: + typedef _XprType XprType; + EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketScalar PacketScalar; + typedef typename XprType::PacketReturnType PacketReturnType; + + enum { + MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, + // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator + Flags = evaluator<XprType>::Flags & ~DirectAccessBit, + IsRowMajor = XprType::IsRowMajor, + SizeAtCompileTime = XprType::SizeAtCompileTime, + InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, + CoeffReadCost = evaluator<XprType>::CoeffReadCost + }; + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index row, Index col) const + { return m_evaluator.coeff(row, col); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index index) const + { return m_evaluator.coeff(index); } + + template<int LoadMode> + PacketReturnType packet(Index row, Index col) const + { return m_evaluator.template packet<LoadMode>(row, col); } + + template<int LoadMode> + PacketReturnType packet(Index index) const + { return m_evaluator.template packet<LoadMode>(index); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeffByOuterInner(Index outer, Index inner) const + { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + + template<int LoadMode> + PacketReturnType packetByOuterInner(Index outer, Index inner) const + { return m_evaluator.template packet<LoadMode>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + +protected: + typename internal::evaluator<XprType>::nestedType m_evaluator; + const XprType &m_xpr; +}; + } // end namespace internal /*************************************************************************** @@ -325,7 +404,7 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> /** \returns the result of a full redux operation on the whole matrix or vector using \a func * * The template parameter \a BinaryOp is the type of the functor \a func which must be - * an associative operator. Both current STL and TR1 functor styles are handled. + * an associative operator. Both current C++98 and C++11 functor styles are handled. * * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() */ @@ -334,9 +413,22 @@ template<typename Func> EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type DenseBase<Derived>::redux(const Func& func) const { - typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested; - return internal::redux_impl<Func, ThisNested> - ::run(derived(), func); + eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); + + // FIXME, eval_nest should be handled by redux_evaluator, however: + // - it is currently difficult to provide the right Flags since they are still handled by the expressions + // - handling it here might reduce the number of template instantiations +// typedef typename internal::nested_eval<Derived,1>::type ThisNested; +// typedef typename internal::remove_all<ThisNested>::type ThisNestedCleaned; +// typedef typename internal::redux_evaluator<ThisNestedCleaned> ThisEvaluator; +// +// ThisNested thisNested(derived()); +// ThisEvaluator thisEval(thisNested); + + typedef typename internal::redux_evaluator<Derived> ThisEvaluator; + ThisEvaluator thisEval(derived()); + + return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func); } /** \returns the minimum of all coefficients of \c *this. @@ -346,7 +438,7 @@ template<typename Derived> EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::minCoeff() const { - return this->redux(Eigen::internal::scalar_min_op<Scalar>()); + return derived().redux(Eigen::internal::scalar_min_op<Scalar>()); } /** \returns the maximum of all coefficients of \c *this. @@ -356,7 +448,7 @@ template<typename Derived> EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::maxCoeff() const { - return this->redux(Eigen::internal::scalar_max_op<Scalar>()); + return derived().redux(Eigen::internal::scalar_max_op<Scalar>()); } /** \returns the sum of all coefficients of *this @@ -369,7 +461,7 @@ DenseBase<Derived>::sum() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(0); - return this->redux(Eigen::internal::scalar_sum_op<Scalar>()); + return derived().redux(Eigen::internal::scalar_sum_op<Scalar>()); } /** \returns the mean of all coefficients of *this @@ -380,7 +472,7 @@ template<typename Derived> EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::mean() const { - return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size()); + return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size()); } /** \returns the product of all coefficients of *this @@ -396,7 +488,7 @@ DenseBase<Derived>::prod() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(1); - return this->redux(Eigen::internal::scalar_product_op<Scalar>()); + return derived().redux(Eigen::internal::scalar_product_op<Scalar>()); } /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 92614c6e2..6e6adbd31 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -12,10 +12,6 @@ namespace Eigen { -template<typename Derived> class RefBase; -template<typename PlainObjectType, int Options = 0, - typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref; - /** \class Ref * \ingroup Core_Module * @@ -131,12 +127,12 @@ public: typedef MapBase<Derived> Base; EIGEN_DENSE_PUBLIC_INTERFACE(RefBase) - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; } - inline Index outerStride() const + EIGEN_DEVICE_FUNC inline Index outerStride() const { return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() : IsVectorAtCompileTime ? this->size() @@ -144,7 +140,7 @@ public: : this->rows(); } - RefBase() + EIGEN_DEVICE_FUNC RefBase() : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime), // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values: m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime, @@ -158,7 +154,7 @@ protected: typedef Stride<StrideType::OuterStrideAtCompileTime,StrideType::InnerStrideAtCompileTime> StrideBase; template<typename Expression> - void construct(Expression& expr) + EIGEN_DEVICE_FUNC void construct(Expression& expr) { if(PlainObjectType::RowsAtCompileTime==1) { @@ -188,6 +184,8 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref : public RefBase<Ref<PlainObjectType, Options, StrideType> > { typedef internal::traits<Ref> Traits; + template<typename Derived> + EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr); public: typedef RefBase<Ref> Base; @@ -196,20 +194,21 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename Derived> - inline Ref(PlainObjectBase<Derived>& expr, - typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0) + EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr) { - Base::construct(expr); + EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + Base::construct(expr.derived()); } template<typename Derived> - inline Ref(const DenseBase<Derived>& expr, - typename internal::enable_if<bool(internal::is_lvalue<Derived>::value&&bool(Traits::template match<Derived>::MatchAtCompileTime)),Derived>::type* = 0, - int = Derived::ThisConstantIsPrivateInPlainObjectBase) + EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr) #else template<typename Derived> inline Ref(DenseBase<Derived>& expr) #endif { + EIGEN_STATIC_ASSERT(bool(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); Base::construct(expr.const_cast_derived()); } @@ -228,7 +227,7 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref< EIGEN_DENSE_PUBLIC_INTERFACE(Ref) template<typename Derived> - inline Ref(const DenseBase<Derived>& expr) + EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr) { // std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n"; // std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; @@ -236,18 +235,27 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref< construct(expr.derived(), typename Traits::template match<Derived>::type()); } + EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) { + // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy + } + + template<typename OtherRef> + EIGEN_DEVICE_FUNC inline Ref(const RefBase<OtherRef>& other) { + construct(other.derived(), typename Traits::template match<OtherRef>::type()); + } + protected: template<typename Expression> - void construct(const Expression& expr,internal::true_type) + EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type) { Base::construct(expr); } template<typename Expression> - void construct(const Expression& expr, internal::false_type) + EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) { - m_object.lazyAssign(expr); + internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar>()); Base::construct(m_object); } diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h index dde86a834..3777049ee 100644 --- a/Eigen/src/Core/Replicate.h +++ b/Eigen/src/Core/Replicate.h @@ -53,8 +53,9 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> > IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1 : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0 : (MatrixType::Flags & RowMajorBit) ? 1 : 0, - Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0), - CoeffReadCost = _MatrixTypeNested::CoeffReadCost + + // FIXME enable DirectAccess with negative strides? + Flags = IsRowMajor ? RowMajorBit : 0 }; }; } @@ -68,6 +69,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate typedef typename internal::dense_xpr_base<Replicate>::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Replicate) + typedef typename internal::remove_all<MatrixType>::type NestedExpression; template<typename OriginalMatrixType> inline explicit Replicate(const OriginalMatrixType& a_matrix) diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h index 7834f6cbc..af01a5567 100644 --- a/Eigen/src/Core/ReturnByValue.h +++ b/Eigen/src/Core/ReturnByValue.h @@ -38,9 +38,10 @@ struct traits<ReturnByValue<Derived> > * So internal::nested always gives the plain return matrix type. * * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ?? + * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators */ template<typename Derived,int n,typename PlainObject> -struct nested<ReturnByValue<Derived>, n, PlainObject> +struct nested_eval<ReturnByValue<Derived>, n, PlainObject> { typedef typename traits<Derived>::ReturnType type; }; @@ -48,7 +49,7 @@ struct nested<ReturnByValue<Derived>, n, PlainObject> } // end namespace internal template<typename Derived> class ReturnByValue - : internal::no_assignment_operator, public internal::dense_xpr_base< ReturnByValue<Derived> >::type + : public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator { public: typedef typename internal::traits<Derived>::ReturnType ReturnType; @@ -73,6 +74,7 @@ template<typename Derived> class ReturnByValue const Unusable& coeff(Index,Index) const { return *reinterpret_cast<const Unusable*>(this); } Unusable& coeffRef(Index) { return *reinterpret_cast<Unusable*>(this); } Unusable& coeffRef(Index,Index) { return *reinterpret_cast<Unusable*>(this); } +#undef Unusable #endif }; @@ -84,6 +86,36 @@ Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other) return derived(); } +namespace internal { + +// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that +// when a ReturnByValue expression is assigned, the evaluator is not constructed. +// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world + +template<typename Derived> +struct evaluator<ReturnByValue<Derived> > + : public evaluator<typename internal::traits<Derived>::ReturnType>::type +{ + typedef ReturnByValue<Derived> XprType; + typedef typename internal::traits<Derived>::ReturnType PlainObject; + typedef typename evaluator<PlainObject>::type Base; + + typedef evaluator type; + typedef evaluator nestedType; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast<Base*>(this)) Base(m_result); + xpr.evalTo(m_result); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_RETURNBYVALUE_H diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index e30ae3d28..291300a4a 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -44,14 +44,7 @@ struct traits<Reverse<MatrixType, Direction> > ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - - // let's enable LinearAccess only with vectorization because of the product overhead - LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) ) - ? LinearAccessBit : 0, - - Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess), - - CoeffReadCost = _MatrixTypeNested::CoeffReadCost + Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit) }; }; @@ -74,6 +67,7 @@ template<typename MatrixType, int Direction> class Reverse typedef typename internal::dense_xpr_base<Reverse>::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Reverse) + typedef typename internal::remove_all<MatrixType>::type NestedExpression; using Base::IsRowMajor; // next line is necessary because otherwise const version of operator() @@ -95,47 +89,47 @@ template<typename MatrixType, int Direction> class Reverse typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet; public: - inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } + EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return -m_matrix.innerStride(); } - inline Scalar& operator()(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& operator()(Index row, Index col) { eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); return coeffRef(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row, ReverseCol ? m_matrix.cols() - col - 1 : col); } - inline CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const { return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row, ReverseCol ? m_matrix.cols() - col - 1 : col); } - inline CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const { return m_matrix.coeff(m_matrix.size() - index - 1); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1); } - inline Scalar& operator()(Index index) + EIGEN_DEVICE_FUNC inline Scalar& operator()(Index index) { eigen_assert(index >= 0 && index < m_matrix.size()); return coeffRef(index); @@ -170,7 +164,7 @@ template<typename MatrixType, int Direction> class Reverse m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x)); } - const typename internal::remove_all<typename MatrixType::Nested>::type& + EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type& nestedExpression() const { return m_matrix; @@ -190,7 +184,7 @@ template<typename Derived> inline typename DenseBase<Derived>::ReverseReturnType DenseBase<Derived>::reverse() { - return derived(); + return ReverseReturnType(derived()); } /** This is the const version of reverse(). */ @@ -198,7 +192,7 @@ template<typename Derived> inline const typename DenseBase<Derived>::ConstReverseReturnType DenseBase<Derived>::reverse() const { - return derived(); + return ConstReverseReturnType(derived()); } /** This is the "in place" version of reverse: it reverses \c *this. diff --git a/Eigen/src/Core/Select.h b/Eigen/src/Core/Select.h index 87993bbb5..79eec1b5b 100644 --- a/Eigen/src/Core/Select.h +++ b/Eigen/src/Core/Select.h @@ -43,23 +43,21 @@ struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, - Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits, - CoeffReadCost = traits<typename remove_all<ConditionMatrixNested>::type>::CoeffReadCost - + EIGEN_SIZE_MAX(traits<typename remove_all<ThenMatrixNested>::type>::CoeffReadCost, - traits<typename remove_all<ElseMatrixNested>::type>::CoeffReadCost) + Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit }; }; } template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> -class Select : internal::no_assignment_operator, - public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type +class Select : public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type, + internal::no_assignment_operator { public: typedef typename internal::dense_xpr_base<Select>::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Select) + inline EIGEN_DEVICE_FUNC Select(const ConditionMatrixType& a_conditionMatrix, const ThenMatrixType& a_thenMatrix, const ElseMatrixType& a_elseMatrix) @@ -69,9 +67,10 @@ class Select : internal::no_assignment_operator, eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols()); } - Index rows() const { return m_condition.rows(); } - Index cols() const { return m_condition.cols(); } + inline EIGEN_DEVICE_FUNC Index rows() const { return m_condition.rows(); } + inline EIGEN_DEVICE_FUNC Index cols() const { return m_condition.cols(); } + inline EIGEN_DEVICE_FUNC const Scalar coeff(Index i, Index j) const { if (m_condition.coeff(i,j)) @@ -80,6 +79,7 @@ class Select : internal::no_assignment_operator, return m_else.coeff(i,j); } + inline EIGEN_DEVICE_FUNC const Scalar coeff(Index i) const { if (m_condition.coeff(i)) @@ -88,17 +88,17 @@ class Select : internal::no_assignment_operator, return m_else.coeff(i); } - const ConditionMatrixType& conditionMatrix() const + inline EIGEN_DEVICE_FUNC const ConditionMatrixType& conditionMatrix() const { return m_condition; } - const ThenMatrixType& thenMatrix() const + inline EIGEN_DEVICE_FUNC const ThenMatrixType& thenMatrix() const { return m_then; } - const ElseMatrixType& elseMatrix() const + inline EIGEN_DEVICE_FUNC const ElseMatrixType& elseMatrix() const { return m_else; } diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index 6c2733650..b785e8e1e 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -35,26 +35,23 @@ struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType> typedef typename nested<MatrixType>::type MatrixTypeNested; typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; typedef MatrixType ExpressionType; - typedef typename MatrixType::PlainObject DenseMatrixType; + typedef typename MatrixType::PlainObject FullMatrixType; enum { Mode = UpLo | SelfAdjoint, - Flags = MatrixTypeNestedCleaned::Flags & (HereditaryBits) - & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)), // FIXME these flags should be preserved - CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost + FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0, + Flags = MatrixTypeNestedCleaned::Flags & (HereditaryBits|FlagsLvalueBit) + & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)) // FIXME these flags should be preserved }; }; } -template <typename Lhs, int LhsMode, bool LhsIsVector, - typename Rhs, int RhsMode, bool RhsIsVector> -struct SelfadjointProductMatrix; - // FIXME could also be called SelfAdjointWrapper to be consistent with DiagonalWrapper ?? -template<typename MatrixType, unsigned int UpLo> class SelfAdjointView - : public TriangularBase<SelfAdjointView<MatrixType, UpLo> > +template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView + : public TriangularBase<SelfAdjointView<_MatrixType, UpLo> > { public: + typedef _MatrixType MatrixType; typedef TriangularBase<SelfAdjointView> Base; typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested; typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned; @@ -65,12 +62,13 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView typedef typename MatrixType::Index Index; enum { - Mode = internal::traits<SelfAdjointView>::Mode + Mode = internal::traits<SelfAdjointView>::Mode, + Flags = internal::traits<SelfAdjointView>::Flags }; typedef typename MatrixType::PlainObject PlainObject; EIGEN_DEVICE_FUNC - inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix) + explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix) {} EIGEN_DEVICE_FUNC @@ -98,6 +96,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { + EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView); Base::check_coordinates_internal(row, col); return m_matrix.const_cast_derived().coeffRef(row, col); } @@ -111,26 +110,29 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView EIGEN_DEVICE_FUNC MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); } - /** Efficient self-adjoint matrix times vector/matrix product */ + /** Efficient triangular matrix times vector/matrix product */ template<typename OtherDerived> EIGEN_DEVICE_FUNC - SelfadjointProductMatrix<MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime> + const Product<SelfAdjointView,OtherDerived> operator*(const MatrixBase<OtherDerived>& rhs) const { - return SelfadjointProductMatrix - <MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime> - (m_matrix, rhs.derived()); + return Product<SelfAdjointView,OtherDerived>(*this, rhs.derived()); } - /** Efficient vector/matrix times self-adjoint matrix product */ + /** Efficient vector/matrix times triangular matrix product */ template<typename OtherDerived> friend EIGEN_DEVICE_FUNC - SelfadjointProductMatrix<OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false> + const Product<OtherDerived,SelfAdjointView> operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs) { - return SelfadjointProductMatrix - <OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false> - (lhs.derived(),rhs.m_matrix); + return Product<OtherDerived,SelfAdjointView>(lhs.derived(),rhs); + } + + friend EIGEN_DEVICE_FUNC + const SelfAdjointView<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,MatrixType>,UpLo> + operator*(const Scalar& s, const SelfAdjointView& mat) + { + return (s*mat.nestedExpression()).template selfadjointView<UpLo>(); } /** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this: @@ -194,96 +196,57 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView namespace internal { -template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite> -struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount, ClearOpposite> -{ - enum { - col = (UnrollCount-1) / Derived1::RowsAtCompileTime, - row = (UnrollCount-1) % Derived1::RowsAtCompileTime - }; - - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) - { - triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src); - - if(row == col) - dst.coeffRef(row, col) = numext::real(src.coeff(row, col)); - else if(row < col) - dst.coeffRef(col, row) = numext::conj(dst.coeffRef(row, col) = src.coeff(row, col)); - } -}; - -template<typename Derived1, typename Derived2, bool ClearOpposite> -struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite> +// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.> +// in the future selfadjoint-ness should be defined by the expression traits +// such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work) +template<typename MatrixType, unsigned int Mode> +struct evaluator_traits<SelfAdjointView<MatrixType,Mode> > { - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &, const Derived2 &) {} + typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind; + typedef SelfAdjointShape Shape; + + static const int AssumeAliasing = 0; }; -template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite> -struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount, ClearOpposite> +template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version> +class triangular_dense_assignment_kernel<UpLo,SelfAdjoint,SetOpposite,DstEvaluatorTypeT,SrcEvaluatorTypeT,Functor,Version> + : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> { - enum { - col = (UnrollCount-1) / Derived1::RowsAtCompileTime, - row = (UnrollCount-1) % Derived1::RowsAtCompileTime - }; - - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) +protected: + typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base; + typedef typename Base::DstXprType DstXprType; + typedef typename Base::SrcXprType SrcXprType; + using Base::m_dst; + using Base::m_src; + using Base::m_functor; +public: + + typedef typename Base::DstEvaluatorType DstEvaluatorType; + typedef typename Base::SrcEvaluatorType SrcEvaluatorType; + typedef typename Base::Scalar Scalar; + typedef typename Base::Index Index; + typedef typename Base::AssignmentTraits AssignmentTraits; + + + EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) + : Base(dst, src, func, dstExpr) + {} + + EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) { - triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src); - - if(row == col) - dst.coeffRef(row, col) = numext::real(src.coeff(row, col)); - else if(row > col) - dst.coeffRef(col, row) = numext::conj(dst.coeffRef(row, col) = src.coeff(row, col)); + eigen_internal_assert(row!=col); + Scalar tmp = m_src.coeff(row,col); + m_functor.assignCoeff(m_dst.coeffRef(row,col), tmp); + m_functor.assignCoeff(m_dst.coeffRef(col,row), numext::conj(tmp)); } -}; - -template<typename Derived1, typename Derived2, bool ClearOpposite> -struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite> -{ - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &, const Derived2 &) {} -}; - -template<typename Derived1, typename Derived2, bool ClearOpposite> -struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite> -{ - typedef typename Derived1::Index Index; - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) + + EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id) { - for(Index j = 0; j < dst.cols(); ++j) - { - for(Index i = 0; i < j; ++i) - { - dst.copyCoeff(i, j, src); - dst.coeffRef(j,i) = numext::conj(dst.coeff(i,j)); - } - dst.copyCoeff(j, j, src); - } - } -}; - -template<typename Derived1, typename Derived2, bool ClearOpposite> -struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite> -{ - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) - { - typedef typename Derived1::Index Index; - for(Index i = 0; i < dst.rows(); ++i) - { - for(Index j = 0; j < i; ++j) - { - dst.copyCoeff(i, j, src); - dst.coeffRef(j,i) = numext::conj(dst.coeff(i,j)); - } - dst.copyCoeff(i, i, src); - } + Base::assignCoeff(id,id); } + + EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index, Index) + { eigen_internal_assert(false && "should never be called"); } }; } // end namespace internal @@ -297,7 +260,7 @@ template<unsigned int UpLo> typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type MatrixBase<Derived>::selfadjointView() const { - return derived(); + return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived()); } template<typename Derived> @@ -305,7 +268,7 @@ template<unsigned int UpLo> typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type MatrixBase<Derived>::selfadjointView() { - return derived(); + return typename SelfAdjointViewReturnType<UpLo>::Type(derived()); } } // end namespace Eigen diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h index 8abdca4a5..38185d9d7 100644 --- a/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -12,179 +12,11 @@ namespace Eigen { -/** \class SelfCwiseBinaryOp - * \ingroup Core_Module - * - * \internal - * - * \brief Internal helper class for optimizing operators like +=, -= - * - * This is a pseudo expression class re-implementing the copyCoeff/copyPacket - * method to directly performs a +=/-= operations in an optimal way. In particular, - * this allows to make sure that the input/output data are loaded only once using - * aligned packet loads. - * - * \sa class SwapWrapper for a similar trick. - */ - -namespace internal { -template<typename BinaryOp, typename Lhs, typename Rhs> -struct traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> > - : traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> > -{ - enum { - // Note that it is still a good idea to preserve the DirectAccessBit - // so that assign can correctly align the data. - Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&AlignedBit) | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit), - OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime, - InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime - }; -}; -} - -template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp - : public internal::dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type -{ - public: - - typedef typename internal::dense_xpr_base<SelfCwiseBinaryOp>::type Base; - EIGEN_DENSE_PUBLIC_INTERFACE(SelfCwiseBinaryOp) - - typedef typename internal::packet_traits<Scalar>::type Packet; - - EIGEN_DEVICE_FUNC - inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {} - - EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } - EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } - EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); } - EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); } - EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_matrix.data(); } - - // note that this function is needed by assign to correctly align loads/stores - // TODO make Assign use .data() - EIGEN_DEVICE_FUNC - inline Scalar& coeffRef(Index row, Index col) - { - EIGEN_STATIC_ASSERT_LVALUE(Lhs) - return m_matrix.const_cast_derived().coeffRef(row, col); - } - EIGEN_DEVICE_FUNC - inline const Scalar& coeffRef(Index row, Index col) const - { - return m_matrix.coeffRef(row, col); - } - - // note that this function is needed by assign to correctly align loads/stores - // TODO make Assign use .data() - EIGEN_DEVICE_FUNC - inline Scalar& coeffRef(Index index) - { - EIGEN_STATIC_ASSERT_LVALUE(Lhs) - return m_matrix.const_cast_derived().coeffRef(index); - } - EIGEN_DEVICE_FUNC - inline const Scalar& coeffRef(Index index) const - { - return m_matrix.const_cast_derived().coeffRef(index); - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other) - { - OtherDerived& _other = other.const_cast_derived(); - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - Scalar& tmp = m_matrix.coeffRef(row,col); - tmp = m_functor(tmp, _other.coeff(row,col)); - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - void copyCoeff(Index index, const DenseBase<OtherDerived>& other) - { - OtherDerived& _other = other.const_cast_derived(); - eigen_internal_assert(index >= 0 && index < m_matrix.size()); - Scalar& tmp = m_matrix.coeffRef(index); - tmp = m_functor(tmp, _other.coeff(index)); - } - - template<typename OtherDerived, int StoreMode, int LoadMode> - void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other) - { - OtherDerived& _other = other.const_cast_derived(); - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - m_matrix.template writePacket<StoreMode>(row, col, - m_functor.packetOp(m_matrix.template packet<StoreMode>(row, col),_other.template packet<LoadMode>(row, col)) ); - } - - template<typename OtherDerived, int StoreMode, int LoadMode> - void copyPacket(Index index, const DenseBase<OtherDerived>& other) - { - OtherDerived& _other = other.const_cast_derived(); - eigen_internal_assert(index >= 0 && index < m_matrix.size()); - m_matrix.template writePacket<StoreMode>(index, - m_functor.packetOp(m_matrix.template packet<StoreMode>(index),_other.template packet<LoadMode>(index)) ); - } - - // reimplement lazyAssign to handle complex *= real - // see CwiseBinaryOp ctor for details - template<typename RhsDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs) - { - EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived) - EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename RhsDerived::Scalar); - - #ifdef EIGEN_DEBUG_ASSIGN - internal::assign_traits<SelfCwiseBinaryOp, RhsDerived>::debug(); - #endif - eigen_assert(rows() == rhs.rows() && cols() == rhs.cols()); - internal::assign_impl<SelfCwiseBinaryOp, RhsDerived>::run(*this,rhs.derived()); - #ifndef EIGEN_NO_DEBUG - this->checkTransposeAliasing(rhs.derived()); - #endif - return *this; - } - - // overloaded to honor evaluation of special matrices - // maybe another solution would be to not use SelfCwiseBinaryOp - // at first... - EIGEN_DEVICE_FUNC - SelfCwiseBinaryOp& operator=(const Rhs& _rhs) - { - typename internal::nested<Rhs>::type rhs(_rhs); - return Base::operator=(rhs); - } - - EIGEN_DEVICE_FUNC - Lhs& expression() const - { - return m_matrix; - } - - EIGEN_DEVICE_FUNC - const BinaryOp& functor() const - { - return m_functor; - } - - protected: - Lhs& m_matrix; - const BinaryOp& m_functor; - - private: - SelfCwiseBinaryOp& operator=(const SelfCwiseBinaryOp&); -}; - template<typename Derived> inline Derived& DenseBase<Derived>::operator*=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived()); - tmp = PlainObject::Constant(rows(),cols(),other); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar>()); return derived(); } @@ -192,8 +24,7 @@ template<typename Derived> inline Derived& ArrayBase<Derived>::operator+=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived()); - tmp = PlainObject::Constant(rows(),cols(),other); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar>()); return derived(); } @@ -201,8 +32,7 @@ template<typename Derived> inline Derived& ArrayBase<Derived>::operator-=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived()); - tmp = PlainObject::Constant(rows(),cols(),other); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar>()); return derived(); } @@ -210,8 +40,7 @@ template<typename Derived> inline Derived& DenseBase<Derived>::operator/=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived()); - tmp = PlainObject::Constant(rows(),cols(), other); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar>()); return derived(); } diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h new file mode 100644 index 000000000..3905cd616 --- /dev/null +++ b/Eigen/src/Core/Solve.h @@ -0,0 +1,152 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SOLVE_H +#define EIGEN_SOLVE_H + +namespace Eigen { + +template<typename Decomposition, typename RhsType, typename StorageKind> class SolveImpl; + +/** \class Solve + * \ingroup Core_Module + * + * \brief Pseudo expression representing a solving operation + * + * \tparam Decomposition the type of the matrix or decomposion object + * \tparam Rhstype the type of the right-hand side + * + * This class represents an expression of A.solve(B) + * and most of the time this is the only way it is used. + * + */ +namespace internal { + +// this solve_traits class permits to determine the evaluation type with respect to storage kind (Dense vs Sparse) +template<typename Decomposition, typename RhsType,typename StorageKind> struct solve_traits; + +template<typename Decomposition, typename RhsType> +struct solve_traits<Decomposition,RhsType,Dense> +{ + typedef typename Decomposition::MatrixType MatrixType; + typedef Matrix<typename RhsType::Scalar, + MatrixType::ColsAtCompileTime, + RhsType::ColsAtCompileTime, + RhsType::PlainObject::Options, + MatrixType::MaxColsAtCompileTime, + RhsType::MaxColsAtCompileTime> PlainObject; +}; + +template<typename Decomposition, typename RhsType> +struct traits<Solve<Decomposition, RhsType> > + : traits<typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject> +{ + typedef typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject PlainObject; + typedef traits<PlainObject> BaseTraits; + enum { + Flags = BaseTraits::Flags & RowMajorBit, + CoeffReadCost = Dynamic + }; +}; + +} + + +template<typename Decomposition, typename RhsType> +class Solve : public SolveImpl<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind> +{ +public: + typedef typename RhsType::Index Index; + typedef typename internal::traits<Solve>::PlainObject PlainObject; + + Solve(const Decomposition &dec, const RhsType &rhs) + : m_dec(dec), m_rhs(rhs) + {} + + EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); } + + EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; } + EIGEN_DEVICE_FUNC const RhsType& rhs() const { return m_rhs; } + +protected: + const Decomposition &m_dec; + const RhsType &m_rhs; +}; + + +// Specialization of the Solve expression for dense results +template<typename Decomposition, typename RhsType> +class SolveImpl<Decomposition,RhsType,Dense> + : public MatrixBase<Solve<Decomposition,RhsType> > +{ + typedef Solve<Decomposition,RhsType> Derived; + +public: + + typedef MatrixBase<Solve<Decomposition,RhsType> > Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + +private: + + Scalar coeff(Index row, Index col) const; + Scalar coeff(Index i) const; +}; + +// Generic API dispatcher +template<typename Decomposition, typename RhsType, typename StorageKind> +class SolveImpl : public internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type +{ + public: + typedef typename internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type Base; +}; + +namespace internal { + +// Evaluator of Solve -> eval into a temporary +template<typename Decomposition, typename RhsType> +struct evaluator<Solve<Decomposition,RhsType> > + : public evaluator<typename Solve<Decomposition,RhsType>::PlainObject>::type +{ + typedef Solve<Decomposition,RhsType> SolveType; + typedef typename SolveType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type Base; + + typedef evaluator type; + typedef evaluator nestedType; + + EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve) + : m_result(solve.rows(), solve.cols()) + { + ::new (static_cast<Base*>(this)) Base(m_result); + solve.dec()._solve_impl(solve.rhs(), m_result); + } + +protected: + PlainObject m_result; +}; + +// Specialization for "dst = dec.solve(rhs)" +// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere +template<typename DstXprType, typename DecType, typename RhsType, typename Scalar> +struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar> +{ + typedef Solve<DecType,RhsType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) + { + // FIXME shall we resize dst here? + src.dec()._solve_impl(src.rhs(), dst); + } +}; + +} // end namepsace internal + +} // end namespace Eigen + +#endif // EIGEN_SOLVE_H diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index e158e3162..f97048bda 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -171,10 +171,10 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> { */ template<typename MatrixType, unsigned int Mode> template<int Side, typename OtherDerived> -void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const +void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const { OtherDerived& other = _other.const_cast_derived(); - eigen_assert( cols() == rows() && ((Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols())) ); + eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) ); eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower))); enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime }; @@ -183,7 +183,7 @@ void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived OtherCopy otherCopy(other); internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type, - Side, Mode>::run(nestedExpression(), otherCopy); + Side, Mode>::run(derived().nestedExpression(), otherCopy); if (copy) other = otherCopy; @@ -213,9 +213,9 @@ void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived template<typename Derived, unsigned int Mode> template<int Side, typename Other> const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other> -TriangularView<Derived,Mode>::solve(const MatrixBase<Other>& other) const +TriangularViewImpl<Derived,Mode,Dense>::solve(const MatrixBase<Other>& other) const { - return internal::triangular_solve_retval<Side,TriangularView,Other>(*this, other.derived()); + return internal::triangular_solve_retval<Side,TriangularViewType,Other>(derived(), other.derived()); } namespace internal { diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index 64d43e1b1..0b7e39827 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -17,7 +17,6 @@ namespace internal { template<typename ExpressionType, typename Scalar> inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale) { - using std::max; Scalar maxCoeff = bl.cwiseAbs().maxCoeff(); if(maxCoeff>scale) @@ -58,8 +57,6 @@ blueNorm_impl(const EigenBase<Derived>& _vec) typedef typename Derived::RealScalar RealScalar; typedef typename Derived::Index Index; using std::pow; - EIGEN_USING_STD_MATH(min); - EIGEN_USING_STD_MATH(max); using std::sqrt; using std::abs; const Derived& vec(_vec.derived()); @@ -136,8 +133,8 @@ blueNorm_impl(const EigenBase<Derived>& _vec) } else return sqrt(amed); - asml = (min)(abig, amed); - abig = (max)(abig, amed); + asml = numext::mini(abig, amed); + abig = numext::maxi(abig, amed); if(asml <= abig*relerr) return abig; else @@ -160,7 +157,6 @@ template<typename Derived> inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::stableNorm() const { - EIGEN_USING_STD_MATH(min); using std::sqrt; const Index blockSize = 4096; RealScalar scale(0); @@ -174,7 +170,7 @@ MatrixBase<Derived>::stableNorm() const if (bi>0) internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale); for (; bi<n; bi+=blockSize) - internal::stable_norm_kernel(this->segment(bi,(min)(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale); + internal::stable_norm_kernel(this->segment(bi,numext::mini(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale); return scale * sqrt(ssq); } diff --git a/Eigen/src/Core/Stride.h b/Eigen/src/Core/Stride.h index d3d454e4e..e46faad34 100644 --- a/Eigen/src/Core/Stride.h +++ b/Eigen/src/Core/Stride.h @@ -86,26 +86,26 @@ class Stride /** \brief Convenience specialization of Stride to specify only an inner stride * See class Map for some examples */ -template<int Value = Dynamic> +template<int Value> class InnerStride : public Stride<0, Value> { typedef Stride<0, Value> Base; public: typedef DenseIndex Index; EIGEN_DEVICE_FUNC InnerStride() : Base() {} - EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {} + EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {} // FIXME making this explicit could break valid code }; /** \brief Convenience specialization of Stride to specify only an outer stride * See class Map for some examples */ -template<int Value = Dynamic> +template<int Value> class OuterStride : public Stride<Value, 0> { typedef Stride<Value, 0> Base; public: typedef DenseIndex Index; EIGEN_DEVICE_FUNC OuterStride() : Base() {} - EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {} + EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {} // FIXME making this explicit could break valid code }; } // end namespace Eigen diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h index d602fba65..55319320a 100644 --- a/Eigen/src/Core/Swap.h +++ b/Eigen/src/Core/Swap.h @@ -12,129 +12,54 @@ namespace Eigen { -/** \class SwapWrapper - * \ingroup Core_Module - * - * \internal - * - * \brief Internal helper class for swapping two expressions - */ namespace internal { -template<typename ExpressionType> -struct traits<SwapWrapper<ExpressionType> > : traits<ExpressionType> {}; -} -template<typename ExpressionType> class SwapWrapper - : public internal::dense_xpr_base<SwapWrapper<ExpressionType> >::type +// Overload default assignPacket behavior for swapping them +template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT> +class generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, Specialized> + : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn> { - public: - - typedef typename internal::dense_xpr_base<SwapWrapper>::type Base; - EIGEN_DENSE_PUBLIC_INTERFACE(SwapWrapper) - typedef typename internal::packet_traits<Scalar>::type Packet; - - EIGEN_DEVICE_FUNC - inline SwapWrapper(ExpressionType& xpr) : m_expression(xpr) {} - - EIGEN_DEVICE_FUNC - inline Index rows() const { return m_expression.rows(); } - EIGEN_DEVICE_FUNC - inline Index cols() const { return m_expression.cols(); } - EIGEN_DEVICE_FUNC - inline Index outerStride() const { return m_expression.outerStride(); } - EIGEN_DEVICE_FUNC - inline Index innerStride() const { return m_expression.innerStride(); } - - typedef typename internal::conditional< - internal::is_lvalue<ExpressionType>::value, - Scalar, - const Scalar - >::type ScalarWithConstIfNotLvalue; - - EIGEN_DEVICE_FUNC - inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } - EIGEN_DEVICE_FUNC - inline const Scalar* data() const { return m_expression.data(); } - - EIGEN_DEVICE_FUNC - inline Scalar& coeffRef(Index rowId, Index colId) - { - return m_expression.const_cast_derived().coeffRef(rowId, colId); - } - - EIGEN_DEVICE_FUNC - inline Scalar& coeffRef(Index index) - { - return m_expression.const_cast_derived().coeffRef(index); - } - - EIGEN_DEVICE_FUNC - inline Scalar& coeffRef(Index rowId, Index colId) const - { - return m_expression.coeffRef(rowId, colId); - } - - EIGEN_DEVICE_FUNC - inline Scalar& coeffRef(Index index) const - { - return m_expression.coeffRef(index); - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - void copyCoeff(Index rowId, Index colId, const DenseBase<OtherDerived>& other) - { - OtherDerived& _other = other.const_cast_derived(); - eigen_internal_assert(rowId >= 0 && rowId < rows() - && colId >= 0 && colId < cols()); - Scalar tmp = m_expression.coeff(rowId, colId); - m_expression.coeffRef(rowId, colId) = _other.coeff(rowId, colId); - _other.coeffRef(rowId, colId) = tmp; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - void copyCoeff(Index index, const DenseBase<OtherDerived>& other) - { - OtherDerived& _other = other.const_cast_derived(); - eigen_internal_assert(index >= 0 && index < m_expression.size()); - Scalar tmp = m_expression.coeff(index); - m_expression.coeffRef(index) = _other.coeff(index); - _other.coeffRef(index) = tmp; - } - - template<typename OtherDerived, int StoreMode, int LoadMode> - void copyPacket(Index rowId, Index colId, const DenseBase<OtherDerived>& other) - { - OtherDerived& _other = other.const_cast_derived(); - eigen_internal_assert(rowId >= 0 && rowId < rows() - && colId >= 0 && colId < cols()); - Packet tmp = m_expression.template packet<StoreMode>(rowId, colId); - m_expression.template writePacket<StoreMode>(rowId, colId, - _other.template packet<LoadMode>(rowId, colId) - ); - _other.template writePacket<LoadMode>(rowId, colId, tmp); - } - - template<typename OtherDerived, int StoreMode, int LoadMode> - void copyPacket(Index index, const DenseBase<OtherDerived>& other) - { - OtherDerived& _other = other.const_cast_derived(); - eigen_internal_assert(index >= 0 && index < m_expression.size()); - Packet tmp = m_expression.template packet<StoreMode>(index); - m_expression.template writePacket<StoreMode>(index, - _other.template packet<LoadMode>(index) - ); - _other.template writePacket<LoadMode>(index, tmp); - } - - EIGEN_DEVICE_FUNC - ExpressionType& expression() const { return m_expression; } - - protected: - ExpressionType& m_expression; +protected: + typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn> Base; + typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar; + using Base::m_dst; + using Base::m_src; + using Base::m_functor; + +public: + typedef typename Base::Scalar Scalar; + typedef typename Base::Index Index; + typedef typename Base::DstXprType DstXprType; + typedef swap_assign_op<Scalar> Functor; + + EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr) + : Base(dst, src, func, dstExpr) + {} + + template<int StoreMode, int LoadMode> + void assignPacket(Index row, Index col) + { + m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col)); + } + + template<int StoreMode, int LoadMode> + void assignPacket(Index index) + { + m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index)); + } + + // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael) + template<int StoreMode, int LoadMode> + void assignPacketByOuterInner(Index outer, Index inner) + { + Index row = Base::rowIndexByOuterInner(outer, inner); + Index col = Base::colIndexByOuterInner(outer, inner); + assignPacket<StoreMode,LoadMode>(row, col); + } }; +} // namespace internal + } // end namespace Eigen #endif // EIGEN_SWAP_H diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index aba3f6670..3bab6092c 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> -// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -29,9 +29,10 @@ namespace Eigen { namespace internal { template<typename MatrixType> -struct traits<Transpose<MatrixType> > : traits<MatrixType> +struct traits<Transpose<MatrixType> > { - typedef typename MatrixType::Scalar Scalar; + typedef typename traits<MatrixType>::Scalar Scalar; + typedef typename traits<MatrixType>::Index Index; typedef typename nested<MatrixType>::type MatrixTypeNested; typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain; typedef typename traits<MatrixType>::StorageKind StorageKind; @@ -45,7 +46,6 @@ struct traits<Transpose<MatrixType> > : traits<MatrixType> Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit), Flags1 = Flags0 | FlagsLvalueBit, Flags = Flags1 ^ RowMajorBit, - CoeffReadCost = MatrixTypeNestedPlain::CoeffReadCost, InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret, OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret }; @@ -61,9 +61,10 @@ template<typename MatrixType> class Transpose typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose) + typedef typename internal::remove_all<MatrixType>::type NestedExpression; EIGEN_DEVICE_FUNC - inline Transpose(MatrixType& a_matrix) : m_matrix(a_matrix) {} + explicit inline Transpose(MatrixType& a_matrix) : m_matrix(a_matrix) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose) @@ -100,12 +101,22 @@ struct TransposeImpl_base<MatrixType, false> } // end namespace internal +// Generic API dispatcher +template<typename XprType, typename StorageKind> +class TransposeImpl + : public internal::generic_xpr_base<Transpose<XprType> >::type +{ +public: + typedef typename internal::generic_xpr_base<Transpose<XprType> >::type Base; +}; + template<typename MatrixType> class TransposeImpl<MatrixType,Dense> : public internal::TransposeImpl_base<MatrixType>::type { public: typedef typename internal::TransposeImpl_base<MatrixType>::type Base; + using Base::coeffRef; EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl) @@ -118,23 +129,10 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense> const Scalar >::type ScalarWithConstIfNotLvalue; - inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); } - inline const Scalar* data() const { return derived().nestedExpression().data(); } - - EIGEN_DEVICE_FUNC - inline ScalarWithConstIfNotLvalue& coeffRef(Index rowId, Index colId) - { - EIGEN_STATIC_ASSERT_LVALUE(MatrixType) - return derived().nestedExpression().const_cast_derived().coeffRef(colId, rowId); - } - - EIGEN_DEVICE_FUNC - inline ScalarWithConstIfNotLvalue& coeffRef(Index index) - { - EIGEN_STATIC_ASSERT_LVALUE(MatrixType) - return derived().nestedExpression().const_cast_derived().coeffRef(index); - } + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return derived().nestedExpression().data(); } + // FIXME: shall we keep the const version of coeffRef? EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { @@ -146,42 +144,6 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense> { return derived().nestedExpression().coeffRef(index); } - - EIGEN_DEVICE_FUNC - inline CoeffReturnType coeff(Index rowId, Index colId) const - { - return derived().nestedExpression().coeff(colId, rowId); - } - - EIGEN_DEVICE_FUNC - inline CoeffReturnType coeff(Index index) const - { - return derived().nestedExpression().coeff(index); - } - - template<int LoadMode> - inline const PacketScalar packet(Index rowId, Index colId) const - { - return derived().nestedExpression().template packet<LoadMode>(colId, rowId); - } - - template<int LoadMode> - inline void writePacket(Index rowId, Index colId, const PacketScalar& x) - { - derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(colId, rowId, x); - } - - template<int LoadMode> - inline const PacketScalar packet(Index index) const - { - return derived().nestedExpression().template packet<LoadMode>(index); - } - - template<int LoadMode> - inline void writePacket(Index index, const PacketScalar& x) - { - derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(index, x); - } }; /** \returns an expression of the transpose of *this. @@ -207,7 +169,7 @@ template<typename Derived> inline Transpose<Derived> DenseBase<Derived>::transpose() { - return derived(); + return TransposeReturnType(derived()); } /** This is the const version of transpose(). @@ -245,8 +207,7 @@ template<typename Derived> inline const typename MatrixBase<Derived>::AdjointReturnType MatrixBase<Derived>::adjoint() const { - return this->transpose(); // in the complex case, the .conjugate() is be implicit here - // due to implicit conversion to return type + return AdjointReturnType(this->transpose()); } /*************************************************************************** @@ -256,18 +217,39 @@ MatrixBase<Derived>::adjoint() const namespace internal { template<typename MatrixType, - bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic> + bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic, + bool MatchPacketSize = + (int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits<typename MatrixType::Scalar>::size)) + && (internal::evaluator<MatrixType>::Flags&PacketAccessBit) > struct inplace_transpose_selector; template<typename MatrixType> -struct inplace_transpose_selector<MatrixType,true> { // square matrix +struct inplace_transpose_selector<MatrixType,true,false> { // square matrix static void run(MatrixType& m) { m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose()); } }; +// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only. template<typename MatrixType> -struct inplace_transpose_selector<MatrixType,false> { // non square matrix +struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize + static void run(MatrixType& m) { + typedef typename MatrixType::Scalar Scalar; + typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet; + typedef typename MatrixType::Index Index; + const Index PacketSize = internal::packet_traits<Scalar>::size; + const Index Alignment = internal::evaluator<MatrixType>::Flags&AlignedBit ? Aligned : Unaligned; + PacketBlock<Packet> A; + for (Index i=0; i<PacketSize; ++i) + A.packet[i] = m.template packetByOuterInner<Alignment>(i,0); + internal::ptranspose(A); + for (Index i=0; i<PacketSize; ++i) + m.template writePacket<Alignment>(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]); + } +}; + +template<typename MatrixType,bool MatchPacketSize> +struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square matrix static void run(MatrixType& m) { if (m.rows()==m.cols()) m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose()); @@ -413,15 +395,15 @@ struct checkTransposeAliasing_impl<Derived, OtherDerived, false> } }; -} // end namespace internal - -template<typename Derived> -template<typename OtherDerived> -void DenseBase<Derived>::checkTransposeAliasing(const OtherDerived& other) const +template<typename Dst, typename Src> +void check_for_aliasing(const Dst &dst, const Src &src) { - internal::checkTransposeAliasing_impl<Derived, OtherDerived>::run(derived(), other); + internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src); } -#endif + +} // end namespace internal + +#endif // EIGEN_NO_DEBUG } // end namespace Eigen diff --git a/Eigen/src/Core/Transpositions.h b/Eigen/src/Core/Transpositions.h index 92261118f..77e7d6f45 100644 --- a/Eigen/src/Core/Transpositions.h +++ b/Eigen/src/Core/Transpositions.h @@ -240,7 +240,7 @@ class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndexTyp typedef typename IndicesType::Scalar StorageIndexType; typedef typename IndicesType::Index Index; - inline Map(const StorageIndexType* indicesPtr) + explicit inline Map(const StorageIndexType* indicesPtr) : m_indices(indicesPtr) {} @@ -299,7 +299,7 @@ class TranspositionsWrapper typedef typename IndicesType::Scalar StorageIndexType; typedef typename IndicesType::Index Index; - inline TranspositionsWrapper(IndicesType& a_indices) + explicit inline TranspositionsWrapper(IndicesType& a_indices) : m_indices(a_indices) {} @@ -414,7 +414,7 @@ class Transpose<TranspositionsBase<TranspositionsDerived> > typedef typename TranspositionType::IndicesType IndicesType; public: - Transpose(const TranspositionType& t) : m_transpositions(t) {} + explicit Transpose(const TranspositionType& t) : m_transpositions(t) {} inline int size() const { return m_transpositions.size(); } diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 72792d21b..cf0255bce 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -32,17 +32,23 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived> enum { Mode = internal::traits<Derived>::Mode, - CoeffReadCost = internal::traits<Derived>::CoeffReadCost, RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime, ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime, MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime, - MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime + MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime, + + SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime, + internal::traits<Derived>::ColsAtCompileTime>::ret) + /**< This is equal to the number of coefficients, i.e. the number of + * rows times the number of columns, or to \a Dynamic if this is not + * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */ }; typedef typename internal::traits<Derived>::Scalar Scalar; typedef typename internal::traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::Index Index; - typedef typename internal::traits<Derived>::DenseMatrixType DenseMatrixType; + typedef typename internal::traits<Derived>::FullMatrixType DenseMatrixType; typedef DenseMatrixType DenseType; + typedef Derived const& Nested; EIGEN_DEVICE_FUNC inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); } @@ -55,6 +61,14 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived> inline Index outerStride() const { return derived().outerStride(); } EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().innerStride(); } + + // dummy resize function + void resize(Index nbRows, Index nbCols) + { + EIGEN_UNUSED_VARIABLE(nbRows); + EIGEN_UNUSED_VARIABLE(nbCols); + eigen_assert(nbRows==rows() && nbCols==nbCols); + } EIGEN_DEVICE_FUNC inline Scalar coeff(Index row, Index col) const { return derived().coeff(row,col); } @@ -155,96 +169,209 @@ struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType> typedef typename nested<MatrixType>::type MatrixTypeNested; typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef; typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; + typedef typename MatrixType::PlainObject FullMatrixType; typedef MatrixType ExpressionType; - typedef typename MatrixType::PlainObject DenseMatrixType; enum { Mode = _Mode, - Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode, - CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost + FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0, + Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits | FlagsLvalueBit) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) }; }; } -template<int Mode, bool LhsIsTriangular, - typename Lhs, bool LhsIsVector, - typename Rhs, bool RhsIsVector> -struct TriangularProduct; +template<typename _MatrixType, unsigned int _Mode, typename StorageKind> class TriangularViewImpl; template<typename _MatrixType, unsigned int _Mode> class TriangularView - : public TriangularBase<TriangularView<_MatrixType, _Mode> > + : public TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind > { public: - typedef TriangularBase<TriangularView> Base; + typedef TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind > Base; typedef typename internal::traits<TriangularView>::Scalar Scalar; - typedef _MatrixType MatrixType; - typedef typename internal::traits<TriangularView>::DenseMatrixType DenseMatrixType; - typedef DenseMatrixType PlainObject; protected: typedef typename internal::traits<TriangularView>::MatrixTypeNested MatrixTypeNested; typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef; - typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned; typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType; public: - using Base::evalToLazy; - typedef typename internal::traits<TriangularView>::StorageKind StorageKind; typedef typename internal::traits<TriangularView>::Index Index; + typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned NestedExpression; enum { Mode = _Mode, + Flags = internal::traits<TriangularView>::Flags, TransposeMode = (Mode & Upper ? Lower : 0) | (Mode & Lower ? Upper : 0) | (Mode & (UnitDiag)) - | (Mode & (ZeroDiag)) + | (Mode & (ZeroDiag)), + IsVectorAtCompileTime = false }; + // FIXME This, combined with const_cast_derived in transpose() leads to a const-correctness loophole EIGEN_DEVICE_FUNC - inline TriangularView(const MatrixType& matrix) : m_matrix(matrix) + explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix) {} + + using Base::operator=; + TriangularView& operator=(const TriangularView &other) + { return Base::operator=(other); } EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } + + EIGEN_DEVICE_FUNC + const NestedExpression& nestedExpression() const { return m_matrix; } + EIGEN_DEVICE_FUNC + NestedExpression& nestedExpression() { return *const_cast<NestedExpression*>(&m_matrix); } + + /** \sa MatrixBase::conjugate() const */ + typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType; + EIGEN_DEVICE_FUNC + inline const ConjugateReturnType conjugate() const + { return ConjugateReturnType(m_matrix.conjugate()); } + + typedef TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType; + /** \sa MatrixBase::adjoint() const */ + EIGEN_DEVICE_FUNC + inline const AdjointReturnType adjoint() const + { return AdjointReturnType(m_matrix.adjoint()); } + + typedef TriangularView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType; + /** \sa MatrixBase::transpose() */ + EIGEN_DEVICE_FUNC + inline TransposeReturnType transpose() + { + EIGEN_STATIC_ASSERT_LVALUE(MatrixType) + typename MatrixType::TransposeReturnType tmp(m_matrix.const_cast_derived()); + return TransposeReturnType(tmp); + } + + typedef TriangularView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType; + /** \sa MatrixBase::transpose() const */ EIGEN_DEVICE_FUNC - inline Index outerStride() const { return m_matrix.outerStride(); } + inline const ConstTransposeReturnType transpose() const + { + return ConstTransposeReturnType(m_matrix.transpose()); + } + + template<typename Other> EIGEN_DEVICE_FUNC - inline Index innerStride() const { return m_matrix.innerStride(); } + inline const Solve<TriangularView, Other> + solve(const MatrixBase<Other>& other) const + { return Solve<TriangularView, Other>(*this, other.derived()); } + + // workaround MSVC ICE + #if EIGEN_COMP_MSVC + template<int Side, typename Other> + EIGEN_DEVICE_FUNC + inline const internal::triangular_solve_retval<Side,TriangularView, Other> + solve(const MatrixBase<Other>& other) const + { return Base::template solve<Side>(other); } + #else + using Base::solve; + #endif + + EIGEN_DEVICE_FUNC + const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const + { + EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR); + return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix); + } + EIGEN_DEVICE_FUNC + SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() + { + EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR); + return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix); + } + + EIGEN_DEVICE_FUNC + Scalar determinant() const + { + if (Mode & UnitDiag) + return 1; + else if (Mode & ZeroDiag) + return 0; + else + return m_matrix.diagonal().prod(); + } + + protected: + + MatrixTypeNested m_matrix; +}; + +template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_MatrixType,_Mode,Dense> + : public TriangularBase<TriangularView<_MatrixType, _Mode> > +{ + public: + + typedef TriangularView<_MatrixType, _Mode> TriangularViewType; + typedef TriangularBase<TriangularViewType> Base; + typedef typename internal::traits<TriangularViewType>::Scalar Scalar; + + typedef _MatrixType MatrixType; + typedef typename MatrixType::PlainObject DenseMatrixType; + typedef DenseMatrixType PlainObject; + + public: + using Base::evalToLazy; + using Base::derived; + + typedef typename internal::traits<TriangularViewType>::StorageKind StorageKind; + typedef typename internal::traits<TriangularViewType>::Index Index; + + enum { + Mode = _Mode, + Flags = internal::traits<TriangularViewType>::Flags + }; + + EIGEN_DEVICE_FUNC + inline Index outerStride() const { return derived().nestedExpression().outerStride(); } + EIGEN_DEVICE_FUNC + inline Index innerStride() const { return derived().nestedExpression().innerStride(); } /** \sa MatrixBase::operator+=() */ template<typename Other> EIGEN_DEVICE_FUNC - TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); } + TriangularViewType& operator+=(const DenseBase<Other>& other) { + internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar>()); + return derived(); + } /** \sa MatrixBase::operator-=() */ template<typename Other> EIGEN_DEVICE_FUNC - TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); } + TriangularViewType& operator-=(const DenseBase<Other>& other) { + internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar>()); + return derived(); + } + /** \sa MatrixBase::operator*=() */ EIGEN_DEVICE_FUNC - TriangularView& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; } + TriangularViewType& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() * other; } /** \sa MatrixBase::operator/=() */ EIGEN_DEVICE_FUNC - TriangularView& operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix / other; } + TriangularViewType& operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() / other; } /** \sa MatrixBase::fill() */ EIGEN_DEVICE_FUNC void fill(const Scalar& value) { setConstant(value); } /** \sa MatrixBase::setConstant() */ EIGEN_DEVICE_FUNC - TriangularView& setConstant(const Scalar& value) - { return *this = MatrixType::Constant(rows(), cols(), value); } + TriangularViewType& setConstant(const Scalar& value) + { return *this = MatrixType::Constant(derived().rows(), derived().cols(), value); } /** \sa MatrixBase::setZero() */ EIGEN_DEVICE_FUNC - TriangularView& setZero() { return setConstant(Scalar(0)); } + TriangularViewType& setZero() { return setConstant(Scalar(0)); } /** \sa MatrixBase::setOnes() */ EIGEN_DEVICE_FUNC - TriangularView& setOnes() { return setConstant(Scalar(1)); } + TriangularViewType& setOnes() { return setConstant(Scalar(1)); } /** \sa MatrixBase::coeff() * \warning the coordinates must fit into the referenced triangular part @@ -253,7 +380,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView inline Scalar coeff(Index row, Index col) const { Base::check_coordinates_internal(row, col); - return m_matrix.coeff(row, col); + return derived().nestedExpression().coeff(row, col); } /** \sa MatrixBase::coeffRef() @@ -262,27 +389,23 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { + EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType); Base::check_coordinates_internal(row, col); - return m_matrix.const_cast_derived().coeffRef(row, col); + return derived().nestedExpression().const_cast_derived().coeffRef(row, col); } - EIGEN_DEVICE_FUNC - const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; } - EIGEN_DEVICE_FUNC - MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); } - /** Assigns a triangular matrix to a triangular part of a dense matrix */ template<typename OtherDerived> EIGEN_DEVICE_FUNC - TriangularView& operator=(const TriangularBase<OtherDerived>& other); + TriangularViewType& operator=(const TriangularBase<OtherDerived>& other); template<typename OtherDerived> EIGEN_DEVICE_FUNC - TriangularView& operator=(const MatrixBase<OtherDerived>& other); + TriangularViewType& operator=(const MatrixBase<OtherDerived>& other); EIGEN_DEVICE_FUNC - TriangularView& operator=(const TriangularView& other) - { return *this = other.nestedExpression(); } + TriangularViewType& operator=(const TriangularViewImpl& other) + { return *this = other.derived().nestedExpression(); } template<typename OtherDerived> EIGEN_DEVICE_FUNC @@ -290,378 +413,88 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView template<typename OtherDerived> EIGEN_DEVICE_FUNC - void lazyAssign(const MatrixBase<OtherDerived>& other); - - /** \sa MatrixBase::conjugate() */ - EIGEN_DEVICE_FUNC - inline TriangularView<MatrixConjugateReturnType,Mode> conjugate() - { return m_matrix.conjugate(); } - /** \sa MatrixBase::conjugate() const */ - EIGEN_DEVICE_FUNC - inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const - { return m_matrix.conjugate(); } - - /** \sa MatrixBase::adjoint() const */ - EIGEN_DEVICE_FUNC - inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const - { return m_matrix.adjoint(); } - - /** \sa MatrixBase::transpose() */ - EIGEN_DEVICE_FUNC - inline TriangularView<Transpose<MatrixType>,TransposeMode> transpose() - { - EIGEN_STATIC_ASSERT_LVALUE(MatrixType) - return m_matrix.const_cast_derived().transpose(); - } - /** \sa MatrixBase::transpose() const */ - EIGEN_DEVICE_FUNC - inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const - { - return m_matrix.transpose(); - } + void lazyAssign(const MatrixBase<OtherDerived>& other); /** Efficient triangular matrix times vector/matrix product */ template<typename OtherDerived> EIGEN_DEVICE_FUNC - TriangularProduct<Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1> + const Product<TriangularViewType,OtherDerived> operator*(const MatrixBase<OtherDerived>& rhs) const { - return TriangularProduct - <Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1> - (m_matrix, rhs.derived()); + return Product<TriangularViewType,OtherDerived>(derived(), rhs.derived()); } /** Efficient vector/matrix times triangular matrix product */ template<typename OtherDerived> friend EIGEN_DEVICE_FUNC - TriangularProduct<Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false> - operator*(const MatrixBase<OtherDerived>& lhs, const TriangularView& rhs) + const Product<OtherDerived,TriangularViewType> + operator*(const MatrixBase<OtherDerived>& lhs, const TriangularViewImpl& rhs) { - return TriangularProduct - <Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false> - (lhs.derived(),rhs.m_matrix); + return Product<OtherDerived,TriangularViewType>(lhs.derived(),rhs.derived()); } template<int Side, typename Other> EIGEN_DEVICE_FUNC - inline const internal::triangular_solve_retval<Side,TriangularView, Other> + inline const internal::triangular_solve_retval<Side,TriangularViewType, Other> solve(const MatrixBase<Other>& other) const; template<int Side, typename OtherDerived> EIGEN_DEVICE_FUNC void solveInPlace(const MatrixBase<OtherDerived>& other) const; - template<typename Other> - EIGEN_DEVICE_FUNC - inline const internal::triangular_solve_retval<OnTheLeft,TriangularView, Other> - solve(const MatrixBase<Other>& other) const - { return solve<OnTheLeft>(other); } - template<typename OtherDerived> EIGEN_DEVICE_FUNC void solveInPlace(const MatrixBase<OtherDerived>& other) const { return solveInPlace<OnTheLeft>(other); } - EIGEN_DEVICE_FUNC - const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const - { - EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR); - return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix); - } - EIGEN_DEVICE_FUNC - SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() - { - EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR); - return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC void swap(TriangularBase<OtherDerived> const & other) { - TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived()); + call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>()); } + // TODO: this overload is ambiguous and it should be deprecated (Gael) template<typename OtherDerived> EIGEN_DEVICE_FUNC void swap(MatrixBase<OtherDerived> const & other) { - SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix)); - TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived()); + call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>()); } + template<typename RhsType, typename DstType> EIGEN_DEVICE_FUNC - Scalar determinant() const - { - if (Mode & UnitDiag) - return 1; - else if (Mode & ZeroDiag) - return 0; - else - return m_matrix.diagonal().prod(); - } - - // TODO simplify the following: - template<typename ProductDerived, typename Lhs, typename Rhs> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other) - { - setZero(); - return assignProduct(other,1); - } - - template<typename ProductDerived, typename Lhs, typename Rhs> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other) - { - return assignProduct(other,1); - } - - template<typename ProductDerived, typename Lhs, typename Rhs> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other) - { - return assignProduct(other,-1); - } - - - template<typename ProductDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other) - { - setZero(); - return assignProduct(other,other.alpha()); - } - - template<typename ProductDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other) - { - return assignProduct(other,other.alpha()); - } - - template<typename ProductDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other) - { - return assignProduct(other,-other.alpha()); + EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const { + if(!(internal::is_same<RhsType,DstType>::value && internal::extract_data(dst) == internal::extract_data(rhs))) + dst = rhs; + this->solveInPlace(dst); } - - protected: - - template<typename ProductDerived, typename Lhs, typename Rhs> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha); - MatrixTypeNested m_matrix; + template<typename ProductType> + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha); }; /*************************************************************************** * Implementation of triangular evaluation/assignment ***************************************************************************/ -namespace internal { - -template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount, bool ClearOpposite> -struct triangular_assignment_selector -{ - enum { - col = (UnrollCount-1) / Derived1::RowsAtCompileTime, - row = (UnrollCount-1) % Derived1::RowsAtCompileTime - }; - - typedef typename Derived1::Scalar Scalar; - - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) - { - triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src); - - eigen_assert( Mode == Upper || Mode == Lower - || Mode == StrictlyUpper || Mode == StrictlyLower - || Mode == UnitUpper || Mode == UnitLower); - if((Mode == Upper && row <= col) - || (Mode == Lower && row >= col) - || (Mode == StrictlyUpper && row < col) - || (Mode == StrictlyLower && row > col) - || (Mode == UnitUpper && row < col) - || (Mode == UnitLower && row > col)) - dst.copyCoeff(row, col, src); - else if(ClearOpposite) - { - if (Mode&UnitDiag && row==col) - dst.coeffRef(row, col) = Scalar(1); - else - dst.coeffRef(row, col) = Scalar(0); - } - } -}; - -// prevent buggy user code from causing an infinite recursion -template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite> -struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite> -{ - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &, const Derived2 &) {} -}; - -template<typename Derived1, typename Derived2, bool ClearOpposite> -struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearOpposite> -{ - typedef typename Derived1::Index Index; - typedef typename Derived1::Scalar Scalar; - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) - { - for(Index j = 0; j < dst.cols(); ++j) - { - Index maxi = (std::min)(j, dst.rows()-1); - for(Index i = 0; i <= maxi; ++i) - dst.copyCoeff(i, j, src); - if (ClearOpposite) - for(Index i = maxi+1; i < dst.rows(); ++i) - dst.coeffRef(i, j) = Scalar(0); - } - } -}; - -template<typename Derived1, typename Derived2, bool ClearOpposite> -struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite> -{ - typedef typename Derived1::Index Index; - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) - { - for(Index j = 0; j < dst.cols(); ++j) - { - for(Index i = j; i < dst.rows(); ++i) - dst.copyCoeff(i, j, src); - Index maxi = (std::min)(j, dst.rows()); - if (ClearOpposite) - for(Index i = 0; i < maxi; ++i) - dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0); - } - } -}; - -template<typename Derived1, typename Derived2, bool ClearOpposite> -struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite> -{ - typedef typename Derived1::Index Index; - typedef typename Derived1::Scalar Scalar; - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) - { - for(Index j = 0; j < dst.cols(); ++j) - { - Index maxi = (std::min)(j, dst.rows()); - for(Index i = 0; i < maxi; ++i) - dst.copyCoeff(i, j, src); - if (ClearOpposite) - for(Index i = maxi; i < dst.rows(); ++i) - dst.coeffRef(i, j) = Scalar(0); - } - } -}; - -template<typename Derived1, typename Derived2, bool ClearOpposite> -struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite> -{ - typedef typename Derived1::Index Index; - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) - { - for(Index j = 0; j < dst.cols(); ++j) - { - for(Index i = j+1; i < dst.rows(); ++i) - dst.copyCoeff(i, j, src); - Index maxi = (std::min)(j, dst.rows()-1); - if (ClearOpposite) - for(Index i = 0; i <= maxi; ++i) - dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0); - } - } -}; - -template<typename Derived1, typename Derived2, bool ClearOpposite> -struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite> -{ - typedef typename Derived1::Index Index; - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) - { - for(Index j = 0; j < dst.cols(); ++j) - { - Index maxi = (std::min)(j, dst.rows()); - for(Index i = 0; i < maxi; ++i) - dst.copyCoeff(i, j, src); - if (ClearOpposite) - { - for(Index i = maxi+1; i < dst.rows(); ++i) - dst.coeffRef(i, j) = 0; - } - } - dst.diagonal().setOnes(); - } -}; -template<typename Derived1, typename Derived2, bool ClearOpposite> -struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite> -{ - typedef typename Derived1::Index Index; - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) - { - for(Index j = 0; j < dst.cols(); ++j) - { - Index maxi = (std::min)(j, dst.rows()); - for(Index i = maxi+1; i < dst.rows(); ++i) - dst.copyCoeff(i, j, src); - if (ClearOpposite) - { - for(Index i = 0; i < maxi; ++i) - dst.coeffRef(i, j) = 0; - } - } - dst.diagonal().setOnes(); - } -}; - -} // end namespace internal - // FIXME should we keep that possibility template<typename MatrixType, unsigned int Mode> template<typename OtherDerived> inline TriangularView<MatrixType, Mode>& -TriangularView<MatrixType, Mode>::operator=(const MatrixBase<OtherDerived>& other) +TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other) { - if(OtherDerived::Flags & EvalBeforeAssigningBit) - { - typename internal::plain_matrix_type<OtherDerived>::type other_evaluated(other.rows(), other.cols()); - other_evaluated.template triangularView<Mode>().lazyAssign(other.derived()); - lazyAssign(other_evaluated); - } - else - lazyAssign(other.derived()); - return *this; + internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar>()); + return derived(); } // FIXME should we keep that possibility template<typename MatrixType, unsigned int Mode> template<typename OtherDerived> -void TriangularView<MatrixType, Mode>::lazyAssign(const MatrixBase<OtherDerived>& other) +void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other) { - enum { - unroll = MatrixType::SizeAtCompileTime != Dynamic - && internal::traits<OtherDerived>::CoeffReadCost != Dynamic - && MatrixType::SizeAtCompileTime*internal::traits<OtherDerived>::CoeffReadCost/2 <= EIGEN_UNROLLING_LIMIT - }; - eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols()); - - internal::triangular_assignment_selector - <MatrixType, OtherDerived, int(Mode), - unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic, - false // do not change the opposite triangular part - >::run(m_matrix.const_cast_derived(), other.derived()); + internal::call_assignment(derived().noalias(), other.template triangularView<Mode>()); } @@ -669,37 +502,19 @@ void TriangularView<MatrixType, Mode>::lazyAssign(const MatrixBase<OtherDerived> template<typename MatrixType, unsigned int Mode> template<typename OtherDerived> inline TriangularView<MatrixType, Mode>& -TriangularView<MatrixType, Mode>::operator=(const TriangularBase<OtherDerived>& other) +TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<OtherDerived>& other) { eigen_assert(Mode == int(OtherDerived::Mode)); - if(internal::traits<OtherDerived>::Flags & EvalBeforeAssigningBit) - { - typename OtherDerived::DenseMatrixType other_evaluated(other.rows(), other.cols()); - other_evaluated.template triangularView<Mode>().lazyAssign(other.derived().nestedExpression()); - lazyAssign(other_evaluated); - } - else - lazyAssign(other.derived().nestedExpression()); - return *this; + internal::call_assignment(derived(), other.derived()); + return derived(); } template<typename MatrixType, unsigned int Mode> template<typename OtherDerived> -void TriangularView<MatrixType, Mode>::lazyAssign(const TriangularBase<OtherDerived>& other) +void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other) { - enum { - unroll = MatrixType::SizeAtCompileTime != Dynamic - && internal::traits<OtherDerived>::CoeffReadCost != Dynamic - && MatrixType::SizeAtCompileTime * internal::traits<OtherDerived>::CoeffReadCost / 2 - <= EIGEN_UNROLLING_LIMIT - }; - eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols()); - - internal::triangular_assignment_selector - <MatrixType, OtherDerived, int(Mode), - unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic, - false // preserve the opposite triangular part - >::run(m_matrix.const_cast_derived(), other.derived().nestedExpression()); + eigen_assert(Mode == int(OtherDerived::Mode)); + internal::call_assignment(derived().noalias(), other.derived()); } /*************************************************************************** @@ -722,27 +537,6 @@ void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const evalToLazy(other.derived()); } -/** Assigns a triangular or selfadjoint matrix to a dense matrix. - * If the matrix is triangular, the opposite part is set to zero. */ -template<typename Derived> -template<typename DenseDerived> -void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const -{ - enum { - unroll = DenseDerived::SizeAtCompileTime != Dynamic - && internal::traits<Derived>::CoeffReadCost != Dynamic - && DenseDerived::SizeAtCompileTime * internal::traits<Derived>::CoeffReadCost / 2 - <= EIGEN_UNROLLING_LIMIT - }; - other.derived().resize(this->rows(), this->cols()); - - internal::triangular_assignment_selector - <DenseDerived, typename internal::traits<Derived>::MatrixTypeNestedCleaned, Derived::Mode, - unroll ? int(DenseDerived::SizeAtCompileTime) : Dynamic, - true // clear the opposite triangular part - >::run(other.derived(), derived().nestedExpression()); -} - /*************************************************************************** * Implementation of TriangularView methods ***************************************************************************/ @@ -767,7 +561,7 @@ template<unsigned int Mode> typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type MatrixBase<Derived>::triangularView() { - return derived(); + return typename TriangularViewReturnType<Mode>::Type(derived()); } /** This is the const version of MatrixBase::triangularView() */ @@ -776,7 +570,7 @@ template<unsigned int Mode> typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type MatrixBase<Derived>::triangularView() const { - return derived(); + return typename ConstTriangularViewReturnType<Mode>::Type(derived()); } /** \returns true if *this is approximately equal to an upper triangular matrix, @@ -831,6 +625,293 @@ bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const return true; } + +/*************************************************************************** +**************************************************************************** +* Evaluators and Assignment of triangular expressions +*************************************************************************** +***************************************************************************/ + +namespace internal { + + +// TODO currently a triangular expression has the form TriangularView<.,.> +// in the future triangular-ness should be defined by the expression traits +// such that Transpose<TriangularView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work) +template<typename MatrixType, unsigned int Mode> +struct evaluator_traits<TriangularView<MatrixType,Mode> > +{ + typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind; + typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape; + + // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a + // temporary; 0 if not. + static const int AssumeAliasing = 0; +}; + +template<typename MatrixType, unsigned int Mode> +struct unary_evaluator<TriangularView<MatrixType,Mode>, IndexBased> + : evaluator<typename internal::remove_all<MatrixType>::type> +{ + typedef TriangularView<MatrixType,Mode> XprType; + typedef evaluator<typename internal::remove_all<MatrixType>::type> Base; + typedef evaluator<XprType> type; + unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {} +}; + +// Additional assignment kinds: +struct Triangular2Triangular {}; +struct Triangular2Dense {}; +struct Dense2Triangular {}; + + +template<typename Kernel, unsigned int Mode, int UnrollCount, bool ClearOpposite> struct triangular_assignment_loop; + + +/** \internal Specialization of the dense assignment kernel for triangular matrices. + * The main difference is that the triangular, diagonal, and opposite parts are processed through three different functions. + * \tparam UpLo must be either Lower or Upper + * \tparam Mode must be either 0, UnitDiag, ZeroDiag, or SelfAdjoint + */ +template<int UpLo, int Mode, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized> +class triangular_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> +{ +protected: + typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base; + typedef typename Base::DstXprType DstXprType; + typedef typename Base::SrcXprType SrcXprType; + using Base::m_dst; + using Base::m_src; + using Base::m_functor; +public: + + typedef typename Base::DstEvaluatorType DstEvaluatorType; + typedef typename Base::SrcEvaluatorType SrcEvaluatorType; + typedef typename Base::Scalar Scalar; + typedef typename Base::Index Index; + typedef typename Base::AssignmentTraits AssignmentTraits; + + + EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) + : Base(dst, src, func, dstExpr) + {} + +#ifdef EIGEN_INTERNAL_DEBUGGING + EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) + { + eigen_internal_assert(row!=col); + Base::assignCoeff(row,col); + } +#else + using Base::assignCoeff; +#endif + + EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id) + { + if(Mode==UnitDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(1)); + else if(Mode==ZeroDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(0)); + else if(Mode==0) Base::assignCoeff(id,id); + } + + EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index row, Index col) + { + eigen_internal_assert(row!=col); + if(SetOpposite) + m_functor.assignCoeff(m_dst.coeffRef(row,col), Scalar(0)); + } +}; + +template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor> +EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) +{ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite, + DstEvaluatorType,SrcEvaluatorType,Functor> Kernel; + Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); + + enum { + unroll = DstXprType::SizeAtCompileTime != Dynamic + && SrcEvaluatorType::CoeffReadCost != Dynamic + && DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT + }; + + triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel); +} + +template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType> +EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src) +{ + call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar>()); +} + +template<> struct AssignmentKind<TriangularShape,TriangularShape> { typedef Triangular2Triangular Kind; }; +template<> struct AssignmentKind<DenseShape,TriangularShape> { typedef Triangular2Dense Kind; }; +template<> struct AssignmentKind<TriangularShape,DenseShape> { typedef Dense2Triangular Kind; }; + + +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular, Scalar> +{ + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + { + eigen_assert(int(DstXprType::Mode) == int(SrcXprType::Mode)); + + call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func); + } +}; + +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense, Scalar> +{ + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + { + call_triangular_assignment_loop<SrcXprType::Mode, (SrcXprType::Mode&SelfAdjoint)==0>(dst, src, func); + } +}; + +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular, Scalar> +{ + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + { + call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func); + } +}; + + +template<typename Kernel, unsigned int Mode, int UnrollCount, bool SetOpposite> +struct triangular_assignment_loop +{ + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; + typedef typename DstEvaluatorType::XprType DstXprType; + + enum { + col = (UnrollCount-1) / DstXprType::RowsAtCompileTime, + row = (UnrollCount-1) % DstXprType::RowsAtCompileTime + }; + + typedef typename Kernel::Scalar Scalar; + + EIGEN_DEVICE_FUNC + static inline void run(Kernel &kernel) + { + triangular_assignment_loop<Kernel, Mode, UnrollCount-1, SetOpposite>::run(kernel); + + if(row==col) + kernel.assignDiagonalCoeff(row); + else if( ((Mode&Lower) && row>col) || ((Mode&Upper) && row<col) ) + kernel.assignCoeff(row,col); + else if(SetOpposite) + kernel.assignOppositeCoeff(row,col); + } +}; + +// prevent buggy user code from causing an infinite recursion +template<typename Kernel, unsigned int Mode, bool SetOpposite> +struct triangular_assignment_loop<Kernel, Mode, 0, SetOpposite> +{ + EIGEN_DEVICE_FUNC + static inline void run(Kernel &) {} +}; + + + +// TODO: experiment with a recursive assignment procedure splitting the current +// triangular part into one rectangular and two triangular parts. + + +template<typename Kernel, unsigned int Mode, bool SetOpposite> +struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite> +{ + typedef typename Kernel::Index Index; + typedef typename Kernel::Scalar Scalar; + EIGEN_DEVICE_FUNC + static inline void run(Kernel &kernel) + { + for(Index j = 0; j < kernel.cols(); ++j) + { + Index maxi = (std::min)(j, kernel.rows()); + Index i = 0; + if (((Mode&Lower) && SetOpposite) || (Mode&Upper)) + { + for(; i < maxi; ++i) + if(Mode&Upper) kernel.assignCoeff(i, j); + else kernel.assignOppositeCoeff(i, j); + } + else + i = maxi; + + if(i<kernel.rows()) // then i==j + kernel.assignDiagonalCoeff(i++); + + if (((Mode&Upper) && SetOpposite) || (Mode&Lower)) + { + for(; i < kernel.rows(); ++i) + if(Mode&Lower) kernel.assignCoeff(i, j); + else kernel.assignOppositeCoeff(i, j); + } + } + } +}; + +} // end namespace internal + +/** Assigns a triangular or selfadjoint matrix to a dense matrix. + * If the matrix is triangular, the opposite part is set to zero. */ +template<typename Derived> +template<typename DenseDerived> +void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const +{ + other.derived().resize(this->rows(), this->cols()); + internal::call_triangular_assignment_loop<Derived::Mode,(Derived::Mode&SelfAdjoint)==0 /* SetOpposite */>(other.derived(), derived().nestedExpression()); +} + +namespace internal { + +// Triangular = Product +template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> +struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar>, Dense2Triangular, Scalar> +{ + typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) + { + dst.setZero(); + dst._assignProduct(src, 1); + } +}; + +// Triangular += Product +template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> +struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar>, Dense2Triangular, Scalar> +{ + typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &) + { + dst._assignProduct(src, 1); + } +}; + +// Triangular -= Product +template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> +struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar>, Dense2Triangular, Scalar> +{ + typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &) + { + dst._assignProduct(src, -1); + } +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_TRIANGULARMATRIX_H diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 52eb4f604..a626310ec 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -48,25 +48,15 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> > ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime, - Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits, - Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0), + Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0, TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime }; - #if EIGEN_GNUC_AT_LEAST(3,4) - typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType; - #else - typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType; - #endif - enum { - CoeffReadCost = TraversalSize==Dynamic ? Dynamic - : TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value) - }; }; } template< typename MatrixType, typename MemberOp, int Direction> -class PartialReduxExpr : internal::no_assignment_operator, - public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type +class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type, + internal::no_assignment_operator { public: @@ -75,7 +65,7 @@ class PartialReduxExpr : internal::no_assignment_operator, typedef typename internal::traits<PartialReduxExpr>::MatrixTypeNested MatrixTypeNested; typedef typename internal::traits<PartialReduxExpr>::_MatrixTypeNested _MatrixTypeNested; - PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp()) + explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp()) : m_matrix(mat), m_functor(func) {} Index rows() const { return (Direction==Vertical ? 1 : m_matrix.rows()); } @@ -138,7 +128,7 @@ struct member_redux { >::type result_type; template<typename _Scalar, int Size> struct Cost { enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; }; - member_redux(const BinaryOp func) : m_functor(func) {} + explicit member_redux(const BinaryOp func) : m_functor(func) {} template<typename Derived> inline result_type operator()(const DenseBase<Derived>& mat) const { return mat.redux(m_functor); } @@ -175,10 +165,10 @@ template<typename ExpressionType, int Direction> class VectorwiseOp typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned; template<template<typename _Scalar> class Functor, - typename Scalar=typename internal::traits<ExpressionType>::Scalar> struct ReturnType + typename Scalar_=Scalar> struct ReturnType { typedef PartialReduxExpr<ExpressionType, - Functor<Scalar>, + Functor<Scalar_>, Direction > Type; }; @@ -186,7 +176,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp template<typename BinaryOp> struct ReduxReturnType { typedef PartialReduxExpr<ExpressionType, - internal::member_redux<BinaryOp,typename internal::traits<ExpressionType>::Scalar>, + internal::member_redux<BinaryOp,Scalar>, Direction > Type; }; @@ -259,7 +249,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp public: - inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {} + explicit inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {} /** \internal */ inline const ExpressionType& _expression() const { return m_matrix; } @@ -274,7 +264,22 @@ template<typename ExpressionType, int Direction> class VectorwiseOp template<typename BinaryOp> const typename ReduxReturnType<BinaryOp>::Type redux(const BinaryOp& func = BinaryOp()) const - { return typename ReduxReturnType<BinaryOp>::Type(_expression(), func); } + { return typename ReduxReturnType<BinaryOp>::Type(_expression(), internal::member_redux<BinaryOp,Scalar>(func)); } + + typedef typename ReturnType<internal::member_minCoeff>::Type MinCoeffReturnType; + typedef typename ReturnType<internal::member_maxCoeff>::Type MaxCoeffReturnType; + typedef typename ReturnType<internal::member_squaredNorm,RealScalar>::Type SquaredNormReturnType; + typedef typename ReturnType<internal::member_norm,RealScalar>::Type NormReturnType; + typedef typename ReturnType<internal::member_blueNorm,RealScalar>::Type BlueNormReturnType; + typedef typename ReturnType<internal::member_stableNorm,RealScalar>::Type StableNormReturnType; + typedef typename ReturnType<internal::member_hypotNorm,RealScalar>::Type HypotNormReturnType; + typedef typename ReturnType<internal::member_sum>::Type SumReturnType; + typedef typename ReturnType<internal::member_mean>::Type MeanReturnType; + typedef typename ReturnType<internal::member_all>::Type AllReturnType; + typedef typename ReturnType<internal::member_any>::Type AnyReturnType; + typedef PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> CountReturnType; + typedef typename ReturnType<internal::member_prod>::Type ProdReturnType; + typedef Reverse<ExpressionType, Direction> ReverseReturnType; /** \returns a row (or column) vector expression of the smallest coefficient * of each column (or row) of the referenced expression. @@ -285,8 +290,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp * Output: \verbinclude PartialRedux_minCoeff.out * * \sa DenseBase::minCoeff() */ - const typename ReturnType<internal::member_minCoeff>::Type minCoeff() const - { return _expression(); } + const MinCoeffReturnType minCoeff() const + { return MinCoeffReturnType(_expression()); } /** \returns a row (or column) vector expression of the largest coefficient * of each column (or row) of the referenced expression. @@ -297,8 +302,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp * Output: \verbinclude PartialRedux_maxCoeff.out * * \sa DenseBase::maxCoeff() */ - const typename ReturnType<internal::member_maxCoeff>::Type maxCoeff() const - { return _expression(); } + const MaxCoeffReturnType maxCoeff() const + { return MaxCoeffReturnType(_expression()); } /** \returns a row (or column) vector expression of the squared norm * of each column (or row) of the referenced expression. @@ -308,8 +313,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp * Output: \verbinclude PartialRedux_squaredNorm.out * * \sa DenseBase::squaredNorm() */ - const typename ReturnType<internal::member_squaredNorm,RealScalar>::Type squaredNorm() const - { return _expression(); } + const SquaredNormReturnType squaredNorm() const + { return SquaredNormReturnType(_expression()); } /** \returns a row (or column) vector expression of the norm * of each column (or row) of the referenced expression. @@ -319,8 +324,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp * Output: \verbinclude PartialRedux_norm.out * * \sa DenseBase::norm() */ - const typename ReturnType<internal::member_norm,RealScalar>::Type norm() const - { return _expression(); } + const NormReturnType norm() const + { return NormReturnType(_expression()); } /** \returns a row (or column) vector expression of the norm @@ -329,8 +334,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp * This is a vector with real entries, even if the original matrix has complex entries. * * \sa DenseBase::blueNorm() */ - const typename ReturnType<internal::member_blueNorm,RealScalar>::Type blueNorm() const - { return _expression(); } + const BlueNormReturnType blueNorm() const + { return BlueNormReturnType(_expression()); } /** \returns a row (or column) vector expression of the norm @@ -339,8 +344,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp * This is a vector with real entries, even if the original matrix has complex entries. * * \sa DenseBase::stableNorm() */ - const typename ReturnType<internal::member_stableNorm,RealScalar>::Type stableNorm() const - { return _expression(); } + const StableNormReturnType stableNorm() const + { return StableNormReturnType(_expression()); } /** \returns a row (or column) vector expression of the norm @@ -349,8 +354,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp * This is a vector with real entries, even if the original matrix has complex entries. * * \sa DenseBase::hypotNorm() */ - const typename ReturnType<internal::member_hypotNorm,RealScalar>::Type hypotNorm() const - { return _expression(); } + const HypotNormReturnType hypotNorm() const + { return HypotNormReturnType(_expression()); } /** \returns a row (or column) vector expression of the sum * of each column (or row) of the referenced expression. @@ -359,31 +364,31 @@ template<typename ExpressionType, int Direction> class VectorwiseOp * Output: \verbinclude PartialRedux_sum.out * * \sa DenseBase::sum() */ - const typename ReturnType<internal::member_sum>::Type sum() const - { return _expression(); } + const SumReturnType sum() const + { return SumReturnType(_expression()); } /** \returns a row (or column) vector expression of the mean * of each column (or row) of the referenced expression. * * \sa DenseBase::mean() */ - const typename ReturnType<internal::member_mean>::Type mean() const - { return _expression(); } + const MeanReturnType mean() const + { return MeanReturnType(_expression()); } /** \returns a row (or column) vector expression representing * whether \b all coefficients of each respective column (or row) are \c true. * This expression can be assigned to a vector with entries of type \c bool. * * \sa DenseBase::all() */ - const typename ReturnType<internal::member_all>::Type all() const - { return _expression(); } + const AllReturnType all() const + { return AllReturnType(_expression()); } /** \returns a row (or column) vector expression representing * whether \b at \b least one coefficient of each respective column (or row) is \c true. * This expression can be assigned to a vector with entries of type \c bool. * * \sa DenseBase::any() */ - const typename ReturnType<internal::member_any>::Type any() const - { return _expression(); } + const AnyReturnType any() const + { return Any(_expression()); } /** \returns a row (or column) vector expression representing * the number of \c true coefficients of each respective column (or row). @@ -394,8 +399,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp * Output: \verbinclude PartialRedux_count.out * * \sa DenseBase::count() */ - const PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> count() const - { return _expression(); } + const CountReturnType count() const + { return CountReturnType(_expression()); } /** \returns a row (or column) vector expression of the product * of each column (or row) of the referenced expression. @@ -404,8 +409,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp * Output: \verbinclude PartialRedux_prod.out * * \sa DenseBase::prod() */ - const typename ReturnType<internal::member_prod>::Type prod() const - { return _expression(); } + const ProdReturnType prod() const + { return ProdReturnType(_expression()); } /** \returns a matrix expression @@ -415,8 +420,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp * Output: \verbinclude Vectorwise_reverse.out * * \sa DenseBase::reverse() */ - const Reverse<ExpressionType, Direction> reverse() const - { return Reverse<ExpressionType, Direction>( _expression() ); } + const ReverseReturnType reverse() const + { return ReverseReturnType( _expression() ); } typedef Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1> ReplicateReturnType; const ReplicateReturnType replicate(Index factor) const; @@ -560,7 +565,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp /////////// Geometry module /////////// - Homogeneous<ExpressionType,Direction> homogeneous() const; + typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType; + HomogeneousReturnType homogeneous() const; typedef typename ExpressionType::PlainObject CrossReturnType; template<typename OtherDerived> @@ -605,7 +611,7 @@ template<typename Derived> inline const typename DenseBase<Derived>::ConstColwiseReturnType DenseBase<Derived>::colwise() const { - return derived(); + return ConstColwiseReturnType(derived()); } /** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations @@ -616,7 +622,7 @@ template<typename Derived> inline typename DenseBase<Derived>::ColwiseReturnType DenseBase<Derived>::colwise() { - return derived(); + return ColwiseReturnType(derived()); } /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations @@ -630,7 +636,7 @@ template<typename Derived> inline const typename DenseBase<Derived>::ConstRowwiseReturnType DenseBase<Derived>::rowwise() const { - return derived(); + return ConstRowwiseReturnType(derived()); } /** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations @@ -641,7 +647,7 @@ template<typename Derived> inline typename DenseBase<Derived>::RowwiseReturnType DenseBase<Derived>::rowwise() { - return derived(); + return RowwiseReturnType(derived()); } } // end namespace Eigen diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index 6f4b9ec35..02bd4eff3 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -53,6 +53,33 @@ struct visitor_impl<Visitor, Derived, Dynamic> } }; +// evaluator adaptor +template<typename XprType> +class visitor_evaluator +{ +public: + explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + RowsAtCompileTime = XprType::RowsAtCompileTime, + CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost + }; + + Index rows() const { return m_xpr.rows(); } + Index cols() const { return m_xpr.cols(); } + Index size() const { return m_xpr.size(); } + + CoeffReturnType coeff(Index row, Index col) const + { return m_evaluator.coeff(row, col); } + +protected: + typename internal::evaluator<XprType>::nestedType m_evaluator; + const XprType &m_xpr; +}; } // end namespace internal /** Applies the visitor \a visitor to the whole coefficients of the matrix or vector. @@ -76,14 +103,17 @@ template<typename Derived> template<typename Visitor> void DenseBase<Derived>::visit(Visitor& visitor) const { - enum { unroll = SizeAtCompileTime != Dynamic - && CoeffReadCost != Dynamic - && (SizeAtCompileTime == 1 || internal::functor_traits<Visitor>::Cost != Dynamic) - && SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost - <= EIGEN_UNROLLING_LIMIT }; - return internal::visitor_impl<Visitor, Derived, + typedef typename internal::visitor_evaluator<Derived> ThisEvaluator; + ThisEvaluator thisEval(derived()); + + enum { unroll = SizeAtCompileTime != Dynamic + && ThisEvaluator::CoeffReadCost != Dynamic + && (SizeAtCompileTime == 1 || internal::functor_traits<Visitor>::Cost != Dynamic) + && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost + <= EIGEN_UNROLLING_LIMIT }; + return internal::visitor_impl<Visitor, ThisEvaluator, unroll ? int(SizeAtCompileTime) : Dynamic - >::run(derived(), visitor); + >::run(thisEval, visitor); } namespace internal { diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 1591458a7..e66d50649 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -22,9 +22,9 @@ namespace internal { #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*)) #endif -#ifdef EIGEN_VECTORIZE_FMA -#ifndef EIGEN_HAS_FUSED_MADD -#define EIGEN_HAS_FUSED_MADD 1 +#ifdef __FMA__ +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD #endif #endif @@ -137,7 +137,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co #ifdef EIGEN_VECTORIZE_FMA template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) { -#if defined(__clang__) || defined(__GNUC__) +#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers, // and gcc stupidly generates a vfmadd132ps instruction, // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate @@ -150,7 +150,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& #endif } template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) { -#if defined(__clang__) || defined(__GNUC__) +#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG // see above Packet4d res = c; __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b)); diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 13b874d0c..f9b93a42b 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -7,23 +7,21 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_COMPLEX_ALTIVEC_H -#define EIGEN_COMPLEX_ALTIVEC_H +#ifndef EIGEN_COMPLEX32_ALTIVEC_H +#define EIGEN_COMPLEX32_ALTIVEC_H namespace Eigen { namespace internal { static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; -static Packet16uc p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; -static Packet16uc p16uc_COMPLEX_IM = vec_sld(p16uc_DUPLICATE, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; -static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 }; -static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; -static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; -static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_COMPLEX_RE, (Packet4ui)p16uc_COMPLEX_IM);//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; -static Packet16uc p16uc_COMPLEX_MASK16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);//{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; -static Packet16uc p16uc_COMPLEX_TRANSPOSE_0 = vec_add(p16uc_PSET_HI, p16uc_COMPLEX_MASK16);//{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; -static Packet16uc p16uc_COMPLEX_TRANSPOSE_1 = vec_add(p16uc_PSET_LO, p16uc_COMPLEX_MASK16);//{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31}; +#ifdef _BIG_ENDIAN +static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; +static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 }; +#else +static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 }; +static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; +#endif //---------- float ---------- struct Packet2cf @@ -65,7 +63,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<flo res.v = pload<Packet4f>((const float *)&from); else res.v = ploadu<Packet4f>((const float *)&from); - res.v = vec_perm(res.v, res.v, p16uc_PSET_HI); + res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI); return res; } @@ -95,16 +93,16 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con Packet4f v1, v2; // Permute and multiply the real parts of a and b - v1 = vec_perm(a.v, a.v, p16uc_COMPLEX_RE); + v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD); // Get the imaginary parts of a - v2 = vec_perm(a.v, a.v, p16uc_COMPLEX_IM); + v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN); // multiply a_re * b v1 = vec_madd(v1, b.v, p4f_ZERO); // multiply a_im * b and get the conjugate result v2 = vec_madd(v2, b.v, p4f_ZERO); v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR); // permute back to a proper order - v2 = vec_perm(v2, v2, p16uc_COMPLEX_REV); + v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV); return Packet2cf(vec_add(v1, v2)); } @@ -138,7 +136,7 @@ template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Pack template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { Packet4f rev_a; - rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX_REV2); + rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2); return Packet2cf(rev_a); } @@ -153,9 +151,13 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packe template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs) { Packet4f b1, b2; - +#ifdef _BIG_ENDIAN b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); +#else + b1 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); + b2 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); +#endif b2 = (Packet4f) vec_sld(b2, b2, 8); b2 = padd(b1, b2); @@ -179,7 +181,11 @@ struct palign_impl<Offset,Packet2cf> { if (Offset==1) { +#ifdef _BIG_ENDIAN first.v = vec_sld(first.v, second.v, 8); +#else + first.v = vec_sld(second.v, first.v, 8); +#endif } } }; @@ -222,23 +228,203 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con // TODO optimize it for AltiVec Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b); Packet4f s = vec_madd(b.v, b.v, p4f_ZERO); - return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX_REV)))); + return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV)))); } template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x) { - return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV)); + return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV)); } EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel) { - Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_0); - kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_1); + Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI); + kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO); kernel.packet[0].v = tmp; } +//---------- double ---------- +#ifdef __VSX__ +struct Packet1cd +{ + EIGEN_STRONG_INLINE Packet1cd() {} + EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {} + Packet2d v; +}; + +template<> struct packet_traits<std::complex<double> > : default_packet_traits +{ + typedef Packet1cd type; + typedef Packet1cd half; + enum { + Vectorizable = 1, + AlignedOnScalar = 0, + size = 1, + HasHalfPacket = 0, + + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasSetLinear = 0 + }; +}; + +template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; }; + +template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); } +template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); } +template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } +template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } + +template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) +{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); } + +template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, DenseIndex stride) +{ + std::complex<double> EIGEN_ALIGN16 af[2]; + af[0] = from[0*stride]; + af[1] = from[1*stride]; + return pload<Packet1cd>(af); +} +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, DenseIndex stride) +{ + std::complex<double> EIGEN_ALIGN16 af[2]; + pstore<std::complex<double> >(af, from); + to[0*stride] = af[0]; + to[1*stride] = af[1]; +} + +template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_add(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_sub(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); } +template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); } + +template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) +{ + Packet2d a_re, a_im, v1, v2; + + // Permute and multiply the real parts of a and b + a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI); + // Get the imaginary parts of a + a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO); + // multiply a_re * b + v1 = vec_madd(a_re, b.v, p2d_ZERO); + // multiply a_im * b and get the conjugate result + v2 = vec_madd(a_im, b.v, p2d_ZERO); + v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8); + v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1); + + return Packet1cd(vec_add(v1, v2)); +} + +template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); } + +template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) +{ + return pset1<Packet1cd>(*from); +} + +template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { vec_dstt((long *)addr, DST_CTRL(2,2,32), DST_CHAN); } + +template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) +{ + std::complex<double> EIGEN_ALIGN16 res[2]; + pstore<std::complex<double> >(res, a); + + return res[0]; +} + +template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; } + +template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) +{ + return pfirst(a); +} + +template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) +{ + return vecs[0]; +} + +template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) +{ + return pfirst(a); +} + +template<int Offset> +struct palign_impl<Offset,Packet1cd> +{ + static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/) + { + // FIXME is it sure we never have to align a Packet1cd? + // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary... + } +}; + +template<> struct conj_helper<Packet1cd, Packet1cd, false,true> +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + return internal::pmul(a, pconj(b)); + } +}; + +template<> struct conj_helper<Packet1cd, Packet1cd, true,false> +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + return internal::pmul(pconj(a), b); + } +}; + +template<> struct conj_helper<Packet1cd, Packet1cd, true,true> +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + return pconj(internal::pmul(a, b)); + } +}; + +template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) +{ + // TODO optimize it for AltiVec + Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b); + Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_); + return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV)))); +} + +EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x) +{ + return Packet1cd(preverse(Packet2d(x.v))); +} + +EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel) +{ + Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI); + kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO); + kernel.packet[0].v = tmp; +} +#endif // __VSX__ } // end namespace internal } // end namespace Eigen -#endif // EIGEN_COMPLEX_ALTIVEC_H +#endif // EIGEN_COMPLEX32_ALTIVEC_H diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index b43e8ace3..6b68fc7a5 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -18,17 +18,17 @@ namespace internal { #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4 #endif -#ifndef EIGEN_HAS_FUSED_MADD -#define EIGEN_HAS_FUSED_MADD 1 +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD #endif -#ifndef EIGEN_HAS_FUSE_CJMADD -#define EIGEN_HAS_FUSE_CJMADD 1 +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD #endif // NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS -#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16 +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32 #endif typedef __vector float Packet4f; @@ -50,22 +50,20 @@ typedef __vector unsigned char Packet16uc; #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ Packet4f p4f_##NAME = pset1<Packet4f>(X) -#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ - Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X)) - #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ Packet4i p4i_##NAME = pset1<Packet4i>(X) +#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \ + Packet2d p2d_##NAME = pset1<Packet2d>(X) + +#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \ + Packet2l p2l_##NAME = pset1<Packet2l>(X) + #define DST_CHAN 1 #define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride)) -// Define global static constants: -static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 }; -static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 }; -static Packet16uc p16uc_REVERSE = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; -static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); //{ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15} -static Packet16uc p16uc_DUPLICATE = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7}; +// These constants are endian-agnostic static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0} static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,} static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1} @@ -74,6 +72,50 @@ static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1} static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0} static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000} +static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 }; +static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 }; + +static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 }; +static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 }; + +// Mask alignment +#ifdef __PPC64__ +#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0 +#else +#define _EIGEN_MASK_ALIGNMENT 0xfffffff0 +#endif + +#define _EIGEN_ALIGNED_PTR(x) ((ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT) + +// Handle endianness properly while loading constants +// Define global static constants: +#ifdef _BIG_ENDIAN +static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); +static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; +static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; +static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; +static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; +#else +static Packet16uc p16uc_FORWARD = p16uc_REVERSE32; +static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; +static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; +static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; +static Packet16uc p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 0), (Packet16uc)p4i_ZERO, 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; +#endif // _BIG_ENDIAN + +static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; +static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; +static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; +static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31}; + +static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 }; + +#ifdef _BIG_ENDIAN +static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; +#else +static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; +#endif // _BIG_ENDIAN + template<> struct packet_traits<float> : default_packet_traits { typedef Packet4f type; @@ -105,9 +147,22 @@ template<> struct packet_traits<int> : default_packet_traits }; }; + template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; }; template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; }; -/* + +inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v) +{ + union { + Packet16uc v; + unsigned char n[16]; + } vt; + vt.v = v; + for (int i=0; i< 16; i++) + s << (int)vt.n[i] << ", "; + return s; +} + inline std::ostream & operator <<(std::ostream & s, const Packet4f & v) { union { @@ -140,7 +195,7 @@ inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v) s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; return s; } - +/* inline std::ostream & operator <<(std::ostream & s, const Packetbi & v) { union { @@ -150,14 +205,21 @@ inline std::ostream & operator <<(std::ostream & s, const Packetbi & v) vt.v = v; s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; return s; -} -*/ +}*/ + + +// Need to define them first or we get specialization after instantiation errors +template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } +template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } + +template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } +template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html float EIGEN_ALIGN16 af[4]; af[0] = from; - Packet4f vc = vec_ld(0, af); + Packet4f vc = pload<Packet4f>(af); vc = vec_splat(vc, 0); return vc; } @@ -165,17 +227,15 @@ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { int EIGEN_ALIGN16 ai[4]; ai[0] = from; - Packet4i vc = vec_ld(0, ai); + Packet4i vc = pload<Packet4i>(ai); vc = vec_splat(vc, 0); return vc; } - - template<> EIGEN_STRONG_INLINE void pbroadcast4<Packet4f>(const float *a, Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3) { - a3 = vec_ld(0,a); + a3 = pload<Packet4f>(a); a0 = vec_splat(a3, 0); a1 = vec_splat(a3, 1); a2 = vec_splat(a3, 2); @@ -185,7 +245,7 @@ template<> EIGEN_STRONG_INLINE void pbroadcast4<Packet4i>(const int *a, Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3) { - a3 = vec_ld(0,a); + a3 = pload<Packet4i>(a); a0 = vec_splat(a3, 0); a1 = vec_splat(a3, 1); a2 = vec_splat(a3, 2); @@ -199,7 +259,7 @@ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const floa af[1] = from[1*stride]; af[2] = from[2*stride]; af[3] = from[3*stride]; - return vec_ld(0, af); + return pload<Packet4f>(af); } template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, DenseIndex stride) { @@ -208,12 +268,12 @@ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* f ai[1] = from[1*stride]; ai[2] = from[2*stride]; ai[3] = from[3*stride]; - return vec_ld(0, ai); + return pload<Packet4i>(ai); } template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, DenseIndex stride) { float EIGEN_ALIGN16 af[4]; - vec_st(from, 0, af); + pstore<float>(af, from); to[0*stride] = af[0]; to[1*stride] = af[1]; to[2*stride] = af[2]; @@ -222,7 +282,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, co template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, DenseIndex stride) { int EIGEN_ALIGN16 ai[4]; - vec_st(from, 0, ai); + pstore<int>((int *)ai, from); to[0*stride] = ai[0]; to[1*stride] = ai[1]; to[2*stride] = ai[2]; @@ -283,7 +343,8 @@ template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const */ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { - Packet4f t, y_0, y_1, res; +#ifndef __VSX__ // VSX actually provides a div instruction + Packet4f t, y_0, y_1; // Altivec does not offer a divide instruction, we have to do a reciprocal approximation y_0 = vec_re(b); @@ -292,8 +353,10 @@ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const t = vec_nmsub(y_0, b, p4f_ONE); y_1 = vec_madd(y_0, t, y_0); - res = vec_madd(a, y_1, p4f_ZERO); - return res; + return vec_madd(a, y_1, p4f_ZERO); +#else + return vec_div(a, b); +#endif } template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/) @@ -311,7 +374,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } -// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); } @@ -324,13 +386,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); } template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); } -template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } -template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } - +#ifdef _BIG_ENDIAN template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD - // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html Packet16uc MSQ, LSQ; Packet16uc mask; MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword @@ -350,25 +409,36 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) mask = vec_lvsl(0, from); // create the permute mask return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data } +#else +// We also need ot redefine little endian loading of Packet4i/Packet4f using VSX +template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) +{ + EIGEN_DEBUG_ALIGNED_LOAD + return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from)); +} +template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) +{ + EIGEN_DEBUG_ALIGNED_LOAD + return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from)); +} +#endif template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) { Packet4f p; if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from); else p = ploadu<Packet4f>(from); - return vec_perm(p, p, p16uc_DUPLICATE); + return vec_perm(p, p, p16uc_DUPLICATE32_HI); } template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from) { Packet4i p; if((ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from); else p = ploadu<Packet4i>(from); - return vec_perm(p, p, p16uc_DUPLICATE); + return vec_perm(p, p, p16uc_DUPLICATE32_HI); } -template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } -template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } - +#ifdef _BIG_ENDIAN template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE @@ -405,15 +475,30 @@ template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& f vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part } +#else +// We also need ot redefine little endian loading of Packet4i/Packet4f using VSX +template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) +{ + EIGEN_DEBUG_ALIGNED_STORE + vec_vsx_st(from, (long)to & 15, (int*) _EIGEN_ALIGNED_PTR(to)); +} +template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) +{ + EIGEN_DEBUG_ALIGNED_STORE + vec_vsx_st(from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to)); +} +#endif +#ifndef __VSX__ template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } +#endif template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } -template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); } -template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); } +template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); } +template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE32); } template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); } @@ -460,7 +545,11 @@ template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) { Packet4i sum; sum = vec_sums(a, p4i_ZERO); +#ifdef _BIG_ENDIAN sum = vec_sld(sum, p4i_ZERO, 12); +#else + sum = vec_sld(p4i_ZERO, sum, 4); +#endif return pfirst(sum); } @@ -547,8 +636,25 @@ struct palign_impl<Offset,Packet4f> { static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second) { - if (Offset!=0) - first = vec_sld(first, second, Offset*4); +#ifdef _BIG_ENDIAN + switch (Offset % 4) { + case 1: + first = vec_sld(first, second, 4); break; + case 2: + first = vec_sld(first, second, 8); break; + case 3: + first = vec_sld(first, second, 12); break; + } +#else + switch (Offset % 4) { + case 1: + first = vec_sld(second, first, 12); break; + case 2: + first = vec_sld(second, first, 8); break; + case 3: + first = vec_sld(second, first, 4); break; + } +#endif } }; @@ -557,8 +663,25 @@ struct palign_impl<Offset,Packet4i> { static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second) { - if (Offset!=0) - first = vec_sld(first, second, Offset*4); +#ifdef _BIG_ENDIAN + switch (Offset % 4) { + case 1: + first = vec_sld(first, second, 4); break; + case 2: + first = vec_sld(first, second, 8); break; + case 3: + first = vec_sld(first, second, 12); break; + } +#else + switch (Offset % 4) { + case 1: + first = vec_sld(second, first, 12); break; + case 2: + first = vec_sld(second, first, 8); break; + case 3: + first = vec_sld(second, first, 4); break; + } +#endif } }; @@ -588,6 +711,222 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) { kernel.packet[3] = vec_mergel(t1, t3); } + +//---------- double ---------- +#ifdef __VSX__ +typedef __vector double Packet2d; +typedef __vector unsigned long long Packet2ul; +typedef __vector long long Packet2l; + +static Packet2l p2l_ZERO = (Packet2l) p4i_ZERO; +static Packet2d p2d_ONE = { 1.0, 1.0 }; +static Packet2d p2d_ZERO = (Packet2d) p4f_ZERO; +static Packet2d p2d_ZERO_ = { -0.0, -0.0 }; + +#ifdef _BIG_ENDIAN +static Packet2d p2d_COUNTDOWN = (Packet2d) vec_sld((Packet16uc) p2d_ZERO, (Packet16uc) p2d_ONE, 8); +#else +static Packet2d p2d_COUNTDOWN = (Packet2d) vec_sld((Packet16uc) p2d_ONE, (Packet16uc) p2d_ZERO, 8); +#endif + +static EIGEN_STRONG_INLINE Packet2d vec_splat_dbl(Packet2d& a, int index) +{ + switch (index) { + case 0: + return (Packet2d) vec_perm(a, a, p16uc_PSET64_HI); + case 1: + return (Packet2d) vec_perm(a, a, p16uc_PSET64_LO); + } + return a; +} + +template<> struct packet_traits<double> : default_packet_traits +{ + typedef Packet2d type; + typedef Packet2d half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size=2, + HasHalfPacket = 0, + + HasDiv = 1, + HasExp = 0, + HasSqrt = 0 + }; +}; + +template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; }; + + +inline std::ostream & operator <<(std::ostream & s, const Packet2d & v) +{ + union { + Packet2d v; + double n[2]; + } vt; + vt.v = v; + s << vt.n[0] << ", " << vt.n[1]; + return s; +} + +// Need to define them first or we get specialization after instantiation errors +template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return (Packet2d) vec_ld(0, (const float *) from); } //FIXME + +template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st((Packet4f)from, 0, (float *)to); } + +template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { + double EIGEN_ALIGN16 af[2]; + af[0] = from; + Packet2d vc = pload<Packet2d>(af); + vc = vec_splat_dbl(vc, 0); + return vc; +} +template<> EIGEN_STRONG_INLINE void +pbroadcast4<Packet2d>(const double *a, + Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3) +{ + a1 = pload<Packet2d>(a); + a0 = vec_splat_dbl(a1, 0); + a1 = vec_splat_dbl(a1, 1); + a3 = pload<Packet2d>(a+2); + a2 = vec_splat_dbl(a3, 0); + a3 = vec_splat_dbl(a3, 1); +} +template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, DenseIndex stride) +{ + double EIGEN_ALIGN16 af[2]; + af[0] = from[0*stride]; + af[1] = from[1*stride]; + return pload<Packet2d>(af); +} +template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, DenseIndex stride) +{ + double EIGEN_ALIGN16 af[2]; + pstore<double>(af, from); + to[0*stride] = af[0]; + to[1*stride] = af[1]; +} +template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) { return vec_add(pset1<Packet2d>(a), p2d_COUNTDOWN); } + +template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_add(a,b); } + +template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_sub(a,b); } + +template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return psub<Packet2d>(p2d_ZERO, a); } + +template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; } + +template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_ZERO); } +template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); } + +// for some weird raisons, it has to be overloaded for packet of integers +template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); } + +template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); } + +template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); } + +template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); } + +template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); } + +template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); } + +template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); } + +template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) +{ + EIGEN_DEBUG_ALIGNED_LOAD + return (Packet2d) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from)); +} +template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) +{ + Packet2d p; + if((ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from); + else p = ploadu<Packet2d>(from); + return vec_perm(p, p, p16uc_PSET64_HI); +} + +template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) +{ + EIGEN_DEBUG_ALIGNED_STORE + vec_vsx_st((Packet4f)from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to)); +} + +template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { vec_dstt((const float *) addr, DST_CTRL(2,2,32), DST_CHAN); } + +template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; } + +template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return (Packet2d)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE64); } + +template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); } + +template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) +{ + Packet2d b, sum; + b = (Packet2d) vec_sld((Packet4ui) a, (Packet4ui)a, 8); + sum = vec_add(a, b); + return pfirst(sum); +} + +template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs) +{ + Packet2d v[2], sum; + v[0] = vec_add(vecs[0], (Packet2d) vec_sld((Packet4ui) vecs[0], (Packet4ui) vecs[0], 8)); + v[1] = vec_add(vecs[1], (Packet2d) vec_sld((Packet4ui) vecs[1], (Packet4ui) vecs[1], 8)); + +#ifdef _BIG_ENDIAN + sum = (Packet2d) vec_sld((Packet4ui) v[0], (Packet4ui) v[1], 8); +#else + sum = (Packet2d) vec_sld((Packet4ui) v[1], (Packet4ui) v[0], 8); +#endif + + return sum; +} +// Other reduction functions: +// mul +template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) +{ + return pfirst(pmul(a, (Packet2d)vec_sld((Packet4ui) a, (Packet4ui) a, 8))); +} + +// min +template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) +{ + return pfirst(vec_min(a, (Packet2d) vec_sld((Packet4ui) a, (Packet4ui) a, 8))); +} + +// max +template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) +{ + return pfirst(vec_max(a, (Packet2d) vec_sld((Packet4ui) a, (Packet4ui) a, 8))); +} + +template<int Offset> +struct palign_impl<Offset,Packet2d> +{ + static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second) + { + if (Offset == 1) +#ifdef _BIG_ENDIAN + first = (Packet2d) vec_sld((Packet4ui) first, (Packet4ui) second, 8); +#else + first = (Packet2d) vec_sld((Packet4ui) second, (Packet4ui) first, 8); +#endif + } +}; + +EIGEN_DEVICE_FUNC inline void +ptranspose(PacketBlock<Packet2d,2>& kernel) { + Packet2d t0, t1; + t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI); + t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO); + kernel.packet[0] = t0; + kernel.packet[1] = t1; +} + +#endif // __VSX__ } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 42e7733d7..0fdcb0741 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -33,6 +33,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits Vectorizable = 1, AlignedOnScalar = 1, size = 2, + HasHalfPacket = 0, HasAdd = 1, HasSub = 1, @@ -88,7 +89,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { - return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); + return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); } template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { @@ -252,7 +253,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true> template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { - // TODO optimize it for AltiVec + // TODO optimize it for NEON Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b); Packet4f s, rev_s; @@ -265,11 +266,198 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf,2>& kernel) { - float32x4_t tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v)); + Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v)); kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v)); kernel.packet[1].v = tmp; } +//---------- double ---------- +#if EIGEN_ARCH_ARM64 + +static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000); + +struct Packet1cd +{ + EIGEN_STRONG_INLINE Packet1cd() {} + EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {} + Packet2d v; +}; + +template<> struct packet_traits<std::complex<double> > : default_packet_traits +{ + typedef Packet1cd type; + typedef Packet1cd half; + enum { + Vectorizable = 1, + AlignedOnScalar = 0, + size = 1, + HasHalfPacket = 0, + + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasSetLinear = 0 + }; +}; + +template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; }; + +template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); } +template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); } + +template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) +{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); } + +template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd<Packet2d>(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub<Packet2d>(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate<Packet2d>(a.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); } + +template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) +{ + Packet2d v1, v2; + + // Get the real values of a + v1 = vdupq_lane_f64(vget_low_f64(a.v), 0); + // Get the real values of a + v2 = vdupq_lane_f64(vget_high_f64(a.v), 1); + // Multiply the real a with b + v1 = vmulq_f64(v1, b.v); + // Multiply the imag a with b + v2 = vmulq_f64(v2, b.v); + // Conjugate v2 + v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR)); + // Swap real/imag elements in v2. + v2 = preverse<Packet2d>(v2); + // Add and return the result + return Packet1cd(vaddq_f64(v1, v2)); +} + +template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) +{ + return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); +} +template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) +{ + return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); +} +template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) +{ + return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); +} +template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) +{ + return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); +} + +template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); } + +template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } +template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } + +template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); } + +template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, DenseIndex stride) +{ + Packet2d res; + res = vsetq_lane_f64(std::real(from[0*stride]), res, 0); + res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1); + return Packet1cd(res); +} + +template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, DenseIndex stride) +{ + to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1)); +} + + +template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) +{ + std::complex<double> EIGEN_ALIGN16 res; + pstore<std::complex<double> >(&res, a); + + return res; +} + +template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; } + +template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); } + +template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; } + +template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); } + +template<int Offset> +struct palign_impl<Offset,Packet1cd> +{ + static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/) + { + // FIXME is it sure we never have to align a Packet1cd? + // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary... + } +}; + +template<> struct conj_helper<Packet1cd, Packet1cd, false,true> +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + return internal::pmul(a, pconj(b)); + } +}; + +template<> struct conj_helper<Packet1cd, Packet1cd, true,false> +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + return internal::pmul(pconj(a), b); + } +}; + +template<> struct conj_helper<Packet1cd, Packet1cd, true,true> +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + return pconj(internal::pmul(a, b)); + } +}; + +template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) +{ + // TODO optimize it for NEON + Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b); + Packet2d s = pmul<Packet2d>(b.v, b.v); + Packet2d rev_s = preverse<Packet2d>(s); + + return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s))); +} + +EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x) +{ + return Packet1cd(preverse(Packet2d(x.v))); +} + +EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel) +{ + Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v)); + kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v)); + kernel.packet[1].v = tmp; +} +#endif // EIGEN_ARCH_ARM64 } // end namespace internal diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 0504c095c..9afd86bec 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -20,14 +20,24 @@ namespace internal { #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 #endif +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD +#endif + +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD +#endif + // FIXME NEON has 16 quad registers, but since the current register allocator // is so bad, it is much better to reduce it to 8 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS -#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8 +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16 #endif +typedef float32x2_t Packet2f; typedef float32x4_t Packet4f; typedef int32x4_t Packet4i; +typedef int32x2_t Packet2i; typedef uint32x4_t Packet4ui; #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ @@ -39,7 +49,7 @@ typedef uint32x4_t Packet4ui; #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ const Packet4i p4i_##NAME = pset1<Packet4i>(X) -#if defined(__llvm__) && !defined(__clang__) +#if EIGEN_COMP_LLVM && !EIGEN_COMP_CLANG //Special treatment for Apple's llvm-gcc, its NEON packet types are unions #define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}} #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}} @@ -52,11 +62,11 @@ typedef uint32x4_t Packet4ui; // arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function // which available on LLVM and GCC (at least) -#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__) +#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR); #elif defined __pld #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR) -#elif !defined(__aarch64__) +#elif !EIGEN_ARCH_ARM64 #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); #else // by default no explicit prefetching @@ -66,11 +76,12 @@ typedef uint32x4_t Packet4ui; template<> struct packet_traits<float> : default_packet_traits { typedef Packet4f type; - typedef Packet4f half; + typedef Packet2f half; enum { Vectorizable = 1, AlignedOnScalar = 1, size = 4, + HasHalfPacket=1, HasDiv = 1, // FIXME check the Has* @@ -84,16 +95,17 @@ template<> struct packet_traits<float> : default_packet_traits template<> struct packet_traits<int> : default_packet_traits { typedef Packet4i type; - typedef Packet4i half; + typedef Packet2i half; enum { Vectorizable = 1, AlignedOnScalar = 1, - size=4 + size=4, + HasHalfPacket=1 // FIXME check the Has* }; }; -#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__) +#if EIGEN_GNUC_AT_MOST(4,4) && !EIGEN_COMP_LLVM // workaround gcc 4.2, 4.3 and 4.4 compilatin issue EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); } EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); } @@ -136,6 +148,9 @@ template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { +#if EIGEN_ARCH_ARM64 + return vdivq_f32(a,b); +#else Packet4f inv, restep, div; // NEON does not offer a divide instruction, we have to do a reciprocal approximation @@ -154,14 +169,27 @@ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const div = vmulq_f32(a, inv); return div; +#endif } + template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/) { eigen_assert(false && "packet integer division are not supported by NEON"); return pset1<Packet4i>(0); } -// for some weird raisons, it has to be overloaded for packet of integers +#ifdef __ARM_FEATURE_FMA +// See bug 936. +// FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4. +// FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding. +// MLA is not fused i.e. does 2 roundings. +// In addition to giving better accuracy, FMA also gives better performance here on a Krait (Nexus 4): +// MLA: 10 GFlop/s ; FMA: 12 GFlops/s. +template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); } +#else template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); } +#endif + +// No FMA instruction for int, so use MLA unconditionally. template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return vmlaq_s32(c,a,b); } template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); } @@ -472,6 +500,193 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) { kernel.packet[3] = vcombine_s32(vget_high_s32(tmp1.val[1]), vget_high_s32(tmp2.val[1])); } +//---------- double ---------- +#if EIGEN_ARCH_ARM64 + +#if (EIGEN_COMP_GNUC_STRICT && defined(__ANDROID__)) || defined(__apple_build_version__) +// Bug 907: workaround missing declarations of the following two functions in the ADK +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_f64 (float64x2_t __a) +{ + return (uint64x2_t) __a; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_f64_u64 (uint64x2_t __a) +{ + return (float64x2_t) __a; +} +#endif + +typedef float64x2_t Packet2d; +typedef float64x1_t Packet1d; + +template<> struct packet_traits<double> : default_packet_traits +{ + typedef Packet2d type; + typedef Packet1d half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 2, + HasHalfPacket=1, + + HasDiv = 1, + // FIXME check the Has* + HasSin = 0, + HasCos = 0, + HasLog = 0, + HasExp = 0, + HasSqrt = 0 + }; +}; + +template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; }; + +template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); } + +template<> EIGEN_STRONG_INLINE Packet2d plset<double>(const double& a) +{ + Packet2d countdown = EIGEN_INIT_NEON_PACKET2(0, 1); + return vaddq_f64(pset1<Packet2d>(a), countdown); +} +template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); } + +template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vsubq_f64(a,b); } + +template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return vnegq_f64(a); } + +template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; } + +template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmulq_f64(a,b); } + +template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vdivq_f64(a,b); } + +#ifdef __ARM_FEATURE_FMA +// See bug 936. See above comment about FMA for float. +template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vfmaq_f64(c,a,b); } +#else +template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vmlaq_f64(c,a,b); } +#endif + +template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vminq_f64(a,b); } + +template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmaxq_f64(a,b); } + +// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics +template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) +{ + return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b))); +} + +template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) +{ + return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b))); +} + +template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) +{ + return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b))); +} + +template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) +{ + return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b))); +} + +template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); } + +template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f64(from); } + +template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) +{ + return vld1q_dup_f64(from); +} +template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f64(to, from); } + +template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f64(to, from); } + +template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, DenseIndex stride) +{ + Packet2d res; + res = vsetq_lane_f64(from[0*stride], res, 0); + res = vsetq_lane_f64(from[1*stride], res, 1); + return res; +} +template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, DenseIndex stride) +{ + to[stride*0] = vgetq_lane_f64(from, 0); + to[stride*1] = vgetq_lane_f64(from, 1); +} +template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ARM_PREFETCH(addr); } + +// FIXME only store the 2 first elements ? +template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(a, 0); } + +template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); } + +template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); } + +#if EIGEN_COMP_CLANG && defined(__apple_build_version__) +// workaround ICE, see bug 907 +template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) + vget_high_f64(a))[0]; } +#else +template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) + vget_high_f64(a), 0); } +#endif + +template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs) +{ + float64x2_t trn1, trn2; + + // NEON zip performs interleaving of the supplied vectors. + // We perform two interleaves in a row to acquire the transposed vector + trn1 = vzip1q_f64(vecs[0], vecs[1]); + trn2 = vzip2q_f64(vecs[0], vecs[1]); + + // Do the addition of the resulting vectors + return vaddq_f64(trn1, trn2); +} +// Other reduction functions: +// mul +#if EIGEN_COMP_CLANG && defined(__apple_build_version__) +template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) * vget_high_f64(a))[0]; } +#else +template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) * vget_high_f64(a), 0); } +#endif + +// min +template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpminq_f64(a, a), 0); } + +// max +template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpmaxq_f64(a, a), 0); } + +// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors, +// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074 +#define PALIGN_NEON(Offset,Type,Command) \ +template<>\ +struct palign_impl<Offset,Type>\ +{\ + EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\ + {\ + if (Offset!=0)\ + first = Command(first, second, Offset);\ + }\ +};\ + +PALIGN_NEON(0,Packet2d,vextq_f64) +PALIGN_NEON(1,Packet2d,vextq_f64) +#undef PALIGN_NEON + +EIGEN_DEVICE_FUNC inline void +ptranspose(PacketBlock<Packet2d,2>& kernel) { + float64x2_t trn1 = vzip1q_f64(kernel.packet[0], kernel.packet[1]); + float64x2_t trn2 = vzip2q_f64(kernel.packet[0], kernel.packet[1]); + + kernel.packet[0] = trn1; + kernel.packet[1] = trn2; +} +#endif // EIGEN_ARCH_ARM64 + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 8f78b3a6c..9ffba5b41 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -52,7 +52,7 @@ Packet4f plog<Packet4f>(const Packet4f& _x) Packet4i emm0; - Packet4f invalid_mask = _mm_cmplt_ps(x, _mm_setzero_ps()); + Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps()); // not greater equal is true if x is NaN Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps()); x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */ @@ -167,7 +167,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x) emm0 = _mm_cvttps_epi32(fx); emm0 = _mm_add_epi32(emm0, p4i_0x7f); emm0 = _mm_slli_epi32(emm0, 23); - return pmul(y, Packet4f(_mm_castsi128_ps(emm0))); + return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x); } template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d pexp<Packet2d>(const Packet2d& _x) @@ -241,7 +241,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x) emm0 = _mm_add_epi32(emm0, p4i_1023_0); emm0 = _mm_slli_epi32(emm0, 20); emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3)); - return pmul(x, Packet2d(_mm_castsi128_pd(emm0))); + return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x); } /* evaluation of 4 sines at onces, using SSE2 intrinsics. diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 6923c88ec..3befd4c25 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -22,13 +22,13 @@ namespace internal { #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*)) #endif -#ifdef EIGEN_VECTORIZE_FMA -#ifndef EIGEN_HAS_FUSED_MADD -#define EIGEN_HAS_FUSED_MADD 1 +#ifdef __FMA__ +#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD +#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1 #endif #endif -#if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER) +#if defined EIGEN_VECTORIZE_AVX && EIGEN_COMP_GNUC_STRICT // With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot // have overloads for both types without linking error. // One solution is to increase ABI version using -fabi-version=4 (or greater). @@ -147,7 +147,7 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4} template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; }; template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; }; -#if defined(_MSC_VER) && (_MSC_VER==1500) +#if EIGEN_COMP_MSVC==1500 // Workaround MSVC 9 internal compiler error. // TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode // TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)). @@ -165,7 +165,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { re // Using inline assembly is also not an option because then gcc fails to reorder properly the instructions. // Therefore, we introduced the pload1 functions to be used in product kernels for which bug 203 does not apply. // Also note that with AVX, we want it to generate a vbroadcastss. -#if (defined __GNUC__) && (!defined __INTEL_COMPILER) && (!defined __clang__) && (!defined __AVX__) +#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__) template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) { return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0); } @@ -282,10 +282,10 @@ template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { E template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); } template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); } -#if defined(_MSC_VER) +#if EIGEN_COMP_MSVC template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD - #if (_MSC_VER==1600) + #if (EIGEN_COMP_MSVC==1600) // NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps // (i.e., it does not generate an unaligned load!! // TODO On most architectures this version should also be faster than a single _mm_loadu_ps @@ -307,11 +307,11 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E // TODO: do the same for MSVC (ICC is compatible) // NOTE: with the code below, MSVC's compiler crashes! -#if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64) && EIGEN_GNUC_AT_LEAST(4, 8))) +#if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386 || (EIGEN_ARCH_x86_64 && EIGEN_GNUC_AT_LEAST(4, 8))) // bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 1 -#elif defined(__clang__) +#elif EIGEN_COMP_CLANG // bug 201: Segfaults in __mm_loadh_pd with clang 2.8 #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0 @@ -439,13 +439,13 @@ template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_p template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } #endif -#if defined(_MSC_VER) && defined(_WIN64) && !defined(__INTEL_COMPILER) +#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64 // The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010 // Direct of the struct members fixed bug #62. template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; } template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; } template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; } -#elif defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#elif EIGEN_COMP_MSVC_STRICT // The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; } template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; } @@ -680,7 +680,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a) #endif // EIGEN_VECTORIZE_SSE4_1 } -#if (defined __GNUC__) +#if EIGEN_COMP_GNUC // template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) // { // Packet4f res = b; diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h index ae264aa64..161b0aa93 100644 --- a/Eigen/src/Core/functors/AssignmentFunctors.h +++ b/Eigen/src/Core/functors/AssignmentFunctors.h @@ -31,7 +31,7 @@ template<typename Scalar> struct functor_traits<assign_op<Scalar> > { enum { Cost = NumTraits<Scalar>::ReadCost, - PacketAccess = packet_traits<Scalar>::IsVectorized + PacketAccess = packet_traits<Scalar>::Vectorizable }; }; @@ -73,7 +73,7 @@ template<typename Scalar> struct functor_traits<sub_assign_op<Scalar> > { enum { Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost, - PacketAccess = packet_traits<Scalar>::HasAdd + PacketAccess = packet_traits<Scalar>::HasSub }; }; @@ -81,22 +81,24 @@ struct functor_traits<sub_assign_op<Scalar> > { * \brief Template functor for scalar/packet assignment with multiplication * */ -template<typename Scalar> struct mul_assign_op { +template<typename DstScalar, typename SrcScalar=DstScalar> +struct mul_assign_op { EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a *= b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a *= b; } template<int Alignment, typename Packet> - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret<Scalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); } }; -template<typename Scalar> -struct functor_traits<mul_assign_op<Scalar> > { +template<typename DstScalar, typename SrcScalar> +struct functor_traits<mul_assign_op<DstScalar,SrcScalar> > { enum { - Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost, - PacketAccess = packet_traits<Scalar>::HasMul + Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost, + PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasMul }; }; +template<typename DstScalar,typename SrcScalar> struct functor_is_product_like<mul_assign_op<DstScalar,SrcScalar> > { enum { ret = 1 }; }; /** \internal * \brief Template functor for scalar/packet assignment with diviving @@ -115,13 +117,13 @@ template<typename Scalar> struct functor_traits<div_assign_op<Scalar> > { enum { Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::MulCost, - PacketAccess = packet_traits<Scalar>::HasMul + PacketAccess = packet_traits<Scalar>::HasDiv }; }; /** \internal - * \brief Template functor for scalar/packet assignment with swaping + * \brief Template functor for scalar/packet assignment with swapping * * It works as follow. For a non-vectorized evaluation loop, we have: * for(i) func(A.coeffRef(i), B.coeff(i)); @@ -140,8 +142,13 @@ template<typename Scalar> struct swap_assign_op { EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { +#ifdef __CUDACC__ + // FIXME is there some kind of cuda::swap? + Scalar t=b; const_cast<Scalar&>(b)=a; a=t; +#else using std::swap; swap(a,const_cast<Scalar&>(b)); +#endif } template<int LhsAlignment, int RhsAlignment, typename Packet> @@ -156,7 +163,7 @@ template<typename Scalar> struct functor_traits<swap_assign_op<Scalar> > { enum { Cost = 3 * NumTraits<Scalar>::ReadCost, - PacketAccess = packet_traits<Scalar>::IsVectorized + PacketAccess = packet_traits<Scalar>::Vectorizable }; }; diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 157d075a7..9c96181c7 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -115,7 +115,7 @@ struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > { */ template<typename Scalar> struct scalar_min_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { EIGEN_USING_STD_MATH(min); return (min)(a, b); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::mini(a, b); } template<typename Packet> EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmin(a,b); } @@ -138,7 +138,7 @@ struct functor_traits<scalar_min_op<Scalar> > { */ template<typename Scalar> struct scalar_max_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { EIGEN_USING_STD_MATH(max); return (max)(a, b); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::maxi(a, b); } template<typename Packet> EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmax(a,b); } @@ -164,8 +164,6 @@ template<typename Scalar> struct scalar_hypot_op { // typedef typename NumTraits<Scalar>::Real result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const { - EIGEN_USING_STD_MATH(max); - EIGEN_USING_STD_MATH(min); using std::sqrt; Scalar p, qp; if(_x>_y) diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h deleted file mode 100644 index 637513132..000000000 --- a/Eigen/src/Core/products/CoeffBasedProduct.h +++ /dev/null @@ -1,452 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> -// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_COEFFBASED_PRODUCT_H -#define EIGEN_COEFFBASED_PRODUCT_H - -namespace Eigen { - -namespace internal { - -/********************************************************************************* -* Coefficient based product implementation. -* It is designed for the following use cases: -* - small fixed sizes -* - lazy products -*********************************************************************************/ - -/* Since the all the dimensions of the product are small, here we can rely - * on the generic Assign mechanism to evaluate the product per coeff (or packet). - * - * Note that here the inner-loops should always be unrolled. - */ - -template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> -struct product_coeff_impl; - -template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl; - -template<typename LhsNested, typename RhsNested, int NestingFlags> -struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > -{ - typedef MatrixXpr XprKind; - typedef typename remove_all<LhsNested>::type _LhsNested; - typedef typename remove_all<RhsNested>::type _RhsNested; - typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar; - typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind, - typename traits<_RhsNested>::StorageKind>::ret StorageKind; - typedef typename promote_index_type<typename traits<_LhsNested>::Index, - typename traits<_RhsNested>::Index>::type Index; - - enum { - LhsCoeffReadCost = _LhsNested::CoeffReadCost, - RhsCoeffReadCost = _RhsNested::CoeffReadCost, - LhsFlags = _LhsNested::Flags, - RhsFlags = _RhsNested::Flags, - - RowsAtCompileTime = _LhsNested::RowsAtCompileTime, - ColsAtCompileTime = _RhsNested::ColsAtCompileTime, - InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), - - MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, - MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, - - LhsRowMajor = LhsFlags & RowMajorBit, - RhsRowMajor = RhsFlags & RowMajorBit, - - SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value, - - CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic - || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0 - && (RhsFlags&AlignedBit) - ) - ), - - CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic - || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0 - && (LhsFlags&AlignedBit) - ) - ), - - EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 - : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 - : (RhsRowMajor && !CanVectorizeLhs), - - Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) - | (EvalToRowMajor ? RowMajorBit : 0) - | NestingFlags - | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0) - | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0) - // TODO enable vectorization for mixed types - | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), - - CoeffReadCost = InnerSize == Dynamic ? Dynamic - : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) - + (InnerSize - 1) * NumTraits<Scalar>::AddCost, - - /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside - * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner - * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect - * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. - */ - CanVectorizeInner = SameType - && LhsRowMajor - && (!RhsRowMajor) - && (LhsFlags & RhsFlags & ActualPacketAccessBit) - && (LhsFlags & RhsFlags & AlignedBit) - && (InnerSize % packet_traits<Scalar>::size == 0) - }; -}; - -} // end namespace internal - -template<typename LhsNested, typename RhsNested, int NestingFlags> -class CoeffBasedProduct - : internal::no_assignment_operator, - public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> > -{ - public: - - typedef MatrixBase<CoeffBasedProduct> Base; - EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct) - typedef typename Base::PlainObject PlainObject; - - private: - - typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested; - typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested; - - enum { - PacketSize = internal::packet_traits<Scalar>::size, - InnerSize = internal::traits<CoeffBasedProduct>::InnerSize, - Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, - CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner - }; - - typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, - Unroll ? InnerSize-1 : Dynamic, - _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl; - - typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType; - - public: - - EIGEN_DEVICE_FUNC - inline CoeffBasedProduct(const CoeffBasedProduct& other) - : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs) - {} - - template<typename Lhs, typename Rhs> - EIGEN_DEVICE_FUNC - inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs) - : m_lhs(lhs), m_rhs(rhs) - { - // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. - // We still allow to mix T and complex<T>. - EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - eigen_assert(lhs.cols() == rhs.rows() - && "invalid matrix product" - && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const - { - Scalar res; - ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); - return res; - } - - /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, - * which is why we don't set the LinearAccessBit. - */ - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - Scalar res; - const Index row = RowsAtCompileTime == 1 ? 0 : index; - const Index col = RowsAtCompileTime == 1 ? index : 0; - ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); - return res; - } - - template<int LoadMode> - EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const - { - PacketScalar res; - internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, - Unroll ? InnerSize-1 : Dynamic, - _LhsNested, _RhsNested, PacketScalar, LoadMode> - ::run(row, col, m_lhs, m_rhs, res); - return res; - } - - // Implicit conversion to the nested type (trigger the evaluation of the product) - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE operator const PlainObject& () const - { - m_result.lazyAssign(*this); - return m_result; - } - - EIGEN_DEVICE_FUNC const _LhsNested& lhs() const { return m_lhs; } - EIGEN_DEVICE_FUNC const _RhsNested& rhs() const { return m_rhs; } - - EIGEN_DEVICE_FUNC - const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const - { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } - - template<int DiagonalIndex> - EIGEN_DEVICE_FUNC - const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const - { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } - - EIGEN_DEVICE_FUNC - const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const - { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); } - - protected: - typename internal::add_const_on_value_type<LhsNested>::type m_lhs; - typename internal::add_const_on_value_type<RhsNested>::type m_rhs; - - mutable PlainObject m_result; -}; - -namespace internal { - -// here we need to overload the nested rule for products -// such that the nested type is a const reference to a plain matrix -template<typename Lhs, typename Rhs, int N, typename PlainObject> -struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject> -{ - typedef PlainObject const& type; -}; - -/*************************************************************************** -* Normal product .coeff() implementation (with meta-unrolling) -***************************************************************************/ - -/************************************** -*** Scalar path - no vectorization *** -**************************************/ - -template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> -struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> -{ - typedef typename Lhs::Index Index; - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) - { - product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); - res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); - } -}; - -template<typename Lhs, typename Rhs, typename RetScalar> -struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> -{ - typedef typename Lhs::Index Index; - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) - { - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - } -}; - -template<typename Lhs, typename Rhs, typename RetScalar> -struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> -{ - typedef typename Lhs::Index Index; - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) - { - eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - for(Index i = 1; i < lhs.cols(); ++i) - res += lhs.coeff(row, i) * rhs.coeff(i, col); - } -}; - -/******************************************* -*** Scalar path with inner vectorization *** -*******************************************/ - -template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet> -struct product_coeff_vectorized_unroller -{ - typedef typename Lhs::Index Index; - enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) - { - product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); - pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); - } -}; - -template<typename Lhs, typename Rhs, typename Packet> -struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) - { - pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); - } -}; - -template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> -struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> -{ - typedef typename Lhs::PacketScalar Packet; - typedef typename Lhs::Index Index; - enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) - { - Packet pres; - product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); - res = predux(pres); - } -}; - -template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime> -struct product_coeff_vectorized_dyn_selector -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); - } -}; - -// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower -// NOTE maybe they are now useless since we have a specialization for Block<Matrix> -template<typename Lhs, typename Rhs, int RhsCols> -struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); - } -}; - -template<typename Lhs, typename Rhs, int LhsRows> -struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); - } -}; - -template<typename Lhs, typename Rhs> -struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - res = lhs.transpose().cwiseProduct(rhs).sum(); - } -}; - -template<typename Lhs, typename Rhs, typename RetScalar> -struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) - { - product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res); - } -}; - -/******************* -*** Packet path *** -*******************/ - -template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) - { - product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); - res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); - } -}; - -template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) - { - product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); - res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); - } -}; - -template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) - { - res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); - } -}; - -template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) - { - res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); - } -}; - -template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) - { - eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); - for(Index i = 1; i < lhs.cols(); ++i) - res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); - } -}; - -template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) - { - eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); - for(Index i = 1; i < lhs.cols(); ++i) - res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_COEFFBASED_PRODUCT_H diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index b91786037..11e5f591d 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -158,8 +158,8 @@ inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, i computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n, num_threads); } -#ifdef EIGEN_HAS_FUSE_CJMADD - #define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C); +#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD + #define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C); #else // FIXME (a bit overkill maybe ?) @@ -184,8 +184,8 @@ inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, i gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t); } - #define MADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T); -// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T); + #define CJMADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T); +// #define CJMADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T); #endif /* Vectorization logic @@ -220,7 +220,7 @@ public: nr = 4, // register block size along the M direction (currently, this one cannot be modified) -#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) +#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) // we assume 16 registers mr = 3*LhsPacketSize, #else @@ -286,7 +286,7 @@ public: // let gcc allocate the register in which to store the result of the pmul // (in the case where there is no FMA) gcc fails to figure out how to avoid // spilling register. -#ifdef EIGEN_HAS_FUSED_MADD +#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD EIGEN_UNUSED_VARIABLE(tmp); c = pmadd(a,b,c); #else @@ -328,7 +328,7 @@ public: NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, nr = 4, -#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) +#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) // we assume 16 registers mr = 3*LhsPacketSize, #else @@ -391,7 +391,7 @@ public: EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const { -#ifdef EIGEN_HAS_FUSED_MADD +#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD EIGEN_UNUSED_VARIABLE(tmp); c.v = pmadd(a.v,b,c.v); #else @@ -675,7 +675,7 @@ public: EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const { -#ifdef EIGEN_HAS_FUSED_MADD +#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD EIGEN_UNUSED_VARIABLE(tmp); c.v = pmadd(a,b.v,c.v); #else @@ -801,31 +801,36 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga for(Index k=0; k<peeled_kc; k+=pk) { - EIGEN_ASM_COMMENT("begin gegp micro kernel 3p x 4"); + EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4"); RhsPacket B_0, T0; LhsPacket A2; #define EIGEN_GEBGP_ONESTEP(K) \ - internal::prefetch(blA+(3*K+16)*LhsProgress); \ - traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \ - traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \ - traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \ - traits.loadRhs(&blB[(0+4*K)*RhsProgress], B_0); \ - traits.madd(A0, B_0, C0, T0); \ - traits.madd(A1, B_0, C4, T0); \ - traits.madd(A2, B_0, C8, B_0); \ - traits.loadRhs(&blB[1+4*K*RhsProgress], B_0); \ - traits.madd(A0, B_0, C1, T0); \ - traits.madd(A1, B_0, C5, T0); \ - traits.madd(A2, B_0, C9, B_0); \ - traits.loadRhs(&blB[2+4*K*RhsProgress], B_0); \ - traits.madd(A0, B_0, C2, T0); \ - traits.madd(A1, B_0, C6, T0); \ - traits.madd(A2, B_0, C10, B_0); \ - traits.loadRhs(&blB[3+4*K*RhsProgress], B_0); \ - traits.madd(A0, B_0, C3 , T0); \ - traits.madd(A1, B_0, C7, T0); \ - traits.madd(A2, B_0, C11, B_0) + do { \ + EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \ + EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ + internal::prefetch(blA+(3*K+16)*LhsProgress); \ + traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \ + traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \ + traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \ + traits.loadRhs(&blB[(0+4*K)*RhsProgress], B_0); \ + traits.madd(A0, B_0, C0, T0); \ + traits.madd(A1, B_0, C4, T0); \ + traits.madd(A2, B_0, C8, B_0); \ + traits.loadRhs(&blB[1+4*K*RhsProgress], B_0); \ + traits.madd(A0, B_0, C1, T0); \ + traits.madd(A1, B_0, C5, T0); \ + traits.madd(A2, B_0, C9, B_0); \ + traits.loadRhs(&blB[2+4*K*RhsProgress], B_0); \ + traits.madd(A0, B_0, C2, T0); \ + traits.madd(A1, B_0, C6, T0); \ + traits.madd(A2, B_0, C10, B_0); \ + traits.loadRhs(&blB[3+4*K*RhsProgress], B_0); \ + traits.madd(A0, B_0, C3 , T0); \ + traits.madd(A1, B_0, C7, T0); \ + traits.madd(A2, B_0, C11, B_0); \ + EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \ + } while(false) internal::prefetch(blB+(48+0)); EIGEN_GEBGP_ONESTEP(0); @@ -840,6 +845,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga blB += pk*4*RhsProgress; blA += pk*3*Traits::LhsProgress; + + EIGEN_ASM_COMMENT("end gebp micro kernel 3pX4"); } // process remaining peeled loop for(Index k=peeled_kc; k<depth; k++) @@ -918,16 +925,21 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga for(Index k=0; k<peeled_kc; k+=pk) { - EIGEN_ASM_COMMENT("begin gegp micro kernel 3p x 1"); + EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX1"); RhsPacket B_0; #define EIGEN_GEBGP_ONESTEP(K) \ - traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \ - traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \ - traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \ - traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \ - traits.madd(A0, B_0, C0, B_0); \ - traits.madd(A1, B_0, C4, B_0); \ - traits.madd(A2, B_0, C8, B_0) + do { \ + EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX1"); \ + EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ + traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \ + traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \ + traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \ + traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \ + traits.madd(A0, B_0, C0, B_0); \ + traits.madd(A1, B_0, C4, B_0); \ + traits.madd(A2, B_0, C8, B_0); \ + EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX1"); \ + } while(false) EIGEN_GEBGP_ONESTEP(0); EIGEN_GEBGP_ONESTEP(1); @@ -940,6 +952,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga blB += pk*RhsProgress; blA += pk*3*Traits::LhsProgress; + + EIGEN_ASM_COMMENT("end gebp micro kernel 3pX1"); } // process remaining peeled loop @@ -1005,22 +1019,27 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga for(Index k=0; k<peeled_kc; k+=pk) { - EIGEN_ASM_COMMENT("begin gegp micro kernel 2pX4"); + EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX4"); RhsPacket B_0, B1, B2, B3, T0; #define EIGEN_GEBGP_ONESTEP(K) \ - traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \ - traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \ - traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \ - traits.madd(A0, B_0, C0, T0); \ - traits.madd(A1, B_0, C4, B_0); \ - traits.madd(A0, B1, C1, T0); \ - traits.madd(A1, B1, C5, B1); \ - traits.madd(A0, B2, C2, T0); \ - traits.madd(A1, B2, C6, B2); \ - traits.madd(A0, B3, C3, T0); \ - traits.madd(A1, B3, C7, B3) - + do { \ + EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \ + EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ + traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \ + traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \ + traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \ + traits.madd(A0, B_0, C0, T0); \ + traits.madd(A1, B_0, C4, B_0); \ + traits.madd(A0, B1, C1, T0); \ + traits.madd(A1, B1, C5, B1); \ + traits.madd(A0, B2, C2, T0); \ + traits.madd(A1, B2, C6, B2); \ + traits.madd(A0, B3, C3, T0); \ + traits.madd(A1, B3, C7, B3); \ + EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX4"); \ + } while(false) + internal::prefetch(blB+(48+0)); EIGEN_GEBGP_ONESTEP(0); EIGEN_GEBGP_ONESTEP(1); @@ -1034,6 +1053,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga blB += pk*4*RhsProgress; blA += pk*(2*Traits::LhsProgress); + + EIGEN_ASM_COMMENT("end gebp micro kernel 2pX4"); } // process remaining peeled loop for(Index k=peeled_kc; k<depth; k++) @@ -1096,15 +1117,20 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga for(Index k=0; k<peeled_kc; k+=pk) { - EIGEN_ASM_COMMENT("begin gegp micro kernel 2p x 1"); + EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX1"); RhsPacket B_0, B1; #define EIGEN_GEBGP_ONESTEP(K) \ - traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \ - traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \ - traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \ - traits.madd(A0, B_0, C0, B1); \ - traits.madd(A1, B_0, C4, B_0) + do { \ + EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \ + EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ + traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \ + traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \ + traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \ + traits.madd(A0, B_0, C0, B1); \ + traits.madd(A1, B_0, C4, B_0); \ + EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \ + } while(false) EIGEN_GEBGP_ONESTEP(0); EIGEN_GEBGP_ONESTEP(1); @@ -1117,6 +1143,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga blB += pk*RhsProgress; blA += pk*2*Traits::LhsProgress; + + EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1"); } // process remaining peeled loop @@ -1179,16 +1207,21 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga for(Index k=0; k<peeled_kc; k+=pk) { - EIGEN_ASM_COMMENT("begin gegp micro kernel 1pX4"); + EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX4"); RhsPacket B_0, B1, B2, B3; #define EIGEN_GEBGP_ONESTEP(K) \ - traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \ - traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \ - traits.madd(A0, B_0, C0, B_0); \ - traits.madd(A0, B1, C1, B1); \ - traits.madd(A0, B2, C2, B2); \ - traits.madd(A0, B3, C3, B3); + do { \ + EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX4"); \ + EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ + traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \ + traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \ + traits.madd(A0, B_0, C0, B_0); \ + traits.madd(A0, B1, C1, B1); \ + traits.madd(A0, B2, C2, B2); \ + traits.madd(A0, B3, C3, B3); \ + EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX4"); \ + } while(false) internal::prefetch(blB+(48+0)); EIGEN_GEBGP_ONESTEP(0); @@ -1203,6 +1236,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga blB += pk*4*RhsProgress; blA += pk*1*LhsProgress; + + EIGEN_ASM_COMMENT("end gebp micro kernel 1pX4"); } // process remaining peeled loop for(Index k=peeled_kc; k<depth; k++) @@ -1251,14 +1286,19 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga for(Index k=0; k<peeled_kc; k+=pk) { - EIGEN_ASM_COMMENT("begin gegp micro kernel 2p x 1"); + EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX1"); RhsPacket B_0; #define EIGEN_GEBGP_ONESTEP(K) \ - traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \ - traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \ - traits.madd(A0, B_0, C0, B_0); \ - + do { \ + EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \ + EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ + traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \ + traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \ + traits.madd(A0, B_0, C0, B_0); \ + EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \ + } while(false); + EIGEN_GEBGP_ONESTEP(0); EIGEN_GEBGP_ONESTEP(1); EIGEN_GEBGP_ONESTEP(2); @@ -1270,6 +1310,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga blB += pk*RhsProgress; blA += pk*1*Traits::LhsProgress; + + EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1"); } // process remaining peeled loop @@ -1402,14 +1444,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga B_0 = blB[0]; B_1 = blB[1]; - MADD(cj,A0,B_0,C0, B_0); - MADD(cj,A0,B_1,C1, B_1); - + CJMADD(cj,A0,B_0,C0, B_0); + CJMADD(cj,A0,B_1,C1, B_1); + B_0 = blB[2]; B_1 = blB[3]; - MADD(cj,A0,B_0,C2, B_0); - MADD(cj,A0,B_1,C3, B_1); - + CJMADD(cj,A0,B_0,C2, B_0); + CJMADD(cj,A0,B_1,C3, B_1); + blB += 4; } res(i, j2 + 0) += alpha * C0; @@ -1434,7 +1476,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga { LhsScalar A0 = blA[k]; RhsScalar B_0 = blB[k]; - MADD(cj, A0, B_0, C0, B_0); + CJMADD(cj, A0, B_0, C0, B_0); } res(i, j2) += alpha * C0; } diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 49362adbe..fd9443cd2 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -228,8 +228,8 @@ struct gemm_functor cols = m_rhs.cols(); Gemm::run(rows, cols, m_lhs.cols(), - /*(const Scalar*)*/&m_lhs.coeffRef(row,0), m_lhs.outerStride(), - /*(const Scalar*)*/&m_rhs.coeffRef(0,col), m_rhs.outerStride(), + &m_lhs.coeffRef(row,0), m_lhs.outerStride(), + &m_rhs.coeffRef(0,col), m_rhs.outerStride(), (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(), m_actualAlpha, m_blocking, info); } @@ -379,84 +379,92 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M } // end namespace internal +namespace internal { + template<typename Lhs, typename Rhs> -class GeneralProduct<Lhs, Rhs, GemmProduct> - : public ProductBase<GeneralProduct<Lhs,Rhs,GemmProduct>, Lhs, Rhs> +struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> + : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> > { - enum { - MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime) - }; - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - typedef typename Lhs::Scalar LhsScalar; - typedef typename Rhs::Scalar RhsScalar; - typedef Scalar ResScalar; - - GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - { - typedef internal::scalar_product_op<LhsScalar,RhsScalar> BinOp; - EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar); - } - - template<typename Dest> - inline void evalTo(Dest& dst) const - { - if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0) - dst.noalias() = m_lhs .lazyProduct( m_rhs ); - else - { - dst.setZero(); - scaleAndAddTo(dst,Scalar(1)); - } - } - - template<typename Dest> - inline void addTo(Dest& dst) const + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + typedef typename Product<Lhs,Rhs>::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned; + + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned; + + enum { + MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime) + }; + + typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct; + + template<typename Dst> + static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) + lazyproduct::evalTo(dst, lhs, rhs); + else { - if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0) - dst.noalias() += m_lhs .lazyProduct( m_rhs ); - else - scaleAndAddTo(dst,Scalar(1)); + dst.setZero(); + scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } + } - template<typename Dest> - inline void subTo(Dest& dst) const - { - if((m_rhs.rows()+dst.rows()+dst.cols())<20 && m_rhs.rows()>0) - dst.noalias() -= m_lhs .lazyProduct( m_rhs ); - else - scaleAndAddTo(dst,Scalar(-1)); - } - - template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const - { - eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + template<typename Dst> + static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) + lazyproduct::addTo(dst, lhs, rhs); + else + scaleAndAddTo(dst,lhs, rhs, Scalar(1)); + } - typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); - typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); + template<typename Dst> + static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) + lazyproduct::subTo(dst, lhs, rhs); + else + scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); + } + + template<typename Dest> + static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) + { + eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols()); - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) - * RhsBlasTraits::extractScalarFactor(m_rhs); + typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); - typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar, - Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType; + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) + * RhsBlasTraits::extractScalarFactor(a_rhs); - typedef internal::gemm_functor< - Scalar, Index, - internal::general_matrix_matrix_product< - Index, - LhsScalar, (_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate), - RhsScalar, (_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate), - (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>, - _ActualLhsType, _ActualRhsType, Dest, BlockingType> GemmFunctor; + typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar, + Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType; - BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true); + typedef internal::gemm_functor< + Scalar, Index, + internal::general_matrix_matrix_product< + Index, + LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate), + RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate), + (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>, + ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor; - internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit); - } + BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true); + internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)> + (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), Dest::Flags&RowMajorBit); + } }; +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_GENERAL_MATRIX_MATRIX_H diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 8de39f76f..e55994900 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -20,7 +20,7 @@ namespace internal { /********************************************************************** * This file implements a general A * B product while * evaluating only one triangular part of the product. -* This is more general version of self adjoint product (C += A A^T) +* This is a more general version of self adjoint product (C += A A^T) * as the level 3 SYRK Blas routine. **********************************************************************/ @@ -270,14 +270,14 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> }; template<typename MatrixType, unsigned int UpLo> -template<typename ProductDerived, typename _Lhs, typename _Rhs> -TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha) +template<typename ProductType> +TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha) { - eigen_assert(m_matrix.rows() == prod.rows() && m_matrix.cols() == prod.cols()); - - general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha); + eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols()); + + general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha); - return *this; + return derived(); } } // end namespace Eigen diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index 837e69415..2b90abf8f 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -129,7 +129,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos Index blockRows = (rows / threads); blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr; - GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads]; + ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0); #pragma omp parallel num_threads(threads) { @@ -146,8 +146,6 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos if(transpose) func(c0, actualBlockCols, 0, rows, info); else func(0, rows, c0, actualBlockCols, info); } - - delete[] info; #endif } diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index 21f8175d2..e831ee20f 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -469,55 +469,54 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f ***************************************************************************/ namespace internal { + template<typename Lhs, int LhsMode, typename Rhs, int RhsMode> -struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> > - : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> > -{}; -} - -template<typename Lhs, int LhsMode, typename Rhs, int RhsMode> -struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> - : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs > +struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false> { - EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) - - SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + typedef typename Product<Lhs,Rhs>::Index Index; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + enum { LhsIsUpper = (LhsMode&(Upper|Lower))==Upper, LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint, RhsIsUpper = (RhsMode&(Upper|Lower))==Upper, RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint }; - - template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + + template<typename Dest> + static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha) { - eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols()); - typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); - typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); + typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) - * RhsBlasTraits::extractScalarFactor(m_rhs); + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) + * RhsBlasTraits::extractScalarFactor(a_rhs); internal::product_selfadjoint_matrix<Scalar, Index, - EIGEN_LOGICAL_XOR(LhsIsUpper, - internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, + EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), - EIGEN_LOGICAL_XOR(RhsIsUpper, - internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, + EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)), internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor> ::run( - lhs.rows(), rhs.cols(), // sizes - &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info - &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info - &dst.coeffRef(0,0), dst.outerStride(), // result info - actualAlpha // alpha + lhs.rows(), rhs.cols(), // sizes + &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info + &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info + &dst.coeffRef(0,0), dst.outerStride(), // result info + actualAlpha // alpha ); } }; +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index 26e787949..372a44e47 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -169,45 +169,45 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd ***************************************************************************/ namespace internal { -template<typename Lhs, int LhsMode, typename Rhs> -struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> > - : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> > -{}; -} template<typename Lhs, int LhsMode, typename Rhs> -struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> - : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs > +struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true> { - EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) - - enum { - LhsUpLo = LhsMode&(Upper|Lower) - }; - - SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - - template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + typedef typename Product<Lhs,Rhs>::Index Index; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned; + + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned; + + enum { LhsUpLo = LhsMode&(Upper|Lower) }; + + template<typename Dest> + static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha) { typedef typename Dest::Scalar ResScalar; - typedef typename Base::RhsScalar RhsScalar; + typedef typename Rhs::Scalar RhsScalar; typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; - eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols()); + eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols()); - typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); - typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); + typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) - * RhsBlasTraits::extractScalarFactor(m_rhs); + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) + * RhsBlasTraits::extractScalarFactor(a_rhs); enum { EvalToDest = (Dest::InnerStrideAtCompileTime==1), - UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1) + UseRhs = (ActualRhsTypeCleaned::InnerStrideAtCompileTime==1) }; internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest; - internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs; + internal::gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!UseRhs> static_rhs; ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), EvalToDest ? dest.data() : static_dest.data()); @@ -230,11 +230,12 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> Index size = rhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif - Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs; + Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, rhs.size()) = rhs; } - internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run + internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, + int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run ( lhs.rows(), // size &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info @@ -248,34 +249,24 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> } }; -namespace internal { -template<typename Lhs, typename Rhs, int RhsMode> -struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> > - : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> > -{}; -} - template<typename Lhs, typename Rhs, int RhsMode> -struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> - : public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs > +struct selfadjoint_product_impl<Lhs,0,true,Rhs,RhsMode,false> { - EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) - - enum { - RhsUpLo = RhsMode&(Upper|Lower) - }; + typedef typename Product<Lhs,Rhs>::Scalar Scalar; + enum { RhsUpLo = RhsMode&(Upper|Lower) }; - SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - - template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const + template<typename Dest> + static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha) { // let's simply transpose the product Transpose<Dest> destT(dest); - SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false, - Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha); + selfadjoint_product_impl<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false, + Transpose<const Lhs>, 0, true>::run(destT, a_rhs.transpose(), a_lhs.transpose(), alpha); } }; +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h index 8594a97ce..2ae364111 100644 --- a/Eigen/src/Core/products/SelfadjointRank2Update.h +++ b/Eigen/src/Core/products/SelfadjointRank2Update.h @@ -79,11 +79,11 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> if (IsRowMajor) actualAlpha = numext::conj(actualAlpha); - internal::selfadjoint_rank2_update_selector<Scalar, Index, - typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type, - typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type, + typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type UType; + typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type VType; + internal::selfadjoint_rank2_update_selector<Scalar, Index, UType, VType, (IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)> - ::run(_expression().const_cast_derived().data(),_expression().outerStride(),actualU,actualV,actualAlpha); + ::run(_expression().const_cast_derived().data(),_expression().outerStride(),UType(actualU),VType(actualV),actualAlpha); return *this; } diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 4cbb79da0..60c99dcd2 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -380,28 +380,29 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, * Wrapper to product_triangular_matrix_matrix ***************************************************************************/ -template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> -struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> > - : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> > -{}; - } // end namespace internal +namespace internal { template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> -struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> - : public ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs > +struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false> { - EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct) - - TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - - template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha) { - typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); - typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); + typedef typename Dest::Index Index; + typedef typename Dest::Scalar Scalar; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned; + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned; + + typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) - * RhsBlasTraits::extractScalarFactor(m_rhs); + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) + * RhsBlasTraits::extractScalarFactor(a_rhs); typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType; @@ -416,19 +417,21 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> internal::product_triangular_matrix_matrix<Scalar, Index, Mode, LhsIsTriangular, - (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, - (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, + (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, + (internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor> ::run( stripedRows, stripedCols, stripedDepth, // sizes - &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info - &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info + &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info + &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info &dst.coeffRef(0,0), dst.outerStride(), // result info actualAlpha, blocking ); } }; +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index d33e3f409..4d88a710b 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -163,83 +163,67 @@ EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,Con * Wrapper to product_triangular_vector ***************************************************************************/ -template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> -struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true> > - : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true>, Lhs, Rhs> > -{}; - -template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs> -struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false> > - : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false>, Lhs, Rhs> > -{}; - - -template<int StorageOrder> +template<int Mode,int StorageOrder> struct trmv_selector; } // end namespace internal +namespace internal { + template<int Mode, typename Lhs, typename Rhs> -struct TriangularProduct<Mode,true,Lhs,false,Rhs,true> - : public ProductBase<TriangularProduct<Mode,true,Lhs,false,Rhs,true>, Lhs, Rhs > +struct triangular_product_impl<Mode,true,Lhs,false,Rhs,true> { - EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct) - - TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - - template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha) { - eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); - - internal::trmv_selector<(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dst, alpha); + eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols()); + + internal::trmv_selector<Mode,(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(lhs, rhs, dst, alpha); } }; template<int Mode, typename Lhs, typename Rhs> -struct TriangularProduct<Mode,false,Lhs,true,Rhs,false> - : public ProductBase<TriangularProduct<Mode,false,Lhs,true,Rhs,false>, Lhs, Rhs > +struct triangular_product_impl<Mode,false,Lhs,true,Rhs,false> { - EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct) - - TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - - template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha) { - eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols()); - typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose; Transpose<Dest> dstT(dst); - internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run( - TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha); + internal::trmv_selector<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower), + (int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor> + ::run(rhs.transpose(),lhs.transpose(), dstT, alpha); } }; +} // end namespace internal + namespace internal { // TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same. - -template<> struct trmv_selector<ColMajor> + +template<int Mode> struct trmv_selector<Mode,ColMajor> { - template<int Mode, typename Lhs, typename Rhs, typename Dest> - static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha) + template<typename Lhs, typename Rhs, typename Dest> + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType; - typedef typename ProductType::Index Index; - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::RealScalar RealScalar; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + typedef typename Dest::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + typedef typename Dest::RealScalar RealScalar; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; - typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); + typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 @@ -294,33 +278,33 @@ template<> struct trmv_selector<ColMajor> } }; -template<> struct trmv_selector<RowMajor> +template<int Mode> struct trmv_selector<Mode,RowMajor> { - template<int Mode, typename Lhs, typename Rhs, typename Dest> - static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha) + template<typename Lhs, typename Rhs, typename Dest> + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType; - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::Index Index; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::_ActualRhsType _ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; - - typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); - - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + typedef typename Dest::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned; + + typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); + typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { - DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1 + DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 }; - gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs; + gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs; ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data()); @@ -331,7 +315,7 @@ template<> struct trmv_selector<RowMajor> Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif - Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; + Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; } internal::triangular_matrix_vector_product diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index c4881b8da..3ec55fad2 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -334,7 +334,7 @@ struct blas_traits<Transpose<NestedXpr> > enum { IsTransposed = Base::IsTransposed ? 0 : 1 }; - static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); } + static inline ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); } static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); } }; diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 05107fdfe..9b40093f0 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -53,14 +53,13 @@ const int Infinity = -1; const unsigned int RowMajorBit = 0x1; /** \ingroup flags - * * means the expression should be evaluated by the calling expression */ const unsigned int EvalBeforeNestingBit = 0x2; /** \ingroup flags - * + * \deprecated * means the expression should be evaluated before any assignment */ -const unsigned int EvalBeforeAssigningBit = 0x4; +const unsigned int EvalBeforeAssigningBit = 0x4; // FIXME deprecated /** \ingroup flags * @@ -155,6 +154,16 @@ const unsigned int AlignedBit = 0x80; const unsigned int NestByRefBit = 0x100; +/** \ingroup flags + * + * for an expression, this means that the storage order + * can be either row-major or column-major. + * The precise choice will be decided at evaluation time or when + * combined with other expressions. + * \sa \ref RowMajorBit, \ref TopicStorageOrders */ +const unsigned int NoPreferredStorageOrderBit = 0x200; + + // list of flags that are inherited by default const unsigned int HereditaryBits = RowMajorBit | EvalBeforeNestingBit @@ -413,10 +422,16 @@ namespace Architecture Generic = 0x0, SSE = 0x1, AltiVec = 0x2, + VSX = 0x3, + NEON = 0x4, #if defined EIGEN_VECTORIZE_SSE Target = SSE #elif defined EIGEN_VECTORIZE_ALTIVEC Target = AltiVec +#elif defined EIGEN_VECTORIZE_VSX + Target = VSX +#elif defined EIGEN_VECTORIZE_NEON + Target = NEON #else Target = Generic #endif @@ -425,7 +440,7 @@ namespace Architecture /** \internal \ingroup enums * Enum used as template parameter in GeneralProduct. */ -enum { CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct }; +enum { DefaultProduct=0, CoeffBasedProductMode, LazyCoeffBasedProductMode, LazyProduct, OuterProduct, InnerProduct, GemvProduct, GemmProduct }; /** \internal \ingroup enums * Enum used in experimental parallel implementation. */ @@ -434,12 +449,38 @@ enum Action {GetAction, SetAction}; /** The type used to identify a dense storage. */ struct Dense {}; +/** The type used to identify a general sparse storage. */ +struct Sparse {}; + +/** The type used to identify a permutation storage. */ +struct PermutationStorage {}; + /** The type used to identify a matrix expression */ struct MatrixXpr {}; /** The type used to identify an array expression */ struct ArrayXpr {}; +// An evaluator must define its shape. By default, it can be one of the following: +struct DenseShape { static std::string debugName() { return "DenseShape"; } }; +struct HomogeneousShape { static std::string debugName() { return "HomogeneousShape"; } }; +struct DiagonalShape { static std::string debugName() { return "DiagonalShape"; } }; +struct BandShape { static std::string debugName() { return "BandShape"; } }; +struct TriangularShape { static std::string debugName() { return "TriangularShape"; } }; +struct SelfAdjointShape { static std::string debugName() { return "SelfAdjointShape"; } }; +struct PermutationShape { static std::string debugName() { return "PermutationShape"; } }; +struct SparseShape { static std::string debugName() { return "SparseShape"; } }; + +namespace internal { + + // random access iterators based on coeff*() accessors. +struct IndexBased {}; + +// evaluator based on iterators to access coefficients. +struct IteratorBased {}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_CONSTANTS_H diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 33deb88ec..c23892c50 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -36,6 +36,10 @@ template<typename Derived> struct accessors_level }; }; +template<typename T> struct evaluator_traits; + +template< typename T> struct evaluator; + } // end namespace internal template<typename T> struct NumTraits; @@ -51,7 +55,7 @@ class DenseCoeffsBase; template<typename _Scalar, int _Rows, int _Cols, int _Options = AutoAlign | -#if defined(__GNUC__) && __GNUC__==3 && __GNUC_MINOR__==4 +#if EIGEN_GNUC_AT(3,4) // workaround a bug in at least gcc 3.4.6 // the innermost ?: ternary operator is misparsed. We write it slightly // differently and this makes gcc 3.4.6 happy, but it's ugly. @@ -87,11 +91,19 @@ template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp; template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp; template<typename ViewOp, typename MatrixType> class CwiseUnaryView; template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp; -template<typename BinOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp; -template<typename Derived, typename Lhs, typename Rhs> class ProductBase; -template<typename Lhs, typename Rhs> class Product; -template<typename Lhs, typename Rhs, int Mode> class GeneralProduct; -template<typename Lhs, typename Rhs, int NestingFlags> class CoeffBasedProduct; +template<typename BinOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp; // TODO deprecated +template<typename Derived, typename Lhs, typename Rhs> class ProductBase; // TODO deprecated +template<typename Decomposition, typename Rhstype> class Solve; +template<typename XprType> class Inverse; + +namespace internal { + template<typename Lhs, typename Rhs> struct product_tag; +} + +template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product; + +template<typename Lhs, typename Rhs, int Mode> class GeneralProduct; // TODO deprecated +template<typename Lhs, typename Rhs, int NestingFlags> class CoeffBasedProduct; // TODO deprecated template<typename Derived> class DiagonalBase; template<typename _DiagonalVectorType> class DiagonalWrapper; @@ -109,7 +121,12 @@ template<typename Derived, int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors > class MapBase; template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride; +template<int Value = Dynamic> class InnerStride; +template<int Value = Dynamic> class OuterStride; template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map; +template<typename Derived> class RefBase; +template<typename PlainObjectType, int Options = 0, + typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref; template<typename Derived> class TriangularBase; template<typename MatrixType, unsigned int Mode> class TriangularView; @@ -120,10 +137,9 @@ template<typename MatrixType> struct CommaInitializer; template<typename Derived> class ReturnByValue; template<typename ExpressionType> class ArrayWrapper; template<typename ExpressionType> class MatrixWrapper; +template<typename XprType> class InnerIterator; namespace internal { -template<typename DecompositionType, typename Rhs> struct solve_retval_base; -template<typename DecompositionType, typename Rhs> struct solve_retval; template<typename DecompositionType> struct kernel_retval_base; template<typename DecompositionType> struct kernel_retval; template<typename DecompositionType> struct image_retval_base; @@ -136,6 +152,18 @@ template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynami namespace internal { template<typename Lhs, typename Rhs> struct product_type; +/** \internal + * \class product_evaluator + * Products need their own evaluator with more template arguments allowing for + * easier partial template specializations. + */ +template< typename T, + int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret, + typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape, + typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape, + typename LhsScalar = typename traits<typename T::Lhs>::Scalar, + typename RhsScalar = typename traits<typename T::Rhs>::Scalar + > struct product_evaluator; } template<typename Lhs, typename Rhs, @@ -193,7 +221,7 @@ struct IOFormat; // Array module template<typename _Scalar, int _Rows, int _Cols, int _Options = AutoAlign | -#if defined(__GNUC__) && __GNUC__==3 && __GNUC_MINOR__==4 +#if EIGEN_GNUC_AT(3,4) // workaround a bug in at least gcc 3.4.6 // the innermost ?: ternary operator is misparsed. We write it slightly // differently and this makes gcc 3.4.6 happy, but it's ugly. @@ -223,6 +251,7 @@ template<typename MatrixType> class HouseholderQR; template<typename MatrixType> class ColPivHouseholderQR; template<typename MatrixType> class FullPivHouseholderQR; template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD; +template<typename MatrixType> class BDCSVD; template<typename MatrixType, int UpLo = Lower> class LLT; template<typename MatrixType, int UpLo = Lower> class LDLT; template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence; diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 40a28d4d6..dc5f13673 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -18,31 +18,280 @@ #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ EIGEN_MINOR_VERSION>=z)))) + +// Compiler identification, EIGEN_COMP_* + +/// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC #ifdef __GNUC__ - #define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__==x && __GNUC_MINOR__>=y) || __GNUC__>x) + #define EIGEN_COMP_GNUC 1 #else - #define EIGEN_GNUC_AT_LEAST(x,y) 0 + #define EIGEN_COMP_GNUC 0 #endif - -#ifdef __GNUC__ - #define EIGEN_GNUC_AT_MOST(x,y) ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x) + +/// \internal EIGEN_COMP_CLANG set to 1 if the compiler is clang (alias for __clang__) +#if defined(__clang__) + #define EIGEN_COMP_CLANG 1 #else - #define EIGEN_GNUC_AT_MOST(x,y) 0 + #define EIGEN_COMP_CLANG 0 #endif -#if EIGEN_GNUC_AT_MOST(4,3) && !defined(__clang__) - // see bug 89 - #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0 + +/// \internal EIGEN_COMP_LLVM set to 1 if the compiler backend is llvm +#if defined(__llvm__) + #define EIGEN_COMP_LLVM 1 #else - #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1 + #define EIGEN_COMP_LLVM 0 +#endif + +/// \internal EIGEN_COMP_ICC set to __INTEL_COMPILER if the compiler is Intel compiler, 0 otherwise +#if defined(__INTEL_COMPILER) + #define EIGEN_COMP_ICC __INTEL_COMPILER +#else + #define EIGEN_COMP_ICC 0 +#endif + +/// \internal EIGEN_COMP_MINGW set to 1 if the compiler is mingw +#if defined(__MINGW32__) + #define EIGEN_COMP_MINGW 1 +#else + #define EIGEN_COMP_MINGW 0 +#endif + +/// \internal EIGEN_COMP_SUNCC set to 1 if the compiler is Solaris Studio +#if defined(__SUNPRO_CC) + #define EIGEN_COMP_SUNCC 1 +#else + #define EIGEN_COMP_SUNCC 0 +#endif + +/// \internal EIGEN_COMP_MSVC set to _MSC_VER if the compiler is Microsoft Visual C++, 0 otherwise. +#if defined(_MSC_VER) + #define EIGEN_COMP_MSVC _MSC_VER +#else + #define EIGEN_COMP_MSVC 0 +#endif + +/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC +#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC) + #define EIGEN_COMP_MSVC_STRICT _MSC_VER +#else + #define EIGEN_COMP_MSVC_STRICT 0 +#endif + +/// \internal EIGEN_COMP_IBM set to 1 if the compiler is IBM XL C++ +#if defined(__IBMCPP__) || defined(__xlc__) + #define EIGEN_COMP_IBM 1 +#else + #define EIGEN_COMP_IBM 0 +#endif + +/// \internal EIGEN_COMP_PGI set to 1 if the compiler is Portland Group Compiler +#if defined(__PGI) + #define EIGEN_COMP_PGI 1 +#else + #define EIGEN_COMP_PGI 0 +#endif + +/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler +#if defined(__CC_ARM) || defined(__ARMCC_VERSION) + #define EIGEN_COMP_ARM 1 +#else + #define EIGEN_COMP_ARM 0 +#endif + + +/// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.) +#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_CLANG || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM ) + #define EIGEN_COMP_GNUC_STRICT 1 +#else + #define EIGEN_COMP_GNUC_STRICT 0 #endif -#if defined(__GNUC__) && (__GNUC__ <= 3) + +#if EIGEN_COMP_GNUC + #define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__==x && __GNUC_MINOR__>=y) || __GNUC__>x) + #define EIGEN_GNUC_AT_MOST(x,y) ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x) + #define EIGEN_GNUC_AT(x,y) ( __GNUC__==x && __GNUC_MINOR__==y ) +#else + #define EIGEN_GNUC_AT_LEAST(x,y) 0 + #define EIGEN_GNUC_AT_MOST(x,y) 0 + #define EIGEN_GNUC_AT(x,y) 0 +#endif + +// FIXME: could probably be removed as we do not support gcc 3.x anymore +#if EIGEN_COMP_GNUC && (__GNUC__ <= 3) #define EIGEN_GCC3_OR_OLDER 1 #else #define EIGEN_GCC3_OR_OLDER 0 #endif + +// Architecture identification, EIGEN_ARCH_* + +#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64) + #define EIGEN_ARCH_x86_64 1 +#else + #define EIGEN_ARCH_x86_64 0 +#endif + +#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386) + #define EIGEN_ARCH_i386 1 +#else + #define EIGEN_ARCH_i386 0 +#endif + +#if EIGEN_ARCH_x86_64 || EIGEN_ARCH_i386 + #define EIGEN_ARCH_i386_OR_x86_64 1 +#else + #define EIGEN_ARCH_i386_OR_x86_64 0 +#endif + +/// \internal EIGEN_ARCH_ARM set to 1 if the architecture is ARM +#if defined(__arm__) + #define EIGEN_ARCH_ARM 1 +#else + #define EIGEN_ARCH_ARM 0 +#endif + +/// \internal EIGEN_ARCH_ARM64 set to 1 if the architecture is ARM64 +#if defined(__aarch64__) + #define EIGEN_ARCH_ARM64 1 +#else + #define EIGEN_ARCH_ARM64 0 +#endif + +#if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64 + #define EIGEN_ARCH_ARM_OR_ARM64 1 +#else + #define EIGEN_ARCH_ARM_OR_ARM64 0 +#endif + +/// \internal EIGEN_ARCH_MIPS set to 1 if the architecture is MIPS +#if defined(__mips__) || defined(__mips) + #define EIGEN_ARCH_MIPS 1 +#else + #define EIGEN_ARCH_MIPS 0 +#endif + +/// \internal EIGEN_ARCH_SPARC set to 1 if the architecture is SPARC +#if defined(__sparc__) || defined(__sparc) + #define EIGEN_ARCH_SPARC 1 +#else + #define EIGEN_ARCH_SPARC 0 +#endif + +/// \internal EIGEN_ARCH_IA64 set to 1 if the architecture is Intel Itanium +#if defined(__ia64__) + #define EIGEN_ARCH_IA64 1 +#else + #define EIGEN_ARCH_IA64 0 +#endif + +/// \internal EIGEN_ARCH_PPC set to 1 if the architecture is PowerPC +#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC) + #define EIGEN_ARCH_PPC 1 +#else + #define EIGEN_ARCH_PPC 0 +#endif + + + +// Operating system identification, EIGEN_OS_* + +/// \internal EIGEN_OS_UNIX set to 1 if the OS is a unix variant +#if defined(__unix__) || defined(__unix) + #define EIGEN_OS_UNIX 1 +#else + #define EIGEN_OS_UNIX 0 +#endif + +/// \internal EIGEN_OS_LINUX set to 1 if the OS is based on Linux kernel +#if defined(__linux__) + #define EIGEN_OS_LINUX 1 +#else + #define EIGEN_OS_LINUX 0 +#endif + +/// \internal EIGEN_OS_ANDROID set to 1 if the OS is Android +#if defined(__ANDROID__) + #define EIGEN_OS_ANDROID 1 +#else + #define EIGEN_OS_ANDROID 0 +#endif + +/// \internal EIGEN_OS_GNULINUX set to 1 if the OS is GNU Linux and not Linux-based OS (e.g., not android) +#if defined(__gnu_linux__) && !(EIGEN_OS_ANDROID) + #define EIGEN_OS_GNULINUX 1 +#else + #define EIGEN_OS_GNULINUX 0 +#endif + +/// \internal EIGEN_OS_BSD set to 1 if the OS is a BSD variant +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) + #define EIGEN_OS_BSD 1 +#else + #define EIGEN_OS_BSD 0 +#endif + +/// \internal EIGEN_OS_MAC set to 1 if the OS is MacOS +#if defined(__APPLE__) + #define EIGEN_OS_MAC 1 +#else + #define EIGEN_OS_MAC 0 +#endif + +/// \internal EIGEN_OS_QNX set to 1 if the OS is QNX +#if defined(__QNX__) + #define EIGEN_OS_QNX 1 +#else + #define EIGEN_OS_QNX 0 +#endif + +/// \internal EIGEN_OS_WIN set to 1 if the OS is Windows based +#if defined(_WIN32) + #define EIGEN_OS_WIN 1 +#else + #define EIGEN_OS_WIN 0 +#endif + +/// \internal EIGEN_OS_WIN64 set to 1 if the OS is Windows 64bits +#if defined(_WIN64) + #define EIGEN_OS_WIN64 1 +#else + #define EIGEN_OS_WIN64 0 +#endif + +/// \internal EIGEN_OS_WINCE set to 1 if the OS is Windows CE +#if defined(_WIN32_WCE) + #define EIGEN_OS_WINCE 1 +#else + #define EIGEN_OS_WINCE 0 +#endif + +/// \internal EIGEN_OS_CYGWIN set to 1 if the OS is Windows/Cygwin +#if defined(__CYGWIN__) + #define EIGEN_OS_CYGWIN 1 +#else + #define EIGEN_OS_CYGWIN 0 +#endif + +/// \internal EIGEN_OS_WIN_STRICT set to 1 if the OS is really Windows and not some variants +#if EIGEN_OS_WIN && !( EIGEN_OS_WINCE || EIGEN_OS_CYGWIN ) + #define EIGEN_OS_WIN_STRICT 1 +#else + #define EIGEN_OS_WIN_STRICT 0 +#endif + + + + +#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG + // see bug 89 + #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0 +#else + #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1 +#endif + // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in @@ -50,7 +299,7 @@ // Only static alignment is really problematic (relies on nonstandard compiler extensions that don't // work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even // when we have to disable static alignment. -#if defined(__GNUC__) && !(defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ppc__) || defined(__ia64__)) +#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64) #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 #else #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 @@ -59,8 +308,8 @@ // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \ && !EIGEN_GCC3_OR_OLDER \ - && !defined(__SUNPRO_CC) \ - && !defined(__QNXNTO__) + && !EIGEN_COMP_SUNCC \ + && !EIGEN_OS_QNX #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1 #else #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0 @@ -86,6 +335,11 @@ #define EIGEN_ALIGN 0 #endif + +// This macro can be used to prevent from macro expansion, e.g.: +// std::max EIGEN_NOT_A_MACRO(a,b) +#define EIGEN_NOT_A_MACRO + // EIGEN_ALIGN_STATICALLY is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable // alignment (EIGEN_DONT_ALIGN_STATICALLY) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only EIGEN_ALIGN_STATICALLY should be used. #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT && !defined(EIGEN_DONT_ALIGN_STATICALLY) @@ -124,7 +378,7 @@ #if (__has_feature(cxx_rvalue_references) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ defined(__GXX_EXPERIMENTAL_CXX0X__) || \ - (defined(_MSC_VER) && _MSC_VER >= 1600)) + (EIGEN_COMP_MSVC >= 1600)) #define EIGEN_HAVE_RVALUE_REFERENCES #endif @@ -161,7 +415,7 @@ // EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC, // but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline // but GCC is still doing fine with just inline. -#if (defined _MSC_VER) || (defined __INTEL_COMPILER) +#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC #define EIGEN_STRONG_INLINE __forceinline #else #define EIGEN_STRONG_INLINE inline @@ -180,15 +434,15 @@ #define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE #endif -#if (defined __GNUC__) +#if EIGEN_COMP_GNUC #define EIGEN_DONT_INLINE __attribute__((noinline)) -#elif (defined _MSC_VER) +#elif EIGEN_COMP_MSVC #define EIGEN_DONT_INLINE __declspec(noinline) #else #define EIGEN_DONT_INLINE #endif -#if (defined __GNUC__) +#if EIGEN_COMP_GNUC #define EIGEN_PERMISSIVE_EXPR __extension__ #else #define EIGEN_PERMISSIVE_EXPR @@ -257,15 +511,15 @@ #endif #ifdef EIGEN_NO_DEBUG -#define EIGEN_ONLY_USED_FOR_DEBUG(x) (void)x +#define EIGEN_ONLY_USED_FOR_DEBUG(x) EIGEN_UNUSED_VARIABLE(x) #else #define EIGEN_ONLY_USED_FOR_DEBUG(x) #endif #ifndef EIGEN_NO_DEPRECATED_WARNING - #if (defined __GNUC__) + #if EIGEN_COMP_GNUC #define EIGEN_DEPRECATED __attribute__((deprecated)) - #elif (defined _MSC_VER) + #elif EIGEN_COMP_MSVC #define EIGEN_DEPRECATED __declspec(deprecated) #else #define EIGEN_DEPRECATED @@ -274,7 +528,7 @@ #define EIGEN_DEPRECATED #endif -#if (defined __GNUC__) +#if EIGEN_COMP_GNUC #define EIGEN_UNUSED __attribute__((unused)) #else #define EIGEN_UNUSED @@ -283,13 +537,13 @@ // Suppresses 'unused variable' warnings. namespace Eigen { namespace internal { - template<typename T> void ignore_unused_variable(const T&) {} + template<typename T> EIGEN_DEVICE_FUNC void ignore_unused_variable(const T&) {} } } #define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var); #if !defined(EIGEN_ASM_COMMENT) - #if (defined __GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) + #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64) #define EIGEN_ASM_COMMENT(X) __asm__("#" X) #else #define EIGEN_ASM_COMMENT(X) @@ -304,12 +558,12 @@ namespace Eigen { * vectorized and non-vectorized code. */ #if (defined __CUDACC__) -#define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n) -#elif (defined __GNUC__) || (defined __PGI) || (defined __IBMCPP__) || (defined __ARMCC_VERSION) + #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n) +#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) -#elif (defined _MSC_VER) +#elif EIGEN_COMP_MSVC #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n)) -#elif (defined __SUNPRO_CC) +#elif EIGEN_COMP_SUNCC // FIXME not sure about this one: #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) #else @@ -357,27 +611,26 @@ namespace Eigen { // just an empty macro ! #define EIGEN_EMPTY -#if defined(_MSC_VER) && (!defined(__INTEL_COMPILER)) -#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ - using Base::operator =; -#elif defined(__clang__) // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) -#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ - using Base::operator =; \ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \ - template <typename OtherDerived> \ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; } -#else -#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ - using Base::operator =; \ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \ - { \ - Base::operator=(other); \ - return *this; \ - } +#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900 + #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + using Base::operator =; +#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) + #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + using Base::operator =; \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \ + template <typename OtherDerived> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; } +#else + #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + using Base::operator =; \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \ + { \ + Base::operator=(other); \ + return *this; \ + } #endif -#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) \ - EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) +#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) /** * Just a side note. Commenting within defines works only by documenting @@ -387,6 +640,8 @@ namespace Eigen { * documentation in a single line. **/ +// TODO The EIGEN_DENSE_PUBLIC_INTERFACE should not exists anymore + #define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \ typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex<float>. */ \ typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \ @@ -397,7 +652,6 @@ namespace Eigen { enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \ ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \ Flags = Eigen::internal::traits<Derived>::Flags, \ - CoeffReadCost = Eigen::internal::traits<Derived>::CoeffReadCost, \ SizeAtCompileTime = Base::SizeAtCompileTime, \ MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \ IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; @@ -416,13 +670,11 @@ namespace Eigen { MaxRowsAtCompileTime = Eigen::internal::traits<Derived>::MaxRowsAtCompileTime, \ MaxColsAtCompileTime = Eigen::internal::traits<Derived>::MaxColsAtCompileTime, \ Flags = Eigen::internal::traits<Derived>::Flags, \ - CoeffReadCost = Eigen::internal::traits<Derived>::CoeffReadCost, \ SizeAtCompileTime = Base::SizeAtCompileTime, \ MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \ IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \ using Base::derived; \ - using Base::const_cast_derived; - + using Base::const_cast_derived; #define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b) #define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b) diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 30133ba67..bacf236fb 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com> // Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com> // Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com> @@ -42,15 +42,15 @@ // See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup // FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures // See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup -#if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__) && (EIGEN_ALIGN_BYTES == 16) +#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_ALIGN_BYTES == 16) #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1 #else #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0 #endif -#if (defined(__APPLE__) && (EIGEN_ALIGN_BYTES == 16)) \ - || (defined(_WIN64) && (EIGEN_ALIGN_BYTES == 16)) \ - || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \ +#if (EIGEN_OS_MAC && (EIGEN_ALIGN_BYTES == 16)) \ + || (EIGEN_OS_WIN64 && (EIGEN_ALIGN_BYTES == 16)) \ + || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \ || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED #define EIGEN_MALLOC_ALREADY_ALIGNED 1 #else @@ -62,9 +62,9 @@ // See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554) // It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first. // Currently, let's include it only on unix systems: -#if defined(__unix__) || defined(__unix) +#if EIGEN_OS_UNIX #include <unistd.h> - #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || (defined __PGI) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0) + #if (EIGEN_OS_QNX || (defined _GNU_SOURCE) || EIGEN_COMP_PGI || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0) #define EIGEN_HAS_POSIX_MEMALIGN 1 #endif #endif @@ -224,7 +224,7 @@ inline void* aligned_malloc(size_t size) if(posix_memalign(&result, EIGEN_ALIGN_BYTES, size)) result = 0; #elif EIGEN_HAS_MM_MALLOC result = _mm_malloc(size, EIGEN_ALIGN_BYTES); - #elif defined(_MSC_VER) && (!defined(_WIN32_WCE)) + #elif EIGEN_OS_WIN_STRICT result = _aligned_malloc(size, EIGEN_ALIGN_BYTES); #else result = handmade_aligned_malloc(size); @@ -247,7 +247,7 @@ inline void aligned_free(void *ptr) std::free(ptr); #elif EIGEN_HAS_MM_MALLOC _mm_free(ptr); - #elif defined(_MSC_VER) && (!defined(_WIN32_WCE)) + #elif EIGEN_OS_WIN_STRICT _aligned_free(ptr); #else handmade_aligned_free(ptr); @@ -274,12 +274,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size) // The defined(_mm_free) is just here to verify that this MSVC version // implements _mm_malloc/_mm_free based on the corresponding _aligned_ // functions. This may not always be the case and we just try to be safe. - #if defined(_MSC_VER) && (!defined(_WIN32_WCE)) && defined(_mm_free) + #if EIGEN_OS_WIN_STRICT && defined(_mm_free) result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES); #else result = generic_aligned_realloc(ptr,new_size,old_size); #endif -#elif defined(_MSC_VER) && (!defined(_WIN32_WCE)) +#elif EIGEN_OS_WIN_STRICT result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES); #else result = handmade_aligned_realloc(ptr,new_size,old_size); @@ -454,6 +454,8 @@ template<typename T, bool Align> inline T* conditional_aligned_realloc_new(T* pt template<typename T, bool Align> inline T* conditional_aligned_new_auto(size_t size) { + if(size==0) + return 0; // short-cut. Also fixes Bug 884 check_size_for_overflow<T>(size); T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size)); if(NumTraits<T>::RequireInitialization) @@ -521,9 +523,8 @@ template<typename T, bool Align> inline void conditional_aligned_delete_auto(T * template<typename Scalar, typename Index> inline Index first_aligned(const Scalar* array, Index size) { - enum { PacketSize = packet_traits<Scalar>::size, - PacketAlignedMask = PacketSize-1 - }; + static const Index PacketSize = packet_traits<Scalar>::size; + static const Index PacketAlignedMask = PacketSize-1; if(PacketSize==1) { @@ -576,27 +577,27 @@ template<typename T, bool UseMemmove> struct smart_memmove_helper; template<typename T> void smart_memmove(const T* start, const T* end, T* target) { - smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target); + smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target); } template<typename T> struct smart_memmove_helper<T,true> { - static inline void run(const T* start, const T* end, T* target) - { std::memmove(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); } + static inline void run(const T* start, const T* end, T* target) + { std::memmove(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); } }; template<typename T> struct smart_memmove_helper<T,false> { - static inline void run(const T* start, const T* end, T* target) - { - if (uintptr_t(target) < uintptr_t(start)) - { - std::copy(start, end, target); - } - else - { - std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T); - std::copy_backward(start, end, target + count); - } + static inline void run(const T* start, const T* end, T* target) + { + if (uintptr_t(target) < uintptr_t(start)) + { + std::copy(start, end, target); } + else + { + std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T); + std::copy_backward(start, end, target + count); + } + } }; @@ -607,16 +608,16 @@ template<typename T> struct smart_memmove_helper<T,false> { // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA // to the appropriate stack allocation function #ifndef EIGEN_ALLOCA - #if (defined __linux__) || (defined __APPLE__) || (defined alloca) + #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca) #define EIGEN_ALLOCA alloca - #elif defined(_MSC_VER) + #elif EIGEN_COMP_MSVC #define EIGEN_ALLOCA _alloca #endif #endif // This helper class construct the allocated memory, and takes care of destructing and freeing the handled data // at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions. -template<typename T> class aligned_stack_memory_handler +template<typename T> class aligned_stack_memory_handler : noncopyable { public: /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size. @@ -644,6 +645,30 @@ template<typename T> class aligned_stack_memory_handler bool m_deallocate; }; +template<typename T> class scoped_array : noncopyable +{ + T* m_ptr; +public: + explicit scoped_array(std::ptrdiff_t size) + { + m_ptr = new T[size]; + } + ~scoped_array() + { + delete[] m_ptr; + } + T& operator[](std::ptrdiff_t i) { return m_ptr[i]; } + const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; } + T* &ptr() { return m_ptr; } + const T* ptr() const { return m_ptr; } + operator const T*() const { return m_ptr; } +}; + +template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) +{ + std::swap(a.ptr(),b.ptr()); +} + } // end namespace internal /** \internal @@ -786,12 +811,12 @@ public: //---------- Cache sizes ---------- #if !defined(EIGEN_NO_CPUID) -# if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) -# if defined(__PIC__) && defined(__i386__) +# if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64 +# if defined(__PIC__) && EIGEN_ARCH_i386 // Case for x86 with PIC # define EIGEN_CPUID(abcd,func,id) \ __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id)); -# elif defined(__PIC__) && defined(__x86_64__) +# elif defined(__PIC__) && EIGEN_ARCH_x86_64 // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model. // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway. # define EIGEN_CPUID(abcd,func,id) \ @@ -801,8 +826,8 @@ public: # define EIGEN_CPUID(abcd,func,id) \ __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) ); # endif -# elif defined(_MSC_VER) -# if (_MSC_VER > 1500) && ( defined(_M_IX86) || defined(_M_X64) ) +# elif EIGEN_COMP_MSVC +# if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64 # define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id) # endif # endif diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index b99b8849e..f3bafd5af 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -274,18 +274,6 @@ template<typename T> struct scalar_product_traits<std::complex<T>, T> // typedef typename scalar_product_traits<typename remove_all<ArgType0>::type, typename remove_all<ArgType1>::type>::ReturnType type; // }; -template<typename T> struct is_diagonal -{ enum { ret = false }; }; - -template<typename T> struct is_diagonal<DiagonalBase<T> > -{ enum { ret = true }; }; - -template<typename T> struct is_diagonal<DiagonalWrapper<T> > -{ enum { ret = true }; }; - -template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> > -{ enum { ret = true }; }; - } // end namespace internal namespace numext { diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 59aa0811c..7538a0633 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -26,7 +26,7 @@ #ifndef EIGEN_NO_STATIC_ASSERT - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (defined(_MSC_VER) && (_MSC_VER >= 1600)) + #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (EIGEN_COMP_MSVC >= 1600) // if native static_assert is enabled, let's use it #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG); @@ -84,13 +84,16 @@ THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY, YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT, THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS, + THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS, THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL, THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES, YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED, YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED, THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE, THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, - OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG + OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, + IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, + STORAGE_LAYOUT_DOES_NOT_MATCH }; }; @@ -101,7 +104,7 @@ // Specialized implementation for MSVC to avoid "conditional // expression is constant" warnings. This implementation doesn't // appear to work under GCC, hence the multiple implementations. - #ifdef _MSC_VER + #if EIGEN_COMP_MSVC #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \ {Eigen::internal::static_assertion<bool(CONDITION)>::MSG;} @@ -157,7 +160,7 @@ #define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1) \ ( \ - (int(TYPE0::SizeAtCompileTime)==0 && int(TYPE1::SizeAtCompileTime)==0) \ + (int(internal::size_of_xpr_at_compile_time<TYPE0>::ret)==0 && int(internal::size_of_xpr_at_compile_time<TYPE1>::ret)==0) \ || (\ (int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \ || int(TYPE1::RowsAtCompileTime)==Eigen::Dynamic \ diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 67ca49754..3ac37df58 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -14,7 +14,7 @@ // just a workaround because GCC seems to not really like empty structs // FIXME: gcc 4.3 generates bad code when strict-aliasing is enabled // so currently we simply disable this optimization for gcc 4.3 -#if (defined __GNUG__) && !((__GNUC__==4) && (__GNUC_MINOR__==3)) +#if EIGEN_COMP_GNUC && !EIGEN_GNUC_AT(4,3) #define EIGEN_EMPTY_STRUCT_CTOR(X) \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {} @@ -128,6 +128,17 @@ template<typename _Scalar, int _Rows, int _Cols, template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> class compute_matrix_flags { + enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 }; + public: + // FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<> + // and then propagate this information to the evaluator's flags. + // However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage. + enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit }; +}; + +template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> +class compute_matrix_evaluator_flags +{ enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0, is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic, @@ -156,7 +167,7 @@ class compute_matrix_flags }; public: - enum { ret = LinearAccessBit | LvalueBit | DirectAccessBit | NestByRefBit | packet_access_bit | row_major_bit | aligned_bit }; + enum { ret = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit }; }; template<int _Rows, int _Cols> struct size_at_compile_time @@ -164,6 +175,11 @@ template<int _Rows, int _Cols> struct size_at_compile_time enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols }; }; +template<typename XprType> struct size_of_xpr_at_compile_time +{ + enum { ret = size_at_compile_time<traits<XprType>::RowsAtCompileTime,traits<XprType>::ColsAtCompileTime>::ret }; +}; + /* plain_matrix_type : the difference from eval is that plain_matrix_type is always a plain matrix type, * whereas eval is a const reference in the case of a matrix */ @@ -174,6 +190,10 @@ template<typename T> struct plain_matrix_type<T,Dense> { typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind>::type type; }; +template<typename T> struct plain_matrix_type<T,DiagonalShape> +{ + typedef typename T::PlainObject type; +}; template<typename T> struct plain_matrix_type_dense<T,MatrixXpr> { @@ -216,6 +236,11 @@ template<typename T> struct eval<T,Dense> // > type; }; +template<typename T> struct eval<T,DiagonalShape> +{ + typedef typename plain_matrix_type<T>::type type; +}; + // for matrices, no need to evaluate, just use a const reference to avoid a useless copy template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> struct eval<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense> @@ -294,38 +319,42 @@ struct transfer_constness >::type type; }; -/** \internal Determines how a given expression should be nested into another one. + +// When using evaluators, we never evaluate when assembling the expression!! +// TODO: get rid of this nested class since it's just an alias for ref_selector. +template<typename T, int n=1, typename PlainObject = void> struct nested +{ + typedef typename ref_selector<T>::type type; +}; + +// However, we still need a mechanism to detect whether an expression which is evaluated multiple time +// has to be evaluated into a temporary. +// That's the purpose of this new nested_eval helper: +/** \internal Determines how a given expression should be nested when evaluated multiple times. * For example, when you do a * (b+c), Eigen will determine how the expression b+c should be - * nested into the bigger product expression. The choice is between nesting the expression b+c as-is, or + * evaluated into the bigger product expression. The choice is between nesting the expression b+c as-is, or * evaluating that expression b+c into a temporary variable d, and nest d so that the resulting expression is * a*d. Evaluating can be beneficial for example if every coefficient access in the resulting expression causes * many coefficient accesses in the nested expressions -- as is the case with matrix product for example. * - * \param T the type of the expression being nested + * \param T the type of the expression being nested. * \param n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression. - * - * Note that if no evaluation occur, then the constness of T is preserved. - * - * Example. Suppose that a, b, and c are of type Matrix3d. The user forms the expression a*(b+c). - * b+c is an expression "sum of matrices", which we will denote by S. In order to determine how to nest it, - * the Product expression uses: nested<S, 3>::type, which turns out to be Matrix3d because the internal logic of - * nested determined that in this case it was better to evaluate the expression b+c into a temporary. On the other hand, - * since a is of type Matrix3d, the Product expression nests it as nested<Matrix3d, 3>::type, which turns out to be - * const Matrix3d&, because the internal logic of nested determined that since a was already a matrix, there was no point - * in copying it into another matrix. + * \param PlainObject the type of the temporary if needed. */ -template<typename T, int n=1, typename PlainObject = typename eval<T>::type> struct nested +template<typename T, int n, typename PlainObject = typename eval<T>::type> struct nested_eval { enum { - // for the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values. + // For the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values. // the choice of 10000 makes it larger than any practical fixed value and even most dynamic values. // in extreme cases where these assumptions would be wrong, we would still at worst suffer performance issues // (poor choice of temporaries). - // it's important that this value can still be squared without integer overflowing. + // It's important that this value can still be squared without integer overflowing. DynamicAsInteger = 10000, ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost, ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost), - CoeffReadCost = traits<T>::CoeffReadCost, + CoeffReadCost = evaluator<T>::CoeffReadCost, // TODO What if an evaluator evaluate itself into a tempory? + // Then CoeffReadCost will be small but we still have to evaluate if n>1... + // The solution might be to ask the evaluator if it creates a temp. Perhaps we could even ask the number of temps? CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost), NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n, CostEvalAsInteger = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger, @@ -333,17 +362,16 @@ template<typename T, int n=1, typename PlainObject = typename eval<T>::type> str }; typedef typename conditional< - ( (int(traits<T>::Flags) & EvalBeforeNestingBit) || - int(CostEvalAsInteger) < int(CostNoEvalAsInteger) - ), - PlainObject, - typename ref_selector<T>::type + ( (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || + (int(CostEvalAsInteger) < int(CostNoEvalAsInteger)) ), + PlainObject, + typename ref_selector<T>::type >::type type; }; template<typename T> EIGEN_DEVICE_FUNC -T* const_cast_ptr(const T* ptr) +inline T* const_cast_ptr(const T* ptr) { return const_cast<T*>(ptr); } @@ -366,6 +394,15 @@ struct dense_xpr_base<Derived, ArrayXpr> typedef ArrayBase<Derived> type; }; +template<typename Derived, typename XprKind = typename traits<Derived>::XprKind, typename StorageKind = typename traits<Derived>::StorageKind> +struct generic_xpr_base; + +template<typename Derived, typename XprKind> +struct generic_xpr_base<Derived, XprKind, Dense> +{ + typedef typename dense_xpr_base<Derived,XprKind>::type type; +}; + /** \internal Helper base class to add a scalar multiple operator * overloads for complex types */ template<typename Derived,typename Scalar,typename OtherScalar, @@ -424,6 +461,60 @@ template <typename A> struct promote_storage_type<const A, A> typedef A ret; }; +/** \internal Specify the "storage kind" of applying a coefficient-wise + * binary operations between two expressions of kinds A and B respectively. + * The template parameter Functor permits to specialize the resulting storage kind wrt to + * the functor. + * The default rules are as follows: + * \code + * A op A -> A + * A op dense -> dense + * dense op B -> dense + * A * dense -> A + * dense * B -> B + * \endcode + */ +template <typename A, typename B, typename Functor> struct cwise_promote_storage_type; + +template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; }; +template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; }; +template <typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<Dense,Dense,scalar_product_op<ScalarA,ScalarB> > { typedef Dense ret; }; +template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; }; +template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; }; +template <typename A, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<A,Dense,scalar_product_op<ScalarA,ScalarB> > { typedef A ret; }; +template <typename B, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<Dense,B,scalar_product_op<ScalarA,ScalarB> > { typedef B ret; }; + +/** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B. + * The template parameter ProductTag permits to specialize the resulting storage kind wrt to + * some compile-time properties of the product: GemmProduct, GemvProduct, OuterProduct, InnerProduct. + * The default rules are as follows: + * \code + * K * K -> K + * dense * K -> dense + * K * dense -> dense + * diag * K -> K + * K * diag -> K + * Perm * K -> K + * K * Perm -> K + * \endcode + */ +template <typename A, typename B, int ProductTag> struct product_promote_storage_type; + +template <typename A, int ProductTag> struct product_promote_storage_type<A, A, ProductTag> { typedef A ret;}; +template <int ProductTag> struct product_promote_storage_type<Dense, Dense, ProductTag> { typedef Dense ret;}; +template <typename A, int ProductTag> struct product_promote_storage_type<A, Dense, ProductTag> { typedef Dense ret; }; +template <typename B, int ProductTag> struct product_promote_storage_type<Dense, B, ProductTag> { typedef Dense ret; }; + +template <typename A, int ProductTag> struct product_promote_storage_type<A, DiagonalShape, ProductTag> { typedef A ret; }; +template <typename B, int ProductTag> struct product_promote_storage_type<DiagonalShape, B, ProductTag> { typedef B ret; }; +template <int ProductTag> struct product_promote_storage_type<Dense, DiagonalShape, ProductTag> { typedef Dense ret; }; +template <int ProductTag> struct product_promote_storage_type<DiagonalShape, Dense, ProductTag> { typedef Dense ret; }; + +template <typename A, int ProductTag> struct product_promote_storage_type<A, PermutationStorage, ProductTag> { typedef A ret; }; +template <typename B, int ProductTag> struct product_promote_storage_type<PermutationStorage, B, ProductTag> { typedef B ret; }; +template <int ProductTag> struct product_promote_storage_type<Dense, PermutationStorage, ProductTag> { typedef Dense ret; }; +template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Dense, ProductTag> { typedef Dense ret; }; + /** \internal gives the plain matrix or array type to store a row/column/diagonal of a matrix type. * \param Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType. */ @@ -480,8 +571,36 @@ struct is_lvalue bool(traits<ExpressionType>::Flags & LvalueBit) }; }; +template<typename T> struct is_diagonal +{ enum { ret = false }; }; + +template<typename T> struct is_diagonal<DiagonalBase<T> > +{ enum { ret = true }; }; + +template<typename T> struct is_diagonal<DiagonalWrapper<T> > +{ enum { ret = true }; }; + +template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> > +{ enum { ret = true }; }; + +template<typename S1, typename S2> struct glue_shapes; +template<> struct glue_shapes<DenseShape,TriangularShape> { typedef TriangularShape type; }; + } // end namespace internal +// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor +// that would take two operands of different types. If there were such an example, then this check should be +// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as +// currently they take only one typename Scalar template parameter. +// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. +// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to +// add together a float matrix and a double matrix. +#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ + EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \ + ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \ + : int(internal::is_same<LHS, RHS>::value)), \ + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + } // end namespace Eigen #endif // EIGEN_XPRHELPER_H diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h index af434bc9b..25082546e 100644 --- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h +++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h @@ -104,7 +104,7 @@ template<typename _MatrixType> class ComplexEigenSolver * according to the specified problem \a size. * \sa ComplexEigenSolver() */ - ComplexEigenSolver(Index size) + explicit ComplexEigenSolver(Index size) : m_eivec(size, size), m_eivalues(size), m_schur(size), @@ -122,7 +122,7 @@ template<typename _MatrixType> class ComplexEigenSolver * * This constructor calls compute() to compute the eigendecomposition. */ - ComplexEigenSolver(const MatrixType& matrix, bool computeEigenvectors = true) + explicit ComplexEigenSolver(const MatrixType& matrix, bool computeEigenvectors = true) : m_eivec(matrix.rows(),matrix.cols()), m_eivalues(matrix.cols()), m_schur(matrix.rows()), diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h index 89e6cade3..a3a5a4649 100644 --- a/Eigen/src/Eigenvalues/ComplexSchur.h +++ b/Eigen/src/Eigenvalues/ComplexSchur.h @@ -91,7 +91,7 @@ template<typename _MatrixType> class ComplexSchur * * \sa compute() for an example. */ - ComplexSchur(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime) + explicit ComplexSchur(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime) : m_matT(size,size), m_matU(size,size), m_hess(size), @@ -109,7 +109,7 @@ template<typename _MatrixType> class ComplexSchur * * \sa matrixT() and matrixU() for examples. */ - ComplexSchur(const MatrixType& matrix, bool computeU = true) + explicit ComplexSchur(const MatrixType& matrix, bool computeU = true) : m_matT(matrix.rows(),matrix.cols()), m_matU(matrix.rows(),matrix.cols()), m_hess(matrix.rows()), diff --git a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h index 91496ae5b..27aed923c 100644 --- a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h +++ b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h @@ -45,7 +45,6 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, bool computeU) \ { \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> MatrixType; \ - typedef MatrixType::Scalar Scalar; \ typedef MatrixType::RealScalar RealScalar; \ typedef std::complex<RealScalar> ComplexScalar; \ \ diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h index d2563d470..9372021ff 100644 --- a/Eigen/src/Eigenvalues/EigenSolver.h +++ b/Eigen/src/Eigenvalues/EigenSolver.h @@ -118,7 +118,7 @@ template<typename _MatrixType> class EigenSolver * according to the specified problem \a size. * \sa EigenSolver() */ - EigenSolver(Index size) + explicit EigenSolver(Index size) : m_eivec(size, size), m_eivalues(size), m_isInitialized(false), @@ -143,7 +143,7 @@ template<typename _MatrixType> class EigenSolver * * \sa compute() */ - EigenSolver(const MatrixType& matrix, bool computeEigenvectors = true) + explicit EigenSolver(const MatrixType& matrix, bool computeEigenvectors = true) : m_eivec(matrix.rows(), matrix.cols()), m_eivalues(matrix.cols()), m_isInitialized(false), @@ -368,7 +368,6 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect { using std::sqrt; using std::abs; - using std::max; using numext::isfinite; eigen_assert(matrix.cols() == matrix.rows()); @@ -409,7 +408,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect { Scalar t0 = m_matT.coeff(i+1, i); Scalar t1 = m_matT.coeff(i, i+1); - Scalar maxval = (max)(abs(p),(max)(abs(t0),abs(t1))); + Scalar maxval = numext::maxi(abs(p),numext::maxi(abs(t0),abs(t1))); t0 /= maxval; t1 /= maxval; Scalar p0 = p/maxval; @@ -600,8 +599,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors() } // Overflow control - EIGEN_USING_STD_MATH(max); - Scalar t = (max)(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n))); + Scalar t = numext::maxi(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n))); if ((eps * t) * t > Scalar(1)) m_matT.block(i, n-1, size-i, 2) /= t; diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index dc240e13e..c20ea03e6 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -122,7 +122,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver * according to the specified problem \a size. * \sa GeneralizedEigenSolver() */ - GeneralizedEigenSolver(Index size) + explicit GeneralizedEigenSolver(Index size) : m_eivec(size, size), m_alphas(size), m_betas(size), @@ -145,7 +145,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver * * \sa compute() */ - GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true) + explicit GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true) : m_eivec(A.rows(), A.cols()), m_alphas(A.cols()), m_betas(A.cols()), diff --git a/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h index 07bf1ea09..1ce1f5f58 100644 --- a/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h @@ -74,7 +74,7 @@ class GeneralizedSelfAdjointEigenSolver : public SelfAdjointEigenSolver<_MatrixT * * \sa compute() for an example */ - GeneralizedSelfAdjointEigenSolver(Index size) + explicit GeneralizedSelfAdjointEigenSolver(Index size) : Base(size) {} diff --git a/Eigen/src/Eigenvalues/HessenbergDecomposition.h b/Eigen/src/Eigenvalues/HessenbergDecomposition.h index 3db0c0106..2615a9f23 100644 --- a/Eigen/src/Eigenvalues/HessenbergDecomposition.h +++ b/Eigen/src/Eigenvalues/HessenbergDecomposition.h @@ -97,7 +97,7 @@ template<typename _MatrixType> class HessenbergDecomposition * * \sa compute() for an example. */ - HessenbergDecomposition(Index size = Size==Dynamic ? 2 : Size) + explicit HessenbergDecomposition(Index size = Size==Dynamic ? 2 : Size) : m_matrix(size,size), m_temp(size), m_isInitialized(false) @@ -115,7 +115,7 @@ template<typename _MatrixType> class HessenbergDecomposition * * \sa matrixH() for an example. */ - HessenbergDecomposition(const MatrixType& matrix) + explicit HessenbergDecomposition(const MatrixType& matrix) : m_matrix(matrix), m_temp(matrix.rows()), m_isInitialized(false) diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h index 5706eeebe..128ef9028 100644 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h @@ -83,7 +83,7 @@ namespace Eigen { * * \sa compute() for an example. */ - RealQZ(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime) : + explicit RealQZ(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime) : m_S(size, size), m_T(size, size), m_Q(size, size), @@ -101,7 +101,7 @@ namespace Eigen { * * This constructor calls compute() to compute the QZ decomposition. */ - RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) : + explicit RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) : m_S(A.rows(),A.cols()), m_T(A.rows(),A.cols()), m_Q(A.rows(),A.cols()), @@ -313,7 +313,7 @@ namespace Eigen { using std::abs; using std::sqrt; const Index dim=m_S.cols(); - if (abs(m_S.coeff(i+1,i)==Scalar(0))) + if (abs(m_S.coeff(i+1,i))==Scalar(0)) return; Index z = findSmallDiagEntry(i,i+1); if (z==i-1) diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h index 64d136341..51e61ba38 100644 --- a/Eigen/src/Eigenvalues/RealSchur.h +++ b/Eigen/src/Eigenvalues/RealSchur.h @@ -80,7 +80,7 @@ template<typename _MatrixType> class RealSchur * * \sa compute() for an example. */ - RealSchur(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime) + explicit RealSchur(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime) : m_matT(size, size), m_matU(size, size), m_workspaceVector(size), @@ -100,7 +100,7 @@ template<typename _MatrixType> class RealSchur * Example: \include RealSchur_RealSchur_MatrixType.cpp * Output: \verbinclude RealSchur_RealSchur_MatrixType.out */ - RealSchur(const MatrixType& matrix, bool computeU = true) + explicit RealSchur(const MatrixType& matrix, bool computeU = true) : m_matT(matrix.rows(),matrix.cols()), m_matU(matrix.rows(),matrix.cols()), m_workspaceVector(matrix.rows()), @@ -234,7 +234,7 @@ template<typename _MatrixType> class RealSchur typedef Matrix<Scalar,3,1> Vector3s; Scalar computeNormOfT(); - Index findSmallSubdiagEntry(Index iu, const Scalar& norm); + Index findSmallSubdiagEntry(Index iu); void splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift); void computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo); void initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector); @@ -286,7 +286,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa { while (iu >= 0) { - Index il = findSmallSubdiagEntry(iu, norm); + Index il = findSmallSubdiagEntry(iu); // Check for convergence if (il == iu) // One root found @@ -343,16 +343,14 @@ inline typename MatrixType::Scalar RealSchur<MatrixType>::computeNormOfT() /** \internal Look for single small sub-diagonal element and returns its index */ template<typename MatrixType> -inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, const Scalar& norm) +inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu) { using std::abs; Index res = iu; while (res > 0) { Scalar s = abs(m_matT.coeff(res-1,res-1)) + abs(m_matT.coeff(res,res)); - if (s == 0.0) - s = norm; - if (abs(m_matT.coeff(res,res-1)) < NumTraits<Scalar>::epsilon() * s) + if (abs(m_matT.coeff(res,res-1)) <= NumTraits<Scalar>::epsilon() * s) break; res--; } @@ -457,9 +455,7 @@ inline void RealSchur<MatrixType>::initFrancisQRStep(Index il, Index iu, const V const Scalar lhs = m_matT.coeff(im,im-1) * (abs(v.coeff(1)) + abs(v.coeff(2))); const Scalar rhs = v.coeff(0) * (abs(m_matT.coeff(im-1,im-1)) + abs(Tmm) + abs(m_matT.coeff(im+1,im+1))); if (abs(lhs) < NumTraits<Scalar>::epsilon() * rhs) - { break; - } } } diff --git a/Eigen/src/Eigenvalues/RealSchur_MKL.h b/Eigen/src/Eigenvalues/RealSchur_MKL.h index ad9736460..c3089b468 100644 --- a/Eigen/src/Eigenvalues/RealSchur_MKL.h +++ b/Eigen/src/Eigenvalues/RealSchur_MKL.h @@ -44,10 +44,6 @@ template<> inline \ RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \ RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW>& matrix, bool computeU) \ { \ - typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> MatrixType; \ - typedef MatrixType::Scalar Scalar; \ - typedef MatrixType::RealScalar RealScalar; \ -\ eigen_assert(matrix.cols() == matrix.rows()); \ \ lapack_int n = matrix.cols(), sdim, info; \ diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index a6bbdac6b..54f60b197 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -133,7 +133,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver * \sa compute() for an example */ EIGEN_DEVICE_FUNC - SelfAdjointEigenSolver(Index size) + explicit SelfAdjointEigenSolver(Index size) : m_eivec(size, size), m_eivalues(size), m_subdiag(size > 1 ? size - 1 : 1), @@ -156,7 +156,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver * \sa compute(const MatrixType&, int) */ EIGEN_DEVICE_FUNC - SelfAdjointEigenSolver(const MatrixType& matrix, int options = ComputeEigenvectors) + explicit SelfAdjointEigenSolver(const MatrixType& matrix, int options = ComputeEigenvectors) : m_eivec(matrix.rows(), matrix.cols()), m_eivalues(matrix.cols()), m_subdiag(matrix.rows() > 1 ? matrix.rows() - 1 : 1), @@ -732,7 +732,6 @@ struct direct_selfadjoint_eigenvalues<SolverType,2,false> EIGEN_DEVICE_FUNC static inline void run(SolverType& solver, const MatrixType& mat, int options) { - EIGEN_USING_STD_MATH(max) EIGEN_USING_STD_MATH(sqrt); eigen_assert(mat.cols() == 2 && mat.cols() == mat.rows()); @@ -746,7 +745,7 @@ struct direct_selfadjoint_eigenvalues<SolverType,2,false> // map the matrix coefficients to [-1:1] to avoid over- and underflow. Scalar scale = mat.cwiseAbs().maxCoeff(); - scale = (max)(scale,Scalar(1)); + scale = numext::maxi(scale,Scalar(1)); MatrixType scaledMat = mat / scale; // Compute the eigenvalues diff --git a/Eigen/src/Eigenvalues/Tridiagonalization.h b/Eigen/src/Eigenvalues/Tridiagonalization.h index 192278d68..bedd1cb34 100644 --- a/Eigen/src/Eigenvalues/Tridiagonalization.h +++ b/Eigen/src/Eigenvalues/Tridiagonalization.h @@ -18,8 +18,10 @@ namespace internal { template<typename MatrixType> struct TridiagonalizationMatrixTReturnType; template<typename MatrixType> struct traits<TridiagonalizationMatrixTReturnType<MatrixType> > + : public traits<typename MatrixType::PlainObject> { - typedef typename MatrixType::PlainObject ReturnType; + typedef typename MatrixType::PlainObject ReturnType; // FIXME shall it be a BandMatrix? + enum { Flags = 0 }; }; template<typename MatrixType, typename CoeffVectorType> @@ -89,10 +91,8 @@ template<typename _MatrixType> class Tridiagonalization >::type DiagonalReturnType; typedef typename internal::conditional<NumTraits<Scalar>::IsComplex, - typename internal::add_const_on_value_type<typename Diagonal< - Block<const MatrixType,SizeMinusOne,SizeMinusOne> >::RealReturnType>::type, - const Diagonal< - Block<const MatrixType,SizeMinusOne,SizeMinusOne> > + typename internal::add_const_on_value_type<typename Diagonal<const MatrixType, -1>::RealReturnType>::type, + const Diagonal<const MatrixType, -1> >::type SubDiagonalReturnType; /** \brief Return type of matrixQ() */ @@ -110,7 +110,7 @@ template<typename _MatrixType> class Tridiagonalization * * \sa compute() for an example. */ - Tridiagonalization(Index size = Size==Dynamic ? 2 : Size) + explicit Tridiagonalization(Index size = Size==Dynamic ? 2 : Size) : m_matrix(size,size), m_hCoeffs(size > 1 ? size-1 : 1), m_isInitialized(false) @@ -126,7 +126,7 @@ template<typename _MatrixType> class Tridiagonalization * Example: \include Tridiagonalization_Tridiagonalization_MatrixType.cpp * Output: \verbinclude Tridiagonalization_Tridiagonalization_MatrixType.out */ - Tridiagonalization(const MatrixType& matrix) + explicit Tridiagonalization(const MatrixType& matrix) : m_matrix(matrix), m_hCoeffs(matrix.cols() > 1 ? matrix.cols()-1 : 1), m_isInitialized(false) @@ -305,7 +305,7 @@ typename Tridiagonalization<MatrixType>::DiagonalReturnType Tridiagonalization<MatrixType>::diagonal() const { eigen_assert(m_isInitialized && "Tridiagonalization is not initialized."); - return m_matrix.diagonal(); + return m_matrix.diagonal().real(); } template<typename MatrixType> @@ -313,8 +313,7 @@ typename Tridiagonalization<MatrixType>::SubDiagonalReturnType Tridiagonalization<MatrixType>::subDiagonal() const { eigen_assert(m_isInitialized && "Tridiagonalization is not initialized."); - Index n = m_matrix.rows(); - return Block<const MatrixType,SizeMinusOne,SizeMinusOne>(m_matrix, 1, 0, n-1,n-1).diagonal(); + return m_matrix.template diagonal<-1>().real(); } namespace internal { diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h index b6a2f0e24..d6c5c1293 100644 --- a/Eigen/src/Geometry/AlignedBox.h +++ b/Eigen/src/Geometry/AlignedBox.h @@ -71,7 +71,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) template<typename Derived> inline explicit AlignedBox(const MatrixBase<Derived>& a_p) { - typename internal::nested<Derived,2>::type p(a_p.derived()); + typename internal::nested_eval<Derived,2>::type p(a_p.derived()); m_min = p; m_max = p; } diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h index 97dd21d15..7f1907542 100644 --- a/Eigen/src/Geometry/Homogeneous.h +++ b/Eigen/src/Geometry/Homogeneous.h @@ -48,8 +48,7 @@ struct traits<Homogeneous<MatrixType,Direction> > TmpFlags = _MatrixTypeNested::Flags & HereditaryBits, Flags = ColsAtCompileTime==1 ? (TmpFlags & ~RowMajorBit) : RowsAtCompileTime==1 ? (TmpFlags | RowMajorBit) - : TmpFlags, - CoeffReadCost = _MatrixTypeNested::CoeffReadCost + : TmpFlags }; }; @@ -59,52 +58,54 @@ template<typename MatrixType,typename Rhs> struct homogeneous_right_product_impl } // end namespace internal template<typename MatrixType,int _Direction> class Homogeneous - : internal::no_assignment_operator, public MatrixBase<Homogeneous<MatrixType,_Direction> > + : public MatrixBase<Homogeneous<MatrixType,_Direction> >, internal::no_assignment_operator { public: + typedef MatrixType NestedExpression; enum { Direction = _Direction }; typedef MatrixBase<Homogeneous> Base; EIGEN_DENSE_PUBLIC_INTERFACE(Homogeneous) - inline Homogeneous(const MatrixType& matrix) + explicit inline Homogeneous(const MatrixType& matrix) : m_matrix(matrix) {} inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical ? 1 : 0); } inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); } - - inline Scalar coeff(Index row, Index col) const - { - if( (int(Direction)==Vertical && row==m_matrix.rows()) - || (int(Direction)==Horizontal && col==m_matrix.cols())) - return 1; - return m_matrix.coeff(row, col); - } + + const NestedExpression& nestedExpression() const { return m_matrix; } template<typename Rhs> - inline const internal::homogeneous_right_product_impl<Homogeneous,Rhs> + inline const Product<Homogeneous,Rhs> operator* (const MatrixBase<Rhs>& rhs) const { eigen_assert(int(Direction)==Horizontal); - return internal::homogeneous_right_product_impl<Homogeneous,Rhs>(m_matrix,rhs.derived()); + return Product<Homogeneous,Rhs>(*this,rhs.derived()); } template<typename Lhs> friend - inline const internal::homogeneous_left_product_impl<Homogeneous,Lhs> + inline const Product<Lhs,Homogeneous> operator* (const MatrixBase<Lhs>& lhs, const Homogeneous& rhs) { eigen_assert(int(Direction)==Vertical); - return internal::homogeneous_left_product_impl<Homogeneous,Lhs>(lhs.derived(),rhs.m_matrix); + return Product<Lhs,Homogeneous>(lhs.derived(),rhs); } template<typename Scalar, int Dim, int Mode, int Options> friend - inline const internal::homogeneous_left_product_impl<Homogeneous,Transform<Scalar,Dim,Mode,Options> > + inline const Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous > operator* (const Transform<Scalar,Dim,Mode,Options>& lhs, const Homogeneous& rhs) { eigen_assert(int(Direction)==Vertical); - return internal::homogeneous_left_product_impl<Homogeneous,Transform<Scalar,Dim,Mode,Options> >(lhs,rhs.m_matrix); + return Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous>(lhs,rhs); + } + + template<typename Func> + EIGEN_STRONG_INLINE typename internal::result_of<Func(Scalar)>::type + redux(const Func& func) const + { + return func(m_matrix.redux(func), Scalar(1)); } protected: @@ -127,7 +128,7 @@ inline typename MatrixBase<Derived>::HomogeneousReturnType MatrixBase<Derived>::homogeneous() const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); - return derived(); + return HomogeneousReturnType(derived()); } /** \geometry_module @@ -142,7 +143,7 @@ template<typename ExpressionType, int Direction> inline Homogeneous<ExpressionType,Direction> VectorwiseOp<ExpressionType,Direction>::homogeneous() const { - return _expression(); + return HomogeneousReturnType(_expression()); } /** \geometry_module @@ -300,6 +301,93 @@ struct homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs> typename Rhs::Nested m_rhs; }; +template<typename ArgType,int Direction> +struct evaluator_traits<Homogeneous<ArgType,Direction> > +{ + typedef typename storage_kind_to_evaluator_kind<typename ArgType::StorageKind>::Kind Kind; + typedef HomogeneousShape Shape; + static const int AssumeAliasing = 0; +}; + +template<> struct AssignmentKind<DenseShape,HomogeneousShape> { typedef Dense2Dense Kind; }; + + +template<typename ArgType,int Direction> +struct unary_evaluator<Homogeneous<ArgType,Direction>, IndexBased> + : evaluator<typename Homogeneous<ArgType,Direction>::PlainObject >::type +{ + typedef Homogeneous<ArgType,Direction> XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type Base; + + typedef evaluator<XprType> type; + typedef evaluator<XprType> nestedType; + + explicit unary_evaluator(const XprType& op) + : Base(), m_temp(op) + { + ::new (static_cast<Base*>(this)) Base(m_temp); + } + +protected: + PlainObject m_temp; +}; + +// dense = homogeneous +template< typename DstXprType, typename ArgType, typename Scalar> +struct Assignment<DstXprType, Homogeneous<ArgType,Vertical>, internal::assign_op<Scalar>, Dense2Dense, Scalar> +{ + typedef Homogeneous<ArgType,Vertical> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) + { + dst.template topRows<ArgType::RowsAtCompileTime>(src.nestedExpression().rows()) = src.nestedExpression(); + dst.row(dst.rows()-1).setOnes(); + } +}; + +// dense = homogeneous +template< typename DstXprType, typename ArgType, typename Scalar> +struct Assignment<DstXprType, Homogeneous<ArgType,Horizontal>, internal::assign_op<Scalar>, Dense2Dense, Scalar> +{ + typedef Homogeneous<ArgType,Horizontal> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) + { + dst.template leftCols<ArgType::ColsAtCompileTime>(src.nestedExpression().cols()) = src.nestedExpression(); + dst.col(dst.cols()-1).setOnes(); + } +}; + +template<typename LhsArg, typename Rhs, int ProductTag> +struct generic_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs, HomogeneousShape, DenseShape, ProductTag> +{ + template<typename Dest> + static void evalTo(Dest& dst, const Homogeneous<LhsArg,Horizontal>& lhs, const Rhs& rhs) + { + homogeneous_right_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs>(lhs.nestedExpression(), rhs).evalTo(dst); + } +}; + +template<typename Lhs, typename RhsArg, int ProductTag> +struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag> +{ + template<typename Dest> + static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs) + { + homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, Lhs>(lhs, rhs.nestedExpression()).evalTo(dst); + } +}; + +template<typename Scalar, int Dim, int Mode,int Options, typename RhsArg, int ProductTag> +struct generic_product_impl<Transform<Scalar,Dim,Mode,Options>, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag> +{ + typedef Transform<Scalar,Dim,Mode,Options> TransformType; + template<typename Dest> + static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous<RhsArg,Vertical>& rhs) + { + homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, TransformType>(lhs, rhs.nestedExpression()).evalTo(dst); + } +}; + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Geometry/Hyperplane.h b/Eigen/src/Geometry/Hyperplane.h index aeff43fef..00b7c4300 100644 --- a/Eigen/src/Geometry/Hyperplane.h +++ b/Eigen/src/Geometry/Hyperplane.h @@ -100,7 +100,17 @@ public: { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3) Hyperplane result(p0.size()); - result.normal() = (p2 - p0).cross(p1 - p0).normalized(); + VectorType v0(p2 - p0), v1(p1 - p0); + result.normal() = v0.cross(v1); + RealScalar norm = result.normal().norm(); + if(norm <= v0.norm() * v1.norm() * NumTraits<RealScalar>::epsilon()) + { + Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose(); + JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV); + result.normal() = svd.matrixV().col(2); + } + else + result.normal() /= norm; result.offset() = -p0.dot(result.normal()); return result; } diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h index 26be3ee5b..a245c79d3 100644 --- a/Eigen/src/Geometry/OrthoMethods.h +++ b/Eigen/src/Geometry/OrthoMethods.h @@ -30,8 +30,8 @@ MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const // Note that there is no need for an expression here since the compiler // optimize such a small temporary very well (even within a complex expression) - typename internal::nested<Derived,2>::type lhs(derived()); - typename internal::nested<OtherDerived,2>::type rhs(other.derived()); + typename internal::nested_eval<Derived,2>::type lhs(derived()); + typename internal::nested_eval<OtherDerived,2>::type rhs(other.derived()); return typename cross_product_return_type<OtherDerived>::type( numext::conj(lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1)), numext::conj(lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2)), @@ -76,8 +76,8 @@ MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4) EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,4) - typedef typename internal::nested<Derived,2>::type DerivedNested; - typedef typename internal::nested<OtherDerived,2>::type OtherDerivedNested; + typedef typename internal::nested_eval<Derived,2>::type DerivedNested; + typedef typename internal::nested_eval<OtherDerived,2>::type OtherDerivedNested; DerivedNested lhs(derived()); OtherDerivedNested rhs(other.derived()); @@ -103,21 +103,24 @@ VectorwiseOp<ExpressionType,Direction>::cross(const MatrixBase<OtherDerived>& ot EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3) EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + + typename internal::nested_eval<ExpressionType,2>::type mat(_expression()); + typename internal::nested_eval<OtherDerived,2>::type vec(other.derived()); CrossReturnType res(_expression().rows(),_expression().cols()); if(Direction==Vertical) { eigen_assert(CrossReturnType::RowsAtCompileTime==3 && "the matrix must have exactly 3 rows"); - res.row(0) = (_expression().row(1) * other.coeff(2) - _expression().row(2) * other.coeff(1)).conjugate(); - res.row(1) = (_expression().row(2) * other.coeff(0) - _expression().row(0) * other.coeff(2)).conjugate(); - res.row(2) = (_expression().row(0) * other.coeff(1) - _expression().row(1) * other.coeff(0)).conjugate(); + res.row(0) = (mat.row(1) * vec.coeff(2) - mat.row(2) * vec.coeff(1)).conjugate(); + res.row(1) = (mat.row(2) * vec.coeff(0) - mat.row(0) * vec.coeff(2)).conjugate(); + res.row(2) = (mat.row(0) * vec.coeff(1) - mat.row(1) * vec.coeff(0)).conjugate(); } else { eigen_assert(CrossReturnType::ColsAtCompileTime==3 && "the matrix must have exactly 3 columns"); - res.col(0) = (_expression().col(1) * other.coeff(2) - _expression().col(2) * other.coeff(1)).conjugate(); - res.col(1) = (_expression().col(2) * other.coeff(0) - _expression().col(0) * other.coeff(2)).conjugate(); - res.col(2) = (_expression().col(0) * other.coeff(1) - _expression().col(1) * other.coeff(0)).conjugate(); + res.col(0) = (mat.col(1) * vec.coeff(2) - mat.col(2) * vec.coeff(1)).conjugate(); + res.col(1) = (mat.col(2) * vec.coeff(0) - mat.col(0) * vec.coeff(2)).conjugate(); + res.col(2) = (mat.col(0) * vec.coeff(1) - mat.col(1) * vec.coeff(0)).conjugate(); } return res; } diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index 11e5398d4..508eba767 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -217,7 +217,7 @@ struct traits<Quaternion<_Scalar,_Options> > typedef _Scalar Scalar; typedef Matrix<_Scalar,4,1,_Options> Coefficients; enum{ - IsAligned = internal::traits<Coefficients>::Flags & AlignedBit, + IsAligned = (internal::traits<Coefficients>::EvaluatorFlags & AlignedBit) != 0, Flags = IsAligned ? (AlignedBit | LvalueBit) : LvalueBit }; }; @@ -251,7 +251,7 @@ public: inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){} /** Constructs and initialize a quaternion from the array data */ - inline Quaternion(const Scalar* data) : m_coeffs(data) {} + explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {} /** Copy constructor */ template<class Derived> EIGEN_STRONG_INLINE Quaternion(const QuaternionBase<Derived>& other) { this->Base::operator=(other); } @@ -351,7 +351,7 @@ class Map<const Quaternion<_Scalar>, _Options > * \code *coeffs == {x, y, z, w} \endcode * * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */ - EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {} + explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {} inline const Coefficients& coeffs() const { return m_coeffs;} @@ -388,7 +388,7 @@ class Map<Quaternion<_Scalar>, _Options > * \code *coeffs == {x, y, z, w} \endcode * * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */ - EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {} + explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {} inline Coefficients& coeffs() { return m_coeffs; } inline const Coefficients& coeffs() const { return m_coeffs; } @@ -571,7 +571,6 @@ template<class Derived> template<typename Derived1, typename Derived2> inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b) { - EIGEN_USING_STD_MATH(max); using std::sqrt; Vector3 v0 = a.normalized(); Vector3 v1 = b.normalized(); @@ -587,7 +586,7 @@ inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Deri // which yields a singular value problem if (c < Scalar(-1)+NumTraits<Scalar>::dummy_precision()) { - c = (max)(c,Scalar(-1)); + c = numext::maxi(c,Scalar(-1)); Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose(); JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV); Vector3 axis = svd.matrixV().col(2); diff --git a/Eigen/src/Geometry/Rotation2D.h b/Eigen/src/Geometry/Rotation2D.h index 1cac343a5..4feb3d4d2 100644 --- a/Eigen/src/Geometry/Rotation2D.h +++ b/Eigen/src/Geometry/Rotation2D.h @@ -59,7 +59,10 @@ protected: public: /** Construct a 2D counter clock wise rotation from the angle \a a in radian. */ - inline Rotation2D(const Scalar& a) : m_angle(a) {} + explicit inline Rotation2D(const Scalar& a) : m_angle(a) {} + + /** Default constructor wihtout initialization. The represented rotation is undefined. */ + Rotation2D() {} /** \returns the rotation angle */ inline Scalar angle() const { return m_angle; } @@ -68,11 +71,11 @@ public: inline Scalar& angle() { return m_angle; } /** \returns the inverse rotation */ - inline Rotation2D inverse() const { return -m_angle; } + inline Rotation2D inverse() const { return Rotation2D(-m_angle); } /** Concatenates two rotations */ inline Rotation2D operator*(const Rotation2D& other) const - { return m_angle + other.m_angle; } + { return Rotation2D(m_angle + other.m_angle); } /** Concatenates two rotations */ inline Rotation2D& operator*=(const Rotation2D& other) @@ -81,16 +84,16 @@ public: /** Applies the rotation to a 2D vector */ Vector2 operator* (const Vector2& vec) const { return toRotationMatrix() * vec; } - + template<typename Derived> Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m); - Matrix2 toRotationMatrix(void) const; + Matrix2 toRotationMatrix() const; /** \returns the spherical interpolation between \c *this and \a other using * parameter \a t. It is in fact equivalent to a linear interpolation. */ inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const - { return m_angle * (1-t) + other.angle() * t; } + { return Rotation2D(m_angle * (1-t) + other.angle() * t); } /** \returns \c *this with scalar type casted to \a NewScalarType * diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index cb93acf6b..d33fc24db 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -62,6 +62,24 @@ struct transform_construct_from_matrix; template<typename TransformType> struct transform_take_affine_part; +template<typename _Scalar, int _Dim, int _Mode, int _Options> +struct traits<Transform<_Scalar,_Dim,_Mode,_Options> > +{ + typedef _Scalar Scalar; + typedef DenseIndex Index; + typedef Dense StorageKind; + enum { + Dim1 = _Dim==Dynamic ? _Dim : _Dim + 1, + RowsAtCompileTime = _Mode==Projective ? Dim1 : _Dim, + ColsAtCompileTime = Dim1, + MaxRowsAtCompileTime = RowsAtCompileTime, + MaxColsAtCompileTime = ColsAtCompileTime, + Flags = 0 + }; +}; + +template<int Mode> struct transform_make_affine; + } // end namespace internal /** \geometry_module \ingroup Geometry_Module @@ -230,8 +248,7 @@ public: inline Transform() { check_template_params(); - if (int(Mode)==Affine) - makeAffine(); + internal::transform_make_affine<(int(Mode)==Affine) ? Affine : AffineCompact>::run(m_matrix); } inline Transform(const Transform& other) @@ -355,6 +372,9 @@ public: inline Transform& operator=(const QTransform& other); inline QTransform toQTransform(void) const; #endif + + Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); } + Index cols() const { return m_matrix.cols(); } /** shortcut for m_matrix(row,col); * \sa MatrixBase::operator(Index,Index) const */ @@ -454,7 +474,7 @@ public: return internal::transform_transform_product_impl<Transform,Transform>::run(*this,other); } - #ifdef __INTEL_COMPILER + #if EIGEN_COMP_ICC private: // this intermediate structure permits to workaround a bug in ICC 11: // error: template instantiation resulted in unexpected function type of "Eigen::Transform<double, 3, 32, 0> @@ -591,11 +611,7 @@ public: */ void makeAffine() { - if(int(Mode)!=int(AffineCompact)) - { - matrix().template block<1,Dim>(Dim,0).setZero(); - matrix().coeffRef(Dim,Dim) = Scalar(1); - } + internal::transform_make_affine<int(Mode)>::run(m_matrix); } /** \internal @@ -1083,6 +1099,24 @@ Transform<Scalar,Dim,Mode,Options>::fromPositionOrientationScale(const MatrixBas namespace internal { +template<int Mode> +struct transform_make_affine +{ + template<typename MatrixType> + static void run(MatrixType &mat) + { + static const int Dim = MatrixType::ColsAtCompileTime-1; + mat.template block<1,Dim>(Dim,0).setZero(); + mat.coeffRef(Dim,Dim) = typename MatrixType::Scalar(1); + } +}; + +template<> +struct transform_make_affine<AffineCompact> +{ + template<typename MatrixType> static void run(MatrixType &) { } +}; + // selector needed to avoid taking the inverse of a 3x4 matrix template<typename TransformType, int Mode=TransformType::Mode> struct projective_transform_inverse diff --git a/Eigen/src/Householder/BlockHouseholder.h b/Eigen/src/Householder/BlockHouseholder.h index 60dbea5f5..35dbf80a1 100644 --- a/Eigen/src/Householder/BlockHouseholder.h +++ b/Eigen/src/Householder/BlockHouseholder.h @@ -16,48 +16,85 @@ namespace Eigen { namespace internal { + +/** \internal */ +// template<typename TriangularFactorType,typename VectorsType,typename CoeffsType> +// void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs) +// { +// typedef typename TriangularFactorType::Index Index; +// typedef typename VectorsType::Scalar Scalar; +// const Index nbVecs = vectors.cols(); +// eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs); +// +// for(Index i = 0; i < nbVecs; i++) +// { +// Index rs = vectors.rows() - i; +// // Warning, note that hCoeffs may alias with vectors. +// // It is then necessary to copy it before modifying vectors(i,i). +// typename CoeffsType::Scalar h = hCoeffs(i); +// // This hack permits to pass trough nested Block<> and Transpose<> expressions. +// Scalar *Vii_ptr = const_cast<Scalar*>(vectors.data() + vectors.outerStride()*i + vectors.innerStride()*i); +// Scalar Vii = *Vii_ptr; +// *Vii_ptr = Scalar(1); +// triFactor.col(i).head(i).noalias() = -h * vectors.block(i, 0, rs, i).adjoint() +// * vectors.col(i).tail(rs); +// *Vii_ptr = Vii; +// // FIXME add .noalias() once the triangular product can work inplace +// triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>() +// * triFactor.col(i).head(i); +// triFactor(i,i) = hCoeffs(i); +// } +// } /** \internal */ +// This variant avoid modifications in vectors template<typename TriangularFactorType,typename VectorsType,typename CoeffsType> void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs) { typedef typename TriangularFactorType::Index Index; - typedef typename VectorsType::Scalar Scalar; const Index nbVecs = vectors.cols(); eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs); - for(Index i = 0; i < nbVecs; i++) + for(Index i = nbVecs-1; i >=0 ; --i) { - Index rs = vectors.rows() - i; - Scalar Vii = vectors(i,i); - vectors.const_cast_derived().coeffRef(i,i) = Scalar(1); - triFactor.col(i).head(i).noalias() = -hCoeffs(i) * vectors.block(i, 0, rs, i).adjoint() - * vectors.col(i).tail(rs); - vectors.const_cast_derived().coeffRef(i, i) = Vii; - // FIXME add .noalias() once the triangular product can work inplace - triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>() - * triFactor.col(i).head(i); + Index rs = vectors.rows() - i - 1; + Index rt = nbVecs-i-1; + + if(rt>0) + { + triFactor.row(i).tail(rt).noalias() = -hCoeffs(i) * vectors.col(i).tail(rs).adjoint() + * vectors.bottomRightCorner(rs, rt).template triangularView<UnitLower>(); + + // FIXME add .noalias() once the triangular product can work inplace + triFactor.row(i).tail(rt) = triFactor.row(i).tail(rt) * triFactor.bottomRightCorner(rt,rt).template triangularView<Upper>(); + + } triFactor(i,i) = hCoeffs(i); } } -/** \internal */ +/** \internal + * if forward then perform mat = H0 * H1 * H2 * mat + * otherwise perform mat = H2 * H1 * H0 * mat + */ template<typename MatrixType,typename VectorsType,typename CoeffsType> -void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs) +void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs, bool forward) { typedef typename MatrixType::Index Index; enum { TFactorSize = MatrixType::ColsAtCompileTime }; Index nbVecs = vectors.cols(); - Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, ColMajor> T(nbVecs,nbVecs); - make_block_householder_triangular_factor(T, vectors, hCoeffs); - - const TriangularView<const VectorsType, UnitLower>& V(vectors); + Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, RowMajor> T(nbVecs,nbVecs); + + if(forward) make_block_householder_triangular_factor(T, vectors, hCoeffs); + else make_block_householder_triangular_factor(T, vectors, hCoeffs.conjugate()); + const TriangularView<const VectorsType, UnitLower> V(vectors); // A -= V T V^* A Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,0, VectorsType::MaxColsAtCompileTime,MatrixType::MaxColsAtCompileTime> tmp = V.adjoint() * mat; // FIXME add .noalias() once the triangular product can work inplace - tmp = T.template triangularView<Upper>().adjoint() * tmp; + if(forward) tmp = T.template triangularView<Upper>() * tmp; + else tmp = T.template triangularView<Upper>().adjoint() * tmp; mat.noalias() -= V * tmp; } diff --git a/Eigen/src/Householder/HouseholderSequence.h b/Eigen/src/Householder/HouseholderSequence.h index d800ca1fa..4ded2995f 100644 --- a/Eigen/src/Householder/HouseholderSequence.h +++ b/Eigen/src/Householder/HouseholderSequence.h @@ -73,6 +73,15 @@ struct traits<HouseholderSequence<VectorsType,CoeffsType,Side> > }; }; +struct HouseholderSequenceShape {}; + +template<typename VectorsType, typename CoeffsType, int Side> +struct evaluator_traits<HouseholderSequence<VectorsType,CoeffsType,Side> > + : public evaluator_traits_base<HouseholderSequence<VectorsType,CoeffsType,Side> > +{ + typedef HouseholderSequenceShape Shape; +}; + template<typename VectorsType, typename CoeffsType, int Side> struct hseq_side_dependent_impl { @@ -307,12 +316,36 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS template<typename Dest, typename Workspace> inline void applyThisOnTheLeft(Dest& dst, Workspace& workspace) const { - workspace.resize(dst.cols()); - for(Index k = 0; k < m_length; ++k) + const Index BlockSize = 48; + // if the entries are large enough, then apply the reflectors by block + if(m_length>=BlockSize && dst.cols()>1) { - Index actual_k = m_trans ? k : m_length-k-1; - dst.bottomRows(rows()-m_shift-actual_k) - .applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data()); + for(Index i = 0; i < m_length; i+=BlockSize) + { + Index end = m_trans ? (std::min)(m_length,i+BlockSize) : m_length-i; + Index k = m_trans ? i : (std::max)(Index(0),end-BlockSize); + Index bs = end-k; + Index start = k + m_shift; + + typedef Block<typename internal::remove_all<VectorsType>::type,Dynamic,Dynamic> SubVectorsType; + SubVectorsType sub_vecs1(m_vectors.const_cast_derived(), Side==OnTheRight ? k : start, + Side==OnTheRight ? start : k, + Side==OnTheRight ? bs : m_vectors.rows()-start, + Side==OnTheRight ? m_vectors.cols()-start : bs); + typename internal::conditional<Side==OnTheRight, Transpose<SubVectorsType>, SubVectorsType&>::type sub_vecs(sub_vecs1); + Block<Dest,Dynamic,Dynamic> sub_dst(dst,dst.rows()-rows()+m_shift+k,0, rows()-m_shift-k,dst.cols()); + apply_block_householder_on_the_left(sub_dst, sub_vecs, m_coeffs.segment(k, bs), !m_trans); + } + } + else + { + workspace.resize(dst.cols()); + for(Index k = 0; k < m_length; ++k) + { + Index actual_k = m_trans ? k : m_length-k-1; + dst.bottomRows(rows()-m_shift-actual_k) + .applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data()); + } } } diff --git a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h index 1f3c060d0..3991afa8f 100644 --- a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +++ b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -43,7 +43,7 @@ class DiagonalPreconditioner DiagonalPreconditioner() : m_isInitialized(false) {} template<typename MatType> - DiagonalPreconditioner(const MatType& mat) : m_invdiag(mat.cols()) + explicit DiagonalPreconditioner(const MatType& mat) : m_invdiag(mat.cols()) { compute(mat); } @@ -80,19 +80,20 @@ class DiagonalPreconditioner return factorize(mat); } + /** \internal */ template<typename Rhs, typename Dest> - void _solve(const Rhs& b, Dest& x) const + void _solve_impl(const Rhs& b, Dest& x) const { x = m_invdiag.array() * b.array() ; } - template<typename Rhs> inline const internal::solve_retval<DiagonalPreconditioner, Rhs> + template<typename Rhs> inline const Solve<DiagonalPreconditioner, Rhs> solve(const MatrixBase<Rhs>& b) const { eigen_assert(m_isInitialized && "DiagonalPreconditioner is not initialized."); eigen_assert(m_invdiag.size()==b.rows() && "DiagonalPreconditioner::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval<DiagonalPreconditioner, Rhs>(*this, b.derived()); + return Solve<DiagonalPreconditioner, Rhs>(*this, b.derived()); } protected: @@ -100,22 +101,6 @@ class DiagonalPreconditioner bool m_isInitialized; }; -namespace internal { - -template<typename _MatrixType, typename Rhs> -struct solve_retval<DiagonalPreconditioner<_MatrixType>, Rhs> - : solve_retval_base<DiagonalPreconditioner<_MatrixType>, Rhs> -{ - typedef DiagonalPreconditioner<_MatrixType> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} /** \ingroup IterativeLinearSolvers_Module * \brief A naive preconditioner which approximates any matrix as the identity matrix @@ -129,7 +114,7 @@ class IdentityPreconditioner IdentityPreconditioner() {} template<typename MatrixType> - IdentityPreconditioner(const MatrixType& ) {} + explicit IdentityPreconditioner(const MatrixType& ) {} template<typename MatrixType> IdentityPreconditioner& analyzePattern(const MatrixType& ) { return *this; } diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h index 27824b9d5..224fe913f 100644 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla @@ -143,7 +143,7 @@ struct traits<BiCGSTAB<_MatrixType,_Preconditioner> > * step execution example starting with a random guess and printing the evolution * of the estimated error: * \include BiCGSTAB_step_by_step.cpp - * Note that such a step by step excution is slightly slower. + * Note that such a step by step execution is slightly slower. * * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner */ @@ -178,29 +178,13 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - BiCGSTAB(const MatrixType& A) : Base(A) {} + explicit BiCGSTAB(const MatrixType& A) : Base(A) {} ~BiCGSTAB() {} - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A - * \a x0 as an initial solution. - * - * \sa compute() - */ - template<typename Rhs,typename Guess> - inline const internal::solve_retval_with_guess<BiCGSTAB, Rhs, Guess> - solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const - { - eigen_assert(m_isInitialized && "BiCGSTAB is not initialized."); - eigen_assert(Base::rows()==b.rows() - && "BiCGSTAB::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval_with_guess - <BiCGSTAB, Rhs, Guess>(*this, b.derived(), x0); - } - + /** \internal */ template<typename Rhs,typename Dest> - void _solveWithGuess(const Rhs& b, Dest& x) const + void _solve_with_guess_impl(const Rhs& b, Dest& x) const { bool failed = false; for(int j=0; j<b.cols(); ++j) @@ -219,36 +203,19 @@ public: } /** \internal */ + using Base::_solve_impl; template<typename Rhs,typename Dest> - void _solve(const Rhs& b, Dest& x) const + void _solve_impl(const MatrixBase<Rhs>& b, Dest& x) const { -// x.setZero(); - x = b; - _solveWithGuess(b,x); + // x.setZero(); + x = b; + _solve_with_guess_impl(b,x); } protected: }; - -namespace internal { - - template<typename _MatrixType, typename _Preconditioner, typename Rhs> -struct solve_retval<BiCGSTAB<_MatrixType, _Preconditioner>, Rhs> - : solve_retval_base<BiCGSTAB<_MatrixType, _Preconditioner>, Rhs> -{ - typedef BiCGSTAB<_MatrixType, _Preconditioner> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - } // end namespace Eigen #endif // EIGEN_BICGSTAB_H diff --git a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h index 3ce517940..b5ef6d60f 100644 --- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -189,29 +189,13 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - ConjugateGradient(const MatrixType& A) : Base(A) {} + explicit ConjugateGradient(const MatrixType& A) : Base(A) {} ~ConjugateGradient() {} - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A - * \a x0 as an initial solution. - * - * \sa compute() - */ - template<typename Rhs,typename Guess> - inline const internal::solve_retval_with_guess<ConjugateGradient, Rhs, Guess> - solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const - { - eigen_assert(m_isInitialized && "ConjugateGradient is not initialized."); - eigen_assert(Base::rows()==b.rows() - && "ConjugateGradient::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval_with_guess - <ConjugateGradient, Rhs, Guess>(*this, b.derived(), x0); - } /** \internal */ template<typename Rhs,typename Dest> - void _solveWithGuess(const Rhs& b, Dest& x) const + void _solve_with_guess_impl(const Rhs& b, Dest& x) const { m_iterations = Base::maxIterations(); m_error = Base::m_tolerance; @@ -231,35 +215,18 @@ public: } /** \internal */ + using Base::_solve_impl; template<typename Rhs,typename Dest> - void _solve(const Rhs& b, Dest& x) const + void _solve_impl(const MatrixBase<Rhs>& b, Dest& x) const { x.setOnes(); - _solveWithGuess(b,x); + _solve_with_guess_impl(b.derived(),x); } protected: }; - -namespace internal { - -template<typename _MatrixType, int _UpLo, typename _Preconditioner, typename Rhs> -struct solve_retval<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner>, Rhs> - : solve_retval_base<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner>, Rhs> -{ - typedef ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - } // end namespace Eigen #endif // EIGEN_CONJUGATE_GRADIENT_H diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h index b55afc136..8ed9bdecc 100644 --- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -93,8 +94,12 @@ Index QuickSplit(VectorV &row, VectorI &ind, Index ncut) * http://comments.gmane.org/gmane.comp.lib.eigen/3302 */ template <typename _Scalar> -class IncompleteLUT : internal::noncopyable +class IncompleteLUT : public SparseSolverBase<IncompleteLUT<_Scalar> > { + protected: + typedef SparseSolverBase<IncompleteLUT<_Scalar> > Base; + using Base::m_isInitialized; + public: typedef _Scalar Scalar; typedef typename NumTraits<Scalar>::Real RealScalar; typedef Matrix<Scalar,Dynamic,1> Vector; @@ -107,13 +112,13 @@ class IncompleteLUT : internal::noncopyable IncompleteLUT() : m_droptol(NumTraits<Scalar>::dummy_precision()), m_fillfactor(10), - m_analysisIsOk(false), m_factorizationIsOk(false), m_isInitialized(false) + m_analysisIsOk(false), m_factorizationIsOk(false) {} template<typename MatrixType> - IncompleteLUT(const MatrixType& mat, const RealScalar& droptol=NumTraits<Scalar>::dummy_precision(), int fillfactor = 10) + explicit IncompleteLUT(const MatrixType& mat, const RealScalar& droptol=NumTraits<Scalar>::dummy_precision(), int fillfactor = 10) : m_droptol(droptol),m_fillfactor(fillfactor), - m_analysisIsOk(false),m_factorizationIsOk(false),m_isInitialized(false) + m_analysisIsOk(false),m_factorizationIsOk(false) { eigen_assert(fillfactor != 0); compute(mat); @@ -158,7 +163,7 @@ class IncompleteLUT : internal::noncopyable void setFillfactor(int fillfactor); template<typename Rhs, typename Dest> - void _solve(const Rhs& b, Dest& x) const + void _solve_impl(const Rhs& b, Dest& x) const { x = m_Pinv * b; x = m_lu.template triangularView<UnitLower>().solve(x); @@ -166,15 +171,6 @@ class IncompleteLUT : internal::noncopyable x = m_P * x; } - template<typename Rhs> inline const internal::solve_retval<IncompleteLUT, Rhs> - solve(const MatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "IncompleteLUT is not initialized."); - eigen_assert(cols()==b.rows() - && "IncompleteLUT::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval<IncompleteLUT, Rhs>(*this, b.derived()); - } - protected: /** keeps off-diagonal entries; drops diagonal entries */ @@ -192,7 +188,6 @@ protected: int m_fillfactor; bool m_analysisIsOk; bool m_factorizationIsOk; - bool m_isInitialized; ComputationInfo m_info; PermutationMatrix<Dynamic,Dynamic,Index> m_P; // Fill-reducing permutation PermutationMatrix<Dynamic,Dynamic,Index> m_Pinv; // Inverse permutation @@ -445,23 +440,6 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat) m_info = Success; } -namespace internal { - -template<typename _MatrixType, typename Rhs> -struct solve_retval<IncompleteLUT<_MatrixType>, Rhs> - : solve_retval_base<IncompleteLUT<_MatrixType>, Rhs> -{ - typedef IncompleteLUT<_MatrixType> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - } // end namespace Eigen #endif // EIGEN_INCOMPLETE_LUT_H diff --git a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h index 2036922d6..f33c868bb 100644 --- a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +++ b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -18,8 +18,12 @@ namespace Eigen { * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner */ template< typename Derived> -class IterativeSolverBase : internal::noncopyable +class IterativeSolverBase : public SparseSolverBase<Derived> { +protected: + typedef SparseSolverBase<Derived> Base; + using Base::m_isInitialized; + public: typedef typename internal::traits<Derived>::MatrixType MatrixType; typedef typename internal::traits<Derived>::Preconditioner Preconditioner; @@ -29,8 +33,7 @@ public: public: - Derived& derived() { return *static_cast<Derived*>(this); } - const Derived& derived() const { return *static_cast<const Derived*>(this); } + using Base::derived; /** Default constructor. */ IterativeSolverBase() @@ -49,7 +52,7 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - IterativeSolverBase(const MatrixType& A) + explicit IterativeSolverBase(const MatrixType& A) { init(); compute(A); @@ -57,10 +60,10 @@ public: ~IterativeSolverBase() {} - /** Initializes the iterative solver for the sparcity pattern of the matrix \a A for further solving \c Ax=b problems. + /** Initializes the iterative solver for the sparsity pattern of the matrix \a A for further solving \c Ax=b problems. * - * Currently, this function mostly call analyzePattern on the preconditioner. In the future - * we might, for instance, implement column reodering for faster matrix vector products. + * Currently, this function mostly calls analyzePattern on the preconditioner. In the future + * we might, for instance, implement column reordering for faster matrix vector products. */ Derived& analyzePattern(const MatrixType& A) { @@ -73,7 +76,7 @@ public: /** Initializes the iterative solver with the numerical values of the matrix \a A for further solving \c Ax=b problems. * - * Currently, this function mostly call factorize on the preconditioner. + * Currently, this function mostly calls factorize on the preconditioner. * * \warning this class stores a reference to the matrix A as well as some * precomputed values that depend on it. Therefore, if \a A is changed @@ -92,8 +95,8 @@ public: /** Initializes the iterative solver with the matrix \a A for further solving \c Ax=b problems. * - * Currently, this function mostly initialized/compute the preconditioner. In the future - * we might, for instance, implement column reodering for faster matrix vector products. + * Currently, this function mostly initializes/computes the preconditioner. In the future + * we might, for instance, implement column reordering for faster matrix vector products. * * \warning this class stores a reference to the matrix A as well as some * precomputed values that depend on it. Therefore, if \a A is changed @@ -159,31 +162,18 @@ public: return m_error; } - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> inline const internal::solve_retval<Derived, Rhs> - solve(const MatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized."); - eigen_assert(rows()==b.rows() - && "IterativeSolverBase::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval<Derived, Rhs>(derived(), b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. + /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A + * and \a x0 as an initial solution. * - * \sa compute() + * \sa solve(), compute() */ - template<typename Rhs> - inline const internal::sparse_solve_retval<IterativeSolverBase, Rhs> - solve(const SparseMatrixBase<Rhs>& b) const + template<typename Rhs,typename Guess> + inline const SolveWithGuess<Derived, Rhs, Guess> + solveWithGuess(const MatrixBase<Rhs>& b, const Guess& x0) const { - eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized."); - eigen_assert(rows()==b.rows() - && "IterativeSolverBase::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval<IterativeSolverBase, Rhs>(*this, b.derived()); + eigen_assert(m_isInitialized && "Solver is not initialized."); + eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b"); + return SolveWithGuess<Derived, Rhs, Guess>(derived(), b.derived(), x0); } /** \returns Success if the iterations converged, and NoConvergence otherwise. */ @@ -195,7 +185,7 @@ public: /** \internal */ template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex> - void _solve_sparse(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const + void _solve_impl(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const { eigen_assert(rows()==b.rows()); @@ -229,26 +219,9 @@ protected: mutable RealScalar m_error; mutable int m_iterations; mutable ComputationInfo m_info; - mutable bool m_isInitialized, m_analysisIsOk, m_factorizationIsOk; -}; - -namespace internal { - -template<typename Derived, typename Rhs> -struct sparse_solve_retval<IterativeSolverBase<Derived>, Rhs> - : sparse_solve_retval_base<IterativeSolverBase<Derived>, Rhs> -{ - typedef IterativeSolverBase<Derived> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec().derived()._solve_sparse(rhs(),dst); - } + mutable bool m_analysisIsOk, m_factorizationIsOk; }; -} // end namespace internal - } // end namespace Eigen #endif // EIGEN_ITERATIVE_SOLVER_BASE_H diff --git a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h new file mode 100644 index 000000000..251c6fa1a --- /dev/null +++ b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h @@ -0,0 +1,113 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SOLVEWITHGUESS_H +#define EIGEN_SOLVEWITHGUESS_H + +namespace Eigen { + +template<typename Decomposition, typename RhsType, typename GuessType> class SolveWithGuess; + +/** \class SolveWithGuess + * \ingroup IterativeLinearSolvers_Module + * + * \brief Pseudo expression representing a solving operation + * + * \tparam Decomposition the type of the matrix or decomposion object + * \tparam Rhstype the type of the right-hand side + * + * This class represents an expression of A.solve(B) + * and most of the time this is the only way it is used. + * + */ +namespace internal { + + +template<typename Decomposition, typename RhsType, typename GuessType> +struct traits<SolveWithGuess<Decomposition, RhsType, GuessType> > + : traits<Solve<Decomposition,RhsType> > +{}; + +} + + +template<typename Decomposition, typename RhsType, typename GuessType> +class SolveWithGuess : public internal::generic_xpr_base<SolveWithGuess<Decomposition,RhsType,GuessType>, MatrixXpr, typename internal::traits<RhsType>::StorageKind>::type +{ +public: + typedef typename RhsType::Index Index; + typedef typename internal::traits<SolveWithGuess>::Scalar Scalar; + typedef typename internal::traits<SolveWithGuess>::PlainObject PlainObject; + typedef typename internal::generic_xpr_base<SolveWithGuess<Decomposition,RhsType,GuessType>, MatrixXpr, typename internal::traits<RhsType>::StorageKind>::type Base; + + SolveWithGuess(const Decomposition &dec, const RhsType &rhs, const GuessType &guess) + : m_dec(dec), m_rhs(rhs), m_guess(guess) + {} + + EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); } + + EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; } + EIGEN_DEVICE_FUNC const RhsType& rhs() const { return m_rhs; } + EIGEN_DEVICE_FUNC const GuessType& guess() const { return m_guess; } + +protected: + const Decomposition &m_dec; + const RhsType &m_rhs; + const GuessType &m_guess; + +private: + Scalar coeff(Index row, Index col) const; + Scalar coeff(Index i) const; +}; + +namespace internal { + +// Evaluator of SolveWithGuess -> eval into a temporary +template<typename Decomposition, typename RhsType, typename GuessType> +struct evaluator<SolveWithGuess<Decomposition,RhsType, GuessType> > + : public evaluator<typename SolveWithGuess<Decomposition,RhsType,GuessType>::PlainObject>::type +{ + typedef SolveWithGuess<Decomposition,RhsType,GuessType> SolveType; + typedef typename SolveType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type Base; + + typedef evaluator type; + typedef evaluator nestedType; + + evaluator(const SolveType& solve) + : m_result(solve.rows(), solve.cols()) + { + ::new (static_cast<Base*>(this)) Base(m_result); + solve.dec()._solve_with_guess_impl(solve.rhs(), m_result, solve().guess()); + } + +protected: + PlainObject m_result; +}; + +// Specialization for "dst = dec.solveWithGuess(rhs)" +// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere +template<typename DstXprType, typename DecType, typename RhsType, typename GuessType, typename Scalar> +struct Assignment<DstXprType, SolveWithGuess<DecType,RhsType,GuessType>, internal::assign_op<Scalar>, Dense2Dense, Scalar> +{ + typedef SolveWithGuess<DecType,RhsType,GuessType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) + { + // FIXME shall we resize dst here? + dst = src.guess(); + src.dec()._solve_with_guess_impl(src.rhs(), dst/*, src.guess()*/); + } +}; + +} // end namepsace internal + +} // end namespace Eigen + +#endif // EIGEN_SOLVEWITHGUESS_H diff --git a/Eigen/src/LU/Determinant.h b/Eigen/src/LU/Determinant.h index bb8e78a8a..d6a3c1e5a 100644 --- a/Eigen/src/LU/Determinant.h +++ b/Eigen/src/LU/Determinant.h @@ -92,7 +92,7 @@ template<typename Derived> inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const { eigen_assert(rows() == cols()); - typedef typename internal::nested<Derived,Base::RowsAtCompileTime>::type Nested; + typedef typename internal::nested_eval<Derived,Base::RowsAtCompileTime>::type Nested; return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived()); } diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 971b9da1d..96f2cebee 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -12,6 +12,15 @@ namespace Eigen { +namespace internal { +template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> > + : traits<_MatrixType> +{ + enum { Flags = 0 }; +}; + +} // end namespace internal + /** \ingroup LU_Module * * \class FullPivLU @@ -62,6 +71,7 @@ template<typename _MatrixType> class FullPivLU typedef typename internal::plain_col_type<MatrixType, Index>::type IntColVectorType; typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationQType; typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationPType; + typedef typename MatrixType::PlainObject PlainObject; /** * \brief Default Constructor. @@ -84,7 +94,7 @@ template<typename _MatrixType> class FullPivLU * \param matrix the matrix of which to compute the LU decomposition. * It is required to be nonzero. */ - FullPivLU(const MatrixType& matrix); + explicit FullPivLU(const MatrixType& matrix); /** Computes the LU decomposition of the given matrix. * @@ -211,11 +221,11 @@ template<typename _MatrixType> class FullPivLU * \sa TriangularView::solve(), kernel(), inverse() */ template<typename Rhs> - inline const internal::solve_retval<FullPivLU, Rhs> + inline const Solve<FullPivLU, Rhs> solve(const MatrixBase<Rhs>& b) const { eigen_assert(m_isInitialized && "LU is not initialized."); - return internal::solve_retval<FullPivLU, Rhs>(*this, b.derived()); + return Solve<FullPivLU, Rhs>(*this, b.derived()); } /** \returns the determinant of the matrix of which @@ -360,18 +370,23 @@ template<typename _MatrixType> class FullPivLU * * \sa MatrixBase::inverse() */ - inline const internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType> inverse() const + inline const Inverse<FullPivLU> inverse() const { eigen_assert(m_isInitialized && "LU is not initialized."); eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the inverse of a non-square matrix!"); - return internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType> - (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols())); + return Inverse<FullPivLU>(*this); } MatrixType reconstructedMatrix() const; inline Index rows() const { return m_lu.rows(); } inline Index cols() const { return m_lu.cols(); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif protected: MatrixType m_lu; @@ -663,64 +678,72 @@ struct image_retval<FullPivLU<_MatrixType> > /***** Implementation of solve() *****************************************************/ -template<typename _MatrixType, typename Rhs> -struct solve_retval<FullPivLU<_MatrixType>, Rhs> - : solve_retval_base<FullPivLU<_MatrixType>, Rhs> -{ - EIGEN_MAKE_SOLVE_HELPERS(FullPivLU<_MatrixType>,Rhs) +} // end namespace internal - template<typename Dest> void evalTo(Dest& dst) const +#ifndef EIGEN_PARSED_BY_DOXYGEN +template<typename _MatrixType> +template<typename RhsType, typename DstType> +void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const +{ + /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}. + * So we proceed as follows: + * Step 1: compute c = P * rhs. + * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible. + * Step 3: replace c by the solution x to Ux = c. May or may not exist. + * Step 4: result = Q * c; + */ + + const Index rows = this->rows(), + cols = this->cols(), + nonzero_pivots = this->nonzeroPivots(); + eigen_assert(rhs.rows() == rows); + const Index smalldim = (std::min)(rows, cols); + + if(nonzero_pivots == 0) { - /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}. - * So we proceed as follows: - * Step 1: compute c = P * rhs. - * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible. - * Step 3: replace c by the solution x to Ux = c. May or may not exist. - * Step 4: result = Q * c; - */ - - const Index rows = dec().rows(), cols = dec().cols(), - nonzero_pivots = dec().nonzeroPivots(); - eigen_assert(rhs().rows() == rows); - const Index smalldim = (std::min)(rows, cols); - - if(nonzero_pivots == 0) - { - dst.setZero(); - return; - } + dst.setZero(); + return; + } - typename Rhs::PlainObject c(rhs().rows(), rhs().cols()); + typename RhsType::PlainObject c(rhs.rows(), rhs.cols()); - // Step 1 - c = dec().permutationP() * rhs(); + // Step 1 + c = permutationP() * rhs; - // Step 2 - dec().matrixLU() - .topLeftCorner(smalldim,smalldim) - .template triangularView<UnitLower>() - .solveInPlace(c.topRows(smalldim)); - if(rows>cols) - { - c.bottomRows(rows-cols) - -= dec().matrixLU().bottomRows(rows-cols) - * c.topRows(cols); - } + // Step 2 + m_lu.topLeftCorner(smalldim,smalldim) + .template triangularView<UnitLower>() + .solveInPlace(c.topRows(smalldim)); + if(rows>cols) + c.bottomRows(rows-cols) -= m_lu.bottomRows(rows-cols) * c.topRows(cols); + + // Step 3 + m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots) + .template triangularView<Upper>() + .solveInPlace(c.topRows(nonzero_pivots)); + + // Step 4 + for(Index i = 0; i < nonzero_pivots; ++i) + dst.row(permutationQ().indices().coeff(i)) = c.row(i); + for(Index i = nonzero_pivots; i < m_lu.cols(); ++i) + dst.row(permutationQ().indices().coeff(i)).setZero(); +} +#endif + +namespace internal { - // Step 3 - dec().matrixLU() - .topLeftCorner(nonzero_pivots, nonzero_pivots) - .template triangularView<Upper>() - .solveInPlace(c.topRows(nonzero_pivots)); - - // Step 4 - for(Index i = 0; i < nonzero_pivots; ++i) - dst.row(dec().permutationQ().indices().coeff(i)) = c.row(i); - for(Index i = nonzero_pivots; i < dec().matrixLU().cols(); ++i) - dst.row(dec().permutationQ().indices().coeff(i)).setZero(); + +/***** Implementation of inverse() *****************************************************/ +template<typename DstXprType, typename MatrixType, typename Scalar> +struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar> +{ + typedef FullPivLU<MatrixType> LuType; + typedef Inverse<LuType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) + { + dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; - } // end namespace internal /******* MatrixBase methods *****************************************************************/ diff --git a/Eigen/src/LU/Inverse.h b/Eigen/src/LU/InverseImpl.h index 8d1364e0a..e5f270d19 100644 --- a/Eigen/src/LU/Inverse.h +++ b/Eigen/src/LU/InverseImpl.h @@ -2,13 +2,14 @@ // for linear algebra. // // Copyright (C) 2008-2010 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_INVERSE_H -#define EIGEN_INVERSE_H +#ifndef EIGEN_INVERSE_IMPL_H +#define EIGEN_INVERSE_IMPL_H namespace Eigen { @@ -42,7 +43,8 @@ struct compute_inverse<MatrixType, ResultType, 1> static inline void run(const MatrixType& matrix, ResultType& result) { typedef typename MatrixType::Scalar Scalar; - result.coeffRef(0,0) = Scalar(1) / matrix.coeff(0,0); + typename internal::evaluator<MatrixType>::type matrixEval(matrix); + result.coeffRef(0,0) = Scalar(1) / matrixEval.coeff(0,0); } }; @@ -75,10 +77,10 @@ inline void compute_inverse_size2_helper( const MatrixType& matrix, const typename ResultType::Scalar& invdet, ResultType& result) { - result.coeffRef(0,0) = matrix.coeff(1,1) * invdet; + result.coeffRef(0,0) = matrix.coeff(1,1) * invdet; result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet; result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet; - result.coeffRef(1,1) = matrix.coeff(0,0) * invdet; + result.coeffRef(1,1) = matrix.coeff(0,0) * invdet; } template<typename MatrixType, typename ResultType> @@ -279,41 +281,33 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4> *** MatrixBase methods *** *************************/ -template<typename MatrixType> -struct traits<inverse_impl<MatrixType> > -{ - typedef typename MatrixType::PlainObject ReturnType; -}; - -template<typename MatrixType> -struct inverse_impl : public ReturnByValue<inverse_impl<MatrixType> > -{ - typedef typename MatrixType::Index Index; - typedef typename internal::eval<MatrixType>::type MatrixTypeNested; - typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; - MatrixTypeNested m_matrix; - - EIGEN_DEVICE_FUNC - inverse_impl(const MatrixType& matrix) - : m_matrix(matrix) - {} +} // end namespace internal - EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } - EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } +namespace internal { - template<typename Dest> - EIGEN_DEVICE_FUNC - inline void evalTo(Dest& dst) const +// Specialization for "dense = dense_xpr.inverse()" +template<typename DstXprType, typename XprType, typename Scalar> +struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<Scalar>, Dense2Dense, Scalar> +{ + typedef Inverse<XprType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) { - const int Size = EIGEN_PLAIN_ENUM_MIN(MatrixType::ColsAtCompileTime,Dest::ColsAtCompileTime); + // FIXME shall we resize dst here? + const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime); EIGEN_ONLY_USED_FOR_DEBUG(Size); - eigen_assert(( (Size<=1) || (Size>4) || (extract_data(m_matrix)!=extract_data(dst))) + eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst))) && "Aliasing problem detected in inverse(), you need to do inverse().eval() here."); - compute_inverse<MatrixTypeNestedCleaned, Dest>::run(m_matrix, dst); + typedef typename internal::nested_eval<XprType,XprType::ColsAtCompileTime>::type ActualXprType; + typedef typename internal::remove_all<ActualXprType>::type ActualXprTypeCleanded; + + ActualXprType actual_xpr(src.nestedExpression()); + + compute_inverse<ActualXprTypeCleanded, DstXprType>::run(actual_xpr, dst); } }; + } // end namespace internal /** \lu_module @@ -334,11 +328,11 @@ struct inverse_impl : public ReturnByValue<inverse_impl<MatrixType> > * \sa computeInverseAndDetWithCheck() */ template<typename Derived> -inline const internal::inverse_impl<Derived> MatrixBase<Derived>::inverse() const +inline const Inverse<Derived> MatrixBase<Derived>::inverse() const { EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES) eigen_assert(rows() == cols()); - return internal::inverse_impl<Derived>(derived()); + return Inverse<Derived>(derived()); } /** \lu_module @@ -374,7 +368,7 @@ inline void MatrixBase<Derived>::computeInverseAndDetWithCheck( // for larger sizes, evaluating has negligible cost and limits code size. typedef typename internal::conditional< RowsAtCompileTime == 2, - typename internal::remove_all<typename internal::nested<Derived, 2>::type>::type, + typename internal::remove_all<typename internal::nested_eval<Derived, 2>::type>::type, PlainObject >::type MatrixType; internal::compute_inverse_and_det_with_check<MatrixType, ResultType>::run @@ -414,4 +408,4 @@ inline void MatrixBase<Derived>::computeInverseWithCheck( } // end namespace Eigen -#endif // EIGEN_INVERSE_H +#endif // EIGEN_INVERSE_IMPL_H diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index 2f65c3a49..d04e4191b 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -13,6 +13,19 @@ namespace Eigen { +namespace internal { +template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> > + : traits<_MatrixType> +{ + typedef traits<_MatrixType> BaseTraits; + enum { + Flags = BaseTraits::Flags & RowMajorBit, + CoeffReadCost = Dynamic + }; +}; + +} // end namespace internal + /** \ingroup LU_Module * * \class PartialPivLU @@ -62,6 +75,7 @@ template<typename _MatrixType> class PartialPivLU typedef typename MatrixType::Index Index; typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType; typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType; + typedef typename MatrixType::PlainObject PlainObject; /** @@ -78,7 +92,7 @@ template<typename _MatrixType> class PartialPivLU * according to the specified problem \a size. * \sa PartialPivLU() */ - PartialPivLU(Index size); + explicit PartialPivLU(Index size); /** Constructor. * @@ -87,7 +101,7 @@ template<typename _MatrixType> class PartialPivLU * \warning The matrix should have full rank (e.g. if it's square, it should be invertible). * If you need to deal with non-full rank, use class FullPivLU instead. */ - PartialPivLU(const MatrixType& matrix); + explicit PartialPivLU(const MatrixType& matrix); PartialPivLU& compute(const MatrixType& matrix); @@ -129,11 +143,11 @@ template<typename _MatrixType> class PartialPivLU * \sa TriangularView::solve(), inverse(), computeInverse() */ template<typename Rhs> - inline const internal::solve_retval<PartialPivLU, Rhs> + inline const Solve<PartialPivLU, Rhs> solve(const MatrixBase<Rhs>& b) const { eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); - return internal::solve_retval<PartialPivLU, Rhs>(*this, b.derived()); + return Solve<PartialPivLU, Rhs>(*this, b.derived()); } /** \returns the inverse of the matrix of which *this is the LU decomposition. @@ -143,11 +157,10 @@ template<typename _MatrixType> class PartialPivLU * * \sa MatrixBase::inverse(), LU::inverse() */ - inline const internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType> inverse() const + inline const Inverse<PartialPivLU> inverse() const { eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); - return internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType> - (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols())); + return Inverse<PartialPivLU>(*this); } /** \returns the determinant of the matrix of which @@ -169,6 +182,30 @@ template<typename _MatrixType> class PartialPivLU inline Index rows() const { return m_lu.rows(); } inline Index cols() const { return m_lu.cols(); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const { + /* The decomposition PA = LU can be rewritten as A = P^{-1} L U. + * So we proceed as follows: + * Step 1: compute c = Pb. + * Step 2: replace c by the solution x to Lx = c. + * Step 3: replace c by the solution x to Ux = c. + */ + + eigen_assert(rhs.rows() == m_lu.rows()); + + // Step 1 + dst = permutationP() * rhs; + + // Step 2 + m_lu.template triangularView<UnitLower>().solveInPlace(dst); + + // Step 3 + m_lu.template triangularView<Upper>().solveInPlace(dst); + } + #endif protected: MatrixType m_lu; @@ -434,34 +471,17 @@ MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const namespace internal { -template<typename _MatrixType, typename Rhs> -struct solve_retval<PartialPivLU<_MatrixType>, Rhs> - : solve_retval_base<PartialPivLU<_MatrixType>, Rhs> +/***** Implementation of inverse() *****************************************************/ +template<typename DstXprType, typename MatrixType, typename Scalar> +struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar> { - EIGEN_MAKE_SOLVE_HELPERS(PartialPivLU<_MatrixType>,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - /* The decomposition PA = LU can be rewritten as A = P^{-1} L U. - * So we proceed as follows: - * Step 1: compute c = Pb. - * Step 2: replace c by the solution x to Lx = c. - * Step 3: replace c by the solution x to Ux = c. - */ - - eigen_assert(rhs().rows() == dec().matrixLU().rows()); - - // Step 1 - dst = dec().permutationP() * rhs(); - - // Step 2 - dec().matrixLU().template triangularView<UnitLower>().solveInPlace(dst); - - // Step 3 - dec().matrixLU().template triangularView<Upper>().solveInPlace(dst); + typedef PartialPivLU<MatrixType> LuType; + typedef Inverse<LuType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) + { + dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; - } // end namespace internal /******** MatrixBase methods *******/ diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h index 60b7a2376..1f62ef14e 100644 --- a/Eigen/src/LU/arch/Inverse_SSE.h +++ b/Eigen/src/LU/arch/Inverse_SSE.h @@ -39,9 +39,11 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType> ResultAlignment = bool(ResultType::Flags&AlignedBit), StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) }; + typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType; - static void run(const MatrixType& matrix, ResultType& result) + static void run(const MatrixType& mat, ResultType& result) { + ActualMatrixType matrix(mat); EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 }; // Load the full matrix into registers @@ -167,14 +169,17 @@ struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType> ResultAlignment = bool(ResultType::Flags&AlignedBit), StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) }; - static void run(const MatrixType& matrix, ResultType& result) + typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType; + + static void run(const MatrixType& mat, ResultType& result) { + ActualMatrixType matrix(mat); const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); // The inverse is calculated using "Divide and Conquer" technique. The // original matrix is divide into four 2x2 sub-matrices. Since each - // register of the matrix holds two element, the smaller matrices are + // register of the matrix holds two elements, the smaller matrices are // consisted of two registers. Hence we get a better locality of the // calculations. diff --git a/Eigen/src/OrderingMethods/Amd.h b/Eigen/src/OrderingMethods/Amd.h index 41b4fd7e3..ce7c0bbf3 100644 --- a/Eigen/src/OrderingMethods/Amd.h +++ b/Eigen/src/OrderingMethods/Amd.h @@ -106,7 +106,8 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,Index>& C, Permutation t = cnz + cnz/5 + 2*n; /* add elbow room to C */ C.resizeNonZeros(t); - Index* W = new Index[8*(n+1)]; /* get workspace */ + // get workspace + ei_declare_aligned_stack_constructed_variable(Index,W,8*(n+1),0); Index* len = W; Index* nv = W + (n+1); Index* next = W + 2*(n+1); @@ -424,8 +425,6 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,Index>& C, Permutation } perm.indices().conservativeResize(n); - - delete[] W; } } // namespace internal diff --git a/Eigen/src/PaStiXSupport/PaStiXSupport.h b/Eigen/src/PaStiXSupport/PaStiXSupport.h index 8a546dc2f..a96c27695 100644 --- a/Eigen/src/PaStiXSupport/PaStiXSupport.h +++ b/Eigen/src/PaStiXSupport/PaStiXSupport.h @@ -125,9 +125,15 @@ namespace internal // This is the base class to interface with PaStiX functions. // Users should not used this class directly. template <class Derived> -class PastixBase : internal::noncopyable +class PastixBase : public SparseSolverBase<Derived> { + protected: + typedef SparseSolverBase<Derived> Base; + using Base::derived; + using Base::m_isInitialized; public: + using Base::_solve_impl; + typedef typename internal::pastix_traits<Derived>::MatrixType _MatrixType; typedef _MatrixType MatrixType; typedef typename MatrixType::Scalar Scalar; @@ -138,7 +144,7 @@ class PastixBase : internal::noncopyable public: - PastixBase() : m_initisOk(false), m_analysisIsOk(false), m_factorizationIsOk(false), m_isInitialized(false), m_pastixdata(0), m_size(0) + PastixBase() : m_initisOk(false), m_analysisIsOk(false), m_factorizationIsOk(false), m_pastixdata(0), m_size(0) { init(); } @@ -147,33 +153,10 @@ class PastixBase : internal::noncopyable { clean(); } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::solve_retval<PastixBase, Rhs> - solve(const MatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "Pastix solver is not initialized."); - eigen_assert(rows()==b.rows() - && "PastixBase::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval<PastixBase, Rhs>(*this, b.derived()); - } template<typename Rhs,typename Dest> - bool _solve (const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const; + bool _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const; - Derived& derived() - { - return *static_cast<Derived*>(this); - } - const Derived& derived() const - { - return *static_cast<const Derived*>(this); - } - /** Returns a reference to the integer vector IPARM of PaStiX parameters * to modify the default parameters. * The statistics related to the different phases of factorization and solve are saved here as well @@ -228,20 +211,6 @@ class PastixBase : internal::noncopyable return m_info; } - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::sparse_solve_retval<PastixBase, Rhs> - solve(const SparseMatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "Pastix LU, LLT or LDLT is not initialized."); - eigen_assert(rows()==b.rows() - && "PastixBase::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval<PastixBase, Rhs>(*this, b.derived()); - } - protected: // Initialize the Pastix data structure, check the matrix @@ -268,7 +237,6 @@ class PastixBase : internal::noncopyable int m_initisOk; int m_analysisIsOk; int m_factorizationIsOk; - bool m_isInitialized; mutable ComputationInfo m_info; mutable pastix_data_t *m_pastixdata; // Data structure for pastix mutable int m_comm; // The MPI communicator identifier @@ -328,7 +296,6 @@ void PastixBase<Derived>::compute(ColSpMatrix& mat) factorize(mat); m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO; - m_isInitialized = m_factorizationIsOk; } @@ -393,7 +360,7 @@ void PastixBase<Derived>::factorize(ColSpMatrix& mat) /* Solve the system */ template<typename Base> template<typename Rhs,typename Dest> -bool PastixBase<Base>::_solve (const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const +bool PastixBase<Base>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const { eigen_assert(m_isInitialized && "The matrix should be factorized first"); EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, @@ -450,7 +417,7 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> > init(); } - PastixLU(const MatrixType& matrix):Base() + explicit PastixLU(const MatrixType& matrix):Base() { init(); compute(matrix); @@ -560,7 +527,7 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> > init(); } - PastixLLT(const MatrixType& matrix):Base() + explicit PastixLLT(const MatrixType& matrix):Base() { init(); compute(matrix); @@ -641,7 +608,7 @@ class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> > init(); } - PastixLDLT(const MatrixType& matrix):Base() + explicit PastixLDLT(const MatrixType& matrix):Base() { init(); compute(matrix); @@ -694,36 +661,6 @@ class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> > } }; -namespace internal { - -template<typename _MatrixType, typename Rhs> -struct solve_retval<PastixBase<_MatrixType>, Rhs> - : solve_retval_base<PastixBase<_MatrixType>, Rhs> -{ - typedef PastixBase<_MatrixType> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template<typename _MatrixType, typename Rhs> -struct sparse_solve_retval<PastixBase<_MatrixType>, Rhs> - : sparse_solve_retval_base<PastixBase<_MatrixType>, Rhs> -{ - typedef PastixBase<_MatrixType> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; - -} // end namespace internal - } // end namespace Eigen #endif diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h index b6571069e..054af6635 100644 --- a/Eigen/src/PardisoSupport/PardisoSupport.h +++ b/Eigen/src/PardisoSupport/PardisoSupport.h @@ -96,10 +96,17 @@ namespace internal } template<class Derived> -class PardisoImpl : internal::noncopyable +class PardisoImpl : public SparseSolveBase<PardisoImpl<Derived> { + protected: + typedef SparseSolveBase<PardisoImpl<Derived> Base; + using Base::derived; + using Base::m_isInitialized; + typedef internal::pardiso_traits<Derived> Traits; public: + using base::_solve_impl; + typedef typename Traits::MatrixType MatrixType; typedef typename Traits::Scalar Scalar; typedef typename Traits::RealScalar RealScalar; @@ -118,7 +125,7 @@ class PardisoImpl : internal::noncopyable eigen_assert((sizeof(Index) >= sizeof(_INTEGER_t) && sizeof(Index) <= 8) && "Non-supported index type"); m_iparm.setZero(); m_msglvl = 0; // No output - m_initialized = false; + m_isInitialized = false; } ~PardisoImpl() @@ -136,7 +143,7 @@ class PardisoImpl : internal::noncopyable */ ComputationInfo info() const { - eigen_assert(m_initialized && "Decomposition is not initialized."); + eigen_assert(m_isInitialized && "Decomposition is not initialized."); return m_info; } @@ -165,51 +172,14 @@ class PardisoImpl : internal::noncopyable Derived& factorize(const MatrixType& matrix); Derived& compute(const MatrixType& matrix); - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::solve_retval<PardisoImpl, Rhs> - solve(const MatrixBase<Rhs>& b) const - { - eigen_assert(m_initialized && "Pardiso solver is not initialized."); - eigen_assert(rows()==b.rows() - && "PardisoImpl::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval<PardisoImpl, Rhs>(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::sparse_solve_retval<PardisoImpl, Rhs> - solve(const SparseMatrixBase<Rhs>& b) const - { - eigen_assert(m_initialized && "Pardiso solver is not initialized."); - eigen_assert(rows()==b.rows() - && "PardisoImpl::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval<PardisoImpl, Rhs>(*this, b.derived()); - } - - Derived& derived() - { - return *static_cast<Derived*>(this); - } - const Derived& derived() const - { - return *static_cast<const Derived*>(this); - } template<typename BDerived, typename XDerived> - bool _solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const; + bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const; protected: void pardisoRelease() { - if(m_initialized) // Factorization ran at least once + if(m_isInitialized) // Factorization ran at least once { internal::pardiso_run_selector<Index>::run(m_pt, 1, 1, m_type, -1, m_size, 0, 0, 0, m_perm.data(), 0, m_iparm.data(), m_msglvl, 0, 0); @@ -270,7 +240,7 @@ class PardisoImpl : internal::noncopyable mutable SparseMatrixType m_matrix; ComputationInfo m_info; - bool m_initialized, m_analysisIsOk, m_factorizationIsOk; + bool m_analysisIsOk, m_factorizationIsOk; Index m_type, m_msglvl; mutable void *m_pt[64]; mutable ParameterType m_iparm; @@ -298,7 +268,7 @@ Derived& PardisoImpl<Derived>::compute(const MatrixType& a) manageErrorCode(error); m_analysisIsOk = true; m_factorizationIsOk = true; - m_initialized = true; + m_isInitialized = true; return derived(); } @@ -321,7 +291,7 @@ Derived& PardisoImpl<Derived>::analyzePattern(const MatrixType& a) manageErrorCode(error); m_analysisIsOk = true; m_factorizationIsOk = false; - m_initialized = true; + m_isInitialized = true; return derived(); } @@ -345,7 +315,7 @@ Derived& PardisoImpl<Derived>::factorize(const MatrixType& a) template<class Base> template<typename BDerived,typename XDerived> -bool PardisoImpl<Base>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const +bool PardisoImpl<Base>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const { if(m_iparm[0] == 0) // Factorization was not computed return false; @@ -421,7 +391,7 @@ class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> > pardisoInit(Base::ScalarIsComplex ? 13 : 11); } - PardisoLU(const MatrixType& matrix) + explicit PardisoLU(const MatrixType& matrix) : Base() { pardisoInit(Base::ScalarIsComplex ? 13 : 11); @@ -472,7 +442,7 @@ class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> > pardisoInit(Base::ScalarIsComplex ? 4 : 2); } - PardisoLLT(const MatrixType& matrix) + explicit PardisoLLT(const MatrixType& matrix) : Base() { pardisoInit(Base::ScalarIsComplex ? 4 : 2); @@ -530,7 +500,7 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> > pardisoInit(Base::ScalarIsComplex ? ( bool(Options&Symmetric) ? 6 : -4 ) : -2); } - PardisoLDLT(const MatrixType& matrix) + explicit PardisoLDLT(const MatrixType& matrix) : Base() { pardisoInit(Base::ScalarIsComplex ? ( bool(Options&Symmetric) ? 6 : -4 ) : -2); @@ -546,36 +516,6 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> > } }; -namespace internal { - -template<typename _Derived, typename Rhs> -struct solve_retval<PardisoImpl<_Derived>, Rhs> - : solve_retval_base<PardisoImpl<_Derived>, Rhs> -{ - typedef PardisoImpl<_Derived> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template<typename Derived, typename Rhs> -struct sparse_solve_retval<PardisoImpl<Derived>, Rhs> - : sparse_solve_retval_base<PardisoImpl<Derived>, Rhs> -{ - typedef PardisoImpl<Derived> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; - -} // end namespace internal - } // end namespace Eigen #endif // EIGEN_PARDISOSUPPORT_H diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h index 4824880f5..370cb69e3 100644 --- a/Eigen/src/QR/ColPivHouseholderQR.h +++ b/Eigen/src/QR/ColPivHouseholderQR.h @@ -13,6 +13,15 @@ namespace Eigen { +namespace internal { +template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> > + : traits<_MatrixType> +{ + enum { Flags = 0 }; +}; + +} // end namespace internal + /** \ingroup QR_Module * * \class ColPivHouseholderQR @@ -56,6 +65,7 @@ template<typename _MatrixType> class ColPivHouseholderQR typedef typename internal::plain_row_type<MatrixType>::type RowVectorType; typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType; typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType; + typedef typename MatrixType::PlainObject PlainObject; private: @@ -107,7 +117,7 @@ template<typename _MatrixType> class ColPivHouseholderQR * * \sa compute() */ - ColPivHouseholderQR(const MatrixType& matrix) + explicit ColPivHouseholderQR(const MatrixType& matrix) : m_qr(matrix.rows(), matrix.cols()), m_hCoeffs((std::min)(matrix.rows(),matrix.cols())), m_colsPermutation(PermIndexType(matrix.cols())), @@ -138,15 +148,15 @@ template<typename _MatrixType> class ColPivHouseholderQR * Output: \verbinclude ColPivHouseholderQR_solve.out */ template<typename Rhs> - inline const internal::solve_retval<ColPivHouseholderQR, Rhs> + inline const Solve<ColPivHouseholderQR, Rhs> solve(const MatrixBase<Rhs>& b) const { eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized."); - return internal::solve_retval<ColPivHouseholderQR, Rhs>(*this, b.derived()); + return Solve<ColPivHouseholderQR, Rhs>(*this, b.derived()); } - HouseholderSequenceType householderQ(void) const; - HouseholderSequenceType matrixQ(void) const + HouseholderSequenceType householderQ() const; + HouseholderSequenceType matrixQ() const { return householderQ(); } @@ -284,13 +294,10 @@ template<typename _MatrixType> class ColPivHouseholderQR * \note If this matrix is not invertible, the returned matrix has undefined coefficients. * Use isInvertible() to first determine whether this matrix is invertible. */ - inline const - internal::solve_retval<ColPivHouseholderQR, typename MatrixType::IdentityReturnType> - inverse() const + inline const Inverse<ColPivHouseholderQR> inverse() const { eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized."); - return internal::solve_retval<ColPivHouseholderQR,typename MatrixType::IdentityReturnType> - (*this, MatrixType::Identity(m_qr.rows(), m_qr.cols())); + return Inverse<ColPivHouseholderQR>(*this); } inline Index rows() const { return m_qr.rows(); } @@ -382,6 +389,12 @@ template<typename _MatrixType> class ColPivHouseholderQR eigen_assert(m_isInitialized && "Decomposition is not initialized."); return Success; } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif protected: MatrixType m_qr; @@ -463,20 +476,10 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const // we store that back into our table: it can't hurt to correct our table. m_colSqNorms.coeffRef(biggest_col_index) = biggest_col_sq_norm; - // if the current biggest column is smaller than epsilon times the initial biggest column, - // terminate to avoid generating nan/inf values. - // Note that here, if we test instead for "biggest == 0", we get a failure every 1000 (or so) - // repetitions of the unit test, with the result of solve() filled with large values of the order - // of 1/(size*epsilon). - if(biggest_col_sq_norm < threshold_helper * RealScalar(rows-k)) - { + // Track the number of meaningful pivots but do not stop the decomposition to make + // sure that the initial matrix is properly reproduced. See bug 941. + if(m_nonzero_pivots==size && biggest_col_sq_norm < threshold_helper * RealScalar(rows-k)) m_nonzero_pivots = k; - m_hCoeffs.tail(size-k).setZero(); - m_qr.bottomRightCorner(rows-k,cols-k) - .template triangularView<StrictlyLower>() - .setZero(); - break; - } // apply the transposition to the columns m_colsTranspositions.coeffRef(k) = biggest_col_index; @@ -505,7 +508,7 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const } m_colsPermutation.setIdentity(PermIndexType(cols)); - for(PermIndexType k = 0; k < m_nonzero_pivots; ++k) + for(PermIndexType k = 0; k < size/*m_nonzero_pivots*/; ++k) m_colsPermutation.applyTranspositionOnTheRight(k, PermIndexType(m_colsTranspositions.coeff(k))); m_det_pq = (number_of_transpositions%2) ? -1 : 1; @@ -514,54 +517,62 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const return *this; } -namespace internal { - -template<typename _MatrixType, typename Rhs> -struct solve_retval<ColPivHouseholderQR<_MatrixType>, Rhs> - : solve_retval_base<ColPivHouseholderQR<_MatrixType>, Rhs> +#ifndef EIGEN_PARSED_BY_DOXYGEN +template<typename _MatrixType> +template<typename RhsType, typename DstType> +void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const { - EIGEN_MAKE_SOLVE_HELPERS(ColPivHouseholderQR<_MatrixType>,Rhs) + eigen_assert(rhs.rows() == rows()); + + const Index nonzero_pivots = nonzeroPivots(); - template<typename Dest> void evalTo(Dest& dst) const + if(nonzero_pivots == 0) { - eigen_assert(rhs().rows() == dec().rows()); + dst.setZero(); + return; + } - const Index cols = dec().cols(), - nonzero_pivots = dec().nonzeroPivots(); + typename RhsType::PlainObject c(rhs); - if(nonzero_pivots == 0) - { - dst.setZero(); - return; - } + // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T + c.applyOnTheLeft(householderSequence(m_qr, m_hCoeffs) + .setLength(nonzero_pivots) + .transpose() + ); - typename Rhs::PlainObject c(rhs()); + m_qr.topLeftCorner(nonzero_pivots, nonzero_pivots) + .template triangularView<Upper>() + .solveInPlace(c.topRows(nonzero_pivots)); - // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T - c.applyOnTheLeft(householderSequence(dec().matrixQR(), dec().hCoeffs()) - .setLength(dec().nonzeroPivots()) - .transpose() - ); + for(Index i = 0; i < nonzero_pivots; ++i) dst.row(m_colsPermutation.indices().coeff(i)) = c.row(i); + for(Index i = nonzero_pivots; i < cols(); ++i) dst.row(m_colsPermutation.indices().coeff(i)).setZero(); +} +#endif - dec().matrixR() - .topLeftCorner(nonzero_pivots, nonzero_pivots) - .template triangularView<Upper>() - .solveInPlace(c.topRows(nonzero_pivots)); +namespace internal { - for(Index i = 0; i < nonzero_pivots; ++i) dst.row(dec().colsPermutation().indices().coeff(i)) = c.row(i); - for(Index i = nonzero_pivots; i < cols; ++i) dst.row(dec().colsPermutation().indices().coeff(i)).setZero(); +template<typename DstXprType, typename MatrixType, typename Scalar> +struct Assignment<DstXprType, Inverse<ColPivHouseholderQR<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar> +{ + typedef ColPivHouseholderQR<MatrixType> QrType; + typedef Inverse<QrType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) + { + dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; } // end namespace internal -/** \returns the matrix Q as a sequence of householder transformations */ +/** \returns the matrix Q as a sequence of householder transformations. + * You can extract the meaningful part only by using: + * \code qr.householderQ().setLength(qr.nonzeroPivots()) */ template<typename MatrixType> typename ColPivHouseholderQR<MatrixType>::HouseholderSequenceType ColPivHouseholderQR<MatrixType> ::householderQ() const { eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized."); - return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate()).setLength(m_nonzero_pivots); + return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate()); } #ifndef __CUDACC__ diff --git a/Eigen/src/QR/ColPivHouseholderQR_MKL.h b/Eigen/src/QR/ColPivHouseholderQR_MKL.h index b5b198326..7b6ba0a5e 100644 --- a/Eigen/src/QR/ColPivHouseholderQR_MKL.h +++ b/Eigen/src/QR/ColPivHouseholderQR_MKL.h @@ -49,7 +49,6 @@ ColPivHouseholderQR<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynami { \ using std::abs; \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> MatrixType; \ - typedef MatrixType::Scalar Scalar; \ typedef MatrixType::RealScalar RealScalar; \ Index rows = matrix.rows();\ Index cols = matrix.cols();\ diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h index a7b0fc16f..5712d175c 100644 --- a/Eigen/src/QR/FullPivHouseholderQR.h +++ b/Eigen/src/QR/FullPivHouseholderQR.h @@ -15,6 +15,12 @@ namespace Eigen { namespace internal { +template<typename _MatrixType> struct traits<FullPivHouseholderQR<_MatrixType> > + : traits<_MatrixType> +{ + enum { Flags = 0 }; +}; + template<typename MatrixType> struct FullPivHouseholderQRMatrixQReturnType; template<typename MatrixType> @@ -23,7 +29,7 @@ struct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> > typedef typename MatrixType::PlainObject ReturnType; }; -} +} // end namespace internal /** \ingroup QR_Module * @@ -69,6 +75,7 @@ template<typename _MatrixType> class FullPivHouseholderQR typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationType; typedef typename internal::plain_row_type<MatrixType>::type RowVectorType; typedef typename internal::plain_col_type<MatrixType>::type ColVectorType; + typedef typename MatrixType::PlainObject PlainObject; /** \brief Default Constructor. * @@ -113,7 +120,7 @@ template<typename _MatrixType> class FullPivHouseholderQR * * \sa compute() */ - FullPivHouseholderQR(const MatrixType& matrix) + explicit FullPivHouseholderQR(const MatrixType& matrix) : m_qr(matrix.rows(), matrix.cols()), m_hCoeffs((std::min)(matrix.rows(), matrix.cols())), m_rows_transpositions((std::min)(matrix.rows(), matrix.cols())), @@ -145,11 +152,11 @@ template<typename _MatrixType> class FullPivHouseholderQR * Output: \verbinclude FullPivHouseholderQR_solve.out */ template<typename Rhs> - inline const internal::solve_retval<FullPivHouseholderQR, Rhs> + inline const Solve<FullPivHouseholderQR, Rhs> solve(const MatrixBase<Rhs>& b) const { eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized."); - return internal::solve_retval<FullPivHouseholderQR, Rhs>(*this, b.derived()); + return Solve<FullPivHouseholderQR, Rhs>(*this, b.derived()); } /** \returns Expression object representing the matrix Q @@ -280,13 +287,11 @@ template<typename _MatrixType> class FullPivHouseholderQR * * \note If this matrix is not invertible, the returned matrix has undefined coefficients. * Use isInvertible() to first determine whether this matrix is invertible. - */ inline const - internal::solve_retval<FullPivHouseholderQR, typename MatrixType::IdentityReturnType> - inverse() const + */ + inline const Inverse<FullPivHouseholderQR> inverse() const { eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized."); - return internal::solve_retval<FullPivHouseholderQR,typename MatrixType::IdentityReturnType> - (*this, MatrixType::Identity(m_qr.rows(), m_qr.cols())); + return Inverse<FullPivHouseholderQR>(*this); } inline Index rows() const { return m_qr.rows(); } @@ -366,6 +371,12 @@ template<typename _MatrixType> class FullPivHouseholderQR * diagonal coefficient of U. */ RealScalar maxPivot() const { return m_maxpivot; } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif protected: MatrixType m_qr; @@ -485,46 +496,53 @@ FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(cons return *this; } -namespace internal { - -template<typename _MatrixType, typename Rhs> -struct solve_retval<FullPivHouseholderQR<_MatrixType>, Rhs> - : solve_retval_base<FullPivHouseholderQR<_MatrixType>, Rhs> +#ifndef EIGEN_PARSED_BY_DOXYGEN +template<typename _MatrixType> +template<typename RhsType, typename DstType> +void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const { - EIGEN_MAKE_SOLVE_HELPERS(FullPivHouseholderQR<_MatrixType>,Rhs) + eigen_assert(rhs.rows() == rows()); + const Index l_rank = rank(); - template<typename Dest> void evalTo(Dest& dst) const + // FIXME introduce nonzeroPivots() and use it here. and more generally, + // make the same improvements in this dec as in FullPivLU. + if(l_rank==0) { - const Index rows = dec().rows(), cols = dec().cols(); - eigen_assert(rhs().rows() == rows); + dst.setZero(); + return; + } - // FIXME introduce nonzeroPivots() and use it here. and more generally, - // make the same improvements in this dec as in FullPivLU. - if(dec().rank()==0) - { - dst.setZero(); - return; - } + typename RhsType::PlainObject c(rhs); - typename Rhs::PlainObject c(rhs()); + Matrix<Scalar,1,RhsType::ColsAtCompileTime> temp(rhs.cols()); + for (Index k = 0; k < l_rank; ++k) + { + Index remainingSize = rows()-k; + c.row(k).swap(c.row(m_rows_transpositions.coeff(k))); + c.bottomRightCorner(remainingSize, rhs.cols()) + .applyHouseholderOnTheLeft(m_qr.col(k).tail(remainingSize-1), + m_hCoeffs.coeff(k), &temp.coeffRef(0)); + } - Matrix<Scalar,1,Rhs::ColsAtCompileTime> temp(rhs().cols()); - for (Index k = 0; k < dec().rank(); ++k) - { - Index remainingSize = rows-k; - c.row(k).swap(c.row(dec().rowsTranspositions().coeff(k))); - c.bottomRightCorner(remainingSize, rhs().cols()) - .applyHouseholderOnTheLeft(dec().matrixQR().col(k).tail(remainingSize-1), - dec().hCoeffs().coeff(k), &temp.coeffRef(0)); - } + m_qr.topLeftCorner(l_rank, l_rank) + .template triangularView<Upper>() + .solveInPlace(c.topRows(l_rank)); - dec().matrixQR() - .topLeftCorner(dec().rank(), dec().rank()) - .template triangularView<Upper>() - .solveInPlace(c.topRows(dec().rank())); + for(Index i = 0; i < l_rank; ++i) dst.row(m_cols_permutation.indices().coeff(i)) = c.row(i); + for(Index i = l_rank; i < cols(); ++i) dst.row(m_cols_permutation.indices().coeff(i)).setZero(); +} +#endif - for(Index i = 0; i < dec().rank(); ++i) dst.row(dec().colsPermutation().indices().coeff(i)) = c.row(i); - for(Index i = dec().rank(); i < cols; ++i) dst.row(dec().colsPermutation().indices().coeff(i)).setZero(); +namespace internal { + +template<typename DstXprType, typename MatrixType, typename Scalar> +struct Assignment<DstXprType, Inverse<FullPivHouseholderQR<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar> +{ + typedef FullPivHouseholderQR<MatrixType> QrType; + typedef Inverse<QrType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) + { + dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; @@ -550,7 +568,7 @@ public: : m_qr(qr), m_hCoeffs(hCoeffs), m_rowsTranspositions(rowsTranspositions) - {} + {} template <typename ResultType> void evalTo(ResultType& result) const @@ -580,8 +598,8 @@ public: } } - Index rows() const { return m_qr.rows(); } - Index cols() const { return m_qr.rows(); } + Index rows() const { return m_qr.rows(); } + Index cols() const { return m_qr.rows(); } protected: typename MatrixType::Nested m_qr; @@ -589,6 +607,11 @@ protected: typename IntDiagSizeVectorType::Nested m_rowsTranspositions; }; +// template<typename MatrixType> +// struct evaluator<FullPivHouseholderQRMatrixQReturnType<MatrixType> > +// : public evaluator<ReturnByValue<FullPivHouseholderQRMatrixQReturnType<MatrixType> > > +// {}; + } // end namespace internal template<typename MatrixType> diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h index 352dbf3f0..f22008494 100644 --- a/Eigen/src/QR/HouseholderQR.h +++ b/Eigen/src/QR/HouseholderQR.h @@ -91,7 +91,7 @@ template<typename _MatrixType> class HouseholderQR * * \sa compute() */ - HouseholderQR(const MatrixType& matrix) + explicit HouseholderQR(const MatrixType& matrix) : m_qr(matrix.rows(), matrix.cols()), m_hCoeffs((std::min)(matrix.rows(),matrix.cols())), m_temp(matrix.cols()), @@ -118,11 +118,11 @@ template<typename _MatrixType> class HouseholderQR * Output: \verbinclude HouseholderQR_solve.out */ template<typename Rhs> - inline const internal::solve_retval<HouseholderQR, Rhs> + inline const Solve<HouseholderQR, Rhs> solve(const MatrixBase<Rhs>& b) const { eigen_assert(m_isInitialized && "HouseholderQR is not initialized."); - return internal::solve_retval<HouseholderQR, Rhs>(*this, b.derived()); + return Solve<HouseholderQR, Rhs>(*this, b.derived()); } /** This method returns an expression of the unitary matrix Q as a sequence of Householder transformations. @@ -187,6 +187,12 @@ template<typename _MatrixType> class HouseholderQR * For advanced uses only. */ const HCoeffsType& hCoeffs() const { return m_hCoeffs; } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif protected: MatrixType m_qr; @@ -283,8 +289,8 @@ struct householder_qr_inplace_blocked for (k = 0; k < size; k += blockSize) { Index bs = (std::min)(size-k,blockSize); // actual size of the block - Index tcols = cols - k - bs; // trailing columns - Index brows = rows-k; // rows of the block + Index tcols = cols - k - bs; // trailing columns + Index brows = rows-k; // rows of the block // partition the matrix: // A00 | A01 | A02 @@ -302,43 +308,38 @@ struct householder_qr_inplace_blocked if(tcols) { BlockType A21_22 = mat.block(k,k+bs,brows,tcols); - apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment.adjoint()); + apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment, false); // false == backward } } } }; -template<typename _MatrixType, typename Rhs> -struct solve_retval<HouseholderQR<_MatrixType>, Rhs> - : solve_retval_base<HouseholderQR<_MatrixType>, Rhs> -{ - EIGEN_MAKE_SOLVE_HELPERS(HouseholderQR<_MatrixType>,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - const Index rows = dec().rows(), cols = dec().cols(); - const Index rank = (std::min)(rows, cols); - eigen_assert(rhs().rows() == rows); +} // end namespace internal - typename Rhs::PlainObject c(rhs()); +#ifndef EIGEN_PARSED_BY_DOXYGEN +template<typename _MatrixType> +template<typename RhsType, typename DstType> +void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const +{ + const Index rank = (std::min)(rows(), cols()); + eigen_assert(rhs.rows() == rows()); - // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T - c.applyOnTheLeft(householderSequence( - dec().matrixQR().leftCols(rank), - dec().hCoeffs().head(rank)).transpose() - ); + typename RhsType::PlainObject c(rhs); - dec().matrixQR() - .topLeftCorner(rank, rank) - .template triangularView<Upper>() - .solveInPlace(c.topRows(rank)); + // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T + c.applyOnTheLeft(householderSequence( + m_qr.leftCols(rank), + m_hCoeffs.head(rank)).transpose() + ); - dst.topRows(rank) = c.topRows(rank); - dst.bottomRows(cols-rank).setZero(); - } -}; + m_qr.topLeftCorner(rank, rank) + .template triangularView<Upper>() + .solveInPlace(c.topRows(rank)); -} // end namespace internal + dst.topRows(rank) = c.topRows(rank); + dst.bottomRows(cols()-rank).setZero(); +} +#endif /** Performs the QR factorization of the given matrix \a matrix. The result of * the factorization is stored into \c *this, and a reference to \c *this diff --git a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h index a2cc2a9e2..54a1b21b8 100644 --- a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +++ b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2012 Desire Nuentsa <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -54,29 +55,26 @@ namespace Eigen { * */ template<typename _MatrixType> -class SPQR +class SPQR : public SparseSolverBase<SPQR<_MatrixType> > { + protected: + typedef SparseSolverBase<SPQR<_MatrixType> > Base; + using Base::m_isInitialized; public: typedef typename _MatrixType::Scalar Scalar; typedef typename _MatrixType::RealScalar RealScalar; typedef UF_long Index ; typedef SparseMatrix<Scalar, ColMajor, Index> MatrixType; - typedef PermutationMatrix<Dynamic, Dynamic> PermutationType; + typedef Map<PermutationMatrix<Dynamic, Dynamic, Index> > PermutationType; public: SPQR() - : m_isInitialized(false), - m_ordering(SPQR_ORDERING_DEFAULT), - m_allow_tol(SPQR_DEFAULT_TOL), - m_tolerance (NumTraits<Scalar>::epsilon()) + : m_ordering(SPQR_ORDERING_DEFAULT), m_allow_tol(SPQR_DEFAULT_TOL), m_tolerance (NumTraits<Scalar>::epsilon()), m_useDefaultThreshold(true) { cholmod_l_start(&m_cc); } - SPQR(const _MatrixType& matrix) - : m_isInitialized(false), - m_ordering(SPQR_ORDERING_DEFAULT), - m_allow_tol(SPQR_DEFAULT_TOL), - m_tolerance (NumTraits<Scalar>::epsilon()) + explicit SPQR(const _MatrixType& matrix) + : m_ordering(SPQR_ORDERING_DEFAULT), m_allow_tol(SPQR_DEFAULT_TOL), m_tolerance (NumTraits<Scalar>::epsilon()), m_useDefaultThreshold(true) { cholmod_l_start(&m_cc); compute(matrix); @@ -101,10 +99,25 @@ class SPQR if(m_isInitialized) SPQR_free(); MatrixType mat(matrix); + + /* Compute the default threshold as in MatLab, see: + * Tim Davis, "Algorithm 915, SuiteSparseQR: Multifrontal Multithreaded Rank-Revealing + * Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3 + */ + RealScalar pivotThreshold = m_tolerance; + if(m_useDefaultThreshold) + { + RealScalar max2Norm = 0.0; + for (int j = 0; j < mat.cols(); j++) max2Norm = numext::maxi(max2Norm, mat.col(j).norm()); + if(max2Norm==RealScalar(0)) + max2Norm = RealScalar(1); + pivotThreshold = 20 * (mat.rows() + mat.cols()) * max2Norm * NumTraits<RealScalar>::epsilon(); + } + cholmod_sparse A; A = viewAsCholmod(mat); Index col = matrix.cols(); - m_rank = SuiteSparseQR<Scalar>(m_ordering, m_tolerance, col, &A, + m_rank = SuiteSparseQR<Scalar>(m_ordering, pivotThreshold, col, &A, &m_cR, &m_E, &m_H, &m_HPinv, &m_HTau, &m_cc); if (!m_cR) @@ -120,41 +133,37 @@ class SPQR /** * Get the number of rows of the input matrix and the Q matrix */ - inline Index rows() const {return m_H->nrow; } + inline Index rows() const {return m_cR->nrow; } /** * Get the number of columns of the input matrix. */ inline Index cols() const { return m_cR->ncol; } - - /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::solve_retval<SPQR, Rhs> solve(const MatrixBase<Rhs>& B) const - { - eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()"); - eigen_assert(this->rows()==B.rows() - && "SPQR::solve(): invalid number of rows of the right hand side matrix B"); - return internal::solve_retval<SPQR, Rhs>(*this, B.derived()); - } template<typename Rhs, typename Dest> - void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const + void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const { eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()"); eigen_assert(b.cols()==1 && "This method is for vectors only"); - + //Compute Q^T * b - typename Dest::PlainObject y; + typename Dest::PlainObject y, y2; y = matrixQ().transpose() * b; - // Solves with the triangular matrix R + + // Solves with the triangular matrix R Index rk = this->rank(); - y.topRows(rk) = this->matrixR().topLeftCorner(rk, rk).template triangularView<Upper>().solve(y.topRows(rk)); - y.bottomRows(cols()-rk).setZero(); + y2 = y; + y.resize((std::max)(cols(),Index(y.rows())),y.cols()); + y.topRows(rk) = this->matrixR().topLeftCorner(rk, rk).template triangularView<Upper>().solve(y2.topRows(rk)); + // Apply the column permutation - dest.topRows(cols()) = colsPermutation() * y.topRows(cols()); + // colsPermutation() performs a copy of the permutation, + // so let's apply it manually: + for(Index i = 0; i < rk; ++i) dest.row(m_E[i]) = y.row(i); + for(Index i = rk; i < cols(); ++i) dest.row(m_E[i]).setZero(); + +// y.bottomRows(y.rows()-rk).setZero(); +// dest = colsPermutation() * y.topRows(cols()); m_info = Success; } @@ -179,11 +188,7 @@ class SPQR PermutationType colsPermutation() const { eigen_assert(m_isInitialized && "Decomposition is not initialized."); - Index n = m_cR->ncol; - PermutationType colsPerm(n); - for(Index j = 0; j <n; j++) colsPerm.indices()(j) = m_E[j]; - return colsPerm; - + return PermutationType(m_E, m_cR->ncol); } /** * Gets the rank of the matrix. @@ -197,7 +202,11 @@ class SPQR /// Set the fill-reducing ordering method to be used void setSPQROrdering(int ord) { m_ordering = ord;} /// Set the tolerance tol to treat columns with 2-norm < =tol as zero - void setPivotThreshold(const RealScalar& tol) { m_tolerance = tol; } + void setPivotThreshold(const RealScalar& tol) + { + m_useDefaultThreshold = false; + m_tolerance = tol; + } /** \returns a pointer to the SPQR workspace */ cholmod_common *cholmodCommon() const { return &m_cc; } @@ -214,7 +223,6 @@ class SPQR return m_info; } protected: - bool m_isInitialized; bool m_analysisIsOk; bool m_factorizationIsOk; mutable bool m_isRUpToDate; @@ -230,6 +238,7 @@ class SPQR mutable cholmod_dense *m_HTau; // The Householder coefficients mutable Index m_rank; // The rank of the matrix mutable cholmod_common m_cc; // Workspace and parameters + bool m_useDefaultThreshold; // Use default threshold template<typename ,typename > friend struct SPQR_QProduct; }; @@ -293,22 +302,5 @@ struct SPQRMatrixQTransposeReturnType{ const SPQRType& m_spqr; }; -namespace internal { - -template<typename _MatrixType, typename Rhs> -struct solve_retval<SPQR<_MatrixType>, Rhs> - : solve_retval_base<SPQR<_MatrixType>, Rhs> -{ - typedef SPQR<_MatrixType> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - }// End namespace Eigen #endif diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h new file mode 100644 index 000000000..dad59bcca --- /dev/null +++ b/Eigen/src/SVD/BDCSVD.h @@ -0,0 +1,1172 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// We used the "A Divide-And-Conquer Algorithm for the Bidiagonal SVD" +// research report written by Ming Gu and Stanley C.Eisenstat +// The code variable names correspond to the names they used in their +// report +// +// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com> +// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr> +// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr> +// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr> +// Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk> +// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BDCSVD_H +#define EIGEN_BDCSVD_H +// #define EIGEN_BDCSVD_DEBUG_VERBOSE +// #define EIGEN_BDCSVD_SANITY_CHECKS +namespace Eigen { + +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE +IOFormat bdcsvdfmt(8, 0, ", ", "\n", " [", "]"); +#endif + +template<typename _MatrixType> class BDCSVD; + +namespace internal { + +template<typename _MatrixType> +struct traits<BDCSVD<_MatrixType> > +{ + typedef _MatrixType MatrixType; +}; + +} // end namespace internal + + +/** \ingroup SVD_Module + * + * + * \class BDCSVD + * + * \brief class Bidiagonal Divide and Conquer SVD + * + * \param MatrixType the type of the matrix of which we are computing the SVD decomposition + * We plan to have a very similar interface to JacobiSVD on this class. + * It should be used to speed up the calcul of SVD for big matrices. + */ +template<typename _MatrixType> +class BDCSVD : public SVDBase<BDCSVD<_MatrixType> > +{ + typedef SVDBase<BDCSVD> Base; + +public: + using Base::rows; + using Base::cols; + using Base::computeU; + using Base::computeV; + + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; + typedef typename MatrixType::Index Index; + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime), + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, + MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime, MaxColsAtCompileTime), + MatrixOptions = MatrixType::Options + }; + + typedef typename Base::MatrixUType MatrixUType; + typedef typename Base::MatrixVType MatrixVType; + typedef typename Base::SingularValuesType SingularValuesType; + + typedef Matrix<Scalar, Dynamic, Dynamic> MatrixX; + typedef Matrix<RealScalar, Dynamic, Dynamic> MatrixXr; + typedef Matrix<RealScalar, Dynamic, 1> VectorType; + typedef Array<RealScalar, Dynamic, 1> ArrayXr; + typedef Array<Index,1,Dynamic> ArrayXi; + + /** \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via BDCSVD::compute(const MatrixType&). + */ + BDCSVD() : m_algoswap(16), m_numIters(0) + {} + + + /** \brief Default Constructor with memory preallocation + * + * Like the default constructor but with preallocation of the internal data + * according to the specified problem size. + * \sa BDCSVD() + */ + BDCSVD(Index rows, Index cols, unsigned int computationOptions = 0) + : m_algoswap(16), m_numIters(0) + { + allocate(rows, cols, computationOptions); + } + + /** \brief Constructor performing the decomposition of given matrix. + * + * \param matrix the matrix to decompose + * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed. + * By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU, + * #ComputeFullV, #ComputeThinV. + * + * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not + * available with the (non - default) FullPivHouseholderQR preconditioner. + */ + BDCSVD(const MatrixType& matrix, unsigned int computationOptions = 0) + : m_algoswap(16), m_numIters(0) + { + compute(matrix, computationOptions); + } + + ~BDCSVD() + { + } + + /** \brief Method performing the decomposition of given matrix using custom options. + * + * \param matrix the matrix to decompose + * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed. + * By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU, + * #ComputeFullV, #ComputeThinV. + * + * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not + * available with the (non - default) FullPivHouseholderQR preconditioner. + */ + BDCSVD& compute(const MatrixType& matrix, unsigned int computationOptions); + + /** \brief Method performing the decomposition of given matrix using current options. + * + * \param matrix the matrix to decompose + * + * This method uses the current \a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int). + */ + BDCSVD& compute(const MatrixType& matrix) + { + return compute(matrix, this->m_computationOptions); + } + + void setSwitchSize(int s) + { + eigen_assert(s>3 && "BDCSVD the size of the algo switch has to be greater than 3"); + m_algoswap = s; + } + +private: + void allocate(Index rows, Index cols, unsigned int computationOptions); + void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift); + void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V); + void computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, VectorType& singVals, ArrayXr& shifts, ArrayXr& mus); + void perturbCol0(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat); + void computeSingVecs(const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V); + void deflation43(Index firstCol, Index shift, Index i, Index size); + void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size); + void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift); + template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV> + void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev); + static void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1); + static RealScalar secularEq(RealScalar x, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift); + +protected: + MatrixXr m_naiveU, m_naiveV; + MatrixXr m_computed; + Index m_nRec; + int m_algoswap; + bool m_isTranspose, m_compU, m_compV; + + using Base::m_singularValues; + using Base::m_diagSize; + using Base::m_computeFullU; + using Base::m_computeFullV; + using Base::m_computeThinU; + using Base::m_computeThinV; + using Base::m_matrixU; + using Base::m_matrixV; + using Base::m_isInitialized; + using Base::m_nonzeroSingularValues; + +public: + int m_numIters; +}; //end class BDCSVD + + +// Method to allocate and initialize matrix and attributes +template<typename MatrixType> +void BDCSVD<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions) +{ + m_isTranspose = (cols > rows); + + if (Base::allocate(rows, cols, computationOptions)) + return; + + m_computed = MatrixXr::Zero(m_diagSize + 1, m_diagSize ); + m_compU = computeV(); + m_compV = computeU(); + if (m_isTranspose) + std::swap(m_compU, m_compV); + + if (m_compU) m_naiveU = MatrixXr::Zero(m_diagSize + 1, m_diagSize + 1 ); + else m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 ); + + if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize); +}// end allocate + +template<typename MatrixType> +BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsigned int computationOptions) +{ +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "\n\n\n======================================================================================================================\n\n\n"; +#endif + allocate(matrix.rows(), matrix.cols(), computationOptions); + using std::abs; + + //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows + RealScalar scale = matrix.cwiseAbs().maxCoeff(); + if(scale==RealScalar(0)) scale = RealScalar(1); + MatrixX copy; + if (m_isTranspose) copy = matrix.adjoint()/scale; + else copy = matrix/scale; + + //**** step 1 - Bidiagonalization + internal::UpperBidiagonalization<MatrixX> bid(copy); + + //**** step 2 - Divide & Conquer + m_naiveU.setZero(); + m_naiveV.setZero(); + m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose(); + m_computed.template bottomRows<1>().setZero(); + divide(0, m_diagSize - 1, 0, 0, 0); + + //**** step 3 - Copy singular values and vectors + for (int i=0; i<m_diagSize; i++) + { + RealScalar a = abs(m_computed.coeff(i, i)); + m_singularValues.coeffRef(i) = a * scale; + if (a == 0) + { + m_nonzeroSingularValues = i; + m_singularValues.tail(m_diagSize - i - 1).setZero(); + break; + } + else if (i == m_diagSize - 1) + { + m_nonzeroSingularValues = i + 1; + break; + } + } +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE +// std::cout << "m_naiveU\n" << m_naiveU << "\n\n"; +// std::cout << "m_naiveV\n" << m_naiveV << "\n\n"; +#endif + if(m_isTranspose) copyUV(bid.householderV(), bid.householderU(), m_naiveV, m_naiveU); + else copyUV(bid.householderU(), bid.householderV(), m_naiveU, m_naiveV); + + m_isInitialized = true; + return *this; +}// end compute + + +template<typename MatrixType> +template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV> +void BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naiveV) +{ + // Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa + if (computeU()) + { + Index Ucols = m_computeThinU ? m_diagSize : householderU.cols(); + m_matrixU = MatrixX::Identity(householderU.cols(), Ucols); + m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize); + householderU.applyThisOnTheLeft(m_matrixU); + } + if (computeV()) + { + Index Vcols = m_computeThinV ? m_diagSize : householderV.cols(); + m_matrixV = MatrixX::Identity(householderV.cols(), Vcols); + m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize); + householderV.applyThisOnTheLeft(m_matrixV); + } +} + +/** \internal + * Performs A = A * B exploiting the special structure of the matrix A. Splitting A as: + * A = [A1] + * [A2] + * such that A1.rows()==n1, then we assume that at least half of the columns of A1 and A2 are zeros. + * We can thus pack them prior to the the matrix product. However, this is only worth the effort if the matrix is large + * enough. + */ +template<typename MatrixType> +void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1) +{ + Index n = A.rows(); + if(n>100) + { + // If the matrices are large enough, let's exploit the sparse structure of A by + // splitting it in half (wrt n1), and packing the non-zero columns. + DenseIndex n2 = n - n1; + MatrixXr A1(n1,n), A2(n2,n), B1(n,n), B2(n,n); + Index k1=0, k2=0; + for(Index j=0; j<n; ++j) + { + if( (A.col(j).head(n1).array()!=0).any() ) + { + A1.col(k1) = A.col(j).head(n1); + B1.row(k1) = B.row(j); + ++k1; + } + if( (A.col(j).tail(n2).array()!=0).any() ) + { + A2.col(k2) = A.col(j).tail(n2); + B2.row(k2) = B.row(j); + ++k2; + } + } + + A.topRows(n1).noalias() = A1.leftCols(k1) * B1.topRows(k1); + A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2); + } + else + A *= B; // FIXME this requires a temporary +} + +// The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the +// place of the submatrix we are currently working on. + +//@param firstCol : The Index of the first column of the submatrix of m_computed and for m_naiveU; +//@param lastCol : The Index of the last column of the submatrix of m_computed and for m_naiveU; +// lastCol + 1 - firstCol is the size of the submatrix. +//@param firstRowW : The Index of the first row of the matrix W that we are to change. (see the reference paper section 1 for more information on W) +//@param firstRowW : Same as firstRowW with the column. +//@param shift : Each time one takes the left submatrix, one must add 1 to the shift. Why? Because! We actually want the last column of the U submatrix +// to become the first column (*coeff) and to shift all the other columns to the right. There are more details on the reference paper. +template<typename MatrixType> +void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift) +{ + // requires nbRows = nbCols + 1; + using std::pow; + using std::sqrt; + using std::abs; + const Index n = lastCol - firstCol + 1; + const Index k = n/2; + RealScalar alphaK; + RealScalar betaK; + RealScalar r0; + RealScalar lambda, phi, c0, s0; + VectorType l, f; + // We use the other algorithm which is more efficient for small + // matrices. + if (n < m_algoswap) + { + JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0)) ; + if (m_compU) + m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU(); + else + { + m_naiveU.row(0).segment(firstCol, n + 1).real() = b.matrixU().row(0); + m_naiveU.row(1).segment(firstCol, n + 1).real() = b.matrixU().row(n); + } + if (m_compV) m_naiveV.block(firstRowW, firstColW, n, n).real() = b.matrixV(); + m_computed.block(firstCol + shift, firstCol + shift, n + 1, n).setZero(); + m_computed.diagonal().segment(firstCol + shift, n) = b.singularValues().head(n); + return; + } + // We use the divide and conquer algorithm + alphaK = m_computed(firstCol + k, firstCol + k); + betaK = m_computed(firstCol + k + 1, firstCol + k); + // The divide must be done in that order in order to have good results. Divide change the data inside the submatrices + // and the divide of the right submatrice reads one column of the left submatrice. That's why we need to treat the + // right submatrix before the left one. + divide(k + 1 + firstCol, lastCol, k + 1 + firstRowW, k + 1 + firstColW, shift); + divide(firstCol, k - 1 + firstCol, firstRowW, firstColW + 1, shift + 1); + + if (m_compU) + { + lambda = m_naiveU(firstCol + k, firstCol + k); + phi = m_naiveU(firstCol + k + 1, lastCol + 1); + } + else + { + lambda = m_naiveU(1, firstCol + k); + phi = m_naiveU(0, lastCol + 1); + } + r0 = sqrt((abs(alphaK * lambda) * abs(alphaK * lambda)) + abs(betaK * phi) * abs(betaK * phi)); + if (m_compU) + { + l = m_naiveU.row(firstCol + k).segment(firstCol, k); + f = m_naiveU.row(firstCol + k + 1).segment(firstCol + k + 1, n - k - 1); + } + else + { + l = m_naiveU.row(1).segment(firstCol, k); + f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1); + } + if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1; + if (r0 == 0) + { + c0 = 1; + s0 = 0; + } + else + { + c0 = alphaK * lambda / r0; + s0 = betaK * phi / r0; + } + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(m_naiveU.allFinite()); + assert(m_naiveV.allFinite()); + assert(m_computed.allFinite()); +#endif + + if (m_compU) + { + MatrixXr q1 (m_naiveU.col(firstCol + k).segment(firstCol, k + 1)); + // we shiftW Q1 to the right + for (Index i = firstCol + k - 1; i >= firstCol; i--) + m_naiveU.col(i + 1).segment(firstCol, k + 1) = m_naiveU.col(i).segment(firstCol, k + 1); + // we shift q1 at the left with a factor c0 + m_naiveU.col(firstCol).segment( firstCol, k + 1) = (q1 * c0); + // last column = q1 * - s0 + m_naiveU.col(lastCol + 1).segment(firstCol, k + 1) = (q1 * ( - s0)); + // first column = q2 * s0 + m_naiveU.col(firstCol).segment(firstCol + k + 1, n - k) = m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) * s0; + // q2 *= c0 + m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) *= c0; + } + else + { + RealScalar q1 = (m_naiveU(0, firstCol + k)); + // we shift Q1 to the right + for (Index i = firstCol + k - 1; i >= firstCol; i--) + m_naiveU(0, i + 1) = m_naiveU(0, i); + // we shift q1 at the left with a factor c0 + m_naiveU(0, firstCol) = (q1 * c0); + // last column = q1 * - s0 + m_naiveU(0, lastCol + 1) = (q1 * ( - s0)); + // first column = q2 * s0 + m_naiveU(1, firstCol) = m_naiveU(1, lastCol + 1) *s0; + // q2 *= c0 + m_naiveU(1, lastCol + 1) *= c0; + m_naiveU.row(1).segment(firstCol + 1, k).setZero(); + m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1).setZero(); + } + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(m_naiveU.allFinite()); + assert(m_naiveV.allFinite()); + assert(m_computed.allFinite()); +#endif + + m_computed(firstCol + shift, firstCol + shift) = r0; + m_computed.col(firstCol + shift).segment(firstCol + shift + 1, k) = alphaK * l.transpose().real(); + m_computed.col(firstCol + shift).segment(firstCol + shift + k + 1, n - k - 1) = betaK * f.transpose().real(); + +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + ArrayXr tmp1 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues(); +#endif + // Second part: try to deflate singular values in combined matrix + deflation(firstCol, lastCol, k, firstRowW, firstColW, shift); +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + ArrayXr tmp2 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues(); + std::cout << "\n\nj1 = " << tmp1.transpose().format(bdcsvdfmt) << "\n"; + std::cout << "j2 = " << tmp2.transpose().format(bdcsvdfmt) << "\n\n"; + std::cout << "err: " << ((tmp1-tmp2).abs()>1e-12*tmp2.abs()).transpose() << "\n"; + static int count = 0; + std::cout << "# " << ++count << "\n\n"; + assert((tmp1-tmp2).matrix().norm() < 1e-14*tmp2.matrix().norm()); +// assert(count<681); +// assert(((tmp1-tmp2).abs()<1e-13*tmp2.abs()).all()); +#endif + + // Third part: compute SVD of combined matrix + MatrixXr UofSVD, VofSVD; + VectorType singVals; + computeSVDofM(firstCol + shift, n, UofSVD, singVals, VofSVD); + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(UofSVD.allFinite()); + assert(VofSVD.allFinite()); +#endif + + if (m_compU) structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2); + else m_naiveU.middleCols(firstCol, n + 1) *= UofSVD; // FIXME this requires a temporary, and exploit that there are 2 rows at compile time + + if (m_compV) structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2); + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(m_naiveU.allFinite()); + assert(m_naiveV.allFinite()); + assert(m_computed.allFinite()); +#endif + + m_computed.block(firstCol + shift, firstCol + shift, n, n).setZero(); + m_computed.block(firstCol + shift, firstCol + shift, n, n).diagonal() = singVals; +}// end divide + +// Compute SVD of m_computed.block(firstCol, firstCol, n + 1, n); this block only has non-zeros in +// the first column and on the diagonal and has undergone deflation, so diagonal is in increasing +// order except for possibly the (0,0) entry. The computed SVD is stored U, singVals and V, except +// that if m_compV is false, then V is not computed. Singular values are sorted in decreasing order. +// +// TODO Opportunities for optimization: better root finding algo, better stopping criterion, better +// handling of round-off errors, be consistent in ordering +// For instance, to solve the secular equation using FMM, see http://www.stat.uchicago.edu/~lekheng/courses/302/classics/greengard-rokhlin.pdf +template <typename MatrixType> +void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V) +{ + // TODO Get rid of these copies (?) + // FIXME at least preallocate them + ArrayXr col0 = m_computed.col(firstCol).segment(firstCol, n); + ArrayXr diag = m_computed.block(firstCol, firstCol, n, n).diagonal(); + diag(0) = 0; + + // Allocate space for singular values and vectors + singVals.resize(n); + U.resize(n+1, n+1); + if (m_compV) V.resize(n, n); + +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + if (col0.hasNaN() || diag.hasNaN()) + std::cout << "\n\nHAS NAN\n\n"; +#endif + + // Many singular values might have been deflated, the zero ones have been moved to the end, + // but others are interleaved and we must ignore them at this stage. + // To this end, let's compute a permutation skipping them: + Index actual_n = n; + while(actual_n>1 && diag(actual_n-1)==0) --actual_n; + Index m = 0; // size of the deflated problem + ArrayXi perm(actual_n); + for(Index k=0;k<actual_n;++k) + if(col0(k)!=0) + perm(m++) = k; + perm.conservativeResize(m); + + ArrayXr shifts(n), mus(n), zhat(n); + +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "computeSVDofM using:\n"; + std::cout << " z: " << col0.transpose() << "\n"; + std::cout << " d: " << diag.transpose() << "\n"; +#endif + + // Compute singVals, shifts, and mus + computeSingVals(col0, diag, perm, singVals, shifts, mus); + +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << " j: " << (m_computed.block(firstCol, firstCol, n, n)).jacobiSvd().singularValues().transpose().reverse() << "\n\n"; + std::cout << " sing-val: " << singVals.transpose() << "\n"; + std::cout << " mu: " << mus.transpose() << "\n"; + std::cout << " shift: " << shifts.transpose() << "\n"; + + { + Index actual_n = n; + while(actual_n>1 && col0(actual_n-1)==0) --actual_n; + std::cout << "\n\n mus: " << mus.head(actual_n).transpose() << "\n\n"; + std::cout << " check1 (expect0) : " << ((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n).transpose() << "\n\n"; + std::cout << " check2 (>0) : " << ((singVals.array()-diag) / singVals.array()).head(actual_n).transpose() << "\n\n"; + std::cout << " check3 (>0) : " << ((diag.segment(1,actual_n-1)-singVals.head(actual_n-1).array()) / singVals.head(actual_n-1).array()).transpose() << "\n\n\n"; + std::cout << " check4 (>0) : " << ((singVals.segment(1,actual_n-1)-singVals.head(actual_n-1))).transpose() << "\n\n\n"; + } +#endif + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(singVals.allFinite()); + assert(mus.allFinite()); + assert(shifts.allFinite()); +#endif + + // Compute zhat + perturbCol0(col0, diag, perm, singVals, shifts, mus, zhat); +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << " zhat: " << zhat.transpose() << "\n"; +#endif + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(zhat.allFinite()); +#endif + + computeSingVecs(zhat, diag, perm, singVals, shifts, mus, U, V); + +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "U^T U: " << (U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() << "\n"; + std::cout << "V^T V: " << (V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() << "\n"; +#endif + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(U.allFinite()); + assert(V.allFinite()); + assert((U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() < 1e-14 * n); + assert((V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() < 1e-14 * n); + assert(m_naiveU.allFinite()); + assert(m_naiveV.allFinite()); + assert(m_computed.allFinite()); +#endif + + // Because of deflation, the singular values might not be completely sorted. + // Fortunately, reordering them is a O(n) problem + for(Index i=0; i<actual_n-1; ++i) + { + if(singVals(i)>singVals(i+1)) + { + using std::swap; + swap(singVals(i),singVals(i+1)); + U.col(i).swap(U.col(i+1)); + if(m_compV) V.col(i).swap(V.col(i+1)); + } + } + + // Reverse order so that singular values in increased order + // Because of deflation, the zeros singular-values are already at the end + singVals.head(actual_n).reverseInPlace(); + U.leftCols(actual_n) = U.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary + if (m_compV) V.leftCols(actual_n) = V.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary + +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + JacobiSVD<MatrixXr> jsvd(m_computed.block(firstCol, firstCol, n, n) ); + std::cout << " * j: " << jsvd.singularValues().transpose() << "\n\n"; + std::cout << " * sing-val: " << singVals.transpose() << "\n"; +// std::cout << " * err: " << ((jsvd.singularValues()-singVals)>1e-13*singVals.norm()).transpose() << "\n"; +#endif +} + +template <typename MatrixType> +typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift) +{ + Index m = perm.size(); + RealScalar res = 1; + for(Index i=0; i<m; ++i) + { + Index j = perm(i); + res += numext::abs2(col0(j)) / ((diagShifted(j) - mu) * (diag(j) + shift + mu)); + } + return res; +} + +template <typename MatrixType> +void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, + VectorType& singVals, ArrayXr& shifts, ArrayXr& mus) +{ + using std::abs; + using std::swap; + + Index n = col0.size(); + Index actual_n = n; + while(actual_n>1 && col0(actual_n-1)==0) --actual_n; + + for (Index k = 0; k < n; ++k) + { + if (col0(k) == 0 || actual_n==1) + { + // if col0(k) == 0, then entry is deflated, so singular value is on diagonal + // if actual_n==1, then the deflated problem is already diagonalized + singVals(k) = k==0 ? col0(0) : diag(k); + mus(k) = 0; + shifts(k) = k==0 ? col0(0) : diag(k); + continue; + } + + // otherwise, use secular equation to find singular value + RealScalar left = diag(k); + RealScalar right; // was: = (k != actual_n-1) ? diag(k+1) : (diag(actual_n-1) + col0.matrix().norm()); + if(k==actual_n-1) + right = (diag(actual_n-1) + col0.matrix().norm()); + else + { + // Skip deflated singular values + Index l = k+1; + while(col0(l)==0) { ++l; eigen_internal_assert(l<actual_n); } + right = diag(l); + } + + // first decide whether it's closer to the left end or the right end + RealScalar mid = left + (right-left) / 2; + RealScalar fMid = secularEq(mid, col0, diag, perm, diag, 0); +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << right-left << "\n"; + std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right) << "\n"; + std::cout << " = " << secularEq(0.1*(left+right), col0, diag, perm, diag, 0) + << " " << secularEq(0.2*(left+right), col0, diag, perm, diag, 0) + << " " << secularEq(0.3*(left+right), col0, diag, perm, diag, 0) + << " " << secularEq(0.4*(left+right), col0, diag, perm, diag, 0) + << " " << secularEq(0.49*(left+right), col0, diag, perm, diag, 0) + << " " << secularEq(0.5*(left+right), col0, diag, perm, diag, 0) + << " " << secularEq(0.51*(left+right), col0, diag, perm, diag, 0) + << " " << secularEq(0.6*(left+right), col0, diag, perm, diag, 0) + << " " << secularEq(0.7*(left+right), col0, diag, perm, diag, 0) + << " " << secularEq(0.8*(left+right), col0, diag, perm, diag, 0) + << " " << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n"; +#endif + RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right; + + // measure everything relative to shift + ArrayXr diagShifted = diag - shift; + + // initial guess + RealScalar muPrev, muCur; + if (shift == left) + { + muPrev = (right - left) * 0.1; + if (k == actual_n-1) muCur = right - left; + else muCur = (right - left) * 0.5; + } + else + { + muPrev = -(right - left) * 0.1; + muCur = -(right - left) * 0.5; + } + + RealScalar fPrev = secularEq(muPrev, col0, diag, perm, diagShifted, shift); + RealScalar fCur = secularEq(muCur, col0, diag, perm, diagShifted, shift); + if (abs(fPrev) < abs(fCur)) + { + swap(fPrev, fCur); + swap(muPrev, muCur); + } + + // rational interpolation: fit a function of the form a / mu + b through the two previous + // iterates and use its zero to compute the next iterate + bool useBisection = fPrev*fCur>0; + while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection) + { + ++m_numIters; + + // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples. + RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev); + RealScalar b = fCur - a / muCur; + // And find mu such that f(mu)==0: + RealScalar muZero = -a/b; + RealScalar fZero = secularEq(muZero, col0, diag, perm, diagShifted, shift); + + muPrev = muCur; + fPrev = fCur; + muCur = muZero; + fCur = fZero; + + + if (shift == left && (muCur < 0 || muCur > right - left)) useBisection = true; + if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true; + if (abs(fCur)>abs(fPrev)) useBisection = true; + } + + // fall back on bisection method if rational interpolation did not work + if (useBisection) + { +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "useBisection for k = " << k << ", actual_n = " << actual_n << "\n"; +#endif + RealScalar leftShifted, rightShifted; + if (shift == left) + { + leftShifted = RealScalar(1)/NumTraits<RealScalar>::highest(); + // I don't understand why the case k==0 would be special there: + // if (k == 0) rightShifted = right - left; else + rightShifted = (k==actual_n-1) ? right : ((right - left) * 0.6); // theoretically we can take 0.5, but let's be safe + } + else + { + leftShifted = -(right - left) * 0.6; + rightShifted = -RealScalar(1)/NumTraits<RealScalar>::highest(); + } + + RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift); + RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift); + +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + if(!(fLeft * fRight<0)) + std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n"; +#endif + eigen_internal_assert(fLeft * fRight < 0); + + while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi(abs(leftShifted), abs(rightShifted))) + { + RealScalar midShifted = (leftShifted + rightShifted) / 2; + RealScalar fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift); + if (fLeft * fMid < 0) + { + rightShifted = midShifted; + fRight = fMid; + } + else + { + leftShifted = midShifted; + fLeft = fMid; + } + } + + muCur = (leftShifted + rightShifted) / 2; + } + + singVals[k] = shift + muCur; + shifts[k] = shift; + mus[k] = muCur; + + // perturb singular value slightly if it equals diagonal entry to avoid division by zero later + // (deflation is supposed to avoid this from happening) + // - this does no seem to be necessary anymore - +// if (singVals[k] == left) singVals[k] *= 1 + NumTraits<RealScalar>::epsilon(); +// if (singVals[k] == right) singVals[k] *= 1 - NumTraits<RealScalar>::epsilon(); + } +} + + +// zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1) +template <typename MatrixType> +void BDCSVD<MatrixType>::perturbCol0 + (const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals, + const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat) +{ + using std::sqrt; + Index n = col0.size(); + Index m = perm.size(); + if(m==0) + { + zhat.setZero(); + return; + } + Index last = perm(m-1); + // The offset permits to skip deflated entries while computing zhat + for (Index k = 0; k < n; ++k) + { + if (col0(k) == 0) // deflated + zhat(k) = 0; + else + { + // see equation (3.6) + RealScalar dk = diag(k); + RealScalar prod = (singVals(last) + dk) * (mus(last) + (shifts(last) - dk)); + + for(Index l = 0; l<m; ++l) + { + Index i = perm(l); + if(i!=k) + { + Index j = i<k ? i : perm(l-1); + prod *= ((singVals(j)+dk) / ((diag(i)+dk))) * ((mus(j)+(shifts(j)-dk)) / ((diag(i)-dk))); +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + if(i!=k && std::abs(((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) - 1) > 0.9 ) + std::cout << " " << ((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) << " == (" << (singVals(j)+dk) << " * " << (mus(j)+(shifts(j)-dk)) + << ") / (" << (diag(i)+dk) << " * " << (diag(i)-dk) << ")\n"; +#endif + } + } +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n"; +#endif + RealScalar tmp = sqrt(prod); + zhat(k) = col0(k) > 0 ? tmp : -tmp; + } + } +} + +// compute singular vectors +template <typename MatrixType> +void BDCSVD<MatrixType>::computeSingVecs + (const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals, + const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V) +{ + Index n = zhat.size(); + Index m = perm.size(); + + for (Index k = 0; k < n; ++k) + { + if (zhat(k) == 0) + { + U.col(k) = VectorType::Unit(n+1, k); + if (m_compV) V.col(k) = VectorType::Unit(n, k); + } + else + { + U.col(k).setZero(); + for(Index l=0;l<m;++l) + { + Index i = perm(l); + U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); + } + U(n,k) = 0; + U.col(k).normalize(); + + if (m_compV) + { + V.col(k).setZero(); + for(Index l=1;l<m;++l) + { + Index i = perm(l); + V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); + } + V(0,k) = -1; + V.col(k).normalize(); + } + } + } + U.col(n) = VectorType::Unit(n+1, n); +} + + +// page 12_13 +// i >= 1, di almost null and zi non null. +// We use a rotation to zero out zi applied to the left of M +template <typename MatrixType> +void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index size) +{ + using std::abs; + using std::sqrt; + using std::pow; + Index start = firstCol + shift; + RealScalar c = m_computed(start, start); + RealScalar s = m_computed(start+i, start); + RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); + if (r == 0) + { + m_computed(start+i, start+i) = 0; + return; + } + m_computed(start,start) = r; + m_computed(start+i, start) = 0; + m_computed(start+i, start+i) = 0; + + JacobiRotation<RealScalar> J(c/r,-s/r); + if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J); + else m_naiveU.applyOnTheRight(firstCol, firstCol+i, J); +}// end deflation 43 + + +// page 13 +// i,j >= 1, i!=j and |di - dj| < epsilon * norm2(M) +// We apply two rotations to have zj = 0; +// TODO deflation44 is still broken and not properly tested +template <typename MatrixType> +void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size) +{ + using std::abs; + using std::sqrt; + using std::conj; + using std::pow; + RealScalar c = m_computed(firstColm+i, firstColm); + RealScalar s = m_computed(firstColm+j, firstColm); + RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "deflation 4.4: " << i << "," << j << " -> " << c << " " << s << " " << r << " ; " + << m_computed(firstColm + i-1, firstColm) << " " + << m_computed(firstColm + i, firstColm) << " " + << m_computed(firstColm + i+1, firstColm) << " " + << m_computed(firstColm + i+2, firstColm) << "\n"; + std::cout << m_computed(firstColm + i-1, firstColm + i-1) << " " + << m_computed(firstColm + i, firstColm+i) << " " + << m_computed(firstColm + i+1, firstColm+i+1) << " " + << m_computed(firstColm + i+2, firstColm+i+2) << "\n"; +#endif + if (r==0) + { + m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j); + return; + } + c/=r; + s/=r; + m_computed(firstColm + i, firstColm) = r; + m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i); + m_computed(firstColm + j, firstColm) = 0; + + JacobiRotation<RealScalar> J(c,-s); + if (m_compU) m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J); + else m_naiveU.applyOnTheRight(firstColu+i, firstColu+j, J); + if (m_compV) m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + i, firstColW + j, J); +}// end deflation 44 + + +// acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive] +template <typename MatrixType> +void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift) +{ + using std::sqrt; + using std::abs; + const Index length = lastCol + 1 - firstCol; + + Block<MatrixXr,Dynamic,1> col0(m_computed, firstCol+shift, firstCol+shift, length, 1); + Diagonal<MatrixXr> fulldiag(m_computed); + VectorBlock<Diagonal<MatrixXr>,Dynamic> diag(fulldiag, firstCol+shift, length); + + RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff(); + RealScalar epsilon_strict = NumTraits<RealScalar>::epsilon() * maxDiag; + RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi(col0.cwiseAbs().maxCoeff(), maxDiag); + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(m_naiveU.allFinite()); + assert(m_naiveV.allFinite()); + assert(m_computed.allFinite()); +#endif + +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "\ndeflate:" << diag.head(k+1).transpose() << " | " << diag.segment(k+1,length-k-1).transpose() << "\n"; +#endif + + //condition 4.1 + if (diag(0) < epsilon_coarse) + { +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "deflation 4.1, because " << diag(0) << " < " << epsilon_coarse << "\n"; +#endif + diag(0) = epsilon_coarse; + } + + //condition 4.2 + for (Index i=1;i<length;++i) + if (abs(col0(i)) < epsilon_strict) + { +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n"; +#endif + col0(i) = 0; + } + + //condition 4.3 + for (Index i=1;i<length; i++) + if (diag(i) < epsilon_coarse) + { +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "deflation 4.3, cancel z(" << i << ")=" << col0(i) << " because diag(" << i << ")=" << diag(i) << " < " << epsilon_coarse << "\n"; +#endif + deflation43(firstCol, shift, i, length); + } + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(m_naiveU.allFinite()); + assert(m_naiveV.allFinite()); + assert(m_computed.allFinite()); +#endif +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "to be sorted: " << diag.transpose() << "\n\n"; +#endif + { + // Check for total deflation + // If we have a total deflation, then we have to consider col0(0)==diag(0) as a singular value during sorting + bool total_deflation = (col0.tail(length-1).array()==RealScalar(0)).all(); + + // Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge. + // First, compute the respective permutation. + Index *permutation = new Index[length]; // FIXME avoid repeated dynamic memory allocation + { + permutation[0] = 0; + Index p = 1; + + // Move deflated diagonal entries at the end. + for(Index i=1; i<length; ++i) + if(diag(i)==0) + permutation[p++] = i; + + Index i=1, j=k+1; + for( ; p < length; ++p) + { + if (i > k) permutation[p] = j++; + else if (j >= length) permutation[p] = i++; + else if (diag(i) < diag(j)) permutation[p] = j++; + else permutation[p] = i++; + } + } + + // If we have a total deflation, then we have to insert diag(0) at the right place + if(total_deflation) + { + for(Index i=1; i<length; ++i) + { + Index pi = permutation[i]; + if(diag(pi)==0 || diag(0)<diag(pi)) + permutation[i-1] = permutation[i]; + else + { + permutation[i-1] = 0; + break; + } + } + } + + // Current index of each col, and current column of each index + Index *realInd = new Index[length]; // FIXME avoid repeated dynamic memory allocation + Index *realCol = new Index[length]; // FIXME avoid repeated dynamic memory allocation + + for(int pos = 0; pos< length; pos++) + { + realCol[pos] = pos; + realInd[pos] = pos; + } + + for(Index i = total_deflation?0:1; i < length; i++) + { + const Index pi = permutation[length - (total_deflation ? i+1 : i)]; + const Index J = realCol[pi]; + + using std::swap; + // swap diagonal and first column entries: + swap(diag(i), diag(J)); + if(i!=0 && J!=0) swap(col0(i), col0(J)); + + // change columns + if (m_compU) m_naiveU.col(firstCol+i).segment(firstCol, length + 1).swap(m_naiveU.col(firstCol+J).segment(firstCol, length + 1)); + else m_naiveU.col(firstCol+i).segment(0, 2) .swap(m_naiveU.col(firstCol+J).segment(0, 2)); + if (m_compV) m_naiveV.col(firstColW + i).segment(firstRowW, length).swap(m_naiveV.col(firstColW + J).segment(firstRowW, length)); + + //update real pos + const Index realI = realInd[i]; + realCol[realI] = J; + realCol[pi] = i; + realInd[J] = realI; + realInd[i] = pi; + } + delete[] permutation; + delete[] realInd; + delete[] realCol; + } +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "sorted: " << diag.transpose().format(bdcsvdfmt) << "\n"; + std::cout << " : " << col0.transpose() << "\n\n"; +#endif + + //condition 4.4 + { + Index i = length-1; + while(i>0 && (diag(i)==0 || col0(i)==0)) --i; + for(; i>1;--i) + if( (diag(i) - diag(i-1)) < NumTraits<RealScalar>::epsilon()*maxDiag ) + { +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "deflation 4.4 with i = " << i << " because " << (diag(i) - diag(i-1)) << " < " << NumTraits<RealScalar>::epsilon()*diag(i) << "\n"; +#endif + eigen_internal_assert(abs(diag(i) - diag(i-1))<epsilon_coarse && " diagonal entries are not properly sorted"); + deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i-1, i, length); + } + } + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + for(Index j=2;j<length;++j) + assert(diag(j-1)<=diag(j) || diag(j)==0); +#endif + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(m_naiveU.allFinite()); + assert(m_naiveV.allFinite()); + assert(m_computed.allFinite()); +#endif +}//end deflation + +#ifndef __CUDACC__ +/** \svd_module + * + * \return the singular value decomposition of \c *this computed by Divide & Conquer algorithm + * + * \sa class BDCSVD + */ +template<typename Derived> +BDCSVD<typename MatrixBase<Derived>::PlainObject> +MatrixBase<Derived>::bdcSvd(unsigned int computationOptions) const +{ + return BDCSVD<PlainObject>(*this, computationOptions); +} +#endif + +} // end namespace Eigen + +#endif diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index 3ab8a4c8a..444187ae7 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -550,7 +550,7 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD * according to the specified problem size. * \sa JacobiSVD() */ - JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0) + explicit JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0) { allocate(rows, cols, computationOptions); } @@ -565,7 +565,7 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not * available with the (non-default) FullPivHouseholderQR preconditioner. */ - JacobiSVD(const MatrixType& matrix, unsigned int computationOptions = 0) + explicit JacobiSVD(const MatrixType& matrix, unsigned int computationOptions = 0) { compute(matrix, computationOptions); } @@ -593,27 +593,12 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD return compute(matrix, m_computationOptions); } - /** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A. - * - * \param b the right-hand-side of the equation to solve. - * - * \note Solving requires both U and V to be computed. Thin U and V are enough, there is no need for full U or V. - * - * \note SVD solving is implicitly least-squares. Thus, this method serves both purposes of exact solving and least-squares solving. - * In other words, the returned solution is guaranteed to minimize the Euclidean norm \f$ \Vert A x - b \Vert \f$. - */ - template<typename Rhs> - inline const internal::solve_retval<JacobiSVD, Rhs> - solve(const MatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "JacobiSVD is not initialized."); - eigen_assert(computeU() && computeV() && "JacobiSVD::solve() requires both unitaries U and V to be computed (thin unitaries suffice)."); - return internal::solve_retval<JacobiSVD, Rhs>(*this, b.derived()); - } - using Base::computeU; using Base::computeV; - + using Base::rows; + using Base::cols; + using Base::rank; + private: void allocate(Index rows, Index cols, unsigned int computationOptions); @@ -643,6 +628,7 @@ template<typename _MatrixType, int QRPreconditioner> class JacobiSVD internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreColsThanRows> m_qr_precond_morecols; internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreRowsThanCols> m_qr_precond_morerows; + MatrixType m_scaledMatrix; }; template<typename MatrixType, int QRPreconditioner> @@ -689,8 +675,9 @@ void JacobiSVD<MatrixType, QRPreconditioner>::allocate(Index rows, Index cols, u : 0); m_workMatrix.resize(m_diagSize, m_diagSize); - if(m_cols>m_rows) m_qr_precond_morecols.allocate(*this); - if(m_rows>m_cols) m_qr_precond_morerows.allocate(*this); + if(m_cols>m_rows) m_qr_precond_morecols.allocate(*this); + if(m_rows>m_cols) m_qr_precond_morerows.allocate(*this); + if(m_cols!=m_cols) m_scaledMatrix.resize(rows,cols); } template<typename MatrixType, int QRPreconditioner> @@ -707,21 +694,26 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig // limit for very small denormal numbers to be considered zero in order to avoid infinite loops (see bug 286) const RealScalar considerAsZero = RealScalar(2) * std::numeric_limits<RealScalar>::denorm_min(); + // Scaling factor to reduce over/under-flows + RealScalar scale = matrix.cwiseAbs().maxCoeff(); + if(scale==RealScalar(0)) scale = RealScalar(1); + /*** step 1. The R-SVD step: we use a QR decomposition to reduce to the case of a square matrix */ - if(!m_qr_precond_morecols.run(*this, matrix) && !m_qr_precond_morerows.run(*this, matrix)) + if(m_rows!=m_cols) + { + m_scaledMatrix = matrix / scale; + m_qr_precond_morecols.run(*this, m_scaledMatrix); + m_qr_precond_morerows.run(*this, m_scaledMatrix); + } + else { - m_workMatrix = matrix.block(0,0,m_diagSize,m_diagSize); + m_workMatrix = matrix.block(0,0,m_diagSize,m_diagSize) / scale; if(m_computeFullU) m_matrixU.setIdentity(m_rows,m_rows); if(m_computeThinU) m_matrixU.setIdentity(m_rows,m_diagSize); if(m_computeFullV) m_matrixV.setIdentity(m_cols,m_cols); if(m_computeThinV) m_matrixV.setIdentity(m_cols, m_diagSize); } - - // Scaling factor to reduce over/under-flows - RealScalar scale = m_workMatrix.cwiseAbs().maxCoeff(); - if(scale==RealScalar(0)) scale = RealScalar(1); - m_workMatrix /= scale; /*** step 2. The main Jacobi SVD iteration. ***/ @@ -739,8 +731,7 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig // if this 2x2 sub-matrix is not diagonal already... // notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't // keep us iterating forever. Similarly, small denormal numbers are considered zero. - EIGEN_USING_STD_MATH(max); - RealScalar threshold = (max)(considerAsZero, precision * (max)(abs(m_workMatrix.coeff(p,p)), + RealScalar threshold = numext::maxi(considerAsZero, precision * numext::maxi(abs(m_workMatrix.coeff(p,p)), abs(m_workMatrix.coeff(q,q)))); // We compare both values to threshold instead of calling max to be robust to NaN (See bug 791) if(abs(m_workMatrix.coeff(p,q))>threshold || abs(m_workMatrix.coeff(q,p)) > threshold) @@ -799,31 +790,6 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig return *this; } -namespace internal { -template<typename _MatrixType, int QRPreconditioner, typename Rhs> -struct solve_retval<JacobiSVD<_MatrixType, QRPreconditioner>, Rhs> - : solve_retval_base<JacobiSVD<_MatrixType, QRPreconditioner>, Rhs> -{ - typedef JacobiSVD<_MatrixType, QRPreconditioner> JacobiSVDType; - EIGEN_MAKE_SOLVE_HELPERS(JacobiSVDType,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - eigen_assert(rhs().rows() == dec().rows()); - - // A = U S V^* - // So A^{-1} = V S^{-1} U^* - - Matrix<Scalar, Dynamic, Rhs::ColsAtCompileTime, 0, _MatrixType::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime> tmp; - Index rank = dec().rank(); - - tmp.noalias() = dec().matrixU().leftCols(rank).adjoint() * rhs(); - tmp = dec().singularValues().head(rank).asDiagonal().inverse() * tmp; - dst = dec().matrixV().leftCols(rank) * tmp; - } -}; -} // end namespace internal - #ifndef __CUDACC__ /** \svd_module * diff --git a/Eigen/src/SVD/JacobiSVD_MKL.h b/Eigen/src/SVD/JacobiSVD_MKL.h index decda7540..14e461c4e 100644 --- a/Eigen/src/SVD/JacobiSVD_MKL.h +++ b/Eigen/src/SVD/JacobiSVD_MKL.h @@ -45,8 +45,8 @@ JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPiv JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPivHouseholderQRPreconditioner>::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>& matrix, unsigned int computationOptions) \ { \ typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> MatrixType; \ - typedef MatrixType::Scalar Scalar; \ - typedef MatrixType::RealScalar RealScalar; \ + /*typedef MatrixType::Scalar Scalar;*/ \ + /*typedef MatrixType::RealScalar RealScalar;*/ \ allocate(matrix.rows(), matrix.cols(), computationOptions); \ \ /*const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();*/ \ diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h index 61b01fb8a..27b732b80 100644 --- a/Eigen/src/SVD/SVDBase.h +++ b/Eigen/src/SVD/SVDBase.h @@ -190,6 +190,30 @@ public: inline Index rows() const { return m_rows; } inline Index cols() const { return m_cols; } + + /** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A. + * + * \param b the right-hand-side of the equation to solve. + * + * \note Solving requires both U and V to be computed. Thin U and V are enough, there is no need for full U or V. + * + * \note SVD solving is implicitly least-squares. Thus, this method serves both purposes of exact solving and least-squares solving. + * In other words, the returned solution is guaranteed to minimize the Euclidean norm \f$ \Vert A x - b \Vert \f$. + */ + template<typename Rhs> + inline const Solve<Derived, Rhs> + solve(const MatrixBase<Rhs>& b) const + { + eigen_assert(m_isInitialized && "SVD is not initialized."); + eigen_assert(computeU() && computeV() && "SVD::solve() requires both unitaries U and V to be computed (thin unitaries suffice)."); + return Solve<Derived, Rhs>(derived(), b.derived()); + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif protected: // return true if already allocated @@ -220,6 +244,23 @@ protected: }; +#ifndef EIGEN_PARSED_BY_DOXYGEN +template<typename Derived> +template<typename RhsType, typename DstType> +void SVDBase<Derived>::_solve_impl(const RhsType &rhs, DstType &dst) const +{ + eigen_assert(rhs.rows() == rows()); + + // A = U S V^* + // So A^{-1} = V S^{-1} U^* + + Matrix<Scalar, Dynamic, RhsType::ColsAtCompileTime, 0, MatrixType::MaxRowsAtCompileTime, RhsType::MaxColsAtCompileTime> tmp; + Index l_rank = rank(); + tmp.noalias() = m_matrixU.leftCols(l_rank).adjoint() * rhs; + tmp = m_singularValues.head(l_rank).asDiagonal().inverse() * tmp; + dst = m_matrixV.leftCols(l_rank) * tmp; +} +#endif template<typename MatrixType> bool SVDBase<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions) diff --git a/Eigen/src/SVD/UpperBidiagonalization.h b/Eigen/src/SVD/UpperBidiagonalization.h index 64906bf0c..eaa6bb86e 100644 --- a/Eigen/src/SVD/UpperBidiagonalization.h +++ b/Eigen/src/SVD/UpperBidiagonalization.h @@ -37,7 +37,7 @@ template<typename _MatrixType> class UpperBidiagonalization typedef Matrix<Scalar, ColsAtCompileTimeMinusOne, 1> SuperDiagVectorType; typedef HouseholderSequence< const MatrixType, - CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Diagonal<const MatrixType,0> > + const typename internal::remove_all<typename Diagonal<const MatrixType,0>::ConjugateReturnType>::type > HouseholderUSequenceType; typedef HouseholderSequence< const typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type, @@ -53,7 +53,7 @@ template<typename _MatrixType> class UpperBidiagonalization */ UpperBidiagonalization() : m_householder(), m_bidiagonal(), m_isInitialized(false) {} - UpperBidiagonalization(const MatrixType& matrix) + explicit UpperBidiagonalization(const MatrixType& matrix) : m_householder(matrix.rows(), matrix.cols()), m_bidiagonal(matrix.cols(), matrix.cols()), m_isInitialized(false) @@ -220,10 +220,10 @@ void upperbidiagonalization_blocked_helper(MatrixType& A, if(k) u_k -= U_k1.adjoint() * X.row(k).head(k).adjoint(); } - // 5 - construct right Householder transform in-placecols + // 5 - construct right Householder transform in-place u_k.makeHouseholderInPlace(tau_u, upper_diagonal[k]); - // this eases the application of Householder transforAions + // this eases the application of Householder transformations // A(k,k+1) will store tau_u later A(k,k+1) = Scalar(1); diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h index e1f96ba5a..22325d7f4 100644 --- a/Eigen/src/SparseCholesky/SimplicialCholesky.h +++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h @@ -17,6 +17,27 @@ enum SimplicialCholeskyMode { SimplicialCholeskyLDLT }; +namespace internal { + template<typename CholMatrixType, typename InputMatrixType> + struct simplicial_cholesky_grab_input { + typedef CholMatrixType const * ConstCholMatrixPtr; + static void run(const InputMatrixType& input, ConstCholMatrixPtr &pmat, CholMatrixType &tmp) + { + tmp = input; + pmat = &tmp; + } + }; + + template<typename MatrixType> + struct simplicial_cholesky_grab_input<MatrixType,MatrixType> { + typedef MatrixType const * ConstMatrixPtr; + static void run(const MatrixType& input, ConstMatrixPtr &pmat, MatrixType &/*tmp*/) + { + pmat = &input; + } + }; +} // end namespace internal + /** \ingroup SparseCholesky_Module * \brief A direct sparse Cholesky factorizations * @@ -33,8 +54,11 @@ enum SimplicialCholeskyMode { * */ template<typename Derived> -class SimplicialCholeskyBase : internal::noncopyable +class SimplicialCholeskyBase : public SparseSolverBase<Derived> { + typedef SparseSolverBase<Derived> Base; + using Base::m_isInitialized; + public: typedef typename internal::traits<Derived>::MatrixType MatrixType; typedef typename internal::traits<Derived>::OrderingType OrderingType; @@ -43,17 +67,20 @@ class SimplicialCholeskyBase : internal::noncopyable typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::Index Index; typedef SparseMatrix<Scalar,ColMajor,Index> CholMatrixType; + typedef CholMatrixType const * ConstCholMatrixPtr; typedef Matrix<Scalar,Dynamic,1> VectorType; public: + + using Base::derived; /** Default constructor */ SimplicialCholeskyBase() - : m_info(Success), m_isInitialized(false), m_shiftOffset(0), m_shiftScale(1) + : m_info(Success), m_shiftOffset(0), m_shiftScale(1) {} - SimplicialCholeskyBase(const MatrixType& matrix) - : m_info(Success), m_isInitialized(false), m_shiftOffset(0), m_shiftScale(1) + explicit SimplicialCholeskyBase(const MatrixType& matrix) + : m_info(Success), m_shiftOffset(0), m_shiftScale(1) { derived().compute(matrix); } @@ -79,34 +106,6 @@ class SimplicialCholeskyBase : internal::noncopyable return m_info; } - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::solve_retval<SimplicialCholeskyBase, Rhs> - solve(const MatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "Simplicial LLT or LDLT is not initialized."); - eigen_assert(rows()==b.rows() - && "SimplicialCholeskyBase::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval<SimplicialCholeskyBase, Rhs>(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::sparse_solve_retval<SimplicialCholeskyBase, Rhs> - solve(const SparseMatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "Simplicial LLT or LDLT is not initialized."); - eigen_assert(rows()==b.rows() - && "SimplicialCholesky::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval<SimplicialCholeskyBase, Rhs>(*this, b.derived()); - } - /** \returns the permutation P * \sa permutationPinv() */ const PermutationMatrix<Dynamic,Dynamic,Index>& permutationP() const @@ -150,7 +149,7 @@ class SimplicialCholeskyBase : internal::noncopyable /** \internal */ template<typename Rhs,typename Dest> - void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const + void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const { eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); eigen_assert(m_matrix.rows()==b.rows()); @@ -175,6 +174,12 @@ class SimplicialCholeskyBase : internal::noncopyable if(m_P.size()>0) dest = m_Pinv * dest; } + + template<typename Rhs,typename Dest> + void _solve_impl(const SparseMatrixBase<Rhs> &b, SparseMatrixBase<Dest> &dest) const + { + internal::solve_sparse_through_dense_panels(derived(), b, dest); + } #endif // EIGEN_PARSED_BY_DOXYGEN @@ -186,10 +191,11 @@ class SimplicialCholeskyBase : internal::noncopyable { eigen_assert(matrix.rows()==matrix.cols()); Index size = matrix.cols(); - CholMatrixType ap(size,size); - ordering(matrix, ap); - analyzePattern_preordered(ap, DoLDLT); - factorize_preordered<DoLDLT>(ap); + CholMatrixType tmp(size,size); + ConstCholMatrixPtr pmat; + ordering(matrix, pmat, tmp); + analyzePattern_preordered(*pmat, DoLDLT); + factorize_preordered<DoLDLT>(*pmat); } template<bool DoLDLT> @@ -197,9 +203,21 @@ class SimplicialCholeskyBase : internal::noncopyable { eigen_assert(a.rows()==a.cols()); int size = a.cols(); - CholMatrixType ap(size,size); - ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P); - factorize_preordered<DoLDLT>(ap); + CholMatrixType tmp(size,size); + ConstCholMatrixPtr pmat; + + if(m_P.size()==0 && (UpLo&Upper)==Upper) + { + // If there is no ordering, try to directly use the input matrix without any copy + internal::simplicial_cholesky_grab_input<CholMatrixType,MatrixType>::run(a, pmat, tmp); + } + else + { + tmp.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P); + pmat = &tmp; + } + + factorize_preordered<DoLDLT>(*pmat); } template<bool DoLDLT> @@ -209,13 +227,14 @@ class SimplicialCholeskyBase : internal::noncopyable { eigen_assert(a.rows()==a.cols()); int size = a.cols(); - CholMatrixType ap(size,size); - ordering(a, ap); - analyzePattern_preordered(ap,doLDLT); + CholMatrixType tmp(size,size); + ConstCholMatrixPtr pmat; + ordering(a, pmat, tmp); + analyzePattern_preordered(*pmat,doLDLT); } void analyzePattern_preordered(const CholMatrixType& a, bool doLDLT); - void ordering(const MatrixType& a, CholMatrixType& ap); + void ordering(const MatrixType& a, ConstCholMatrixPtr &pmat, CholMatrixType& ap); /** keeps off-diagonal entries; drops diagonal entries */ struct keep_diag { @@ -226,7 +245,6 @@ class SimplicialCholeskyBase : internal::noncopyable }; mutable ComputationInfo m_info; - bool m_isInitialized; bool m_factorizationIsOk; bool m_analysisIsOk; @@ -255,10 +273,10 @@ template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<Simp typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Index Index; typedef SparseMatrix<Scalar, ColMajor, Index> CholMatrixType; - typedef SparseTriangularView<CholMatrixType, Eigen::Lower> MatrixL; - typedef SparseTriangularView<typename CholMatrixType::AdjointReturnType, Eigen::Upper> MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } + typedef TriangularView<const CholMatrixType, Eigen::Lower> MatrixL; + typedef TriangularView<const typename CholMatrixType::AdjointReturnType, Eigen::Upper> MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } }; template<typename _MatrixType,int _UpLo, typename _Ordering> struct traits<SimplicialLDLT<_MatrixType,_UpLo,_Ordering> > @@ -269,10 +287,10 @@ template<typename _MatrixType,int _UpLo, typename _Ordering> struct traits<Simpl typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Index Index; typedef SparseMatrix<Scalar, ColMajor, Index> CholMatrixType; - typedef SparseTriangularView<CholMatrixType, Eigen::UnitLower> MatrixL; - typedef SparseTriangularView<typename CholMatrixType::AdjointReturnType, Eigen::UnitUpper> MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } + typedef TriangularView<const CholMatrixType, Eigen::UnitLower> MatrixL; + typedef TriangularView<const typename CholMatrixType::AdjointReturnType, Eigen::UnitUpper> MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } }; template<typename _MatrixType, int _UpLo, typename _Ordering> struct traits<SimplicialCholesky<_MatrixType,_UpLo,_Ordering> > @@ -321,7 +339,7 @@ public: /** Default constructor */ SimplicialLLT() : Base() {} /** Constructs and performs the LLT factorization of \a matrix */ - SimplicialLLT(const MatrixType& matrix) + explicit SimplicialLLT(const MatrixType& matrix) : Base(matrix) {} /** \returns an expression of the factor L */ @@ -411,7 +429,7 @@ public: SimplicialLDLT() : Base() {} /** Constructs and performs the LLT factorization of \a matrix */ - SimplicialLDLT(const MatrixType& matrix) + explicit SimplicialLDLT(const MatrixType& matrix) : Base(matrix) {} /** \returns a vector expression of the diagonal D */ @@ -491,7 +509,7 @@ public: public: SimplicialCholesky() : Base(), m_LDLT(true) {} - SimplicialCholesky(const MatrixType& matrix) + explicit SimplicialCholesky(const MatrixType& matrix) : Base(), m_LDLT(true) { compute(matrix); @@ -560,7 +578,7 @@ public: /** \internal */ template<typename Rhs,typename Dest> - void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const + void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const { eigen_assert(Base::m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); eigen_assert(Base::m_matrix.rows()==b.rows()); @@ -596,6 +614,13 @@ public: dest = Base::m_Pinv * dest; } + /** \internal */ + template<typename Rhs,typename Dest> + void _solve_impl(const SparseMatrixBase<Rhs> &b, SparseMatrixBase<Dest> &dest) const + { + internal::solve_sparse_through_dense_panels(*this, b, dest); + } + Scalar determinant() const { if(m_LDLT) @@ -614,58 +639,43 @@ public: }; template<typename Derived> -void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, CholMatrixType& ap) +void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, ConstCholMatrixPtr &pmat, CholMatrixType& ap) { eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); - // Note that amd compute the inverse permutation + pmat = ≈ + // Note that ordering methods compute the inverse permutation + if(!internal::is_same<OrderingType,NaturalOrdering<Index> >::value) { - CholMatrixType C; - C = a.template selfadjointView<UpLo>(); + { + CholMatrixType C; + C = a.template selfadjointView<UpLo>(); + + OrderingType ordering; + ordering(C,m_Pinv); + } + + if(m_Pinv.size()>0) m_P = m_Pinv.inverse(); + else m_P.resize(0); - OrderingType ordering; - ordering(C,m_Pinv); + ap.resize(size,size); + ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P); } - - if(m_Pinv.size()>0) - m_P = m_Pinv.inverse(); else + { + m_Pinv.resize(0); m_P.resize(0); - - ap.resize(size,size); - ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P); + if(UpLo==Lower || MatrixType::IsRowMajor) + { + // we have to transpose the lower part to to the upper one + ap.resize(size,size); + ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>(); + } + else + internal::simplicial_cholesky_grab_input<CholMatrixType,MatrixType>::run(a, pmat, ap); + } } -namespace internal { - -template<typename Derived, typename Rhs> -struct solve_retval<SimplicialCholeskyBase<Derived>, Rhs> - : solve_retval_base<SimplicialCholeskyBase<Derived>, Rhs> -{ - typedef SimplicialCholeskyBase<Derived> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec().derived()._solve(rhs(),dst); - } -}; - -template<typename Derived, typename Rhs> -struct sparse_solve_retval<SimplicialCholeskyBase<Derived>, Rhs> - : sparse_solve_retval_base<SimplicialCholeskyBase<Derived>, Rhs> -{ - typedef SimplicialCholeskyBase<Derived> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; - -} // end namespace internal - } // end namespace Eigen #endif // EIGEN_SIMPLICIAL_CHOLESKY_H diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h index 7aaf702be..b7fd62faa 100644 --- a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +++ b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h @@ -126,7 +126,7 @@ void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType& Index top = size; // stack for pattern is empty tags[k] = k; // mark node k as visited m_nonZerosPerCol[k] = 0; // count of nonzeros in column k of L - for(typename MatrixType::InnerIterator it(ap,k); it; ++it) + for(typename CholMatrixType::InnerIterator it(ap,k); it; ++it) { Index i = it.index(); if(i <= k) diff --git a/Eigen/src/SparseCore/AmbiVector.h b/Eigen/src/SparseCore/AmbiVector.h index 17fff96a7..76ef25f7d 100644 --- a/Eigen/src/SparseCore/AmbiVector.h +++ b/Eigen/src/SparseCore/AmbiVector.h @@ -27,7 +27,7 @@ class AmbiVector typedef _Index Index; typedef typename NumTraits<Scalar>::Real RealScalar; - AmbiVector(Index size) + explicit AmbiVector(Index size) : m_buffer(0), m_zero(0), m_size(0), m_allocatedSize(0), m_allocatedElements(0), m_mode(-1) { resize(size); @@ -69,7 +69,7 @@ class AmbiVector delete[] m_buffer; if (size<1000) { - Index allocSize = (size * sizeof(ListEl))/sizeof(Scalar); + Index allocSize = (size * sizeof(ListEl) + sizeof(Scalar) - 1)/sizeof(Scalar); m_allocatedElements = (allocSize*sizeof(Scalar))/sizeof(ListEl); m_buffer = new Scalar[allocSize]; } @@ -88,7 +88,7 @@ class AmbiVector Index copyElements = m_allocatedElements; m_allocatedElements = (std::min)(Index(m_allocatedElements*1.5),m_size); Index allocSize = m_allocatedElements * sizeof(ListEl); - allocSize = allocSize/sizeof(Scalar) + (allocSize%sizeof(Scalar)>0?1:0); + allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar); Scalar* newBuffer = new Scalar[allocSize]; memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl)); delete[] m_buffer; @@ -288,7 +288,7 @@ class AmbiVector<_Scalar,_Index>::Iterator * In practice, all coefficients having a magnitude smaller than \a epsilon * are skipped. */ - Iterator(const AmbiVector& vec, const RealScalar& epsilon = 0) + explicit Iterator(const AmbiVector& vec, const RealScalar& epsilon = 0) : m_vector(vec) { using std::abs; diff --git a/Eigen/src/SparseCore/CompressedStorage.h b/Eigen/src/SparseCore/CompressedStorage.h index a667cb56e..99f741138 100644 --- a/Eigen/src/SparseCore/CompressedStorage.h +++ b/Eigen/src/SparseCore/CompressedStorage.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -36,7 +36,7 @@ class CompressedStorage : m_values(0), m_indices(0), m_size(0), m_allocatedSize(0) {} - CompressedStorage(size_t size) + explicit CompressedStorage(size_t size) : m_values(0), m_indices(0), m_size(0), m_allocatedSize(0) { resize(size); @@ -108,15 +108,6 @@ class CompressedStorage inline Index& index(size_t i) { return m_indices[i]; } inline const Index& index(size_t i) const { return m_indices[i]; } - static CompressedStorage Map(Index* indices, Scalar* values, size_t size) - { - CompressedStorage res; - res.m_indices = indices; - res.m_values = values; - res.m_allocatedSize = res.m_size = size; - return res; - } - /** \returns the largest \c k such that for all \c j in [0,k) index[\c j]\<\a key */ inline Index searchLowerIndex(Index key) const { @@ -152,10 +143,10 @@ class CompressedStorage } /** Like at(), but the search is performed in the range [start,end) */ - inline Scalar atInRange(size_t start, size_t end, Index key, const Scalar& defaultValue = Scalar(0)) const + inline Scalar atInRange(size_t start, size_t end, Index key, const Scalar &defaultValue = Scalar(0)) const { if (start>=end) - return Scalar(0); + return defaultValue; else if (end>start && key==m_indices[end-1]) return m_values[end-1]; // ^^ optimization: let's first check if it is the last coefficient @@ -172,12 +163,31 @@ class CompressedStorage size_t id = searchLowerIndex(0,m_size,key); if (id>=m_size || m_indices[id]!=key) { - resize(m_size+1,1); - for (size_t j=m_size-1; j>id; --j) + if (m_allocatedSize<m_size+1) { - m_indices[j] = m_indices[j-1]; - m_values[j] = m_values[j-1]; + m_allocatedSize = 2*(m_size+1); + internal::scoped_array<Scalar> newValues(m_allocatedSize); + internal::scoped_array<Index> newIndices(m_allocatedSize); + + // copy first chunk + internal::smart_copy(m_values, m_values +id, newValues.ptr()); + internal::smart_copy(m_indices, m_indices+id, newIndices.ptr()); + + // copy the rest + if(m_size>id) + { + internal::smart_copy(m_values +id, m_values +m_size, newValues.ptr() +id+1); + internal::smart_copy(m_indices+id, m_indices+m_size, newIndices.ptr()+id+1); + } + std::swap(m_values,newValues.ptr()); + std::swap(m_indices,newIndices.ptr()); } + else if(m_size>id) + { + internal::smart_memmove(m_values +id, m_values +m_size, m_values +id+1); + internal::smart_memmove(m_indices+id, m_indices+m_size, m_indices+id+1); + } + m_size++; m_indices[id] = key; m_values[id] = defaultValue; } @@ -204,17 +214,14 @@ class CompressedStorage inline void reallocate(size_t size) { - Scalar* newValues = new Scalar[size]; - Index* newIndices = new Index[size]; + eigen_internal_assert(size!=m_allocatedSize); + internal::scoped_array<Scalar> newValues(size); + internal::scoped_array<Index> newIndices(size); size_t copySize = (std::min)(size, m_size); - // copy - internal::smart_copy(m_values, m_values+copySize, newValues); - internal::smart_copy(m_indices, m_indices+copySize, newIndices); - // delete old stuff - delete[] m_values; - delete[] m_indices; - m_values = newValues; - m_indices = newIndices; + internal::smart_copy(m_values, m_values+copySize, newValues.ptr()); + internal::smart_copy(m_indices, m_indices+copySize, newIndices.ptr()); + std::swap(m_values,newValues.ptr()); + std::swap(m_indices,newIndices.ptr()); m_allocatedSize = size; } diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h index 67bc33a93..a30522ff7 100644 --- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -38,6 +38,9 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r // per column of the lhs. // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros(); + + typename evaluator<Lhs>::type lhsEval(lhs); + typename evaluator<Rhs>::type rhsEval(rhs); res.setZero(); res.reserve(Index(estimated_nnz_prod)); @@ -47,11 +50,11 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r res.startVec(j); Index nnz = 0; - for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt) + for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { Scalar y = rhsIt.value(); Index k = rhsIt.index(); - for (typename Lhs::InnerIterator lhsIt(lhs, k); lhsIt; ++lhsIt) + for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) { Index i = lhsIt.index(); Scalar x = lhsIt.value(); @@ -88,7 +91,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r // otherwise => loop through the entire vector // In order to avoid to perform an expensive log2 when the // result is clearly very sparse we use a linear bound up to 200. - if((nnz<200 && nnz<t200) || nnz * log2(nnz) < t) + if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t) { if(nnz>1) std::sort(indices,indices+nnz); for(Index k=0; k<nnz; ++k) @@ -138,6 +141,8 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,C typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrixAux; typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime>::type ColMajorMatrix; + // If the result is tall and thin (in the extreme case a column vector) + // then it is faster to sort the coefficients inplace instead of transposing twice. // FIXME, the following heuristic is probably not very good. if(lhs.rows()>=rhs.cols()) { diff --git a/Eigen/src/SparseCore/MappedSparseMatrix.h b/Eigen/src/SparseCore/MappedSparseMatrix.h index ab1a266a9..2852c669a 100644 --- a/Eigen/src/SparseCore/MappedSparseMatrix.h +++ b/Eigen/src/SparseCore/MappedSparseMatrix.h @@ -176,6 +176,32 @@ class MappedSparseMatrix<Scalar,_Flags,_Index>::ReverseInnerIterator const Index m_end; }; +namespace internal { + +template<typename _Scalar, int _Options, typename _Index> +struct evaluator<MappedSparseMatrix<_Scalar,_Options,_Index> > + : evaluator_base<MappedSparseMatrix<_Scalar,_Options,_Index> > +{ + typedef MappedSparseMatrix<_Scalar,_Options,_Index> MappedSparseMatrixType; + typedef typename MappedSparseMatrixType::InnerIterator InnerIterator; + typedef typename MappedSparseMatrixType::ReverseInnerIterator ReverseInnerIterator; + + enum { + CoeffReadCost = NumTraits<_Scalar>::ReadCost, + Flags = MappedSparseMatrixType::Flags + }; + + evaluator() : m_matrix(0) {} + explicit evaluator(const MappedSparseMatrixType &mat) : m_matrix(&mat) {} + + operator MappedSparseMatrixType&() { return m_matrix->const_cast_derived(); } + operator const MappedSparseMatrixType&() const { return *m_matrix; } + + const MappedSparseMatrixType *m_matrix; +}; + +} + } // end namespace Eigen #endif // EIGEN_MAPPED_SPARSEMATRIX_H diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h new file mode 100644 index 000000000..97c079d3f --- /dev/null +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -0,0 +1,192 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSEASSIGN_H +#define EIGEN_SPARSEASSIGN_H + +namespace Eigen { + +template<typename Derived> +template<typename OtherDerived> +Derived& SparseMatrixBase<Derived>::operator=(const EigenBase<OtherDerived> &other) +{ + // TODO use the evaluator mechanism + other.derived().evalTo(derived()); + return derived(); +} + +template<typename Derived> +template<typename OtherDerived> +Derived& SparseMatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other) +{ + // TODO use the evaluator mechanism + other.evalTo(derived()); + return derived(); +} + +template<typename Derived> +template<typename OtherDerived> +inline Derived& SparseMatrixBase<Derived>::operator=(const SparseMatrixBase<OtherDerived>& other) +{ + // FIXME, by default sparse evaluation do not alias, so we should be able to bypass the generic call_assignment + internal::call_assignment/*_no_alias*/(derived(), other.derived()); + return derived(); +} + +template<typename Derived> +inline Derived& SparseMatrixBase<Derived>::operator=(const Derived& other) +{ + internal::call_assignment_no_alias(derived(), other.derived()); + return derived(); +} + +namespace internal { + +template<> +struct storage_kind_to_evaluator_kind<Sparse> { + typedef IteratorBased Kind; +}; + +template<> +struct storage_kind_to_shape<Sparse> { + typedef SparseShape Shape; +}; + +struct Sparse2Sparse {}; +struct Sparse2Dense {}; + +template<> struct AssignmentKind<SparseShape, SparseShape> { typedef Sparse2Sparse Kind; }; +template<> struct AssignmentKind<SparseShape, SparseTriangularShape> { typedef Sparse2Sparse Kind; }; +template<> struct AssignmentKind<DenseShape, SparseShape> { typedef Sparse2Dense Kind; }; + + +template<typename DstXprType, typename SrcXprType> +void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src) +{ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + typedef typename DstXprType::Index Index; + typedef typename DstXprType::Scalar Scalar; + typedef typename internal::evaluator<DstXprType>::type DstEvaluatorType; + typedef typename internal::evaluator<SrcXprType>::type SrcEvaluatorType; + + SrcEvaluatorType srcEvaluator(src); + + const bool transpose = (DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit); + const Index outerEvaluationSize = (SrcEvaluatorType::Flags&RowMajorBit) ? src.rows() : src.cols(); + if ((!transpose) && src.isRValue()) + { + // eval without temporary + dst.resize(src.rows(), src.cols()); + dst.setZero(); + dst.reserve((std::max)(src.rows(),src.cols())*2); + for (Index j=0; j<outerEvaluationSize; ++j) + { + dst.startVec(j); + for (typename SrcEvaluatorType::InnerIterator it(srcEvaluator, j); it; ++it) + { + Scalar v = it.value(); + dst.insertBackByOuterInner(j,it.index()) = v; + } + } + dst.finalize(); + } + else + { + // eval through a temporary + eigen_assert(( ((internal::traits<DstXprType>::SupportedAccessPatterns & OuterRandomAccessPattern)==OuterRandomAccessPattern) || + (!((DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit)))) && + "the transpose operation is supposed to be handled in SparseMatrix::operator="); + + enum { Flip = (DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit) }; + + + DstXprType temp(src.rows(), src.cols()); + + temp.reserve((std::max)(src.rows(),src.cols())*2); + for (Index j=0; j<outerEvaluationSize; ++j) + { + temp.startVec(j); + for (typename SrcEvaluatorType::InnerIterator it(srcEvaluator, j); it; ++it) + { + Scalar v = it.value(); + temp.insertBackByOuterInner(Flip?it.index():j,Flip?j:it.index()) = v; + } + } + temp.finalize(); + + dst = temp.markAsRValue(); + } +} + +// Generic Sparse to Sparse assignment +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Sparse, Scalar> +{ + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + assign_sparse_to_sparse(dst.derived(), src.derived()); + } +}; + +// Sparse to Dense assignment +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Dense, Scalar> +{ + static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + typedef typename SrcXprType::Index Index; + + typename internal::evaluator<SrcXprType>::type srcEval(src); + typename internal::evaluator<DstXprType>::type dstEval(dst); + const Index outerEvaluationSize = (internal::evaluator<SrcXprType>::Flags&RowMajorBit) ? src.rows() : src.cols(); + for (Index j=0; j<outerEvaluationSize; ++j) + for (typename internal::evaluator<SrcXprType>::InnerIterator i(srcEval,j); i; ++i) + func.assignCoeff(dstEval.coeffRef(i.row(),i.col()), i.value()); + } +}; + +template< typename DstXprType, typename SrcXprType, typename Scalar> +struct Assignment<DstXprType, SrcXprType, internal::assign_op<typename DstXprType::Scalar>, Sparse2Dense, Scalar> +{ + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + typedef typename SrcXprType::Index Index; + + dst.setZero(); + typename internal::evaluator<SrcXprType>::type srcEval(src); + typename internal::evaluator<DstXprType>::type dstEval(dst); + const Index outerEvaluationSize = (internal::evaluator<SrcXprType>::Flags&RowMajorBit) ? src.rows() : src.cols(); + for (Index j=0; j<outerEvaluationSize; ++j) + for (typename internal::evaluator<SrcXprType>::InnerIterator i(srcEval,j); i; ++i) + dstEval.coeffRef(i.row(),i.col()) = i.value(); + } +}; + +// Specialization for "dst = dec.solve(rhs)" +// NOTE we need to specialize it for Sparse2Sparse to avoid ambiguous specialization error +template<typename DstXprType, typename DecType, typename RhsType, typename Scalar> +struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar>, Sparse2Sparse, Scalar> +{ + typedef Solve<DecType,RhsType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) + { + src.dec()._solve_impl(src.rhs(), dst); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SPARSEASSIGN_H diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index 491cc72b0..9e4da2057 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -12,6 +12,7 @@ namespace Eigen {
+// Subset of columns or rows
template<typename XprType, int BlockRows, int BlockCols>
class BlockImpl<XprType,BlockRows,BlockCols,true,Sparse>
: public SparseMatrixBase<Block<XprType,BlockRows,BlockCols,true> >
@@ -24,31 +25,6 @@ protected: enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
public:
EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
-
- class InnerIterator: public XprType::InnerIterator
- {
- typedef typename BlockImpl::Index Index;
- public:
- inline InnerIterator(const BlockType& xpr, Index outer)
- : XprType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
- {}
- inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
- inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
- protected:
- Index m_outer;
- };
- class ReverseInnerIterator: public XprType::ReverseInnerIterator
- {
- typedef typename BlockImpl::Index Index;
- public:
- inline ReverseInnerIterator(const BlockType& xpr, Index outer)
- : XprType::ReverseInnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
- {}
- inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
- inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
- protected:
- Index m_outer;
- };
inline BlockImpl(const XprType& xpr, Index i)
: m_matrix(xpr), m_outerStart(i), m_outerSize(OuterSize)
@@ -63,13 +39,21 @@ public: Index nonZeros() const
{
+ typedef typename internal::evaluator<XprType>::type EvaluatorType;
+ EvaluatorType matEval(m_matrix);
Index nnz = 0;
Index end = m_outerStart + m_outerSize.value();
- for(Index j=m_outerStart; j<end; ++j)
- for(typename XprType::InnerIterator it(m_matrix, j); it; ++it)
+ for(int j=m_outerStart; j<end; ++j)
+ for(typename EvaluatorType::InnerIterator it(matEval, j); it; ++it)
++nnz;
return nnz;
}
+
+ inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+ Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
+ Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
+ Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
+ Index blockCols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
protected:
@@ -100,29 +84,6 @@ public: protected:
enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
public:
-
- class InnerIterator: public SparseMatrixType::InnerIterator
- {
- public:
- inline InnerIterator(const BlockType& xpr, Index outer)
- : SparseMatrixType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
- {}
- inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
- inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
- protected:
- Index m_outer;
- };
- class ReverseInnerIterator: public SparseMatrixType::ReverseInnerIterator
- {
- public:
- inline ReverseInnerIterator(const BlockType& xpr, Index outer)
- : SparseMatrixType::ReverseInnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer)
- {}
- inline Index row() const { return IsRowMajor ? m_outer : this->index(); }
- inline Index col() const { return IsRowMajor ? this->index() : m_outer; }
- protected:
- Index m_outer;
- };
inline sparse_matrix_block_impl(const SparseMatrixType& xpr, Index i)
: m_matrix(xpr), m_outerStart(i), m_outerSize(OuterSize)
@@ -248,6 +209,12 @@ public: EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
+
+ inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+ Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
+ Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
+ Index blockRows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); }
+ Index blockCols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); }
protected:
@@ -407,32 +374,11 @@ public: }
inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
+ Index startRow() const { return m_startRow.value(); }
+ Index startCol() const { return m_startCol.value(); }
+ Index blockRows() const { return m_blockRows.value(); }
+ Index blockCols() const { return m_blockCols.value(); }
- typedef internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel> InnerIterator;
-
- class ReverseInnerIterator : public _MatrixTypeNested::ReverseInnerIterator
- {
- typedef typename _MatrixTypeNested::ReverseInnerIterator Base;
- const BlockType& m_block;
- Index m_begin;
- public:
-
- EIGEN_STRONG_INLINE ReverseInnerIterator(const BlockType& block, Index outer)
- : Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())),
- m_block(block),
- m_begin(IsRowMajor ? block.m_startCol.value() : block.m_startRow.value())
- {
- while( (Base::operator bool()) && (Base::index() >= (IsRowMajor ? m_block.m_startCol.value()+block.m_blockCols.value() : m_block.m_startRow.value()+block.m_blockRows.value())) )
- Base::operator--();
- }
-
- inline Index index() const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
- inline Index outer() const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); }
- inline Index row() const { return Base::row() - m_block.m_startRow.value(); }
- inline Index col() const { return Base::col() - m_block.m_startCol.value(); }
-
- inline operator bool() const { return Base::operator bool() && Base::index() >= m_begin; }
- };
protected:
friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
friend class ReverseInnerIterator;
@@ -497,7 +443,7 @@ namespace internal { Index m_end;
public:
- EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer = 0)
+ explicit EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer = 0)
:
m_block(block),
m_outerPos( (IsRowMajor ? block.m_startCol.value() : block.m_startRow.value()) - 1), // -1 so that operator++ finds the first non-zero entry
@@ -520,10 +466,8 @@ namespace internal { inline GenericSparseBlockInnerIteratorImpl& operator++()
{
// search next non-zero entry
- while(m_outerPos<m_end)
+ while(++m_outerPos<m_end)
{
- m_outerPos++;
- if(m_outerPos==m_end) break;
typename XprType::InnerIterator it(m_block.m_matrix, m_outerPos);
// search for the key m_innerIndex in the current outer-vector
while(it && it.index() < m_innerIndex) ++it;
@@ -538,7 +482,119 @@ namespace internal { inline operator bool() const { return m_outerPos < m_end; }
};
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased >
+ : public evaluator_base<Block<ArgType,BlockRows,BlockCols,InnerPanel> >
+{
+ class InnerVectorInnerIterator;
+ class OuterVectorInnerIterator;
+ public:
+ typedef Block<ArgType,BlockRows,BlockCols,InnerPanel> XprType;
+ typedef typename XprType::Index Index;
+ typedef typename XprType::Scalar Scalar;
+
+ class ReverseInnerIterator;
+
+ enum {
+ IsRowMajor = XprType::IsRowMajor,
+
+ OuterVector = (BlockCols==1 && ArgType::IsRowMajor)
+ | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
+ // revert to || as soon as not needed anymore.
+ (BlockRows==1 && !ArgType::IsRowMajor),
+
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = XprType::Flags
+ };
+
+ typedef typename internal::conditional<OuterVector,OuterVectorInnerIterator,InnerVectorInnerIterator>::type InnerIterator;
+
+ explicit unary_evaluator(const XprType& op)
+ : m_argImpl(op.nestedExpression()), m_block(op)
+ {}
+
+ protected:
+ typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
+
+ typename evaluator<ArgType>::nestedType m_argImpl;
+ const XprType &m_block;
+};
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+class unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased>::InnerVectorInnerIterator
+ : public EvalIterator
+{
+ const XprType& m_block;
+ Index m_end;
+public:
+
+ EIGEN_STRONG_INLINE InnerVectorInnerIterator(const unary_evaluator& aEval, Index outer)
+ : EvalIterator(aEval.m_argImpl, outer + (IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol())),
+ m_block(aEval.m_block),
+ m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows())
+ {
+ while( (EvalIterator::operator bool()) && (EvalIterator::index() < (IsRowMajor ? m_block.startCol() : m_block.startRow())) )
+ EvalIterator::operator++();
+ }
+
+ inline Index index() const { return EvalIterator::index() - (IsRowMajor ? m_block.startCol() : m_block.startRow()); }
+ inline Index outer() const { return EvalIterator::outer() - (IsRowMajor ? m_block.startRow() : m_block.startCol()); }
+ inline Index row() const { return EvalIterator::row() - m_block.startRow(); }
+ inline Index col() const { return EvalIterator::col() - m_block.startCol(); }
+
+ inline operator bool() const { return EvalIterator::operator bool() && EvalIterator::index() < m_end; }
+};
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+class unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased>::OuterVectorInnerIterator
+{
+ const unary_evaluator& m_eval;
+ Index m_outerPos;
+ Index m_innerIndex;
+ Scalar m_value;
+ Index m_end;
+public:
+
+ EIGEN_STRONG_INLINE OuterVectorInnerIterator(const unary_evaluator& aEval, Index outer)
+ : m_eval(aEval),
+ m_outerPos( (IsRowMajor ? aEval.m_block.startCol() : aEval.m_block.startRow()) - 1), // -1 so that operator++ finds the first non-zero entry
+ m_innerIndex(IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol()),
+ m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows())
+ {
+ EIGEN_UNUSED_VARIABLE(outer);
+ eigen_assert(outer==0);
+
+ ++(*this);
+ }
+
+ inline Index index() const { return m_outerPos - (IsRowMajor ? m_eval.m_block.startCol() : m_eval.m_block.startRow()); }
+ inline Index outer() const { return 0; }
+ inline Index row() const { return IsRowMajor ? 0 : index(); }
+ inline Index col() const { return IsRowMajor ? index() : 0; }
+ inline Scalar value() const { return m_value; }
+
+ inline OuterVectorInnerIterator& operator++()
+ {
+ // search next non-zero entry
+ while(++m_outerPos<m_end)
+ {
+ EvalIterator it(m_eval.m_argImpl, m_outerPos);
+ // search for the key m_innerIndex in the current outer-vector
+ while(it && it.index() < m_innerIndex) ++it;
+ if(it && it.index()==m_innerIndex)
+ {
+ m_value = it.value();
+ break;
+ }
+ }
+ return *this;
+ }
+
+ inline operator bool() const { return m_outerPos < m_end; }
+};
+
} // end namespace internal
diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index 60fdd214a..94ca9b1a4 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -31,12 +31,6 @@ namespace Eigen { namespace internal { -template<> struct promote_storage_type<Dense,Sparse> -{ typedef Sparse ret; }; - -template<> struct promote_storage_type<Sparse,Dense> -{ typedef Sparse ret; }; - template<typename BinaryOp, typename Lhs, typename Rhs, typename Derived, typename _LhsStorageMode = typename traits<Lhs>::StorageKind, typename _RhsStorageMode = typename traits<Rhs>::StorageKind> @@ -44,71 +38,35 @@ class sparse_cwise_binary_op_inner_iterator_selector; } // end namespace internal -template<typename BinaryOp, typename Lhs, typename Rhs> -class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Sparse> - : public SparseMatrixBase<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > -{ - public: - class InnerIterator; - class ReverseInnerIterator; - typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> Derived; - EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) - CwiseBinaryOpImpl() - { - typedef typename internal::traits<Lhs>::StorageKind LhsStorageKind; - typedef typename internal::traits<Rhs>::StorageKind RhsStorageKind; - EIGEN_STATIC_ASSERT(( - (!internal::is_same<LhsStorageKind,RhsStorageKind>::value) - || ((Lhs::Flags&RowMajorBit) == (Rhs::Flags&RowMajorBit))), - THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH); - } -}; - -template<typename BinaryOp, typename Lhs, typename Rhs> -class CwiseBinaryOpImpl<BinaryOp,Lhs,Rhs,Sparse>::InnerIterator - : public internal::sparse_cwise_binary_op_inner_iterator_selector<BinaryOp,Lhs,Rhs,typename CwiseBinaryOpImpl<BinaryOp,Lhs,Rhs,Sparse>::InnerIterator> -{ - public: - typedef internal::sparse_cwise_binary_op_inner_iterator_selector< - BinaryOp,Lhs,Rhs, InnerIterator> Base; - - EIGEN_STRONG_INLINE InnerIterator(const CwiseBinaryOpImpl& binOp, Index outer) - : Base(binOp.derived(),outer) - {} -}; - -/*************************************************************************** -* Implementation of inner-iterators -***************************************************************************/ - -// template<typename T> struct internal::func_is_conjunction { enum { ret = false }; }; -// template<typename T> struct internal::func_is_conjunction<internal::scalar_product_op<T> > { enum { ret = true }; }; - -// TODO generalize the internal::scalar_product_op specialization to all conjunctions if any ! - namespace internal { -// sparse - sparse (generic) -template<typename BinaryOp, typename Lhs, typename Rhs, typename Derived> -class sparse_cwise_binary_op_inner_iterator_selector<BinaryOp, Lhs, Rhs, Derived, Sparse, Sparse> + +// Generic "sparse OP sparse" +template<typename BinaryOp, typename Lhs, typename Rhs> +struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IteratorBased> + : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > { - typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> CwiseBinaryXpr; - typedef typename traits<CwiseBinaryXpr>::Scalar Scalar; - typedef typename traits<CwiseBinaryXpr>::Index Index; - typedef typename traits<CwiseBinaryXpr>::_LhsNested _LhsNested; - typedef typename traits<CwiseBinaryXpr>::_RhsNested _RhsNested; - typedef typename _LhsNested::InnerIterator LhsIterator; - typedef typename _RhsNested::InnerIterator RhsIterator; +protected: + typedef typename evaluator<Lhs>::InnerIterator LhsIterator; + typedef typename evaluator<Rhs>::InnerIterator RhsIterator; +public: + typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; + + class ReverseInnerIterator; + class InnerIterator + { + typedef typename traits<XprType>::Scalar Scalar; + typedef typename XprType::Index Index; public: - - EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer) - : m_lhsIter(xpr.lhs(),outer), m_rhsIter(xpr.rhs(),outer), m_functor(xpr.functor()) + + EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer) + : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor) { this->operator++(); } - EIGEN_STRONG_INLINE Derived& operator++() + EIGEN_STRONG_INLINE InnerIterator& operator++() { if (m_lhsIter && m_rhsIter && (m_lhsIter.index() == m_rhsIter.index())) { @@ -134,7 +92,7 @@ class sparse_cwise_binary_op_inner_iterator_selector<BinaryOp, Lhs, Rhs, Derived m_value = 0; // this is to avoid a compilation warning m_id = -1; } - return *static_cast<Derived*>(this); + return *this; } EIGEN_STRONG_INLINE Scalar value() const { return m_value; } @@ -151,24 +109,48 @@ class sparse_cwise_binary_op_inner_iterator_selector<BinaryOp, Lhs, Rhs, Derived const BinaryOp& m_functor; Scalar m_value; Index m_id; + }; + + + enum { + CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, + Flags = XprType::Flags + }; + + explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()) + { } + +protected: + const BinaryOp m_functor; + typename evaluator<Lhs>::nestedType m_lhsImpl; + typename evaluator<Rhs>::nestedType m_rhsImpl; }; -// sparse - sparse (product) -template<typename T, typename Lhs, typename Rhs, typename Derived> -class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs, Rhs, Derived, Sparse, Sparse> +// "sparse .* sparse" +template<typename T, typename Lhs, typename Rhs> +struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IteratorBased> + : evaluator_base<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs> > { - typedef scalar_product_op<T> BinaryFunc; - typedef CwiseBinaryOp<BinaryFunc, Lhs, Rhs> CwiseBinaryXpr; - typedef typename CwiseBinaryXpr::Scalar Scalar; - typedef typename CwiseBinaryXpr::Index Index; - typedef typename traits<CwiseBinaryXpr>::_LhsNested _LhsNested; - typedef typename _LhsNested::InnerIterator LhsIterator; - typedef typename traits<CwiseBinaryXpr>::_RhsNested _RhsNested; - typedef typename _RhsNested::InnerIterator RhsIterator; - public: +protected: + typedef scalar_product_op<T> BinaryOp; + typedef typename evaluator<Lhs>::InnerIterator LhsIterator; + typedef typename evaluator<Rhs>::InnerIterator RhsIterator; +public: + typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; + + class ReverseInnerIterator; + class InnerIterator + { + typedef typename traits<XprType>::Scalar Scalar; + typedef typename XprType::Index Index; - EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer) - : m_lhsIter(xpr.lhs(),outer), m_rhsIter(xpr.rhs(),outer), m_functor(xpr.functor()) + public: + + EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer) + : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor) { while (m_lhsIter && m_rhsIter && (m_lhsIter.index() != m_rhsIter.index())) { @@ -179,7 +161,7 @@ class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs, } } - EIGEN_STRONG_INLINE Derived& operator++() + EIGEN_STRONG_INLINE InnerIterator& operator++() { ++m_lhsIter; ++m_rhsIter; @@ -190,9 +172,9 @@ class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs, else ++m_rhsIter; } - return *static_cast<Derived*>(this); + return *this; } - + EIGEN_STRONG_INLINE Scalar value() const { return m_functor(m_lhsIter.value(), m_rhsIter.value()); } EIGEN_STRONG_INLINE Index index() const { return m_lhsIter.index(); } @@ -204,91 +186,159 @@ class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs, protected: LhsIterator m_lhsIter; RhsIterator m_rhsIter; - const BinaryFunc& m_functor; + const BinaryOp& m_functor; + }; + + + enum { + CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, + Flags = XprType::Flags + }; + + explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()) + { } + +protected: + const BinaryOp m_functor; + typename evaluator<Lhs>::nestedType m_lhsImpl; + typename evaluator<Rhs>::nestedType m_rhsImpl; }; -// sparse - dense (product) -template<typename T, typename Lhs, typename Rhs, typename Derived> -class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs, Rhs, Derived, Sparse, Dense> +// "dense .* sparse" +template<typename T, typename Lhs, typename Rhs> +struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IndexBased, IteratorBased> + : evaluator_base<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs> > { - typedef scalar_product_op<T> BinaryFunc; - typedef CwiseBinaryOp<BinaryFunc, Lhs, Rhs> CwiseBinaryXpr; - typedef typename CwiseBinaryXpr::Scalar Scalar; - typedef typename CwiseBinaryXpr::Index Index; - typedef typename traits<CwiseBinaryXpr>::_LhsNested _LhsNested; - typedef typename traits<CwiseBinaryXpr>::RhsNested RhsNested; - typedef typename _LhsNested::InnerIterator LhsIterator; - enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit }; - public: +protected: + typedef scalar_product_op<T> BinaryOp; + typedef typename evaluator<Lhs>::type LhsEvaluator; + typedef typename evaluator<Rhs>::InnerIterator RhsIterator; +public: + typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; + + class ReverseInnerIterator; + class InnerIterator + { + typedef typename traits<XprType>::Scalar Scalar; + typedef typename XprType::Index Index; + enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit }; - EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer) - : m_rhs(xpr.rhs()), m_lhsIter(xpr.lhs(),typename _LhsNested::Index(outer)), m_functor(xpr.functor()), m_outer(outer) + public: + + EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer) + : m_lhsEval(aEval.m_lhsImpl), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor), m_outer(outer) {} - EIGEN_STRONG_INLINE Derived& operator++() + EIGEN_STRONG_INLINE InnerIterator& operator++() { - ++m_lhsIter; - return *static_cast<Derived*>(this); + ++m_rhsIter; + return *this; } EIGEN_STRONG_INLINE Scalar value() const - { return m_functor(m_lhsIter.value(), - m_rhs.coeff(IsRowMajor?m_outer:m_lhsIter.index(),IsRowMajor?m_lhsIter.index():m_outer)); } + { return m_functor(m_lhsEval.coeff(IsRowMajor?m_outer:m_rhsIter.index(),IsRowMajor?m_rhsIter.index():m_outer), m_rhsIter.value()); } - EIGEN_STRONG_INLINE Index index() const { return m_lhsIter.index(); } - EIGEN_STRONG_INLINE Index row() const { return m_lhsIter.row(); } - EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); } + EIGEN_STRONG_INLINE Index index() const { return m_rhsIter.index(); } + EIGEN_STRONG_INLINE Index row() const { return m_rhsIter.row(); } + EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); } - EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; } + EIGEN_STRONG_INLINE operator bool() const { return m_rhsIter; } protected: - RhsNested m_rhs; - LhsIterator m_lhsIter; - const BinaryFunc m_functor; + const LhsEvaluator &m_lhsEval; + RhsIterator m_rhsIter; + const BinaryOp& m_functor; const Index m_outer; + }; + + + enum { + CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, + Flags = XprType::Flags + }; + + explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()) + { } + +protected: + const BinaryOp m_functor; + typename evaluator<Lhs>::nestedType m_lhsImpl; + typename evaluator<Rhs>::nestedType m_rhsImpl; }; -// sparse - dense (product) -template<typename T, typename Lhs, typename Rhs, typename Derived> -class sparse_cwise_binary_op_inner_iterator_selector<scalar_product_op<T>, Lhs, Rhs, Derived, Dense, Sparse> +// "sparse .* dense" +template<typename T, typename Lhs, typename Rhs> +struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IndexBased> + : evaluator_base<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs> > { - typedef scalar_product_op<T> BinaryFunc; - typedef CwiseBinaryOp<BinaryFunc, Lhs, Rhs> CwiseBinaryXpr; - typedef typename CwiseBinaryXpr::Scalar Scalar; - typedef typename CwiseBinaryXpr::Index Index; - typedef typename traits<CwiseBinaryXpr>::_RhsNested _RhsNested; - typedef typename _RhsNested::InnerIterator RhsIterator; +protected: + typedef scalar_product_op<T> BinaryOp; + typedef typename evaluator<Lhs>::InnerIterator LhsIterator; + typedef typename evaluator<Rhs>::type RhsEvaluator; +public: + typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; + + class ReverseInnerIterator; + class InnerIterator + { + typedef typename traits<XprType>::Scalar Scalar; + typedef typename XprType::Index Index; + enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit }; - enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit }; public: - - EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer) - : m_xpr(xpr), m_rhsIter(xpr.rhs(),outer), m_functor(xpr.functor()), m_outer(outer) + + EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer) + : m_lhsIter(aEval.m_lhsImpl,outer), m_rhsEval(aEval.m_rhsImpl), m_functor(aEval.m_functor), m_outer(outer) {} - EIGEN_STRONG_INLINE Derived& operator++() + EIGEN_STRONG_INLINE InnerIterator& operator++() { - ++m_rhsIter; - return *static_cast<Derived*>(this); + ++m_lhsIter; + return *this; } EIGEN_STRONG_INLINE Scalar value() const - { return m_functor(m_xpr.lhs().coeff(IsRowMajor?m_outer:m_rhsIter.index(),IsRowMajor?m_rhsIter.index():m_outer), m_rhsIter.value()); } + { return m_functor(m_lhsIter.value(), + m_rhsEval.coeff(IsRowMajor?m_outer:m_lhsIter.index(),IsRowMajor?m_lhsIter.index():m_outer)); } - EIGEN_STRONG_INLINE Index index() const { return m_rhsIter.index(); } - EIGEN_STRONG_INLINE Index row() const { return m_rhsIter.row(); } - EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); } + EIGEN_STRONG_INLINE Index index() const { return m_lhsIter.index(); } + EIGEN_STRONG_INLINE Index row() const { return m_lhsIter.row(); } + EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); } - EIGEN_STRONG_INLINE operator bool() const { return m_rhsIter; } + EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; } protected: - const CwiseBinaryXpr& m_xpr; - RhsIterator m_rhsIter; - const BinaryFunc& m_functor; + LhsIterator m_lhsIter; + const RhsEvaluator &m_rhsEval; + const BinaryOp& m_functor; const Index m_outer; + }; + + + enum { + CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, + Flags = XprType::Flags + }; + + explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()) + { } + +protected: + const BinaryOp m_functor; + typename evaluator<Lhs>::nestedType m_lhsImpl; + typename evaluator<Rhs>::nestedType m_rhsImpl; }; -} // end namespace internal +} /*************************************************************************** * Implementation of SparseMatrixBase and SparseCwise functions/operators diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h index 5a50c7803..32b7bc949 100644 --- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -12,131 +12,154 @@ namespace Eigen { -template<typename UnaryOp, typename MatrixType> -class CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse> - : public SparseMatrixBase<CwiseUnaryOp<UnaryOp, MatrixType> > +namespace internal { + +template<typename UnaryOp, typename ArgType> +struct unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased> + : public evaluator_base<CwiseUnaryOp<UnaryOp,ArgType> > { public: + typedef CwiseUnaryOp<UnaryOp, ArgType> XprType; class InnerIterator; - class ReverseInnerIterator; - - typedef CwiseUnaryOp<UnaryOp, MatrixType> Derived; - EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) +// class ReverseInnerIterator; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost, + Flags = XprType::Flags + }; + + explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {} protected: - typedef typename internal::traits<Derived>::_XprTypeNested _MatrixTypeNested; - typedef typename _MatrixTypeNested::InnerIterator MatrixTypeIterator; - typedef typename _MatrixTypeNested::ReverseInnerIterator MatrixTypeReverseIterator; + typedef typename evaluator<ArgType>::InnerIterator EvalIterator; +// typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator; + + const UnaryOp m_functor; + typename evaluator<ArgType>::nestedType m_argImpl; }; -template<typename UnaryOp, typename MatrixType> -class CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::InnerIterator - : public CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::MatrixTypeIterator +template<typename UnaryOp, typename ArgType> +class unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::InnerIterator + : public unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator { - typedef typename CwiseUnaryOpImpl::Scalar Scalar; - typedef typename CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::MatrixTypeIterator Base; + typedef typename XprType::Scalar Scalar; + typedef typename unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalIterator Base; public: - EIGEN_STRONG_INLINE InnerIterator(const CwiseUnaryOpImpl& unaryOp, typename CwiseUnaryOpImpl::Index outer) - : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor()) + EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& unaryOp, typename XprType::Index outer) + : Base(unaryOp.m_argImpl,outer), m_functor(unaryOp.m_functor) {} EIGEN_STRONG_INLINE InnerIterator& operator++() { Base::operator++(); return *this; } - EIGEN_STRONG_INLINE typename CwiseUnaryOpImpl::Scalar value() const { return m_functor(Base::value()); } + EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); } protected: const UnaryOp m_functor; private: - typename CwiseUnaryOpImpl::Scalar& valueRef(); + Scalar& valueRef(); }; -template<typename UnaryOp, typename MatrixType> -class CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::ReverseInnerIterator - : public CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::MatrixTypeReverseIterator -{ - typedef typename CwiseUnaryOpImpl::Scalar Scalar; - typedef typename CwiseUnaryOpImpl<UnaryOp,MatrixType,Sparse>::MatrixTypeReverseIterator Base; - public: - - EIGEN_STRONG_INLINE ReverseInnerIterator(const CwiseUnaryOpImpl& unaryOp, typename CwiseUnaryOpImpl::Index outer) - : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor()) - {} - - EIGEN_STRONG_INLINE ReverseInnerIterator& operator--() - { Base::operator--(); return *this; } - - EIGEN_STRONG_INLINE typename CwiseUnaryOpImpl::Scalar value() const { return m_functor(Base::value()); } - - protected: - const UnaryOp m_functor; - private: - typename CwiseUnaryOpImpl::Scalar& valueRef(); -}; - -template<typename ViewOp, typename MatrixType> -class CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse> - : public SparseMatrixBase<CwiseUnaryView<ViewOp, MatrixType> > +// template<typename UnaryOp, typename ArgType> +// class unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::ReverseInnerIterator +// : public unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalReverseIterator +// { +// typedef typename XprType::Scalar Scalar; +// typedef typename unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>::EvalReverseIterator Base; +// public: +// +// EIGEN_STRONG_INLINE ReverseInnerIterator(const XprType& unaryOp, typename XprType::Index outer) +// : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor()) +// {} +// +// EIGEN_STRONG_INLINE ReverseInnerIterator& operator--() +// { Base::operator--(); return *this; } +// +// EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); } +// +// protected: +// const UnaryOp m_functor; +// private: +// Scalar& valueRef(); +// }; + + + + + +template<typename ViewOp, typename ArgType> +struct unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased> + : public evaluator_base<CwiseUnaryView<ViewOp,ArgType> > { public: + typedef CwiseUnaryView<ViewOp, ArgType> XprType; class InnerIterator; class ReverseInnerIterator; - - typedef CwiseUnaryView<ViewOp, MatrixType> Derived; - EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<ViewOp>::Cost, + Flags = XprType::Flags + }; + + explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {} protected: - typedef typename internal::traits<Derived>::_MatrixTypeNested _MatrixTypeNested; - typedef typename _MatrixTypeNested::InnerIterator MatrixTypeIterator; - typedef typename _MatrixTypeNested::ReverseInnerIterator MatrixTypeReverseIterator; + typedef typename evaluator<ArgType>::InnerIterator EvalIterator; +// typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator; + + const ViewOp m_functor; + typename evaluator<ArgType>::nestedType m_argImpl; }; -template<typename ViewOp, typename MatrixType> -class CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::InnerIterator - : public CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::MatrixTypeIterator +template<typename ViewOp, typename ArgType> +class unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::InnerIterator + : public unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator { - typedef typename CwiseUnaryViewImpl::Scalar Scalar; - typedef typename CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::MatrixTypeIterator Base; + typedef typename XprType::Scalar Scalar; + typedef typename unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalIterator Base; public: - EIGEN_STRONG_INLINE InnerIterator(const CwiseUnaryViewImpl& unaryOp, typename CwiseUnaryViewImpl::Index outer) - : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor()) + EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& unaryOp, typename XprType::Index outer) + : Base(unaryOp.m_argImpl,outer), m_functor(unaryOp.m_functor) {} EIGEN_STRONG_INLINE InnerIterator& operator++() { Base::operator++(); return *this; } - EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar value() const { return m_functor(Base::value()); } - EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar& valueRef() { return m_functor(Base::valueRef()); } + EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); } + EIGEN_STRONG_INLINE Scalar& valueRef() { return m_functor(Base::valueRef()); } protected: const ViewOp m_functor; }; -template<typename ViewOp, typename MatrixType> -class CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::ReverseInnerIterator - : public CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::MatrixTypeReverseIterator -{ - typedef typename CwiseUnaryViewImpl::Scalar Scalar; - typedef typename CwiseUnaryViewImpl<ViewOp,MatrixType,Sparse>::MatrixTypeReverseIterator Base; - public: - - EIGEN_STRONG_INLINE ReverseInnerIterator(const CwiseUnaryViewImpl& unaryOp, typename CwiseUnaryViewImpl::Index outer) - : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor()) - {} - - EIGEN_STRONG_INLINE ReverseInnerIterator& operator--() - { Base::operator--(); return *this; } - - EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar value() const { return m_functor(Base::value()); } - EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar& valueRef() { return m_functor(Base::valueRef()); } - - protected: - const ViewOp m_functor; -}; +// template<typename ViewOp, typename ArgType> +// class unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::ReverseInnerIterator +// : public unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalReverseIterator +// { +// typedef typename XprType::Scalar Scalar; +// typedef typename unary_evaluator<CwiseUnaryView<ViewOp,ArgType>, IteratorBased>::EvalReverseIterator Base; +// public: +// +// EIGEN_STRONG_INLINE ReverseInnerIterator(const XprType& unaryOp, typename XprType::Index outer) +// : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor()) +// {} +// +// EIGEN_STRONG_INLINE ReverseInnerIterator& operator--() +// { Base::operator--(); return *this; } +// +// EIGEN_STRONG_INLINE Scalar value() const { return m_functor(Base::value()); } +// EIGEN_STRONG_INLINE Scalar& valueRef() { return m_functor(Base::valueRef()); } +// +// protected: +// const ViewOp m_functor; +// }; + + +} // end namespace internal template<typename Derived> EIGEN_STRONG_INLINE Derived& diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index d40e966c1..5aea11425 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -12,152 +12,10 @@ namespace Eigen { -template<typename Lhs, typename Rhs, int InnerSize> struct SparseDenseProductReturnType -{ - typedef SparseTimeDenseProduct<Lhs,Rhs> Type; -}; - -template<typename Lhs, typename Rhs> struct SparseDenseProductReturnType<Lhs,Rhs,1> -{ - typedef typename internal::conditional< - Lhs::IsRowMajor, - SparseDenseOuterProduct<Rhs,Lhs,true>, - SparseDenseOuterProduct<Lhs,Rhs,false> >::type Type; -}; - -template<typename Lhs, typename Rhs, int InnerSize> struct DenseSparseProductReturnType -{ - typedef DenseTimeSparseProduct<Lhs,Rhs> Type; -}; - -template<typename Lhs, typename Rhs> struct DenseSparseProductReturnType<Lhs,Rhs,1> -{ - typedef typename internal::conditional< - Rhs::IsRowMajor, - SparseDenseOuterProduct<Rhs,Lhs,true>, - SparseDenseOuterProduct<Lhs,Rhs,false> >::type Type; -}; - namespace internal { -template<typename Lhs, typename Rhs, bool Tr> -struct traits<SparseDenseOuterProduct<Lhs,Rhs,Tr> > -{ - typedef Sparse StorageKind; - typedef typename scalar_product_traits<typename traits<Lhs>::Scalar, - typename traits<Rhs>::Scalar>::ReturnType Scalar; - typedef typename Lhs::Index Index; - typedef typename Lhs::Nested LhsNested; - typedef typename Rhs::Nested RhsNested; - typedef typename remove_all<LhsNested>::type _LhsNested; - typedef typename remove_all<RhsNested>::type _RhsNested; - - enum { - LhsCoeffReadCost = traits<_LhsNested>::CoeffReadCost, - RhsCoeffReadCost = traits<_RhsNested>::CoeffReadCost, - - RowsAtCompileTime = Tr ? int(traits<Rhs>::RowsAtCompileTime) : int(traits<Lhs>::RowsAtCompileTime), - ColsAtCompileTime = Tr ? int(traits<Lhs>::ColsAtCompileTime) : int(traits<Rhs>::ColsAtCompileTime), - MaxRowsAtCompileTime = Tr ? int(traits<Rhs>::MaxRowsAtCompileTime) : int(traits<Lhs>::MaxRowsAtCompileTime), - MaxColsAtCompileTime = Tr ? int(traits<Lhs>::MaxColsAtCompileTime) : int(traits<Rhs>::MaxColsAtCompileTime), - - Flags = Tr ? RowMajorBit : 0, - - CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + NumTraits<Scalar>::MulCost - }; -}; - -} // end namespace internal - -template<typename Lhs, typename Rhs, bool Tr> -class SparseDenseOuterProduct - : public SparseMatrixBase<SparseDenseOuterProduct<Lhs,Rhs,Tr> > -{ - public: - - typedef SparseMatrixBase<SparseDenseOuterProduct> Base; - EIGEN_DENSE_PUBLIC_INTERFACE(SparseDenseOuterProduct) - typedef internal::traits<SparseDenseOuterProduct> Traits; - - private: - - typedef typename Traits::LhsNested LhsNested; - typedef typename Traits::RhsNested RhsNested; - typedef typename Traits::_LhsNested _LhsNested; - typedef typename Traits::_RhsNested _RhsNested; - - public: - - class InnerIterator; - - EIGEN_STRONG_INLINE SparseDenseOuterProduct(const Lhs& lhs, const Rhs& rhs) - : m_lhs(lhs), m_rhs(rhs) - { - EIGEN_STATIC_ASSERT(!Tr,YOU_MADE_A_PROGRAMMING_MISTAKE); - } - - EIGEN_STRONG_INLINE SparseDenseOuterProduct(const Rhs& rhs, const Lhs& lhs) - : m_lhs(lhs), m_rhs(rhs) - { - EIGEN_STATIC_ASSERT(Tr,YOU_MADE_A_PROGRAMMING_MISTAKE); - } - - EIGEN_STRONG_INLINE Index rows() const { return Tr ? Index(m_rhs.rows()) : m_lhs.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return Tr ? m_lhs.cols() : Index(m_rhs.cols()); } - - EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; } - EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; } - - protected: - LhsNested m_lhs; - RhsNested m_rhs; -}; - -template<typename Lhs, typename Rhs, bool Transpose> -class SparseDenseOuterProduct<Lhs,Rhs,Transpose>::InnerIterator : public _LhsNested::InnerIterator -{ - typedef typename _LhsNested::InnerIterator Base; - typedef typename SparseDenseOuterProduct::Index Index; - public: - EIGEN_STRONG_INLINE InnerIterator(const SparseDenseOuterProduct& prod, Index outer) - : Base(prod.lhs(), 0), m_outer(outer), m_empty(false), m_factor(get(prod.rhs(), outer, typename internal::traits<Rhs>::StorageKind() )) - {} - - inline Index outer() const { return m_outer; } - inline Index row() const { return Transpose ? m_outer : Base::index(); } - inline Index col() const { return Transpose ? Base::index() : m_outer; } - - inline Scalar value() const { return Base::value() * m_factor; } - inline operator bool() const { return Base::operator bool() && !m_empty; } - - protected: - Scalar get(const _RhsNested &rhs, Index outer, Dense = Dense()) const - { - return rhs.coeff(outer); - } - - Scalar get(const _RhsNested &rhs, Index outer, Sparse = Sparse()) - { - typename Traits::_RhsNested::InnerIterator it(rhs, outer); - if (it && it.index()==0 && it.value()!=Scalar(0)) - return it.value(); - m_empty = true; - return Scalar(0); - } - - Index m_outer; - bool m_empty; - Scalar m_factor; -}; - -namespace internal { -template<typename Lhs, typename Rhs> -struct traits<SparseTimeDenseProduct<Lhs,Rhs> > - : traits<ProductBase<SparseTimeDenseProduct<Lhs,Rhs>, Lhs, Rhs> > -{ - typedef Dense StorageKind; - typedef MatrixXpr XprKind; -}; +template <> struct product_promote_storage_type<Sparse,Dense, OuterProduct> { typedef Sparse ret; }; +template <> struct product_promote_storage_type<Dense,Sparse, OuterProduct> { typedef Sparse ret; }; template<typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType, @@ -172,16 +30,17 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t typedef typename internal::remove_all<DenseRhsType>::type Rhs; typedef typename internal::remove_all<DenseResType>::type Res; typedef typename Lhs::Index Index; - typedef typename Lhs::InnerIterator LhsInnerIterator; + typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator; static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha) { + typename evaluator<Lhs>::type lhsEval(lhs); for(Index c=0; c<rhs.cols(); ++c) { Index n = lhs.outerSize(); for(Index j=0; j<n; ++j) { typename Res::Scalar tmp(0); - for(LhsInnerIterator it(lhs,j); it ;++it) + for(LhsInnerIterator it(lhsEval,j); it ;++it) tmp += it.value() * rhs.coeff(it.index(),c); res.coeffRef(j,c) = alpha * tmp; } @@ -203,17 +62,18 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, A typedef typename internal::remove_all<SparseLhsType>::type Lhs; typedef typename internal::remove_all<DenseRhsType>::type Rhs; typedef typename internal::remove_all<DenseResType>::type Res; - typedef typename Lhs::InnerIterator LhsInnerIterator; typedef typename Lhs::Index Index; + typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator; static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha) { + typename evaluator<Lhs>::type lhsEval(lhs); for(Index c=0; c<rhs.cols(); ++c) { for(Index j=0; j<lhs.outerSize(); ++j) { // typename Res::Scalar rhs_j = alpha * rhs.coeff(j,c); typename internal::scalar_product_traits<AlphaType, typename Rhs::Scalar>::ReturnType rhs_j(alpha * rhs.coeff(j,c)); - for(LhsInnerIterator it(lhs,j); it ;++it) + for(LhsInnerIterator it(lhsEval,j); it ;++it) res.coeffRef(it.index(),c) += it.value() * rhs_j; } } @@ -226,14 +86,15 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t typedef typename internal::remove_all<SparseLhsType>::type Lhs; typedef typename internal::remove_all<DenseRhsType>::type Rhs; typedef typename internal::remove_all<DenseResType>::type Res; - typedef typename Lhs::InnerIterator LhsInnerIterator; typedef typename Lhs::Index Index; + typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator; static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha) { + typename evaluator<Lhs>::type lhsEval(lhs); for(Index j=0; j<lhs.outerSize(); ++j) { typename Res::RowXpr res_j(res.row(j)); - for(LhsInnerIterator it(lhs,j); it ;++it) + for(LhsInnerIterator it(lhsEval,j); it ;++it) res_j += (alpha*it.value()) * rhs.row(it.index()); } } @@ -245,14 +106,15 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t typedef typename internal::remove_all<SparseLhsType>::type Lhs; typedef typename internal::remove_all<DenseRhsType>::type Rhs; typedef typename internal::remove_all<DenseResType>::type Res; - typedef typename Lhs::InnerIterator LhsInnerIterator; typedef typename Lhs::Index Index; + typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator; static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha) { + typename evaluator<Lhs>::type lhsEval(lhs); for(Index j=0; j<lhs.outerSize(); ++j) { typename Rhs::ConstRowXpr rhs_j(rhs.row(j)); - for(LhsInnerIterator it(lhs,j); it ;++it) + for(LhsInnerIterator it(lhsEval,j); it ;++it) res.row(it.index()) += (alpha*it.value()) * rhs_j; } } @@ -266,58 +128,164 @@ inline void sparse_time_dense_product(const SparseLhsType& lhs, const DenseRhsTy } // end namespace internal -template<typename Lhs, typename Rhs> -class SparseTimeDenseProduct - : public ProductBase<SparseTimeDenseProduct<Lhs,Rhs>, Lhs, Rhs> -{ - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(SparseTimeDenseProduct) - - SparseTimeDenseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - {} - - template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const - { - internal::sparse_time_dense_product(m_lhs, m_rhs, dest, alpha); - } +namespace internal { - private: - SparseTimeDenseProduct& operator=(const SparseTimeDenseProduct&); +template<typename Lhs, typename Rhs, int ProductType> +struct generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType> +{ + template<typename Dest> + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + typedef typename nested_eval<Lhs,Dynamic>::type LhsNested; + typedef typename nested_eval<Rhs,Dynamic>::type RhsNested; + LhsNested lhsNested(lhs); + RhsNested rhsNested(rhs); + + dst.setZero(); + internal::sparse_time_dense_product(lhsNested, rhsNested, dst, typename Dest::Scalar(1)); + } }; +template<typename Lhs, typename Rhs, int ProductType> +struct generic_product_impl<Lhs, Rhs, SparseTriangularShape, DenseShape, ProductType> + : generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType> +{}; -// dense = dense * sparse -namespace internal { -template<typename Lhs, typename Rhs> -struct traits<DenseTimeSparseProduct<Lhs,Rhs> > - : traits<ProductBase<DenseTimeSparseProduct<Lhs,Rhs>, Lhs, Rhs> > +template<typename Lhs, typename Rhs, int ProductType> +struct generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType> { - typedef Dense StorageKind; + template<typename Dest> + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + typedef typename nested_eval<Lhs,Dynamic>::type LhsNested; + typedef typename nested_eval<Rhs,Dynamic>::type RhsNested; + LhsNested lhsNested(lhs); + RhsNested rhsNested(rhs); + + dst.setZero(); + // transpose everything + Transpose<Dest> dstT(dst); + internal::sparse_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, typename Dest::Scalar(1)); + } }; -} // end namespace internal -template<typename Lhs, typename Rhs> -class DenseTimeSparseProduct - : public ProductBase<DenseTimeSparseProduct<Lhs,Rhs>, Lhs, Rhs> +template<typename Lhs, typename Rhs, int ProductType> +struct generic_product_impl<Lhs, Rhs, DenseShape, SparseTriangularShape, ProductType> + : generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType> +{}; + +template<typename LhsT, typename RhsT, bool NeedToTranspose> +struct sparse_dense_outer_product_evaluator { +protected: + typedef typename conditional<NeedToTranspose,RhsT,LhsT>::type Lhs1; + typedef typename conditional<NeedToTranspose,LhsT,RhsT>::type ActualRhs; + typedef Product<LhsT,RhsT,DefaultProduct> ProdXprType; + + // if the actual left-hand side is a dense vector, + // then build a sparse-view so that we can seamlessly iterate over it. + typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value, + Lhs1, SparseView<Lhs1> >::type ActualLhs; + typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value, + Lhs1 const&, SparseView<Lhs1> >::type LhsArg; + + typedef typename evaluator<ActualLhs>::type LhsEval; + typedef typename evaluator<ActualRhs>::type RhsEval; + typedef typename evaluator<ActualLhs>::InnerIterator LhsIterator; + typedef typename ProdXprType::Scalar Scalar; + typedef typename ProdXprType::Index Index; + +public: + enum { + Flags = NeedToTranspose ? RowMajorBit : 0, + CoeffReadCost = Dynamic + }; + + class InnerIterator : public LhsIterator + { public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(DenseTimeSparseProduct) - - DenseTimeSparseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) + InnerIterator(const sparse_dense_outer_product_evaluator &xprEval, Index outer) + : LhsIterator(xprEval.m_lhsXprImpl, 0), + m_outer(outer), + m_empty(false), + m_factor(get(xprEval.m_rhsXprImpl, outer, typename internal::traits<ActualRhs>::StorageKind() )) {} + + EIGEN_STRONG_INLINE Index outer() const { return m_outer; } + EIGEN_STRONG_INLINE Index row() const { return NeedToTranspose ? m_outer : LhsIterator::index(); } + EIGEN_STRONG_INLINE Index col() const { return NeedToTranspose ? LhsIterator::index() : m_outer; } - template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const + EIGEN_STRONG_INLINE Scalar value() const { return LhsIterator::value() * m_factor; } + EIGEN_STRONG_INLINE operator bool() const { return LhsIterator::operator bool() && (!m_empty); } + + protected: + Scalar get(const RhsEval &rhs, Index outer, Dense = Dense()) const { - Transpose<const _LhsNested> lhs_t(m_lhs); - Transpose<const _RhsNested> rhs_t(m_rhs); - Transpose<Dest> dest_t(dest); - internal::sparse_time_dense_product(rhs_t, lhs_t, dest_t, alpha); + return rhs.coeff(outer); + } + + Scalar get(const RhsEval &rhs, Index outer, Sparse = Sparse()) + { + typename RhsEval::InnerIterator it(rhs, outer); + if (it && it.index()==0 && it.value()!=Scalar(0)) + return it.value(); + m_empty = true; + return Scalar(0); } + + Index m_outer; + bool m_empty; + Scalar m_factor; + }; + + sparse_dense_outer_product_evaluator(const Lhs1 &lhs, const ActualRhs &rhs) + : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) + {} + + // transpose case + sparse_dense_outer_product_evaluator(const ActualRhs &rhs, const Lhs1 &lhs) + : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) + {} + +protected: + const LhsArg m_lhs; + typename evaluator<ActualLhs>::nestedType m_lhsXprImpl; + typename evaluator<ActualRhs>::nestedType m_rhsXprImpl; +}; - private: - DenseTimeSparseProduct& operator=(const DenseTimeSparseProduct&); +// sparse * dense outer product +template<typename Lhs, typename Rhs> +struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, SparseShape, DenseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> + : sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor> +{ + typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor> Base; + + typedef Product<Lhs, Rhs> XprType; + typedef typename XprType::PlainObject PlainObject; + + explicit product_evaluator(const XprType& xpr) + : Base(xpr.lhs(), xpr.rhs()) + {} + }; +template<typename Lhs, typename Rhs> +struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, DenseShape, SparseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> + : sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor> +{ + typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor> Base; + + typedef Product<Lhs, Rhs> XprType; + typedef typename XprType::PlainObject PlainObject; + + explicit product_evaluator(const XprType& xpr) + : Base(xpr.lhs(), xpr.rhs()) + {} + +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_SPARSEDENSEPRODUCT_H diff --git a/Eigen/src/SparseCore/SparseDiagonalProduct.h b/Eigen/src/SparseCore/SparseDiagonalProduct.h index c056b4914..be935e9f3 100644 --- a/Eigen/src/SparseCore/SparseDiagonalProduct.h +++ b/Eigen/src/SparseCore/SparseDiagonalProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -26,173 +26,122 @@ namespace Eigen { namespace internal { -template<typename Lhs, typename Rhs> -struct traits<SparseDiagonalProduct<Lhs, Rhs> > -{ - typedef typename remove_all<Lhs>::type _Lhs; - typedef typename remove_all<Rhs>::type _Rhs; - typedef typename _Lhs::Scalar Scalar; - // propagate the index type of the sparse matrix - typedef typename conditional< is_diagonal<_Lhs>::ret, - typename traits<Rhs>::Index, - typename traits<Lhs>::Index>::type Index; - typedef Sparse StorageKind; - typedef MatrixXpr XprKind; - enum { - RowsAtCompileTime = _Lhs::RowsAtCompileTime, - ColsAtCompileTime = _Rhs::ColsAtCompileTime, - - MaxRowsAtCompileTime = _Lhs::MaxRowsAtCompileTime, - MaxColsAtCompileTime = _Rhs::MaxColsAtCompileTime, - - SparseFlags = is_diagonal<_Lhs>::ret ? int(_Rhs::Flags) : int(_Lhs::Flags), - Flags = (SparseFlags&RowMajorBit), - CoeffReadCost = Dynamic - }; +enum { + SDP_AsScalarProduct, + SDP_AsCwiseProduct }; + +template<typename SparseXprType, typename DiagonalCoeffType, int SDP_Tag> +struct sparse_diagonal_product_evaluator; -enum {SDP_IsDiagonal, SDP_IsSparseRowMajor, SDP_IsSparseColMajor}; -template<typename Lhs, typename Rhs, typename SparseDiagonalProductType, int RhsMode, int LhsMode> -class sparse_diagonal_product_inner_iterator_selector; - -} // end namespace internal - -template<typename Lhs, typename Rhs> -class SparseDiagonalProduct - : public SparseMatrixBase<SparseDiagonalProduct<Lhs,Rhs> >, - internal::no_assignment_operator +template<typename Lhs, typename Rhs, int ProductTag> +struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, DiagonalShape, SparseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> + : public sparse_diagonal_product_evaluator<Rhs, typename Lhs::DiagonalVectorType, Rhs::Flags&RowMajorBit?SDP_AsScalarProduct:SDP_AsCwiseProduct> { - typedef typename Lhs::Nested LhsNested; - typedef typename Rhs::Nested RhsNested; - - typedef typename internal::remove_all<LhsNested>::type _LhsNested; - typedef typename internal::remove_all<RhsNested>::type _RhsNested; - - enum { - LhsMode = internal::is_diagonal<_LhsNested>::ret ? internal::SDP_IsDiagonal - : (_LhsNested::Flags&RowMajorBit) ? internal::SDP_IsSparseRowMajor : internal::SDP_IsSparseColMajor, - RhsMode = internal::is_diagonal<_RhsNested>::ret ? internal::SDP_IsDiagonal - : (_RhsNested::Flags&RowMajorBit) ? internal::SDP_IsSparseRowMajor : internal::SDP_IsSparseColMajor - }; - - public: - - EIGEN_SPARSE_PUBLIC_INTERFACE(SparseDiagonalProduct) - - typedef internal::sparse_diagonal_product_inner_iterator_selector - <_LhsNested,_RhsNested,SparseDiagonalProduct,LhsMode,RhsMode> InnerIterator; - - // We do not want ReverseInnerIterator for diagonal-sparse products, - // but this dummy declaration is needed to make diag * sparse * diag compile. - class ReverseInnerIterator; - - EIGEN_STRONG_INLINE SparseDiagonalProduct(const Lhs& lhs, const Rhs& rhs) - : m_lhs(lhs), m_rhs(rhs) - { - eigen_assert(lhs.cols() == rhs.rows() && "invalid sparse matrix * diagonal matrix product"); - } - - EIGEN_STRONG_INLINE Index rows() const { return Index(m_lhs.rows()); } - EIGEN_STRONG_INLINE Index cols() const { return Index(m_rhs.cols()); } - - EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; } - EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; } - - protected: - LhsNested m_lhs; - RhsNested m_rhs; + typedef Product<Lhs, Rhs, DefaultProduct> XprType; + typedef evaluator<XprType> type; + typedef evaluator<XprType> nestedType; + enum { CoeffReadCost = Dynamic, Flags = Rhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags + + typedef sparse_diagonal_product_evaluator<Rhs, typename Lhs::DiagonalVectorType, Rhs::Flags&RowMajorBit?SDP_AsScalarProduct:SDP_AsCwiseProduct> Base; + explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {} }; -namespace internal { - -template<typename Lhs, typename Rhs, typename SparseDiagonalProductType> -class sparse_diagonal_product_inner_iterator_selector -<Lhs,Rhs,SparseDiagonalProductType,SDP_IsDiagonal,SDP_IsSparseRowMajor> - : public CwiseUnaryOp<scalar_multiple_op<typename Lhs::Scalar>,const Rhs>::InnerIterator +template<typename Lhs, typename Rhs, int ProductTag> +struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, SparseShape, DiagonalShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> + : public sparse_diagonal_product_evaluator<Lhs, Transpose<const typename Rhs::DiagonalVectorType>, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> { - typedef typename CwiseUnaryOp<scalar_multiple_op<typename Lhs::Scalar>,const Rhs>::InnerIterator Base; - typedef typename Rhs::Index Index; - public: - inline sparse_diagonal_product_inner_iterator_selector( - const SparseDiagonalProductType& expr, Index outer) - : Base(expr.rhs()*(expr.lhs().diagonal().coeff(outer)), outer) - {} + typedef Product<Lhs, Rhs, DefaultProduct> XprType; + typedef evaluator<XprType> type; + typedef evaluator<XprType> nestedType; + enum { CoeffReadCost = Dynamic, Flags = Lhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags + + typedef sparse_diagonal_product_evaluator<Lhs, Transpose<const typename Rhs::DiagonalVectorType>, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> Base; + explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal().transpose()) {} }; -template<typename Lhs, typename Rhs, typename SparseDiagonalProductType> -class sparse_diagonal_product_inner_iterator_selector -<Lhs,Rhs,SparseDiagonalProductType,SDP_IsDiagonal,SDP_IsSparseColMajor> - : public CwiseBinaryOp< - scalar_product_op<typename Lhs::Scalar>, - const typename Rhs::ConstInnerVectorReturnType, - const typename Lhs::DiagonalVectorType>::InnerIterator +template<typename SparseXprType, typename DiagonalCoeffType> +struct sparse_diagonal_product_evaluator<SparseXprType, DiagonalCoeffType, SDP_AsScalarProduct> { - typedef typename CwiseBinaryOp< - scalar_product_op<typename Lhs::Scalar>, - const typename Rhs::ConstInnerVectorReturnType, - const typename Lhs::DiagonalVectorType>::InnerIterator Base; - typedef typename Rhs::Index Index; - Index m_outer; +protected: + typedef typename evaluator<SparseXprType>::InnerIterator SparseXprInnerIterator; + typedef typename SparseXprType::Scalar Scalar; + typedef typename SparseXprType::Index Index; + +public: + class InnerIterator : public SparseXprInnerIterator + { public: - inline sparse_diagonal_product_inner_iterator_selector( - const SparseDiagonalProductType& expr, Index outer) - : Base(expr.rhs().innerVector(outer) .cwiseProduct(expr.lhs().diagonal()), 0), m_outer(outer) + InnerIterator(const sparse_diagonal_product_evaluator &xprEval, Index outer) + : SparseXprInnerIterator(xprEval.m_sparseXprImpl, outer), + m_coeff(xprEval.m_diagCoeffImpl.coeff(outer)) {} - inline Index outer() const { return m_outer; } - inline Index col() const { return m_outer; } + EIGEN_STRONG_INLINE Scalar value() const { return m_coeff * SparseXprInnerIterator::value(); } + protected: + typename DiagonalCoeffType::Scalar m_coeff; + }; + + sparse_diagonal_product_evaluator(const SparseXprType &sparseXpr, const DiagonalCoeffType &diagCoeff) + : m_sparseXprImpl(sparseXpr), m_diagCoeffImpl(diagCoeff) + {} + +protected: + typename evaluator<SparseXprType>::nestedType m_sparseXprImpl; + typename evaluator<DiagonalCoeffType>::nestedType m_diagCoeffImpl; }; -template<typename Lhs, typename Rhs, typename SparseDiagonalProductType> -class sparse_diagonal_product_inner_iterator_selector -<Lhs,Rhs,SparseDiagonalProductType,SDP_IsSparseColMajor,SDP_IsDiagonal> - : public CwiseUnaryOp<scalar_multiple_op<typename Rhs::Scalar>,const Lhs>::InnerIterator -{ - typedef typename CwiseUnaryOp<scalar_multiple_op<typename Rhs::Scalar>,const Lhs>::InnerIterator Base; - typedef typename Lhs::Index Index; - public: - inline sparse_diagonal_product_inner_iterator_selector( - const SparseDiagonalProductType& expr, Index outer) - : Base(expr.lhs()*expr.rhs().diagonal().coeff(outer), outer) - {} -}; -template<typename Lhs, typename Rhs, typename SparseDiagonalProductType> -class sparse_diagonal_product_inner_iterator_selector -<Lhs,Rhs,SparseDiagonalProductType,SDP_IsSparseRowMajor,SDP_IsDiagonal> - : public CwiseBinaryOp< - scalar_product_op<typename Rhs::Scalar>, - const typename Lhs::ConstInnerVectorReturnType, - const Transpose<const typename Rhs::DiagonalVectorType> >::InnerIterator +template<typename SparseXprType, typename DiagCoeffType> +struct sparse_diagonal_product_evaluator<SparseXprType, DiagCoeffType, SDP_AsCwiseProduct> { - typedef typename CwiseBinaryOp< - scalar_product_op<typename Rhs::Scalar>, - const typename Lhs::ConstInnerVectorReturnType, - const Transpose<const typename Rhs::DiagonalVectorType> >::InnerIterator Base; - typedef typename Lhs::Index Index; - Index m_outer; + typedef typename SparseXprType::Scalar Scalar; + typedef typename SparseXprType::Index Index; + + typedef CwiseBinaryOp<scalar_product_op<Scalar>, + const typename SparseXprType::ConstInnerVectorReturnType, + const DiagCoeffType> CwiseProductType; + + typedef typename evaluator<CwiseProductType>::type CwiseProductEval; + typedef typename evaluator<CwiseProductType>::InnerIterator CwiseProductIterator; + + class InnerIterator + { public: - inline sparse_diagonal_product_inner_iterator_selector( - const SparseDiagonalProductType& expr, Index outer) - : Base(expr.lhs().innerVector(outer) .cwiseProduct(expr.rhs().diagonal().transpose()), 0), m_outer(outer) + InnerIterator(const sparse_diagonal_product_evaluator &xprEval, Index outer) + : m_cwiseEval(xprEval.m_sparseXprNested.innerVector(outer).cwiseProduct(xprEval.m_diagCoeffNested)), + m_cwiseIter(m_cwiseEval, 0), + m_outer(outer) {} - inline Index outer() const { return m_outer; } - inline Index row() const { return m_outer; } + inline Scalar value() const { return m_cwiseIter.value(); } + inline Index index() const { return m_cwiseIter.index(); } + inline Index outer() const { return m_outer; } + inline Index col() const { return SparseXprType::IsRowMajor ? m_cwiseIter.index() : m_outer; } + inline Index row() const { return SparseXprType::IsRowMajor ? m_outer : m_cwiseIter.index(); } + + EIGEN_STRONG_INLINE InnerIterator& operator++() + { ++m_cwiseIter; return *this; } + inline operator bool() const { return m_cwiseIter; } + + protected: + CwiseProductEval m_cwiseEval; + CwiseProductIterator m_cwiseIter; + Index m_outer; + }; + + sparse_diagonal_product_evaluator(const SparseXprType &sparseXpr, const DiagCoeffType &diagCoeff) + : m_sparseXprNested(sparseXpr), m_diagCoeffNested(diagCoeff) + {} + +protected: + typename nested_eval<SparseXprType,1>::type m_sparseXprNested; + typename nested_eval<DiagCoeffType,SparseXprType::IsRowMajor ? SparseXprType::RowsAtCompileTime + : SparseXprType::ColsAtCompileTime>::type m_diagCoeffNested; }; } // end namespace internal -// SparseMatrixBase functions - -template<typename Derived> -template<typename OtherDerived> -const SparseDiagonalProduct<Derived,OtherDerived> -SparseMatrixBase<Derived>::operator*(const DiagonalBase<OtherDerived> &other) const -{ - return SparseDiagonalProduct<Derived,OtherDerived>(this->derived(), other.derived()); -} - } // end namespace Eigen #endif // EIGEN_SPARSE_DIAGONAL_PRODUCT_H diff --git a/Eigen/src/SparseCore/SparseDot.h b/Eigen/src/SparseCore/SparseDot.h index db39c9aec..b10c8058f 100644 --- a/Eigen/src/SparseCore/SparseDot.h +++ b/Eigen/src/SparseCore/SparseDot.h @@ -26,7 +26,8 @@ SparseMatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const eigen_assert(size() == other.size()); eigen_assert(other.size()>0 && "you are using a non initialized vector"); - typename Derived::InnerIterator i(derived(),0); + typename internal::evaluator<Derived>::type thisEval(derived()); + typename internal::evaluator<Derived>::InnerIterator i(thisEval, 0); Scalar res(0); while (i) { @@ -49,16 +50,12 @@ SparseMatrixBase<Derived>::dot(const SparseMatrixBase<OtherDerived>& other) cons eigen_assert(size() == other.size()); - typedef typename Derived::Nested Nested; - typedef typename OtherDerived::Nested OtherNested; - typedef typename internal::remove_all<Nested>::type NestedCleaned; - typedef typename internal::remove_all<OtherNested>::type OtherNestedCleaned; + typename internal::evaluator<Derived>::type thisEval(derived()); + typename internal::evaluator<Derived>::InnerIterator i(thisEval, 0); + + typename internal::evaluator<OtherDerived>::type otherEval(other.derived()); + typename internal::evaluator<OtherDerived>::InnerIterator j(otherEval, 0); - Nested nthis(derived()); - OtherNested nother(other.derived()); - - typename NestedCleaned::InnerIterator i(nthis,0); - typename OtherNestedCleaned::InnerIterator j(nother,0); Scalar res(0); while (i && j) { diff --git a/Eigen/src/SparseCore/SparseFuzzy.h b/Eigen/src/SparseCore/SparseFuzzy.h index 45f36e9eb..7d47eb94d 100644 --- a/Eigen/src/SparseCore/SparseFuzzy.h +++ b/Eigen/src/SparseCore/SparseFuzzy.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -10,17 +10,20 @@ #ifndef EIGEN_SPARSE_FUZZY_H #define EIGEN_SPARSE_FUZZY_H -// template<typename Derived> -// template<typename OtherDerived> -// bool SparseMatrixBase<Derived>::isApprox( -// const OtherDerived& other, -// typename NumTraits<Scalar>::Real prec -// ) const -// { -// const typename internal::nested<Derived,2>::type nested(derived()); -// const typename internal::nested<OtherDerived,2>::type otherNested(other.derived()); -// return (nested - otherNested).cwise().abs2().sum() -// <= prec * prec * (std::min)(nested.cwise().abs2().sum(), otherNested.cwise().abs2().sum()); -// } +namespace Eigen { + +template<typename Derived> +template<typename OtherDerived> +bool SparseMatrixBase<Derived>::isApprox(const SparseMatrixBase<OtherDerived>& other, const RealScalar &prec) const +{ + const typename internal::nested_eval<Derived,2,PlainObject>::type actualA(derived()); + typename internal::conditional<bool(IsRowMajor)==bool(OtherDerived::IsRowMajor), + const typename internal::nested_eval<OtherDerived,2,PlainObject>::type, + const PlainObject>::type actualB(other.derived()); + + return (actualA - actualB).squaredNorm() <= prec * prec * numext::mini(actualA.squaredNorm(), actualB.squaredNorm()); +} + +} // end namespace Eigen #endif // EIGEN_SPARSE_FUZZY_H diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 2ed2f3ebd..93677c786 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -52,13 +52,12 @@ struct traits<SparseMatrix<_Scalar, _Options, _Index> > MaxRowsAtCompileTime = Dynamic, MaxColsAtCompileTime = Dynamic, Flags = _Options | NestByRefBit | LvalueBit, - CoeffReadCost = NumTraits<Scalar>::ReadCost, SupportedAccessPatterns = InnerRandomAccessPattern }; }; template<typename _Scalar, int _Options, typename _Index, int DiagIndex> -struct traits<Diagonal<const SparseMatrix<_Scalar, _Options, _Index>, DiagIndex> > +struct traits<Diagonal<SparseMatrix<_Scalar, _Options, _Index>, DiagIndex> > { typedef SparseMatrix<_Scalar, _Options, _Index> MatrixType; typedef typename nested<MatrixType>::type MatrixTypeNested; @@ -74,8 +73,16 @@ struct traits<Diagonal<const SparseMatrix<_Scalar, _Options, _Index>, DiagIndex> ColsAtCompileTime = 1, MaxRowsAtCompileTime = Dynamic, MaxColsAtCompileTime = 1, - Flags = 0, - CoeffReadCost = _MatrixTypeNested::CoeffReadCost*10 + Flags = LvalueBit + }; +}; + +template<typename _Scalar, int _Options, typename _Index, int DiagIndex> +struct traits<Diagonal<const SparseMatrix<_Scalar, _Options, _Index>, DiagIndex> > + : public traits<Diagonal<SparseMatrix<_Scalar, _Options, _Index>, DiagIndex> > +{ + enum { + Flags = 0 }; }; @@ -91,6 +98,10 @@ class SparseMatrix EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, -=) typedef MappedSparseMatrix<Scalar,Flags> Map; + typedef Diagonal<SparseMatrix> DiagonalReturnType; + typedef Diagonal<const SparseMatrix> ConstDiagonalReturnType; + + using Base::IsRowMajor; typedef internal::CompressedStorage<Scalar,Index> Storage; enum { @@ -274,7 +285,7 @@ class SparseMatrix } template<class SizesType> inline void reserve(const SizesType& reserveSizes, const typename SizesType::Scalar& enableif = - #if (!defined(_MSC_VER)) || (_MSC_VER>=1500) // MSVC 2005 fails to compile with this typename + #if (!EIGEN_COMP_MSVC) || (EIGEN_COMP_MSVC>=1500) // MSVC 2005 fails to compile with this typename typename #endif SizesType::Scalar()) @@ -622,8 +633,14 @@ class SparseMatrix m_data.resize(size); } - /** \returns a const expression of the diagonal coefficients */ - const Diagonal<const SparseMatrix> diagonal() const { return *this; } + /** \returns a const expression of the diagonal coefficients. */ + const ConstDiagonalReturnType diagonal() const { return ConstDiagonalReturnType(*this); } + + /** \returns a read-write expression of the diagonal coefficients. + * \warning If the diagonal entries are written, then all diagonal + * entries \b must already exist, otherwise an assertion will be raised. + */ + DiagonalReturnType diagonal() { return DiagonalReturnType(*this); } /** Default constructor yielding an empty \c 0 \c x \c 0 matrix */ inline SparseMatrix() @@ -649,7 +666,9 @@ class SparseMatrix EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) check_template_parameters(); - *this = other.derived(); + const bool needToTranspose = (Flags & RowMajorBit) != (internal::evaluator<OtherDerived>::Flags & RowMajorBit); + if (needToTranspose) *this = other.derived(); + else internal::call_assignment_no_alias(*this, other.derived()); } /** Constructs a sparse matrix from the sparse selfadjoint view \a other */ @@ -658,7 +677,7 @@ class SparseMatrix : m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0) { check_template_parameters(); - *this = other; + Base::operator=(other); } /** Copy constructor (it performs a deep copy) */ @@ -722,22 +741,11 @@ class SparseMatrix return *this; } - #ifndef EIGEN_PARSED_BY_DOXYGEN - template<typename Lhs, typename Rhs> - inline SparseMatrix& operator=(const SparseSparseProduct<Lhs,Rhs>& product) - { return Base::operator=(product); } - - template<typename OtherDerived> - inline SparseMatrix& operator=(const ReturnByValue<OtherDerived>& other) - { - initAssignment(other); - return Base::operator=(other.derived()); - } - +#ifndef EIGEN_PARSED_BY_DOXYGEN template<typename OtherDerived> inline SparseMatrix& operator=(const EigenBase<OtherDerived>& other) { return Base::operator=(other.derived()); } - #endif +#endif // EIGEN_PARSED_BY_DOXYGEN template<typename OtherDerived> EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other); @@ -898,6 +906,11 @@ class SparseMatrix<Scalar,_Options,_Index>::InnerIterator const Index m_outer; Index m_id; Index m_end; + private: + // If you get here, then you're not using the right InnerIterator type, e.g.: + // SparseMatrix<double,RowMajor> A; + // SparseMatrix<double>::InnerIterator it(A,0); + template<typename T> InnerIterator(const SparseMatrixBase<T>&,Index outer); }; template<typename Scalar, int _Options, typename _Index> @@ -1061,17 +1074,19 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt { EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit); + + const bool needToTranspose = (Flags & RowMajorBit) != (internal::evaluator<OtherDerived>::Flags & RowMajorBit); if (needToTranspose) { // two passes algorithm: // 1 - compute the number of coeffs per dest inner vector // 2 - do the actual copy/eval // Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed - typedef typename internal::nested<OtherDerived,2>::type OtherCopy; + typedef typename internal::nested_eval<OtherDerived,2,typename internal::plain_matrix_type<OtherDerived>::type >::type OtherCopy; typedef typename internal::remove_all<OtherCopy>::type _OtherCopy; + typedef internal::evaluator<_OtherCopy> OtherCopyEval; OtherCopy otherCopy(other.derived()); + OtherCopyEval otherCopyEval(otherCopy); SparseMatrix dest(other.rows(),other.cols()); Eigen::Map<Matrix<Index, Dynamic, 1> > (dest.m_outerIndex,dest.outerSize()).setZero(); @@ -1079,7 +1094,7 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt // pass 1 // FIXME the above copy could be merged with that pass for (Index j=0; j<otherCopy.outerSize(); ++j) - for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it) + for (typename OtherCopyEval::InnerIterator it(otherCopyEval, j); it; ++it) ++dest.m_outerIndex[it.index()]; // prefix sum @@ -1098,7 +1113,7 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt // pass 2 for (Index j=0; j<otherCopy.outerSize(); ++j) { - for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it) + for (typename OtherCopyEval::InnerIterator it(otherCopyEval, j); it; ++it) { Index pos = positions[it.index()]++; dest.m_data.index(pos) = j; @@ -1111,7 +1126,9 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt else { if(other.isRValue()) + { initAssignment(other.derived()); + } // there is no special optimization return Base::operator=(other.derived()); } @@ -1256,6 +1273,53 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& Sparse return (m_data.value(p) = 0); } +namespace internal { + +template<typename _Scalar, int _Options, typename _Index> +struct evaluator<SparseMatrix<_Scalar,_Options,_Index> > + : evaluator_base<SparseMatrix<_Scalar,_Options,_Index> > +{ + typedef _Scalar Scalar; + typedef _Index Index; + typedef SparseMatrix<_Scalar,_Options,_Index> SparseMatrixType; + typedef typename SparseMatrixType::InnerIterator InnerIterator; + typedef typename SparseMatrixType::ReverseInnerIterator ReverseInnerIterator; + + enum { + CoeffReadCost = NumTraits<_Scalar>::ReadCost, + Flags = SparseMatrixType::Flags + }; + + evaluator() : m_matrix(0) {} + explicit evaluator(const SparseMatrixType &mat) : m_matrix(&mat) {} + + operator SparseMatrixType&() { return m_matrix->const_cast_derived(); } + operator const SparseMatrixType&() const { return *m_matrix; } + + typedef typename DenseCoeffsBase<SparseMatrixType,ReadOnlyAccessors>::CoeffReturnType CoeffReturnType; + Scalar coeff(Index row, Index col) const + { return m_matrix->coeff(row,col); } + + Scalar& coeffRef(Index row, Index col) + { + eigen_internal_assert(row>=0 && row<m_matrix->rows() && col>=0 && col<m_matrix->cols()); + + const Index outer = SparseMatrixType::IsRowMajor ? row : col; + const Index inner = SparseMatrixType::IsRowMajor ? col : row; + + Index start = m_matrix->outerIndexPtr()[outer]; + Index end = m_matrix->isCompressed() ? m_matrix->outerIndexPtr()[outer+1] : m_matrix->outerIndexPtr()[outer] + m_matrix->innerNonZeroPtr()[outer]; + eigen_assert(end>start && "you are using a non finalized sparse matrix or written coefficient does not exist"); + const Index p = m_matrix->data().searchLowerIndex(start,end-1,inner); + eigen_assert((p<end) && (m_matrix->data().index(p)==inner) && "written coefficient does not exist"); + return m_matrix->const_cast_derived().data().value(p); + } + + const SparseMatrixType *m_matrix; +}; + +} + } // end namespace Eigen #endif // EIGEN_SPARSEMATRIX_H diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index fb5025049..04baabe4f 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -39,11 +39,7 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived> typedef EigenBase<Derived> Base; template<typename OtherDerived> - Derived& operator=(const EigenBase<OtherDerived> &other) - { - other.derived().evalTo(derived()); - return derived(); - } + Derived& operator=(const EigenBase<OtherDerived> &other); enum { @@ -83,11 +79,6 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived> * constructed from this one. See the \ref flags "list of flags". */ - CoeffReadCost = internal::traits<Derived>::CoeffReadCost, - /**< This is a rough measure of how expensive it is to read one coefficient from - * this expression. - */ - IsRowMajor = Flags&RowMajorBit ? 1 : 0, InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime) @@ -103,11 +94,13 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived> CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, Eigen::Transpose<const Derived> >, Transpose<const Derived> >::type AdjointReturnType; + typedef Transpose<Derived> TransposeReturnType; + template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; }; + typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType; - + // FIXME storage order do not match evaluator storage order typedef SparseMatrix<Scalar, Flags&RowMajorBit ? RowMajor : ColMajor, Index> PlainObject; - #ifndef EIGEN_PARSED_BY_DOXYGEN /** This is the "real scalar" type; if the \a Scalar type is already real numbers * (e.g. int, float or double) then \a RealScalar is just the same as \a Scalar. If @@ -124,6 +117,8 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived> /** \internal Represents a matrix with all coefficients equal to one another*/ typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Matrix<Scalar,Dynamic,Dynamic> > ConstantReturnType; + /** type of the equivalent dense matrix */ + typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType; /** type of the equivalent square matrix */ typedef Matrix<Scalar,EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime), EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType; @@ -175,93 +170,23 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived> template<typename OtherDerived> - Derived& operator=(const ReturnByValue<OtherDerived>& other) - { - other.evalTo(derived()); - return derived(); - } - + Derived& operator=(const ReturnByValue<OtherDerived>& other); template<typename OtherDerived> - inline Derived& operator=(const SparseMatrixBase<OtherDerived>& other) - { - return assign(other.derived()); - } + inline Derived& operator=(const SparseMatrixBase<OtherDerived>& other); - inline Derived& operator=(const Derived& other) - { -// if (other.isRValue()) -// derived().swap(other.const_cast_derived()); -// else - return assign(other.derived()); - } + inline Derived& operator=(const Derived& other); protected: template<typename OtherDerived> - inline Derived& assign(const OtherDerived& other) - { - const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit); - const Index outerSize = (int(OtherDerived::Flags) & RowMajorBit) ? Index(other.rows()) : Index(other.cols()); - if ((!transpose) && other.isRValue()) - { - // eval without temporary - derived().resize(Index(other.rows()), Index(other.cols())); - derived().setZero(); - derived().reserve((std::max)(this->rows(),this->cols())*2); - for (Index j=0; j<outerSize; ++j) - { - derived().startVec(j); - for (typename OtherDerived::InnerIterator it(other, typename OtherDerived::Index(j)); it; ++it) - { - Scalar v = it.value(); - derived().insertBackByOuterInner(j,Index(it.index())) = v; - } - } - derived().finalize(); - } - else - { - assignGeneric(other); - } - return derived(); - } + inline Derived& assign(const OtherDerived& other); template<typename OtherDerived> - inline void assignGeneric(const OtherDerived& other) - { - //const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit); - eigen_assert(( ((internal::traits<Derived>::SupportedAccessPatterns&OuterRandomAccessPattern)==OuterRandomAccessPattern) || - (!((Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit)))) && - "the transpose operation is supposed to be handled in SparseMatrix::operator="); - - enum { Flip = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit) }; - - const Index outerSize = Index(other.outerSize()); - //typedef typename internal::conditional<transpose, LinkedVectorMatrix<Scalar,Flags&RowMajorBit>, Derived>::type TempType; - // thanks to shallow copies, we always eval to a tempary - Derived temp(Index(other.rows()), Index(other.cols())); - - temp.reserve((std::max)(this->rows(),this->cols())*2); - for (Index j=0; j<outerSize; ++j) - { - temp.startVec(j); - for (typename OtherDerived::InnerIterator it(other.derived(), typename OtherDerived::Index(j)); it; ++it) - { - Scalar v = it.value(); - temp.insertBackByOuterInner(Flip?Index(it.index()):j,Flip?j:Index(it.index())) = v; - } - } - temp.finalize(); - - derived() = temp.markAsRValue(); - } + inline void assignGeneric(const OtherDerived& other); public: - template<typename Lhs, typename Rhs> - inline Derived& operator=(const SparseSparseProduct<Lhs,Rhs>& product); - friend std::ostream & operator << (std::ostream & s, const SparseMatrixBase& m) { typedef typename Derived::Nested Nested; @@ -333,33 +258,34 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived> EIGEN_STRONG_INLINE const EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE cwiseProduct(const MatrixBase<OtherDerived> &other) const; - // sparse * sparse - template<typename OtherDerived> - const typename SparseSparseProductReturnType<Derived,OtherDerived>::Type - operator*(const SparseMatrixBase<OtherDerived> &other) const; - // sparse * diagonal template<typename OtherDerived> - const SparseDiagonalProduct<Derived,OtherDerived> - operator*(const DiagonalBase<OtherDerived> &other) const; + const Product<Derived,OtherDerived> + operator*(const DiagonalBase<OtherDerived> &other) const + { return Product<Derived,OtherDerived>(derived(), other.derived()); } // diagonal * sparse template<typename OtherDerived> friend - const SparseDiagonalProduct<OtherDerived,Derived> + const Product<OtherDerived,Derived> operator*(const DiagonalBase<OtherDerived> &lhs, const SparseMatrixBase& rhs) - { return SparseDiagonalProduct<OtherDerived,Derived>(lhs.derived(), rhs.derived()); } - - /** dense * sparse (return a dense object unless it is an outer product) */ - template<typename OtherDerived> friend - const typename DenseSparseProductReturnType<OtherDerived,Derived>::Type - operator*(const MatrixBase<OtherDerived>& lhs, const Derived& rhs) - { return typename DenseSparseProductReturnType<OtherDerived,Derived>::Type(lhs.derived(),rhs); } - - /** sparse * dense (returns a dense object unless it is an outer product) */ + { return Product<OtherDerived,Derived>(lhs.derived(), rhs.derived()); } + + // sparse * sparse template<typename OtherDerived> - const typename SparseDenseProductReturnType<Derived,OtherDerived>::Type + const Product<Derived,OtherDerived> + operator*(const SparseMatrixBase<OtherDerived> &other) const; + + // sparse * dense + template<typename OtherDerived> + const Product<Derived,OtherDerived> operator*(const MatrixBase<OtherDerived> &other) const - { return typename SparseDenseProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived()); } + { return Product<Derived,OtherDerived>(derived(), other.derived()); } + + // dense * sparse + template<typename OtherDerived> friend + const Product<OtherDerived,Derived> + operator*(const MatrixBase<OtherDerived> &lhs, const SparseMatrixBase& rhs) + { return Product<OtherDerived,Derived>(lhs.derived(), rhs.derived()); } /** \returns an expression of P H P^-1 where H is the matrix represented by \c *this */ SparseSymmetricPermutationProduct<Derived,Upper|Lower> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const @@ -371,9 +297,9 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived> Derived& operator*=(const SparseMatrixBase<OtherDerived>& other); template<int Mode> - inline const SparseTriangularView<Derived, Mode> triangularView() const; + inline const TriangularView<const Derived, Mode> triangularView() const; - template<unsigned int UpLo> inline const SparseSelfAdjointView<Derived, UpLo> selfadjointView() const; + template<unsigned int UpLo> inline const SparseSelfAdjointView<const Derived, UpLo> selfadjointView() const; template<unsigned int UpLo> inline SparseSelfAdjointView<Derived, UpLo> selfadjointView(); template<typename OtherDerived> Scalar dot(const MatrixBase<OtherDerived>& other) const; @@ -382,9 +308,9 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived> RealScalar norm() const; RealScalar blueNorm() const; - Transpose<Derived> transpose() { return derived(); } - const Transpose<const Derived> transpose() const { return derived(); } - const AdjointReturnType adjoint() const { return transpose(); } + TransposeReturnType transpose() { return TransposeReturnType(derived()); } + const ConstTransposeReturnType transpose() const { return ConstTransposeReturnType(derived()); } + const AdjointReturnType adjoint() const { return AdjointReturnType(transpose()); } // inner-vector typedef Block<Derived,IsRowMajor?1:Dynamic,IsRowMajor?Dynamic:1,true> InnerVectorReturnType; @@ -396,25 +322,14 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived> Block<Derived,Dynamic,Dynamic,true> innerVectors(Index outerStart, Index outerSize); const Block<const Derived,Dynamic,Dynamic,true> innerVectors(Index outerStart, Index outerSize) const; - /** \internal use operator= */ - template<typename DenseDerived> - void evalTo(MatrixBase<DenseDerived>& dst) const - { - dst.setZero(); - for (Index j=0; j<outerSize(); ++j) - for (typename Derived::InnerIterator i(derived(),typename Derived::Index(j)); i; ++i) - dst.coeffRef(i.row(),i.col()) = i.value(); - } - - Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> toDense() const + DenseMatrixType toDense() const { - return derived(); + return DenseMatrixType(derived()); } template<typename OtherDerived> bool isApprox(const SparseMatrixBase<OtherDerived>& other, - const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const - { return toDense().isApprox(other.toDense(),prec); } + const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const; template<typename OtherDerived> bool isApprox(const MatrixBase<OtherDerived>& other, @@ -430,6 +345,9 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived> { return typename internal::eval<Derived>::type(derived()); } Scalar sum() const; + + inline const SparseView<Derived> + pruned(const Scalar& reference = Scalar(0), const RealScalar& epsilon = NumTraits<Scalar>::dummy_precision()) const; protected: diff --git a/Eigen/src/SparseCore/SparsePermutation.h b/Eigen/src/SparseCore/SparsePermutation.h index b85be93f6..21411f232 100644 --- a/Eigen/src/SparseCore/SparsePermutation.h +++ b/Eigen/src/SparseCore/SparsePermutation.h @@ -61,7 +61,7 @@ struct permut_sparsematrix_product_retval for(Index j=0; j<m_matrix.outerSize(); ++j) { Index jp = m_permutation.indices().coeff(j); - sizes[((Side==OnTheLeft) ^ Transposed) ? jp : j] = m_matrix.innerVector(((Side==OnTheRight) ^ Transposed) ? jp : j).size(); + sizes[((Side==OnTheLeft) ^ Transposed) ? jp : j] = m_matrix.innerVector(((Side==OnTheRight) ^ Transposed) ? jp : j).nonZeros(); } tmp.reserve(sizes); for(Index j=0; j<m_matrix.outerSize(); ++j) @@ -103,44 +103,133 @@ struct permut_sparsematrix_product_retval } +namespace internal { + +template <int ProductTag> struct product_promote_storage_type<Sparse, PermutationStorage, ProductTag> { typedef Sparse ret; }; +template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Sparse, ProductTag> { typedef Sparse ret; }; + +// TODO, the following need cleaning, this is just a copy-past of the dense case + +template<typename Lhs, typename Rhs, int ProductTag> +struct generic_product_impl<Lhs, Rhs, PermutationShape, SparseShape, ProductTag> +{ + template<typename Dest> + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permut_sparsematrix_product_retval<Lhs, Rhs, OnTheLeft, false> pmpr(lhs, rhs); + pmpr.evalTo(dst); + } +}; + +template<typename Lhs, typename Rhs, int ProductTag> +struct generic_product_impl<Lhs, Rhs, SparseShape, PermutationShape, ProductTag> +{ + template<typename Dest> + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permut_sparsematrix_product_retval<Rhs, Lhs, OnTheRight, false> pmpr(rhs, lhs); + pmpr.evalTo(dst); + } +}; + +template<typename Lhs, typename Rhs, int ProductTag> +struct generic_product_impl<Transpose<Lhs>, Rhs, PermutationShape, SparseShape, ProductTag> +{ + template<typename Dest> + static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs) + { + permut_sparsematrix_product_retval<Lhs, Rhs, OnTheLeft, true> pmpr(lhs.nestedPermutation(), rhs); + pmpr.evalTo(dst); + } +}; + +template<typename Lhs, typename Rhs, int ProductTag> +struct generic_product_impl<Lhs, Transpose<Rhs>, SparseShape, PermutationShape, ProductTag> +{ + template<typename Dest> + static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs) + { + permut_sparsematrix_product_retval<Rhs, Lhs, OnTheRight, true> pmpr(rhs.nestedPermutation(), lhs); + pmpr.evalTo(dst); + } +}; + +// TODO, the following two overloads are only needed to define the right temporary type through +// typename traits<permut_sparsematrix_product_retval<Rhs,Lhs,OnTheRight,false> >::ReturnType +// while it should be correctly handled by traits<Product<> >::PlainObject +template<typename Lhs, typename Rhs, int ProductTag> +struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, PermutationShape, SparseShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> + : public evaluator<typename traits<permut_sparsematrix_product_retval<Lhs,Rhs,OnTheRight,false> >::ReturnType>::type +{ + typedef Product<Lhs, Rhs, DefaultProduct> XprType; + typedef typename traits<permut_sparsematrix_product_retval<Lhs,Rhs,OnTheRight,false> >::ReturnType PlainObject; + typedef typename evaluator<PlainObject>::type Base; + + explicit product_evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast<Base*>(this)) Base(m_result); + generic_product_impl<Lhs, Rhs, PermutationShape, SparseShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs()); + } + +protected: + PlainObject m_result; +}; + +template<typename Lhs, typename Rhs, int ProductTag> +struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, SparseShape, PermutationShape, typename traits<Lhs>::Scalar, typename traits<Rhs>::Scalar> + : public evaluator<typename traits<permut_sparsematrix_product_retval<Rhs,Lhs,OnTheRight,false> >::ReturnType>::type +{ + typedef Product<Lhs, Rhs, DefaultProduct> XprType; + typedef typename traits<permut_sparsematrix_product_retval<Rhs,Lhs,OnTheRight,false> >::ReturnType PlainObject; + typedef typename evaluator<PlainObject>::type Base; + + explicit product_evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast<Base*>(this)) Base(m_result); + generic_product_impl<Lhs, Rhs, SparseShape, PermutationShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs()); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal /** \returns the matrix with the permutation applied to the columns */ template<typename SparseDerived, typename PermDerived> -inline const internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheRight, false> +inline const Product<SparseDerived, PermDerived> operator*(const SparseMatrixBase<SparseDerived>& matrix, const PermutationBase<PermDerived>& perm) -{ - return internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheRight, false>(perm, matrix.derived()); -} +{ return Product<SparseDerived, PermDerived>(matrix.derived(), perm.derived()); } /** \returns the matrix with the permutation applied to the rows */ template<typename SparseDerived, typename PermDerived> -inline const internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheLeft, false> +inline const Product<PermDerived, SparseDerived> operator*( const PermutationBase<PermDerived>& perm, const SparseMatrixBase<SparseDerived>& matrix) -{ - return internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheLeft, false>(perm, matrix.derived()); -} - +{ return Product<PermDerived, SparseDerived>(perm.derived(), matrix.derived()); } +// TODO, the following specializations should not be needed as Transpose<Permutation*> should be a PermutationBase. /** \returns the matrix with the inverse permutation applied to the columns. */ template<typename SparseDerived, typename PermDerived> -inline const internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheRight, true> +inline const Product<SparseDerived, Transpose<PermutationBase<PermDerived> > > operator*(const SparseMatrixBase<SparseDerived>& matrix, const Transpose<PermutationBase<PermDerived> >& tperm) { - return internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheRight, true>(tperm.nestedPermutation(), matrix.derived()); + return Product<SparseDerived, Transpose<PermutationBase<PermDerived> > >(matrix.derived(), tperm); } /** \returns the matrix with the inverse permutation applied to the rows. */ template<typename SparseDerived, typename PermDerived> -inline const internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheLeft, true> +inline const Product<Transpose<PermutationBase<PermDerived> >, SparseDerived> operator*(const Transpose<PermutationBase<PermDerived> >& tperm, const SparseMatrixBase<SparseDerived>& matrix) { - return internal::permut_sparsematrix_product_retval<PermutationBase<PermDerived>, SparseDerived, OnTheLeft, true>(tperm.nestedPermutation(), matrix.derived()); + return Product<Transpose<PermutationBase<PermDerived> >, SparseDerived>(tperm, matrix.derived()); } } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h index cf7663070..c62386ed1 100644 --- a/Eigen/src/SparseCore/SparseProduct.h +++ b/Eigen/src/SparseCore/SparseProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -12,158 +12,6 @@ namespace Eigen { -template<typename Lhs, typename Rhs> -struct SparseSparseProductReturnType -{ - typedef typename internal::traits<Lhs>::Scalar Scalar; - typedef typename internal::traits<Lhs>::Index Index; - enum { - LhsRowMajor = internal::traits<Lhs>::Flags & RowMajorBit, - RhsRowMajor = internal::traits<Rhs>::Flags & RowMajorBit, - TransposeRhs = (!LhsRowMajor) && RhsRowMajor, - TransposeLhs = LhsRowMajor && (!RhsRowMajor) - }; - - typedef typename internal::conditional<TransposeLhs, - SparseMatrix<Scalar,0,Index>, - typename internal::nested<Lhs,Rhs::RowsAtCompileTime>::type>::type LhsNested; - - typedef typename internal::conditional<TransposeRhs, - SparseMatrix<Scalar,0,Index>, - typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type>::type RhsNested; - - typedef SparseSparseProduct<LhsNested, RhsNested> Type; -}; - -namespace internal { -template<typename LhsNested, typename RhsNested> -struct traits<SparseSparseProduct<LhsNested, RhsNested> > -{ - typedef MatrixXpr XprKind; - // clean the nested types: - typedef typename remove_all<LhsNested>::type _LhsNested; - typedef typename remove_all<RhsNested>::type _RhsNested; - typedef typename _LhsNested::Scalar Scalar; - typedef typename promote_index_type<typename traits<_LhsNested>::Index, - typename traits<_RhsNested>::Index>::type Index; - - enum { - LhsCoeffReadCost = _LhsNested::CoeffReadCost, - RhsCoeffReadCost = _RhsNested::CoeffReadCost, - LhsFlags = _LhsNested::Flags, - RhsFlags = _RhsNested::Flags, - - RowsAtCompileTime = _LhsNested::RowsAtCompileTime, - ColsAtCompileTime = _RhsNested::ColsAtCompileTime, - MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, - MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, - - InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), - - EvalToRowMajor = (RhsFlags & LhsFlags & RowMajorBit), - - RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit), - - Flags = (int(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) - | EvalBeforeAssigningBit - | EvalBeforeNestingBit, - - CoeffReadCost = Dynamic - }; - - typedef Sparse StorageKind; -}; - -} // end namespace internal - -template<typename LhsNested, typename RhsNested> -class SparseSparseProduct : internal::no_assignment_operator, - public SparseMatrixBase<SparseSparseProduct<LhsNested, RhsNested> > -{ - public: - - typedef SparseMatrixBase<SparseSparseProduct> Base; - EIGEN_DENSE_PUBLIC_INTERFACE(SparseSparseProduct) - - private: - - typedef typename internal::traits<SparseSparseProduct>::_LhsNested _LhsNested; - typedef typename internal::traits<SparseSparseProduct>::_RhsNested _RhsNested; - - public: - - template<typename Lhs, typename Rhs> - EIGEN_STRONG_INLINE SparseSparseProduct(const Lhs& lhs, const Rhs& rhs) - : m_lhs(lhs), m_rhs(rhs), m_tolerance(0), m_conservative(true) - { - init(); - } - - template<typename Lhs, typename Rhs> - EIGEN_STRONG_INLINE SparseSparseProduct(const Lhs& lhs, const Rhs& rhs, const RealScalar& tolerance) - : m_lhs(lhs), m_rhs(rhs), m_tolerance(tolerance), m_conservative(false) - { - init(); - } - - SparseSparseProduct pruned(const Scalar& reference = 0, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision()) const - { - using std::abs; - return SparseSparseProduct(m_lhs,m_rhs,abs(reference)*epsilon); - } - - template<typename Dest> - void evalTo(Dest& result) const - { - if(m_conservative) - internal::conservative_sparse_sparse_product_selector<_LhsNested, _RhsNested, Dest>::run(lhs(),rhs(),result); - else - internal::sparse_sparse_product_with_pruning_selector<_LhsNested, _RhsNested, Dest>::run(lhs(),rhs(),result,m_tolerance); - } - - EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } - - EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; } - EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; } - - protected: - void init() - { - eigen_assert(m_lhs.cols() == m_rhs.rows()); - - enum { - ProductIsValid = _LhsNested::ColsAtCompileTime==Dynamic - || _RhsNested::RowsAtCompileTime==Dynamic - || int(_LhsNested::ColsAtCompileTime)==int(_RhsNested::RowsAtCompileTime), - AreVectors = _LhsNested::IsVectorAtCompileTime && _RhsNested::IsVectorAtCompileTime, - SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(_LhsNested,_RhsNested) - }; - // note to the lost user: - // * for a dot product use: v1.dot(v2) - // * for a coeff-wise product use: v1.cwise()*v2 - EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), - INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) - EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), - INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) - EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) - } - - LhsNested m_lhs; - RhsNested m_rhs; - RealScalar m_tolerance; - bool m_conservative; -}; - -// sparse = sparse * sparse -template<typename Derived> -template<typename Lhs, typename Rhs> -inline Derived& SparseMatrixBase<Derived>::operator=(const SparseSparseProduct<Lhs,Rhs>& product) -{ - product.evalTo(derived()); - return derived(); -} - /** \returns an expression of the product of two sparse matrices. * By default a conservative product preserving the symbolic non zeros is performed. * The automatic pruning of the small values can be achieved by calling the pruned() function @@ -177,12 +25,74 @@ inline Derived& SparseMatrixBase<Derived>::operator=(const SparseSparseProduct<L * */ template<typename Derived> template<typename OtherDerived> -inline const typename SparseSparseProductReturnType<Derived,OtherDerived>::Type +inline const Product<Derived,OtherDerived> SparseMatrixBase<Derived>::operator*(const SparseMatrixBase<OtherDerived> &other) const { - return typename SparseSparseProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived()); + return Product<Derived,OtherDerived>(derived(), other.derived()); } +namespace internal { + +// sparse * sparse +template<typename Lhs, typename Rhs, int ProductType> +struct generic_product_impl<Lhs, Rhs, SparseShape, SparseShape, ProductType> +{ + template<typename Dest> + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + typedef typename nested_eval<Lhs,Dynamic>::type LhsNested; + typedef typename nested_eval<Rhs,Dynamic>::type RhsNested; + LhsNested lhsNested(lhs); + RhsNested rhsNested(rhs); + internal::conservative_sparse_sparse_product_selector<typename remove_all<LhsNested>::type, + typename remove_all<RhsNested>::type, Dest>::run(lhsNested,rhsNested,dst); + } +}; + +// sparse * sparse-triangular +template<typename Lhs, typename Rhs, int ProductType> +struct generic_product_impl<Lhs, Rhs, SparseShape, SparseTriangularShape, ProductType> + : public generic_product_impl<Lhs, Rhs, SparseShape, SparseShape, ProductType> +{}; + +// sparse-triangular * sparse +template<typename Lhs, typename Rhs, int ProductType> +struct generic_product_impl<Lhs, Rhs, SparseTriangularShape, SparseShape, ProductType> + : public generic_product_impl<Lhs, Rhs, SparseShape, SparseShape, ProductType> +{}; + +template<typename Lhs, typename Rhs, int Options> +struct evaluator<SparseView<Product<Lhs, Rhs, Options> > > + : public evaluator<typename Product<Lhs, Rhs, DefaultProduct>::PlainObject>::type +{ + typedef SparseView<Product<Lhs, Rhs, Options> > XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type Base; + + typedef evaluator type; + typedef evaluator nestedType; + + explicit evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + using std::abs; + ::new (static_cast<Base*>(this)) Base(m_result); + typedef typename nested_eval<Lhs,Dynamic>::type LhsNested; + typedef typename nested_eval<Rhs,Dynamic>::type RhsNested; + LhsNested lhsNested(xpr.nestedExpression().lhs()); + RhsNested rhsNested(xpr.nestedExpression().rhs()); + + internal::sparse_sparse_product_with_pruning_selector<typename remove_all<LhsNested>::type, + typename remove_all<RhsNested>::type, PlainObject>::run(lhsNested,rhsNested,m_result, + abs(xpr.reference())*xpr.epsilon()); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_SPARSEPRODUCT_H diff --git a/Eigen/src/SparseCore/SparseRedux.h b/Eigen/src/SparseCore/SparseRedux.h index f3da93a71..763f2296b 100644 --- a/Eigen/src/SparseCore/SparseRedux.h +++ b/Eigen/src/SparseCore/SparseRedux.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -18,8 +18,9 @@ SparseMatrixBase<Derived>::sum() const { eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix"); Scalar res(0); + typename internal::evaluator<Derived>::type thisEval(derived()); for (Index j=0; j<outerSize(); ++j) - for (typename Derived::InnerIterator iter(derived(),j); iter; ++iter) + for (typename internal::evaluator<Derived>::InnerIterator iter(thisEval,j); iter; ++iter) res += iter.value(); return res; } diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index 56c922929..5da7d2bef 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -11,14 +11,14 @@ #define EIGEN_SPARSE_SELFADJOINTVIEW_H namespace Eigen { - + /** \ingroup SparseCore_Module * \class SparseSelfAdjointView * * \brief Pseudo expression to manipulate a triangular sparse matrix as a selfadjoint matrix. * * \param MatrixType the type of the dense matrix storing the coefficients - * \param UpLo can be either \c #Lower or \c #Upper + * \param Mode can be either \c #Lower or \c #Upper * * This class is an expression of a sefladjoint matrix from a triangular part of a matrix * with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView() @@ -26,38 +26,34 @@ namespace Eigen { * * \sa SparseMatrixBase::selfadjointView() */ -template<typename Lhs, typename Rhs, int UpLo> -class SparseSelfAdjointTimeDenseProduct; - -template<typename Lhs, typename Rhs, int UpLo> -class DenseTimeSparseSelfAdjointProduct; - namespace internal { -template<typename MatrixType, unsigned int UpLo> -struct traits<SparseSelfAdjointView<MatrixType,UpLo> > : traits<MatrixType> { +template<typename MatrixType, unsigned int Mode> +struct traits<SparseSelfAdjointView<MatrixType,Mode> > : traits<MatrixType> { }; -template<int SrcUpLo,int DstUpLo,typename MatrixType,int DestOrder> +template<int SrcMode,int DstMode,typename MatrixType,int DestOrder> void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DestOrder,typename MatrixType::Index>& _dest, const typename MatrixType::Index* perm = 0); -template<int UpLo,typename MatrixType,int DestOrder> +template<int Mode,typename MatrixType,int DestOrder> void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DestOrder,typename MatrixType::Index>& _dest, const typename MatrixType::Index* perm = 0); } -template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView - : public EigenBase<SparseSelfAdjointView<MatrixType,UpLo> > +template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView + : public EigenBase<SparseSelfAdjointView<MatrixType,_Mode> > { public: + + enum { Mode = _Mode }; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Index Index; typedef Matrix<Index,Dynamic,1> VectorI; typedef typename MatrixType::Nested MatrixTypeNested; typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested; - - inline SparseSelfAdjointView(const MatrixType& matrix) : m_matrix(matrix) + + explicit inline SparseSelfAdjointView(const MatrixType& matrix) : m_matrix(matrix) { eigen_assert(rows()==cols() && "SelfAdjointView is only for squared matrices"); } @@ -75,10 +71,10 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView * Indeed, the SparseSelfadjointView operand is first copied into a temporary SparseMatrix before computing the product. */ template<typename OtherDerived> - SparseSparseProduct<typename OtherDerived::PlainObject, OtherDerived> + Product<SparseSelfAdjointView, OtherDerived> operator*(const SparseMatrixBase<OtherDerived>& rhs) const { - return SparseSparseProduct<typename OtherDerived::PlainObject, OtherDerived>(*this, rhs.derived()); + return Product<SparseSelfAdjointView, OtherDerived>(*this, rhs.derived()); } /** \returns an expression of the matrix product between a sparse matrix \a lhs and a sparse self-adjoint matrix \a rhs. @@ -87,26 +83,26 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView * Indeed, the SparseSelfadjointView operand is first copied into a temporary SparseMatrix before computing the product. */ template<typename OtherDerived> friend - SparseSparseProduct<OtherDerived, typename OtherDerived::PlainObject > + Product<OtherDerived, SparseSelfAdjointView> operator*(const SparseMatrixBase<OtherDerived>& lhs, const SparseSelfAdjointView& rhs) { - return SparseSparseProduct<OtherDerived, typename OtherDerived::PlainObject>(lhs.derived(), rhs); + return Product<OtherDerived, SparseSelfAdjointView>(lhs.derived(), rhs); } /** Efficient sparse self-adjoint matrix times dense vector/matrix product */ template<typename OtherDerived> - SparseSelfAdjointTimeDenseProduct<MatrixType,OtherDerived,UpLo> + Product<SparseSelfAdjointView,OtherDerived> operator*(const MatrixBase<OtherDerived>& rhs) const { - return SparseSelfAdjointTimeDenseProduct<MatrixType,OtherDerived,UpLo>(m_matrix, rhs.derived()); + return Product<SparseSelfAdjointView,OtherDerived>(*this, rhs.derived()); } /** Efficient dense vector/matrix times sparse self-adjoint matrix product */ template<typename OtherDerived> friend - DenseTimeSparseSelfAdjointProduct<OtherDerived,MatrixType,UpLo> + Product<OtherDerived,SparseSelfAdjointView> operator*(const MatrixBase<OtherDerived>& lhs, const SparseSelfAdjointView& rhs) { - return DenseTimeSparseSelfAdjointProduct<OtherDerived,_MatrixTypeNested,UpLo>(lhs.derived(), rhs.m_matrix); + return Product<OtherDerived,SparseSelfAdjointView>(lhs.derived(), rhs); } /** Perform a symmetric rank K update of the selfadjoint matrix \c *this: @@ -123,53 +119,49 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView /** \internal triggered by sparse_matrix = SparseSelfadjointView; */ template<typename DestScalar,int StorageOrder> void evalTo(SparseMatrix<DestScalar,StorageOrder,Index>& _dest) const { - internal::permute_symm_to_fullsymm<UpLo>(m_matrix, _dest); + internal::permute_symm_to_fullsymm<Mode>(m_matrix, _dest); } template<typename DestScalar> void evalTo(DynamicSparseMatrix<DestScalar,ColMajor,Index>& _dest) const { // TODO directly evaluate into _dest; SparseMatrix<DestScalar,ColMajor,Index> tmp(_dest.rows(),_dest.cols()); - internal::permute_symm_to_fullsymm<UpLo>(m_matrix, tmp); + internal::permute_symm_to_fullsymm<Mode>(m_matrix, tmp); _dest = tmp; } /** \returns an expression of P H P^-1 */ - SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const + // TODO implement twists in a more evaluator friendly fashion + SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode> twistedBy(const PermutationMatrix<Dynamic,Dynamic,Index>& perm) const { - return SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo>(m_matrix, perm); + return SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode>(m_matrix, perm); } - - template<typename SrcMatrixType,int SrcUpLo> - SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct<SrcMatrixType,SrcUpLo>& permutedMatrix) + + template<typename SrcMatrixType,int SrcMode> + SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct<SrcMatrixType,SrcMode>& permutedMatrix) { permutedMatrix.evalTo(*this); return *this; } - SparseSelfAdjointView& operator=(const SparseSelfAdjointView& src) { PermutationMatrix<Dynamic> pnull; return *this = src.twistedBy(pnull); } - template<typename SrcMatrixType,unsigned int SrcUpLo> - SparseSelfAdjointView& operator=(const SparseSelfAdjointView<SrcMatrixType,SrcUpLo>& src) + template<typename SrcMatrixType,unsigned int SrcMode> + SparseSelfAdjointView& operator=(const SparseSelfAdjointView<SrcMatrixType,SrcMode>& src) { PermutationMatrix<Dynamic> pnull; return *this = src.twistedBy(pnull); } - - // const SparseLLT<PlainObject, UpLo> llt() const; - // const SparseLDLT<PlainObject, UpLo> ldlt() const; - protected: typename MatrixType::Nested m_matrix; - mutable VectorI m_countPerRow; - mutable VectorI m_countPerCol; + //mutable VectorI m_countPerRow; + //mutable VectorI m_countPerCol; }; /*************************************************************************** @@ -177,33 +169,33 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView ***************************************************************************/ template<typename Derived> -template<unsigned int UpLo> -const SparseSelfAdjointView<Derived, UpLo> SparseMatrixBase<Derived>::selfadjointView() const +template<unsigned int Mode> +const SparseSelfAdjointView<const Derived, Mode> SparseMatrixBase<Derived>::selfadjointView() const { - return derived(); + return SparseSelfAdjointView<const Derived, Mode>(derived()); } template<typename Derived> -template<unsigned int UpLo> -SparseSelfAdjointView<Derived, UpLo> SparseMatrixBase<Derived>::selfadjointView() +template<unsigned int Mode> +SparseSelfAdjointView<Derived, Mode> SparseMatrixBase<Derived>::selfadjointView() { - return derived(); + return SparseSelfAdjointView<Derived, Mode>(derived()); } /*************************************************************************** * Implementation of SparseSelfAdjointView methods ***************************************************************************/ -template<typename MatrixType, unsigned int UpLo> +template<typename MatrixType, unsigned int Mode> template<typename DerivedU> -SparseSelfAdjointView<MatrixType,UpLo>& -SparseSelfAdjointView<MatrixType,UpLo>::rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha) +SparseSelfAdjointView<MatrixType,Mode>& +SparseSelfAdjointView<MatrixType,Mode>::rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha) { - SparseMatrix<Scalar,MatrixType::Flags&RowMajorBit?RowMajor:ColMajor> tmp = u * u.adjoint(); + SparseMatrix<Scalar,(MatrixType::Flags&RowMajorBit)?RowMajor:ColMajor> tmp = u * u.adjoint(); if(alpha==Scalar(0)) - m_matrix.const_cast_derived() = tmp.template triangularView<UpLo>(); + m_matrix.const_cast_derived() = tmp.template triangularView<Mode>(); else - m_matrix.const_cast_derived() += alpha * tmp.template triangularView<UpLo>(); + m_matrix.const_cast_derived() += alpha * tmp.template triangularView<Mode>(); return *this; } @@ -213,104 +205,154 @@ SparseSelfAdjointView<MatrixType,UpLo>::rankUpdate(const SparseMatrixBase<Derive ***************************************************************************/ namespace internal { -template<typename Lhs, typename Rhs, int UpLo> -struct traits<SparseSelfAdjointTimeDenseProduct<Lhs,Rhs,UpLo> > - : traits<ProductBase<SparseSelfAdjointTimeDenseProduct<Lhs,Rhs,UpLo>, Lhs, Rhs> > -{ - typedef Dense StorageKind; -}; -} -template<typename Lhs, typename Rhs, int UpLo> -class SparseSelfAdjointTimeDenseProduct - : public ProductBase<SparseSelfAdjointTimeDenseProduct<Lhs,Rhs,UpLo>, Lhs, Rhs> +template<int Mode, typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType> +inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha) { - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(SparseSelfAdjointTimeDenseProduct) - - SparseSelfAdjointTimeDenseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - {} - - template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const + EIGEN_ONLY_USED_FOR_DEBUG(alpha); + // TODO use alpha + eigen_assert(alpha==AlphaType(1) && "alpha != 1 is not implemented yet, sorry"); + + typedef typename evaluator<SparseLhsType>::type LhsEval; + typedef typename evaluator<SparseLhsType>::InnerIterator LhsIterator; + typedef typename SparseLhsType::Index Index; + typedef typename SparseLhsType::Scalar LhsScalar; + + enum { + LhsIsRowMajor = (LhsEval::Flags&RowMajorBit)==RowMajorBit, + ProcessFirstHalf = + ((Mode&(Upper|Lower))==(Upper|Lower)) + || ( (Mode&Upper) && !LhsIsRowMajor) + || ( (Mode&Lower) && LhsIsRowMajor), + ProcessSecondHalf = !ProcessFirstHalf + }; + + LhsEval lhsEval(lhs); + + for (Index j=0; j<lhs.outerSize(); ++j) + { + LhsIterator i(lhsEval,j); + if (ProcessSecondHalf) { - EIGEN_ONLY_USED_FOR_DEBUG(alpha); - // TODO use alpha - eigen_assert(alpha==Scalar(1) && "alpha != 1 is not implemented yet, sorry"); - typedef typename internal::remove_all<Lhs>::type _Lhs; - typedef typename _Lhs::InnerIterator LhsInnerIterator; - enum { - LhsIsRowMajor = (_Lhs::Flags&RowMajorBit)==RowMajorBit, - ProcessFirstHalf = - ((UpLo&(Upper|Lower))==(Upper|Lower)) - || ( (UpLo&Upper) && !LhsIsRowMajor) - || ( (UpLo&Lower) && LhsIsRowMajor), - ProcessSecondHalf = !ProcessFirstHalf - }; - for (typename _Lhs::Index j=0; j<m_lhs.outerSize(); ++j) + while (i && i.index()<j) ++i; + if(i && i.index()==j) { - LhsInnerIterator i(m_lhs,j); - if (ProcessSecondHalf) - { - while (i && i.index()<j) ++i; - if(i && i.index()==j) - { - dest.row(j) += i.value() * m_rhs.row(j); - ++i; - } - } - for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i) - { - Index a = LhsIsRowMajor ? j : i.index(); - Index b = LhsIsRowMajor ? i.index() : j; - typename Lhs::Scalar v = i.value(); - dest.row(a) += (v) * m_rhs.row(b); - dest.row(b) += numext::conj(v) * m_rhs.row(a); - } - if (ProcessFirstHalf && i && (i.index()==j)) - dest.row(j) += i.value() * m_rhs.row(j); + res.row(j) += i.value() * rhs.row(j); + ++i; } } + for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i) + { + Index a = LhsIsRowMajor ? j : i.index(); + Index b = LhsIsRowMajor ? i.index() : j; + LhsScalar v = i.value(); + res.row(a) += (v) * rhs.row(b); + res.row(b) += numext::conj(v) * rhs.row(a); + } + if (ProcessFirstHalf && i && (i.index()==j)) + res.row(j) += i.value() * rhs.row(j); + } +} + +// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.> +// in the future selfadjoint-ness should be defined by the expression traits +// such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work) +template<typename MatrixType, unsigned int Mode> +struct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> > +{ + typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind; + typedef SparseSelfAdjointShape Shape; + + static const int AssumeAliasing = 0; +}; - private: - SparseSelfAdjointTimeDenseProduct& operator=(const SparseSelfAdjointTimeDenseProduct&); +template<typename LhsView, typename Rhs, int ProductType> +struct generic_product_impl<LhsView, Rhs, SparseSelfAdjointShape, DenseShape, ProductType> +{ + template<typename Dest> + static void evalTo(Dest& dst, const LhsView& lhsView, const Rhs& rhs) + { + typedef typename LhsView::_MatrixTypeNested Lhs; + typedef typename nested_eval<Lhs,Dynamic>::type LhsNested; + typedef typename nested_eval<Rhs,Dynamic>::type RhsNested; + LhsNested lhsNested(lhsView.matrix()); + RhsNested rhsNested(rhs); + + dst.setZero(); + internal::sparse_selfadjoint_time_dense_product<LhsView::Mode>(lhsNested, rhsNested, dst, typename Dest::Scalar(1)); + } }; -namespace internal { -template<typename Lhs, typename Rhs, int UpLo> -struct traits<DenseTimeSparseSelfAdjointProduct<Lhs,Rhs,UpLo> > - : traits<ProductBase<DenseTimeSparseSelfAdjointProduct<Lhs,Rhs,UpLo>, Lhs, Rhs> > -{}; -} +template<typename Lhs, typename RhsView, int ProductType> +struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, ProductType> +{ + template<typename Dest> + static void evalTo(Dest& dst, const Lhs& lhs, const RhsView& rhsView) + { + typedef typename RhsView::_MatrixTypeNested Rhs; + typedef typename nested_eval<Lhs,Dynamic>::type LhsNested; + typedef typename nested_eval<Rhs,Dynamic>::type RhsNested; + LhsNested lhsNested(lhs); + RhsNested rhsNested(rhsView.matrix()); + + dst.setZero(); + // transpoe everything + Transpose<Dest> dstT(dst); + internal::sparse_selfadjoint_time_dense_product<RhsView::Mode>(rhsNested.transpose(), lhsNested.transpose(), dstT, typename Dest::Scalar(1)); + } +}; -template<typename Lhs, typename Rhs, int UpLo> -class DenseTimeSparseSelfAdjointProduct - : public ProductBase<DenseTimeSparseSelfAdjointProduct<Lhs,Rhs,UpLo>, Lhs, Rhs> +// NOTE: these two overloads are needed to evaluate the sparse sefladjoint view into a full sparse matrix +// TODO: maybe the copy could be handled by generic_product_impl so that these overloads would not be needed anymore + +template<typename LhsView, typename Rhs, int ProductTag> +struct product_evaluator<Product<LhsView, Rhs, DefaultProduct>, ProductTag, SparseSelfAdjointShape, SparseShape, typename traits<LhsView>::Scalar, typename traits<Rhs>::Scalar> + : public evaluator<typename Product<typename Rhs::PlainObject, Rhs, DefaultProduct>::PlainObject>::type { - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(DenseTimeSparseSelfAdjointProduct) + typedef Product<LhsView, Rhs, DefaultProduct> XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type Base; - DenseTimeSparseSelfAdjointProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - {} + product_evaluator(const XprType& xpr) + : m_lhs(xpr.lhs()), m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast<Base*>(this)) Base(m_result); + generic_product_impl<typename Rhs::PlainObject, Rhs, SparseShape, SparseShape, ProductTag>::evalTo(m_result, m_lhs, xpr.rhs()); + } + +protected: + typename Rhs::PlainObject m_lhs; + PlainObject m_result; +}; - template<typename Dest> void scaleAndAddTo(Dest& /*dest*/, const Scalar& /*alpha*/) const - { - // TODO - } +template<typename Lhs, typename RhsView, int ProductTag> +struct product_evaluator<Product<Lhs, RhsView, DefaultProduct>, ProductTag, SparseShape, SparseSelfAdjointShape, typename traits<Lhs>::Scalar, typename traits<RhsView>::Scalar> + : public evaluator<typename Product<Lhs, typename Lhs::PlainObject, DefaultProduct>::PlainObject>::type +{ + typedef Product<Lhs, RhsView, DefaultProduct> XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type Base; - private: - DenseTimeSparseSelfAdjointProduct& operator=(const DenseTimeSparseSelfAdjointProduct&); + product_evaluator(const XprType& xpr) + : m_rhs(xpr.rhs()), m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast<Base*>(this)) Base(m_result); + generic_product_impl<Lhs, typename Lhs::PlainObject, SparseShape, SparseShape, ProductTag>::evalTo(m_result, xpr.lhs(), m_rhs); + } + +protected: + typename Lhs::PlainObject m_rhs; + PlainObject m_result; }; +} // namespace internal + /*************************************************************************** * Implementation of symmetric copies and permutations ***************************************************************************/ namespace internal { - -template<typename MatrixType, int UpLo> -struct traits<SparseSymmetricPermutationProduct<MatrixType,UpLo> > : traits<MatrixType> { -}; -template<int UpLo,typename MatrixType,int DestOrder> +template<int Mode,typename MatrixType,int DestOrder> void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DestOrder,typename MatrixType::Index>& _dest, const typename MatrixType::Index* perm) { typedef typename MatrixType::Index Index; @@ -337,11 +379,11 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri Index r = it.row(); Index c = it.col(); Index ip = perm ? perm[i] : i; - if(UpLo==(Upper|Lower)) + if(Mode==(Upper|Lower)) count[StorageOrderMatch ? jp : ip]++; else if(r==c) count[ip]++; - else if(( UpLo==Lower && r>c) || ( UpLo==Upper && r<c)) + else if(( Mode==Lower && r>c) || ( Mode==Upper && r<c)) { count[ip]++; count[jp]++; @@ -370,7 +412,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri Index jp = perm ? perm[j] : j; Index ip = perm ? perm[i] : i; - if(UpLo==(Upper|Lower)) + if(Mode==(Upper|Lower)) { Index k = count[StorageOrderMatch ? jp : ip]++; dest.innerIndexPtr()[k] = StorageOrderMatch ? ip : jp; @@ -382,7 +424,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri dest.innerIndexPtr()[k] = ip; dest.valuePtr()[k] = it.value(); } - else if(( (UpLo&Lower)==Lower && r>c) || ( (UpLo&Upper)==Upper && r<c)) + else if(( (Mode&Lower)==Lower && r>c) || ( (Mode&Upper)==Upper && r<c)) { if(!StorageOrderMatch) std::swap(ip,jp); @@ -397,7 +439,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix<typename Matri } } -template<int _SrcUpLo,int _DstUpLo,typename MatrixType,int DstOrder> +template<int _SrcMode,int _DstMode,typename MatrixType,int DstOrder> void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixType::Scalar,DstOrder,typename MatrixType::Index>& _dest, const typename MatrixType::Index* perm) { typedef typename MatrixType::Index Index; @@ -407,8 +449,8 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp enum { SrcOrder = MatrixType::IsRowMajor ? RowMajor : ColMajor, StorageOrderMatch = int(SrcOrder) == int(DstOrder), - DstUpLo = DstOrder==RowMajor ? (_DstUpLo==Upper ? Lower : Upper) : _DstUpLo, - SrcUpLo = SrcOrder==RowMajor ? (_SrcUpLo==Upper ? Lower : Upper) : _SrcUpLo + DstMode = DstOrder==RowMajor ? (_DstMode==Upper ? Lower : Upper) : _DstMode, + SrcMode = SrcOrder==RowMajor ? (_SrcMode==Upper ? Lower : Upper) : _SrcMode }; Index size = mat.rows(); @@ -421,11 +463,11 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp for(typename MatrixType::InnerIterator it(mat,j); it; ++it) { Index i = it.index(); - if((int(SrcUpLo)==int(Lower) && i<j) || (int(SrcUpLo)==int(Upper) && i>j)) + if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j)) continue; Index ip = perm ? perm[i] : i; - count[int(DstUpLo)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++; + count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++; } } dest.outerIndexPtr()[0] = 0; @@ -441,17 +483,17 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp for(typename MatrixType::InnerIterator it(mat,j); it; ++it) { Index i = it.index(); - if((int(SrcUpLo)==int(Lower) && i<j) || (int(SrcUpLo)==int(Upper) && i>j)) + if((int(SrcMode)==int(Lower) && i<j) || (int(SrcMode)==int(Upper) && i>j)) continue; Index jp = perm ? perm[j] : j; Index ip = perm? perm[i] : i; - Index k = count[int(DstUpLo)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++; - dest.innerIndexPtr()[k] = int(DstUpLo)==int(Lower) ? (std::max)(ip,jp) : (std::min)(ip,jp); + Index k = count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++; + dest.innerIndexPtr()[k] = int(DstMode)==int(Lower) ? (std::max)(ip,jp) : (std::min)(ip,jp); if(!StorageOrderMatch) std::swap(ip,jp); - if( ((int(DstUpLo)==int(Lower) && ip<jp) || (int(DstUpLo)==int(Upper) && ip>jp))) + if( ((int(DstMode)==int(Lower) && ip<jp) || (int(DstMode)==int(Upper) && ip>jp))) dest.valuePtr()[k] = numext::conj(it.value()); else dest.valuePtr()[k] = it.value(); @@ -461,9 +503,19 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix<typename MatrixTyp } -template<typename MatrixType,int UpLo> +// TODO implement twists in a more evaluator friendly fashion + +namespace internal { + +template<typename MatrixType, int Mode> +struct traits<SparseSymmetricPermutationProduct<MatrixType,Mode> > : traits<MatrixType> { +}; + +} + +template<typename MatrixType,int Mode> class SparseSymmetricPermutationProduct - : public EigenBase<SparseSymmetricPermutationProduct<MatrixType,UpLo> > + : public EigenBase<SparseSymmetricPermutationProduct<MatrixType,Mode> > { public: typedef typename MatrixType::Scalar Scalar; @@ -485,15 +537,15 @@ class SparseSymmetricPermutationProduct template<typename DestScalar, int Options, typename DstIndex> void evalTo(SparseMatrix<DestScalar,Options,DstIndex>& _dest) const { -// internal::permute_symm_to_fullsymm<UpLo>(m_matrix,_dest,m_perm.indices().data()); +// internal::permute_symm_to_fullsymm<Mode>(m_matrix,_dest,m_perm.indices().data()); SparseMatrix<DestScalar,(Options&RowMajor)==RowMajor ? ColMajor : RowMajor, DstIndex> tmp; - internal::permute_symm_to_fullsymm<UpLo>(m_matrix,tmp,m_perm.indices().data()); + internal::permute_symm_to_fullsymm<Mode>(m_matrix,tmp,m_perm.indices().data()); _dest = tmp; } - template<typename DestType,unsigned int DestUpLo> void evalTo(SparseSelfAdjointView<DestType,DestUpLo>& dest) const + template<typename DestType,unsigned int DestMode> void evalTo(SparseSelfAdjointView<DestType,DestMode>& dest) const { - internal::permute_symm_to_symm<UpLo,DestUpLo>(m_matrix,dest.matrix(),m_perm.indices().data()); + internal::permute_symm_to_symm<Mode,DestMode>(m_matrix,dest.matrix(),m_perm.indices().data()); } protected: diff --git a/Eigen/src/SparseCore/SparseSolverBase.h b/Eigen/src/SparseCore/SparseSolverBase.h new file mode 100644 index 000000000..df4e2f017 --- /dev/null +++ b/Eigen/src/SparseCore/SparseSolverBase.h @@ -0,0 +1,110 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSESOLVERBASE_H +#define EIGEN_SPARSESOLVERBASE_H + +namespace Eigen { + +namespace internal { + + /** \internal + * Helper functions to solve with a sparse right-hand-side and result. + * The rhs is decomposed into small vertical panels which are solved through dense temporaries. + */ +template<typename Decomposition, typename Rhs, typename Dest> +void solve_sparse_through_dense_panels(const Decomposition &dec, const Rhs& rhs, Dest &dest) +{ + EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + typedef typename Dest::Scalar DestScalar; + // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix. + static const int NbColsAtOnce = 4; + int rhsCols = rhs.cols(); + int size = rhs.rows(); + // the temporary matrices do not need more columns than NbColsAtOnce: + int tmpCols = (std::min)(rhsCols, NbColsAtOnce); + Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,tmpCols); + Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,tmpCols); + for(int k=0; k<rhsCols; k+=NbColsAtOnce) + { + int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce); + tmp.leftCols(actualCols) = rhs.middleCols(k,actualCols); + tmpX.leftCols(actualCols) = dec.solve(tmp.leftCols(actualCols)); + dest.middleCols(k,actualCols) = tmpX.leftCols(actualCols).sparseView(); + } +} + +} // end namespace internal + +/** \class SparseSolverBase + * \ingroup SparseCore_Module + * \brief A base class for sparse solvers + * + * \tparam Derived the actual type of the solver. + * + */ +template<typename Derived> +class SparseSolverBase : internal::noncopyable +{ + public: + + /** Default constructor */ + SparseSolverBase() + : m_isInitialized(false) + {} + + ~SparseSolverBase() + {} + + Derived& derived() { return *static_cast<Derived*>(this); } + const Derived& derived() const { return *static_cast<const Derived*>(this); } + + /** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * \sa compute() + */ + template<typename Rhs> + inline const Solve<Derived, Rhs> + solve(const MatrixBase<Rhs>& b) const + { + eigen_assert(m_isInitialized && "Solver is not initialized."); + eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b"); + return Solve<Derived, Rhs>(derived(), b.derived()); + } + + /** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * \sa compute() + */ + template<typename Rhs> + inline const Solve<Derived, Rhs> + solve(const SparseMatrixBase<Rhs>& b) const + { + eigen_assert(m_isInitialized && "Solver is not initialized."); + eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b"); + return Solve<Derived, Rhs>(derived(), b.derived()); + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal default implementation of solving with a sparse rhs */ + template<typename Rhs,typename Dest> + void _solve_impl(const SparseMatrixBase<Rhs> &b, SparseMatrixBase<Dest> &dest) const + { + internal::solve_sparse_through_dense_panels(derived(), b.derived(), dest.derived()); + } + #endif // EIGEN_PARSED_BY_DOXYGEN + + protected: + + mutable bool m_isInitialized; +}; + +} // end namespace Eigen + +#endif // EIGEN_SPARSESOLVERBASE_H diff --git a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h index fcc18f5c9..f291f8cef 100644 --- a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +++ b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -46,6 +46,9 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r res.resize(cols, rows); else res.resize(rows, cols); + + typename evaluator<Lhs>::type lhsEval(lhs); + typename evaluator<Rhs>::type rhsEval(rhs); res.reserve(estimated_nnz_prod); double ratioColRes = double(estimated_nnz_prod)/double(lhs.rows()*rhs.cols()); @@ -56,12 +59,12 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r // let's do a more accurate determination of the nnz ratio for the current column j of res tempVector.init(ratioColRes); tempVector.setZero(); - for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt) + for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index()) tempVector.restart(); Scalar x = rhsIt.value(); - for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt) + for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt) { tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x; } @@ -140,8 +143,53 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,R } }; -// NOTE the 2 others cases (col row *) must never occur since they are caught -// by ProductReturnType which transforms it to (col col *) by evaluating rhs. +template<typename Lhs, typename Rhs, typename ResultType> +struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,RowMajor> +{ + typedef typename ResultType::RealScalar RealScalar; + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) + { + typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::Index> RowMajorMatrixLhs; + RowMajorMatrixLhs rowLhs(lhs); + sparse_sparse_product_with_pruning_selector<RowMajorMatrixLhs,Rhs,ResultType,RowMajor,RowMajor>(rowLhs,rhs,res,tolerance); + } +}; + +template<typename Lhs, typename Rhs, typename ResultType> +struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,RowMajor> +{ + typedef typename ResultType::RealScalar RealScalar; + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) + { + typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::Index> RowMajorMatrixRhs; + RowMajorMatrixRhs rowRhs(rhs); + sparse_sparse_product_with_pruning_selector<Lhs,RowMajorMatrixRhs,ResultType,RowMajor,RowMajor,RowMajor>(lhs,rowRhs,res,tolerance); + } +}; + +template<typename Lhs, typename Rhs, typename ResultType> +struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,ColMajor> +{ + typedef typename ResultType::RealScalar RealScalar; + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) + { + typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::Index> ColMajorMatrixRhs; + ColMajorMatrixRhs colRhs(rhs); + internal::sparse_sparse_product_with_pruning_impl<Lhs,ColMajorMatrixRhs,ResultType>(lhs, colRhs, res, tolerance); + } +}; + +template<typename Lhs, typename Rhs, typename ResultType> +struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,ColMajor> +{ + typedef typename ResultType::RealScalar RealScalar; + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) + { + typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::Index> ColMajorMatrixLhs; + ColMajorMatrixLhs colLhs(lhs); + internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,Rhs,ResultType>(colLhs, rhs, res, tolerance); + } +}; } // end namespace internal diff --git a/Eigen/src/SparseCore/SparseTranspose.h b/Eigen/src/SparseCore/SparseTranspose.h index 7c300ee8d..c3d2d1a16 100644 --- a/Eigen/src/SparseCore/SparseTranspose.h +++ b/Eigen/src/SparseCore/SparseTranspose.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -12,52 +12,64 @@ namespace Eigen { +// Implement nonZeros() for transpose. I'm not sure that's the best approach for that. +// Perhaps it should be implemented in Transpose<> itself. template<typename MatrixType> class TransposeImpl<MatrixType,Sparse> : public SparseMatrixBase<Transpose<MatrixType> > { - typedef typename internal::remove_all<typename MatrixType::Nested>::type _MatrixTypeNested; + protected: + typedef SparseMatrixBase<Transpose<MatrixType> > Base; public: - - EIGEN_SPARSE_PUBLIC_INTERFACE(Transpose<MatrixType> ) - - class InnerIterator; - class ReverseInnerIterator; - - inline Index nonZeros() const { return derived().nestedExpression().nonZeros(); } + inline typename MatrixType::Index nonZeros() const { return Base::derived().nestedExpression().nonZeros(); } }; -// NOTE: VC10 trigger an ICE if don't put typename TransposeImpl<MatrixType,Sparse>:: in front of Index, -// a typedef typename TransposeImpl<MatrixType,Sparse>::Index Index; -// does not fix the issue. -// An alternative is to define the nested class in the parent class itself. -template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>::InnerIterator - : public _MatrixTypeNested::InnerIterator +namespace internal { + +template<typename ArgType> +struct unary_evaluator<Transpose<ArgType>, IteratorBased> + : public evaluator_base<Transpose<ArgType> > { - typedef typename _MatrixTypeNested::InnerIterator Base; - typedef typename TransposeImpl::Index Index; + typedef typename evaluator<ArgType>::InnerIterator EvalIterator; + typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator; public: + typedef Transpose<ArgType> XprType; + typedef typename XprType::Index Index; - EIGEN_STRONG_INLINE InnerIterator(const TransposeImpl& trans, typename TransposeImpl<MatrixType,Sparse>::Index outer) - : Base(trans.derived().nestedExpression(), outer) - {} - Index row() const { return Base::col(); } - Index col() const { return Base::row(); } -}; - -template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>::ReverseInnerIterator - : public _MatrixTypeNested::ReverseInnerIterator -{ - typedef typename _MatrixTypeNested::ReverseInnerIterator Base; - typedef typename TransposeImpl::Index Index; - public: + class InnerIterator : public EvalIterator + { + public: + EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& unaryOp, typename XprType::Index outer) + : EvalIterator(unaryOp.m_argImpl,outer) + {} + + Index row() const { return EvalIterator::col(); } + Index col() const { return EvalIterator::row(); } + }; + + class ReverseInnerIterator : public EvalReverseIterator + { + public: + EIGEN_STRONG_INLINE ReverseInnerIterator(const unary_evaluator& unaryOp, typename XprType::Index outer) + : EvalReverseIterator(unaryOp.m_argImpl,outer) + {} + + Index row() const { return EvalReverseIterator::col(); } + Index col() const { return EvalReverseIterator::row(); } + }; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + Flags = XprType::Flags + }; + + explicit unary_evaluator(const XprType& op) :m_argImpl(op.nestedExpression()) {} - EIGEN_STRONG_INLINE ReverseInnerIterator(const TransposeImpl& xpr, typename TransposeImpl<MatrixType,Sparse>::Index outer) - : Base(xpr.derived().nestedExpression(), outer) - {} - Index row() const { return Base::col(); } - Index col() const { return Base::row(); } + protected: + typename evaluator<ArgType>::nestedType m_argImpl; }; +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_SPARSETRANSPOSE_H diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h index 333127b78..b044d6778 100644 --- a/Eigen/src/SparseCore/SparseTriangularView.h +++ b/Eigen/src/SparseCore/SparseTriangularView.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla @@ -13,17 +13,8 @@ namespace Eigen { -namespace internal { - -template<typename MatrixType, int Mode> -struct traits<SparseTriangularView<MatrixType,Mode> > -: public traits<MatrixType> -{}; - -} // namespace internal - -template<typename MatrixType, int Mode> class SparseTriangularView - : public SparseMatrixBase<SparseTriangularView<MatrixType,Mode> > +template<typename MatrixType, unsigned int Mode> class TriangularViewImpl<MatrixType,Mode,Sparse> + : public SparseMatrixBase<TriangularView<MatrixType,Mode> > { enum { SkipFirst = ((Mode&Lower) && !(MatrixType::Flags&RowMajorBit)) || ((Mode&Upper) && (MatrixType::Flags&RowMajorBit)), @@ -31,46 +22,53 @@ template<typename MatrixType, int Mode> class SparseTriangularView SkipDiag = (Mode&ZeroDiag) ? 1 : 0, HasUnitDiag = (Mode&UnitDiag) ? 1 : 0 }; + + typedef TriangularView<MatrixType,Mode> TriangularViewType; + +protected: + // dummy solve function to make TriangularView happy. + void solve() const; public: - EIGEN_SPARSE_PUBLIC_INTERFACE(SparseTriangularView) - + EIGEN_SPARSE_PUBLIC_INTERFACE(TriangularViewType) + class InnerIterator; class ReverseInnerIterator; - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - typedef typename MatrixType::Nested MatrixTypeNested; typedef typename internal::remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef; typedef typename internal::remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; - inline SparseTriangularView(const MatrixType& matrix) : m_matrix(matrix) {} - - /** \internal */ - inline const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; } - - template<typename OtherDerived> - typename internal::plain_matrix_type_column_major<OtherDerived>::type - solve(const MatrixBase<OtherDerived>& other) const; + template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const { + if(!(internal::is_same<RhsType,DstType>::value && internal::extract_data(dst) == internal::extract_data(rhs))) + dst = rhs; + this->solveInPlace(dst); + } template<typename OtherDerived> void solveInPlace(MatrixBase<OtherDerived>& other) const; template<typename OtherDerived> void solveInPlace(SparseMatrixBase<OtherDerived>& other) const; - protected: - MatrixTypeNested m_matrix; + inline Index nonZeros() const { + // FIXME HACK number of nonZeros is required for product logic + // this returns only an upper bound (but should be OK for most purposes) + return derived().nestedExpression().nonZeros(); + } + + }; -template<typename MatrixType, int Mode> -class SparseTriangularView<MatrixType,Mode>::InnerIterator : public MatrixTypeNestedCleaned::InnerIterator +template<typename MatrixType, unsigned int Mode> +class TriangularViewImpl<MatrixType,Mode,Sparse>::InnerIterator : public MatrixTypeNestedCleaned::InnerIterator { typedef typename MatrixTypeNestedCleaned::InnerIterator Base; - typedef typename SparseTriangularView::Index Index; + typedef typename TriangularViewType::Index Index; public: - EIGEN_STRONG_INLINE InnerIterator(const SparseTriangularView& view, Index outer) - : Base(view.nestedExpression(), outer), m_returnOne(false) + EIGEN_STRONG_INLINE InnerIterator(const TriangularViewImpl& view, Index outer) + : Base(view.derived().nestedExpression(), outer), m_returnOne(false) { if(SkipFirst) { @@ -132,15 +130,15 @@ class SparseTriangularView<MatrixType,Mode>::InnerIterator : public MatrixTypeNe bool m_returnOne; }; -template<typename MatrixType, int Mode> -class SparseTriangularView<MatrixType,Mode>::ReverseInnerIterator : public MatrixTypeNestedCleaned::ReverseInnerIterator +template<typename MatrixType, unsigned int Mode> +class TriangularViewImpl<MatrixType,Mode,Sparse>::ReverseInnerIterator : public MatrixTypeNestedCleaned::ReverseInnerIterator { typedef typename MatrixTypeNestedCleaned::ReverseInnerIterator Base; - typedef typename SparseTriangularView::Index Index; + typedef typename TriangularViewImpl::Index Index; public: - EIGEN_STRONG_INLINE ReverseInnerIterator(const SparseTriangularView& view, Index outer) - : Base(view.nestedExpression(), outer) + EIGEN_STRONG_INLINE ReverseInnerIterator(const TriangularViewType& view, Index outer) + : Base(view.derived().nestedExpression(), outer) { eigen_assert((!HasUnitDiag) && "ReverseInnerIterator does not support yet triangular views with a unit diagonal"); if(SkipLast) { @@ -166,12 +164,119 @@ class SparseTriangularView<MatrixType,Mode>::ReverseInnerIterator : public Matri } }; +namespace internal { + +template<typename ArgType, unsigned int Mode> +struct unary_evaluator<TriangularView<ArgType,Mode>, IteratorBased> + : evaluator_base<TriangularView<ArgType,Mode> > +{ + typedef TriangularView<ArgType,Mode> XprType; + +protected: + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::Index Index; + typedef typename evaluator<ArgType>::InnerIterator EvalIterator; + + enum { SkipFirst = ((Mode&Lower) && !(ArgType::Flags&RowMajorBit)) + || ((Mode&Upper) && (ArgType::Flags&RowMajorBit)), + SkipLast = !SkipFirst, + SkipDiag = (Mode&ZeroDiag) ? 1 : 0, + HasUnitDiag = (Mode&UnitDiag) ? 1 : 0 + }; + +public: + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + Flags = XprType::Flags + }; + + explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()) {} + + class InnerIterator : public EvalIterator + { + typedef EvalIterator Base; + public: + + EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& xprEval, Index outer) + : Base(xprEval.m_argImpl,outer), m_returnOne(false) + { + if(SkipFirst) + { + while((*this) && ((HasUnitDiag||SkipDiag) ? this->index()<=outer : this->index()<outer)) + Base::operator++(); + if(HasUnitDiag) + m_returnOne = true; + } + else if(HasUnitDiag && ((!Base::operator bool()) || Base::index()>=Base::outer())) + { + if((!SkipFirst) && Base::operator bool()) + Base::operator++(); + m_returnOne = true; // FIXME check innerSize()>outer(); + } + } + + EIGEN_STRONG_INLINE InnerIterator& operator++() + { + if(HasUnitDiag && m_returnOne) + m_returnOne = false; + else + { + Base::operator++(); + if(HasUnitDiag && (!SkipFirst) && ((!Base::operator bool()) || Base::index()>=Base::outer())) + { + if((!SkipFirst) && Base::operator bool()) + Base::operator++(); + m_returnOne = true; // FIXME check innerSize()>outer(); + } + } + return *this; + } + + EIGEN_STRONG_INLINE operator bool() const + { + if(HasUnitDiag && m_returnOne) + return true; + if(SkipFirst) return Base::operator bool(); + else + { + if (SkipDiag) return (Base::operator bool() && this->index() < this->outer()); + else return (Base::operator bool() && this->index() <= this->outer()); + } + } + +// inline Index row() const { return (ArgType::Flags&RowMajorBit ? Base::outer() : this->index()); } +// inline Index col() const { return (ArgType::Flags&RowMajorBit ? this->index() : Base::outer()); } + inline Index index() const + { + if(HasUnitDiag && m_returnOne) return Base::outer(); + else return Base::index(); + } + inline Scalar value() const + { + if(HasUnitDiag && m_returnOne) return Scalar(1); + else return Base::value(); + } + + protected: + bool m_returnOne; + private: + Scalar& valueRef(); + }; + +protected: + typename evaluator<ArgType>::type m_argImpl; +}; + +} // end namespace internal + template<typename Derived> template<int Mode> -inline const SparseTriangularView<Derived, Mode> +inline const TriangularView<const Derived, Mode> SparseMatrixBase<Derived>::triangularView() const { - return derived(); + return TriangularView<const Derived, Mode>(derived()); } } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseUtil.h b/Eigen/src/SparseCore/SparseUtil.h index 02c19d18f..8de227b88 100644 --- a/Eigen/src/SparseCore/SparseUtil.h +++ b/Eigen/src/SparseCore/SparseUtil.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -52,13 +52,12 @@ EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, /=) typedef typename Eigen::internal::traits<Derived >::Index Index; \ enum { RowsAtCompileTime = Eigen::internal::traits<Derived >::RowsAtCompileTime, \ ColsAtCompileTime = Eigen::internal::traits<Derived >::ColsAtCompileTime, \ - Flags = Eigen::internal::traits<Derived >::Flags, \ - CoeffReadCost = Eigen::internal::traits<Derived >::CoeffReadCost, \ + Flags = Eigen::internal::traits<Derived>::Flags, \ SizeAtCompileTime = Base::SizeAtCompileTime, \ IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \ using Base::derived; \ using Base::const_cast_derived; - + #define EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \ _EIGEN_SPARSE_PUBLIC_INTERFACE(Derived, Eigen::SparseMatrixBase<Derived >) @@ -73,7 +72,6 @@ template<typename _Scalar, int _Flags = 0, typename _Index = int> class Dynamic template<typename _Scalar, int _Flags = 0, typename _Index = int> class SparseVector; template<typename _Scalar, int _Flags = 0, typename _Index = int> class MappedSparseMatrix; -template<typename MatrixType, int Mode> class SparseTriangularView; template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView; template<typename Lhs, typename Rhs> class SparseDiagonalProduct; template<typename MatrixType> class SparseView; @@ -131,11 +129,29 @@ template<typename T> struct plain_matrix_type<T,Sparse> { typedef typename traits<T>::Scalar _Scalar; typedef typename traits<T>::Index _Index; - enum { _Options = ((traits<T>::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor }; + enum { _Options = ((evaluator<T>::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor }; public: typedef SparseMatrix<_Scalar, _Options, _Index> type; }; +template<typename Decomposition, typename RhsType> +struct solve_traits<Decomposition,RhsType,Sparse> +{ + typedef typename sparse_eval<RhsType, RhsType::RowsAtCompileTime, RhsType::ColsAtCompileTime>::type PlainObject; +}; + +template<typename Derived> +struct generic_xpr_base<Derived, MatrixXpr, Sparse> +{ + typedef SparseMatrixBase<Derived> type; +}; + +struct SparseTriangularShape { static std::string debugName() { return "SparseTriangularShape"; } }; +struct SparseSelfAdjointShape { static std::string debugName() { return "SparseSelfAdjointShape"; } }; + +template<> struct glue_shapes<SparseShape,SelfAdjointShape> { typedef SparseSelfAdjointShape type; }; +template<> struct glue_shapes<SparseShape,TriangularShape > { typedef SparseTriangularShape type; }; + } // end namespace internal /** \ingroup SparseCore_Module diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h index 0b1b389ce..8b696a476 100644 --- a/Eigen/src/SparseCore/SparseVector.h +++ b/Eigen/src/SparseCore/SparseVector.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -221,7 +221,7 @@ class SparseVector inline SparseVector() : m_size(0) { check_template_parameters(); resize(0); } - inline SparseVector(Index size) : m_size(0) { check_template_parameters(); resize(size); } + explicit inline SparseVector(Index size) : m_size(0) { check_template_parameters(); resize(size); } inline SparseVector(Index rows, Index cols) : m_size(0) { check_template_parameters(); resize(rows,cols); } @@ -360,14 +360,14 @@ template<typename Scalar, int _Options, typename _Index> class SparseVector<Scalar,_Options,_Index>::InnerIterator { public: - InnerIterator(const SparseVector& vec, Index outer=0) + explicit InnerIterator(const SparseVector& vec, Index outer=0) : m_data(vec.m_data), m_id(0), m_end(static_cast<Index>(m_data.size())) { EIGEN_UNUSED_VARIABLE(outer); eigen_assert(outer==0); } - InnerIterator(const internal::CompressedStorage<Scalar,Index>& data) + explicit InnerIterator(const internal::CompressedStorage<Scalar,Index>& data) : m_data(data), m_id(0), m_end(static_cast<Index>(m_data.size())) {} @@ -386,20 +386,25 @@ class SparseVector<Scalar,_Options,_Index>::InnerIterator const internal::CompressedStorage<Scalar,Index>& m_data; Index m_id; const Index m_end; + private: + // If you get here, then you're not using the right InnerIterator type, e.g.: + // SparseMatrix<double,RowMajor> A; + // SparseMatrix<double>::InnerIterator it(A,0); + template<typename T> InnerIterator(const SparseMatrixBase<T>&,Index outer=0); }; template<typename Scalar, int _Options, typename _Index> class SparseVector<Scalar,_Options,_Index>::ReverseInnerIterator { public: - ReverseInnerIterator(const SparseVector& vec, Index outer=0) + explicit ReverseInnerIterator(const SparseVector& vec, Index outer=0) : m_data(vec.m_data), m_id(static_cast<Index>(m_data.size())), m_start(0) { EIGEN_UNUSED_VARIABLE(outer); eigen_assert(outer==0); } - ReverseInnerIterator(const internal::CompressedStorage<Scalar,Index>& data) + explicit ReverseInnerIterator(const internal::CompressedStorage<Scalar,Index>& data) : m_data(data), m_id(static_cast<Index>(m_data.size())), m_start(0) {} @@ -422,11 +427,34 @@ class SparseVector<Scalar,_Options,_Index>::ReverseInnerIterator namespace internal { +template<typename _Scalar, int _Options, typename _Index> +struct evaluator<SparseVector<_Scalar,_Options,_Index> > + : evaluator_base<SparseVector<_Scalar,_Options,_Index> > +{ + typedef SparseVector<_Scalar,_Options,_Index> SparseVectorType; + typedef typename SparseVectorType::InnerIterator InnerIterator; + typedef typename SparseVectorType::ReverseInnerIterator ReverseInnerIterator; + + enum { + CoeffReadCost = NumTraits<_Scalar>::ReadCost, + Flags = SparseVectorType::Flags + }; + + explicit evaluator(const SparseVectorType &mat) : m_matrix(mat) {} + + operator SparseVectorType&() { return m_matrix.const_cast_derived(); } + operator const SparseVectorType&() const { return m_matrix; } + + const SparseVectorType &m_matrix; +}; + template< typename Dest, typename Src> struct sparse_vector_assign_selector<Dest,Src,SVA_Inner> { static void run(Dest& dst, const Src& src) { eigen_internal_assert(src.innerSize()==src.size()); - for(typename Src::InnerIterator it(src, 0); it; ++it) + typedef typename internal::evaluator<Src>::type SrcEvaluatorType; + SrcEvaluatorType srcEval(src); + for(typename SrcEvaluatorType::InnerIterator it(srcEval, 0); it; ++it) dst.insert(it.index()) = it.value(); } }; @@ -435,9 +463,11 @@ template< typename Dest, typename Src> struct sparse_vector_assign_selector<Dest,Src,SVA_Outer> { static void run(Dest& dst, const Src& src) { eigen_internal_assert(src.outerSize()==src.size()); + typedef typename internal::evaluator<Src>::type SrcEvaluatorType; + SrcEvaluatorType srcEval(src); for(typename Dest::Index i=0; i<src.size(); ++i) { - typename Src::InnerIterator it(src, i); + typename SrcEvaluatorType::InnerIterator it(srcEval, i); if(it) dst.insert(i) = it.value(); } diff --git a/Eigen/src/SparseCore/SparseView.h b/Eigen/src/SparseCore/SparseView.h index fd8450463..40a3019fa 100644 --- a/Eigen/src/SparseCore/SparseView.h +++ b/Eigen/src/SparseCore/SparseView.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2010 Daniel Lowengrub <lowdanie@gmail.com> // // This Source Code Form is subject to the terms of the Mozilla @@ -34,64 +34,186 @@ class SparseView : public SparseMatrixBase<SparseView<MatrixType> > typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested; public: EIGEN_SPARSE_PUBLIC_INTERFACE(SparseView) + typedef typename internal::remove_all<MatrixType>::type NestedExpression; - SparseView(const MatrixType& mat, const Scalar& m_reference = Scalar(0), - typename NumTraits<Scalar>::Real m_epsilon = NumTraits<Scalar>::dummy_precision()) : + explicit SparseView(const MatrixType& mat, const Scalar& m_reference = Scalar(0), + RealScalar m_epsilon = NumTraits<Scalar>::dummy_precision()) : m_matrix(mat), m_reference(m_reference), m_epsilon(m_epsilon) {} - class InnerIterator; - inline Index rows() const { return m_matrix.rows(); } inline Index cols() const { return m_matrix.cols(); } inline Index innerSize() const { return m_matrix.innerSize(); } inline Index outerSize() const { return m_matrix.outerSize(); } - + + /** \returns the nested expression */ + const typename internal::remove_all<MatrixTypeNested>::type& + nestedExpression() const { return m_matrix; } + + Scalar reference() const { return m_reference; } + RealScalar epsilon() const { return m_epsilon; } + protected: MatrixTypeNested m_matrix; Scalar m_reference; - typename NumTraits<Scalar>::Real m_epsilon; + RealScalar m_epsilon; }; -template<typename MatrixType> -class SparseView<MatrixType>::InnerIterator : public _MatrixTypeNested::InnerIterator -{ - typedef typename SparseView::Index Index; -public: - typedef typename _MatrixTypeNested::InnerIterator IterBase; - InnerIterator(const SparseView& view, Index outer) : - IterBase(view.m_matrix, outer), m_view(view) - { - incrementToNonZero(); - } - - EIGEN_STRONG_INLINE InnerIterator& operator++() - { - IterBase::operator++(); - incrementToNonZero(); - return *this; - } - - using IterBase::value; +namespace internal { -protected: - const SparseView& m_view; +// TODO find a way to unify the two following variants +// This is tricky because implementing an inner iterator on top of an IndexBased evaluator is +// not easy because the evaluators do not expose the sizes of the underlying expression. + +template<typename ArgType> +struct unary_evaluator<SparseView<ArgType>, IteratorBased> + : public evaluator_base<SparseView<ArgType> > +{ + typedef typename evaluator<ArgType>::InnerIterator EvalIterator; + public: + typedef SparseView<ArgType> XprType; + + class InnerIterator : public EvalIterator + { + typedef typename XprType::Scalar Scalar; + public: + + EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& sve, typename XprType::Index outer) + : EvalIterator(sve.m_argImpl,outer), m_view(sve.m_view) + { + incrementToNonZero(); + } + + EIGEN_STRONG_INLINE InnerIterator& operator++() + { + EvalIterator::operator++(); + incrementToNonZero(); + return *this; + } + + using EvalIterator::value; + + protected: + const XprType &m_view; + + private: + void incrementToNonZero() + { + while((bool(*this)) && internal::isMuchSmallerThan(value(), m_view.reference(), m_view.epsilon())) + { + EvalIterator::operator++(); + } + } + }; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + Flags = XprType::Flags + }; + + explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_view(xpr) {} + + protected: + typename evaluator<ArgType>::nestedType m_argImpl; + const XprType &m_view; +}; -private: - void incrementToNonZero() - { - while((bool(*this)) && internal::isMuchSmallerThan(value(), m_view.m_reference, m_view.m_epsilon)) +template<typename ArgType> +struct unary_evaluator<SparseView<ArgType>, IndexBased> + : public evaluator_base<SparseView<ArgType> > +{ + public: + typedef SparseView<ArgType> XprType; + protected: + enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit }; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + public: + + class InnerIterator { - IterBase::operator++(); - } - } + public: + + EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& sve, typename XprType::Index outer) + : m_sve(sve), m_inner(0), m_outer(outer), m_end(sve.m_view.innerSize()) + { + incrementToNonZero(); + } + + EIGEN_STRONG_INLINE InnerIterator& operator++() + { + m_inner++; + incrementToNonZero(); + return *this; + } + + EIGEN_STRONG_INLINE Scalar value() const + { + return (IsRowMajor) ? m_sve.m_argImpl.coeff(m_outer, m_inner) + : m_sve.m_argImpl.coeff(m_inner, m_outer); + } + + EIGEN_STRONG_INLINE Index index() const { return m_inner; } + inline Index row() const { return IsRowMajor ? m_outer : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer; } + + EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } + + protected: + const unary_evaluator &m_sve; + Index m_inner; + const Index m_outer; + const Index m_end; + + private: + void incrementToNonZero() + { + while((bool(*this)) && internal::isMuchSmallerThan(value(), m_sve.m_view.reference(), m_sve.m_view.epsilon())) + { + m_inner++; + } + } + }; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + Flags = XprType::Flags + }; + + explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_view(xpr) {} + + protected: + typename evaluator<ArgType>::nestedType m_argImpl; + const XprType &m_view; }; +} // end namespace internal + +template<typename Derived> +const SparseView<Derived> MatrixBase<Derived>::sparseView(const Scalar& reference, + const typename NumTraits<Scalar>::Real& epsilon) const +{ + return SparseView<Derived>(derived(), reference, epsilon); +} + +/** \returns an expression of \c *this with values smaller than + * \a reference * \a epsilon are removed. + * + * This method is typically used in conjunction with the product of two sparse matrices + * to automatically prune the smallest values as follows: + * \code + * C = (A*B).pruned(); // suppress numerical zeros (exact) + * C = (A*B).pruned(ref); + * C = (A*B).pruned(ref,epsilon); + * \endcode + * where \c ref is a meaningful non zero reference value. + * */ template<typename Derived> -const SparseView<Derived> MatrixBase<Derived>::sparseView(const Scalar& m_reference, - const typename NumTraits<Scalar>::Real& m_epsilon) const +const SparseView<Derived> +SparseMatrixBase<Derived>::pruned(const Scalar& reference, + const RealScalar& epsilon) const { - return SparseView<Derived>(derived(), m_reference, m_epsilon); + return SparseView<Derived>(derived(), reference, epsilon); } } // end namespace Eigen diff --git a/Eigen/src/SparseCore/TriangularSolver.h b/Eigen/src/SparseCore/TriangularSolver.h index dd55522a7..98062e9c6 100644 --- a/Eigen/src/SparseCore/TriangularSolver.h +++ b/Eigen/src/SparseCore/TriangularSolver.h @@ -29,8 +29,11 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,RowMajor> { typedef typename Rhs::Scalar Scalar; typedef typename Lhs::Index Index; + typedef typename evaluator<Lhs>::type LhsEval; + typedef typename evaluator<Lhs>::InnerIterator LhsIterator; static void run(const Lhs& lhs, Rhs& other) { + LhsEval lhsEval(lhs); for(Index col=0 ; col<other.cols() ; ++col) { for(Index i=0; i<lhs.rows(); ++i) @@ -38,7 +41,7 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,RowMajor> Scalar tmp = other.coeff(i,col); Scalar lastVal(0); Index lastIndex = 0; - for(typename Lhs::InnerIterator it(lhs, i); it; ++it) + for(LhsIterator it(lhsEval, i); it; ++it) { lastVal = it.value(); lastIndex = it.index(); @@ -64,15 +67,18 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,RowMajor> { typedef typename Rhs::Scalar Scalar; typedef typename Lhs::Index Index; + typedef typename evaluator<Lhs>::type LhsEval; + typedef typename evaluator<Lhs>::InnerIterator LhsIterator; static void run(const Lhs& lhs, Rhs& other) { + LhsEval lhsEval(lhs); for(Index col=0 ; col<other.cols() ; ++col) { for(Index i=lhs.rows()-1 ; i>=0 ; --i) { Scalar tmp = other.coeff(i,col); Scalar l_ii = 0; - typename Lhs::InnerIterator it(lhs, i); + LhsIterator it(lhsEval, i); while(it && it.index()<i) ++it; if(!(Mode & UnitDiag)) @@ -88,10 +94,8 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,RowMajor> tmp -= it.value() * other.coeff(it.index(),col); } - if (Mode & UnitDiag) - other.coeffRef(i,col) = tmp; - else - other.coeffRef(i,col) = tmp/l_ii; + if (Mode & UnitDiag) other.coeffRef(i,col) = tmp; + else other.coeffRef(i,col) = tmp/l_ii; } } } @@ -103,8 +107,11 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,ColMajor> { typedef typename Rhs::Scalar Scalar; typedef typename Lhs::Index Index; + typedef typename evaluator<Lhs>::type LhsEval; + typedef typename evaluator<Lhs>::InnerIterator LhsIterator; static void run(const Lhs& lhs, Rhs& other) { + LhsEval lhsEval(lhs); for(Index col=0 ; col<other.cols() ; ++col) { for(Index i=0; i<lhs.cols(); ++i) @@ -112,7 +119,7 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Lower,ColMajor> Scalar& tmp = other.coeffRef(i,col); if (tmp!=Scalar(0)) // optimization when other is actually sparse { - typename Lhs::InnerIterator it(lhs, i); + LhsIterator it(lhsEval, i); while(it && it.index()<i) ++it; if(!(Mode & UnitDiag)) @@ -136,8 +143,11 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,ColMajor> { typedef typename Rhs::Scalar Scalar; typedef typename Lhs::Index Index; + typedef typename evaluator<Lhs>::type LhsEval; + typedef typename evaluator<Lhs>::InnerIterator LhsIterator; static void run(const Lhs& lhs, Rhs& other) { + LhsEval lhsEval(lhs); for(Index col=0 ; col<other.cols() ; ++col) { for(Index i=lhs.cols()-1; i>=0; --i) @@ -148,13 +158,13 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,ColMajor> if(!(Mode & UnitDiag)) { // TODO replace this by a binary search. make sure the binary search is safe for partially sorted elements - typename Lhs::ReverseInnerIterator it(lhs, i); + LhsIterator it(lhsEval, i); while(it && it.index()!=i) - --it; + ++it; eigen_assert(it && it.index()==i); other.coeffRef(i,col) /= it.value(); } - typename Lhs::InnerIterator it(lhs, i); + LhsIterator it(lhsEval, i); for(; it && it.index()<i; ++it) other.coeffRef(it.index(), col) -= tmp * it.value(); } @@ -165,11 +175,11 @@ struct sparse_solve_triangular_selector<Lhs,Rhs,Mode,Upper,ColMajor> } // end namespace internal -template<typename ExpressionType,int Mode> +template<typename ExpressionType,unsigned int Mode> template<typename OtherDerived> -void SparseTriangularView<ExpressionType,Mode>::solveInPlace(MatrixBase<OtherDerived>& other) const +void TriangularViewImpl<ExpressionType,Mode,Sparse>::solveInPlace(MatrixBase<OtherDerived>& other) const { - eigen_assert(m_matrix.cols() == m_matrix.rows() && m_matrix.cols() == other.rows()); + eigen_assert(derived().cols() == derived().rows() && derived().cols() == other.rows()); eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower))); enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit }; @@ -178,22 +188,12 @@ void SparseTriangularView<ExpressionType,Mode>::solveInPlace(MatrixBase<OtherDer typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy; OtherCopy otherCopy(other.derived()); - internal::sparse_solve_triangular_selector<ExpressionType, typename internal::remove_reference<OtherCopy>::type, Mode>::run(m_matrix, otherCopy); + internal::sparse_solve_triangular_selector<ExpressionType, typename internal::remove_reference<OtherCopy>::type, Mode>::run(derived().nestedExpression(), otherCopy); if (copy) other = otherCopy; } -template<typename ExpressionType,int Mode> -template<typename OtherDerived> -typename internal::plain_matrix_type_column_major<OtherDerived>::type -SparseTriangularView<ExpressionType,Mode>::solve(const MatrixBase<OtherDerived>& other) const -{ - typename internal::plain_matrix_type_column_major<OtherDerived>::type res(other); - solveInPlace(res); - return res; -} - // pure sparse path namespace internal { @@ -290,11 +290,11 @@ struct sparse_solve_triangular_sparse_selector<Lhs,Rhs,Mode,UpLo,ColMajor> } // end namespace internal -template<typename ExpressionType,int Mode> +template<typename ExpressionType,unsigned int Mode> template<typename OtherDerived> -void SparseTriangularView<ExpressionType,Mode>::solveInPlace(SparseMatrixBase<OtherDerived>& other) const +void TriangularViewImpl<ExpressionType,Mode,Sparse>::solveInPlace(SparseMatrixBase<OtherDerived>& other) const { - eigen_assert(m_matrix.cols() == m_matrix.rows() && m_matrix.cols() == other.rows()); + eigen_assert(derived().cols() == derived().rows() && derived().cols() == other.rows()); eigen_assert( (!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower))); // enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit }; @@ -303,7 +303,7 @@ void SparseTriangularView<ExpressionType,Mode>::solveInPlace(SparseMatrixBase<Ot // typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy; // OtherCopy otherCopy(other.derived()); - internal::sparse_solve_triangular_sparse_selector<ExpressionType, OtherDerived, Mode>::run(m_matrix, other.derived()); + internal::sparse_solve_triangular_sparse_selector<ExpressionType, OtherDerived, Mode>::run(derived().nestedExpression(), other.derived()); // if (copy) // other = otherCopy; diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 7a9aeec2d..79b78da99 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> -// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2012-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -70,9 +70,14 @@ template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixURetu * \sa \ref OrderingMethods_Module */ template <typename _MatrixType, typename _OrderingType> -class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::Index> +class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::Index> { + protected: + typedef SparseSolverBase<SparseLU<_MatrixType,_OrderingType> > APIBase; + using APIBase::m_isInitialized; public: + using APIBase::_solve_impl; + typedef _MatrixType MatrixType; typedef _OrderingType OrderingType; typedef typename MatrixType::Scalar Scalar; @@ -86,11 +91,11 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ typedef internal::SparseLUImpl<Scalar, Index> Base; public: - SparseLU():m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) + SparseLU():m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) { initperfvalues(); } - SparseLU(const MatrixType& matrix):m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) + explicit SparseLU(const MatrixType& matrix):m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) { initperfvalues(); compute(matrix); @@ -168,6 +173,7 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ m_diagpivotthresh = thresh; } +#ifdef EIGEN_PARSED_BY_DOXYGEN /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. * * \warning the destination matrix X in X = this->solve(B) must be colmun-major. @@ -175,26 +181,8 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ * \sa compute() */ template<typename Rhs> - inline const internal::solve_retval<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const - { - eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); - eigen_assert(rows()==B.rows() - && "SparseLU::solve(): invalid number of rows of the right hand side matrix B"); - return internal::solve_retval<SparseLU, Rhs>(*this, B.derived()); - } - - /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::sparse_solve_retval<SparseLU, Rhs> solve(const SparseMatrixBase<Rhs>& B) const - { - eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); - eigen_assert(rows()==B.rows() - && "SparseLU::solve(): invalid number of rows of the right hand side matrix B"); - return internal::sparse_solve_retval<SparseLU, Rhs>(*this, B.derived()); - } + inline const Solve<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const; +#endif // EIGEN_PARSED_BY_DOXYGEN /** \brief Reports whether previous computation was successful. * @@ -219,7 +207,7 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ } template<typename Rhs, typename Dest> - bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const + bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const { Dest& X(X_base.derived()); eigen_assert(m_factorizationIsOk && "The matrix should be factorized first"); @@ -261,14 +249,13 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ eigen_assert(m_factorizationIsOk && "The matrix should be factorized first."); // Initialize with the determinant of the row matrix Scalar det = Scalar(1.); - //Note that the diagonal blocks of U are stored in supernodes, + // Note that the diagonal blocks of U are stored in supernodes, // which are available in the L part :) for (Index j = 0; j < this->cols(); ++j) { for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it) { - if(it.row() < j) continue; - if(it.row() == j) + if(it.index() == j) { det *= abs(it.value()); break; @@ -322,7 +309,7 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ // Functions void initperfvalues() { - m_perfv.panel_size = 1; + m_perfv.panel_size = 16; m_perfv.relax = 1; m_perfv.maxsuper = 128; m_perfv.rowblk = 16; @@ -332,7 +319,6 @@ class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typ // Variables mutable ComputationInfo m_info; - bool m_isInitialized; bool m_factorizationIsOk; bool m_analysisIsOk; std::string m_lastError; @@ -377,30 +363,32 @@ void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat) //TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat. + // Firstly, copy the whole input matrix. + m_mat = mat; + + // Compute fill-in ordering OrderingType ord; - ord(mat,m_perm_c); + ord(m_mat,m_perm_c); // Apply the permutation to the column of the input matrix - //First copy the whole input matrix. - m_mat = mat; - if (m_perm_c.size()) { + if (m_perm_c.size()) + { m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used. - //Then, permute only the column pointers - const Index * outerIndexPtr; - if (mat.isCompressed()) outerIndexPtr = mat.outerIndexPtr(); - else - { - Index *outerIndexPtr_t = new Index[mat.cols()+1]; - for(Index i = 0; i <= mat.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i]; - outerIndexPtr = outerIndexPtr_t; - } + // Then, permute only the column pointers + ei_declare_aligned_stack_constructed_variable(Index,outerIndexPtr,mat.cols()+1,mat.isCompressed()?const_cast<Index*>(mat.outerIndexPtr()):0); + + // If the input matrix 'mat' is uncompressed, then the outer-indices do not match the ones of m_mat, and a copy is thus needed. + if(!mat.isCompressed()) + IndexVector::Map(outerIndexPtr, mat.cols()+1) = IndexVector::Map(m_mat.outerIndexPtr(),mat.cols()+1); + + // Apply the permutation and compute the nnz per column. for (Index i = 0; i < mat.cols(); i++) { m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i]; m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i]; } - if(!mat.isCompressed()) delete[] outerIndexPtr; } + // Compute the column elimination tree of the permuted matrix IndexVector firstRowElt; internal::coletree(m_mat, m_etree,firstRowElt); @@ -463,6 +451,8 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix) typedef typename IndexVector::Scalar Index; + m_isInitialized = true; + // Apply the column permutation computed in analyzepattern() // m_mat = matrix * m_perm_c.inverse(); @@ -661,7 +651,7 @@ struct SparseLUMatrixLReturnType : internal::no_assignment_operator { typedef typename MappedSupernodalType::Index Index; typedef typename MappedSupernodalType::Scalar Scalar; - SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL) + explicit SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL) { } Index rows() { return m_mapL.rows(); } Index cols() { return m_mapL.cols(); } @@ -678,7 +668,7 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator { typedef typename MatrixLType::Index Index; typedef typename MatrixLType::Scalar Scalar; - SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU) + explicit SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU) : m_mapL(mapL),m_mapU(mapU) { } Index rows() { return m_mapL.rows(); } @@ -686,8 +676,11 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator template<typename Dest> void solveInPlace(MatrixBase<Dest> &X) const { - Index nrhs = X.cols(); - Index n = X.rows(); + /* Explicit type conversion as the Index type of MatrixBase<Dest> may be wider than Index */ + eigen_assert(X.rows() <= NumTraits<Index>::highest()); + eigen_assert(X.cols() <= NumTraits<Index>::highest()); + Index nrhs = Index(X.cols()); + Index n = Index(X.rows()); // Backward solve with U for (Index k = m_mapL.nsuper(); k >= 0; k--) { @@ -728,35 +721,6 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator const MatrixUType& m_mapU; }; -namespace internal { - -template<typename _MatrixType, typename Derived, typename Rhs> -struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs> - : solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs> -{ - typedef SparseLU<_MatrixType,Derived> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template<typename _MatrixType, typename Derived, typename Rhs> -struct sparse_solve_retval<SparseLU<_MatrixType,Derived>, Rhs> - : sparse_solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs> -{ - typedef SparseLU<_MatrixType,Derived> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; -} // end namespace internal - } // End namespace Eigen #endif diff --git a/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h index ad6f2183f..e8ee35a94 100644 --- a/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +++ b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h @@ -189,8 +189,8 @@ class MappedSuperNodalMatrix<Scalar,Index>::InnerIterator m_idval(mat.colIndexPtr()[outer]), m_startidval(m_idval), m_endidval(mat.colIndexPtr()[outer+1]), - m_idrow(mat.rowIndexPtr()[outer]), - m_endidrow(mat.rowIndexPtr()[outer+1]) + m_idrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]]), + m_endidrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]+1]) {} inline InnerIterator& operator++() { @@ -233,8 +233,11 @@ template<typename Scalar, typename Index> template<typename Dest> void MappedSuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const { - Index n = X.rows(); - Index nrhs = X.cols(); + /* Explicit type conversion as the Index type of MatrixBase<Dest> may be wider than Index */ + eigen_assert(X.rows() <= NumTraits<Index>::highest()); + eigen_assert(X.cols() <= NumTraits<Index>::highest()); + Index n = Index(X.rows()); + Index nrhs = Index(X.cols()); const Scalar * Lval = valuePtr(); // Nonzero values Matrix<Scalar,Dynamic,Dynamic> work(n, nrhs); // working vector work.setZero(); diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h index 0d0283b13..cad149ded 100644 --- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h @@ -31,13 +31,13 @@ namespace internal { template <int SegSizeAtCompileTime> struct LU_kernel_bmod { template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index> - static EIGEN_DONT_INLINE void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda, + static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros); }; template <int SegSizeAtCompileTime> template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index> -EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda, +EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros) { typedef typename ScalarVector::Scalar Scalar; @@ -45,7 +45,7 @@ EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const int segsi // The result of triangular solve is in tempv[*]; // The result of matric-vector update is in dense[*] Index isub = lptr + no_zeros; - int i; + Index i; Index irow; for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++) { @@ -92,13 +92,13 @@ EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const int segsi template <> struct LU_kernel_bmod<1> { template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index> - static EIGEN_DONT_INLINE void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr, + static EIGEN_DONT_INLINE void run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr, const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros); }; template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index> -EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr, +EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr, const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros) { typedef typename ScalarVector::Scalar Scalar; diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h index 002b4824b..133211488 100644 --- a/Eigen/src/SparseQR/SparseQR.h +++ b/Eigen/src/SparseQR/SparseQR.h @@ -62,9 +62,13 @@ namespace internal { * */ template<typename _MatrixType, typename _OrderingType> -class SparseQR +class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> > { + protected: + typedef SparseSolverBase<SparseQR<_MatrixType,_OrderingType> > Base; + using Base::m_isInitialized; public: + using Base::_solve_impl; typedef _MatrixType MatrixType; typedef _OrderingType OrderingType; typedef typename MatrixType::Scalar Scalar; @@ -75,7 +79,7 @@ class SparseQR typedef Matrix<Scalar, Dynamic, 1> ScalarVector; typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; public: - SparseQR () : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false) + SparseQR () : m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false) { } /** Construct a QR factorization of the matrix \a mat. @@ -84,7 +88,7 @@ class SparseQR * * \sa compute() */ - SparseQR(const MatrixType& mat) : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false) + explicit SparseQR(const MatrixType& mat) : m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false) { compute(mat); } @@ -162,7 +166,7 @@ class SparseQR /** \internal */ template<typename Rhs, typename Dest> - bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &dest) const + bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &dest) const { eigen_assert(m_isInitialized && "The factorization should be called first, use compute()"); eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix"); @@ -178,7 +182,7 @@ class SparseQR y.resize((std::max)(cols(),Index(y.rows())),y.cols()); y.topRows(rank) = this->matrixR().topLeftCorner(rank, rank).template triangularView<Upper>().solve(b.topRows(rank)); y.bottomRows(y.rows()-rank).setZero(); - + // Apply the column permutation if (m_perm_c.size()) dest = colsPermutation() * y.topRows(cols()); else dest = y.topRows(cols()); @@ -186,7 +190,6 @@ class SparseQR m_info = Success; return true; } - /** Sets the threshold that is used to determine linearly dependent columns during the factorization. * @@ -204,18 +207,18 @@ class SparseQR * \sa compute() */ template<typename Rhs> - inline const internal::solve_retval<SparseQR, Rhs> solve(const MatrixBase<Rhs>& B) const + inline const Solve<SparseQR, Rhs> solve(const MatrixBase<Rhs>& B) const { eigen_assert(m_isInitialized && "The factorization should be called first, use compute()"); eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix"); - return internal::solve_retval<SparseQR, Rhs>(*this, B.derived()); + return Solve<SparseQR, Rhs>(*this, B.derived()); } template<typename Rhs> - inline const internal::sparse_solve_retval<SparseQR, Rhs> solve(const SparseMatrixBase<Rhs>& B) const + inline const Solve<SparseQR, Rhs> solve(const SparseMatrixBase<Rhs>& B) const { eigen_assert(m_isInitialized && "The factorization should be called first, use compute()"); eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix"); - return internal::sparse_solve_retval<SparseQR, Rhs>(*this, B.derived()); + return Solve<SparseQR, Rhs>(*this, B.derived()); } /** \brief Reports whether previous computation was successful. @@ -244,7 +247,6 @@ class SparseQR protected: - bool m_isInitialized; bool m_analysisIsok; bool m_factorizationIsok; mutable ComputationInfo m_info; @@ -282,9 +284,11 @@ template <typename MatrixType, typename OrderingType> void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat) { eigen_assert(mat.isCompressed() && "SparseQR requires a sparse matrix in compressed mode. Call .makeCompressed() before passing it to SparseQR"); + // Copy to a column major matrix if the input is rowmajor + typename internal::conditional<MatrixType::IsRowMajor,QRMatrixType,const MatrixType&>::type matCpy(mat); // Compute the column fill reducing ordering OrderingType ord; - ord(mat, m_perm_c); + ord(matCpy, m_perm_c); Index n = mat.cols(); Index m = mat.rows(); Index diagSize = (std::min)(m,n); @@ -297,7 +301,7 @@ void SparseQR<MatrixType,OrderingType>::analyzePattern(const MatrixType& mat) // Compute the column elimination tree of the permuted matrix m_outputPerm_c = m_perm_c.inverse(); - internal::coletree(mat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data()); + internal::coletree(matCpy, m_etree, m_firstRowElt, m_outputPerm_c.indices().data()); m_isEtreeOk = true; m_R.resize(m, n); @@ -321,7 +325,6 @@ template <typename MatrixType, typename OrderingType> void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat) { using std::abs; - using std::max; eigen_assert(m_analysisIsok && "analyzePattern() should be called before this step"); Index m = mat.rows(); @@ -335,21 +338,35 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat) m_R.setZero(); m_Q.setZero(); + m_pmat = mat; if(!m_isEtreeOk) { m_outputPerm_c = m_perm_c.inverse(); - internal::coletree(mat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data()); + internal::coletree(m_pmat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data()); m_isEtreeOk = true; } - - m_pmat = mat; + m_pmat.uncompress(); // To have the innerNonZeroPtr allocated + // Apply the fill-in reducing permutation lazily: - for (int i = 0; i < n; i++) { - Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i; - m_pmat.outerIndexPtr()[p] = mat.outerIndexPtr()[i]; - m_pmat.innerNonZeroPtr()[p] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; + // If the input is row major, copy the original column indices, + // otherwise directly use the input matrix + // + IndexVector originalOuterIndicesCpy; + const Index *originalOuterIndices = mat.outerIndexPtr(); + if(MatrixType::IsRowMajor) + { + originalOuterIndicesCpy = IndexVector::Map(m_pmat.outerIndexPtr(),n+1); + originalOuterIndices = originalOuterIndicesCpy.data(); + } + + for (int i = 0; i < n; i++) + { + Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i; + m_pmat.outerIndexPtr()[p] = originalOuterIndices[i]; + m_pmat.innerNonZeroPtr()[p] = originalOuterIndices[i+1] - originalOuterIndices[i]; + } } /* Compute the default threshold as in MatLab, see: @@ -359,7 +376,9 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat) if(m_useDefaultThreshold) { RealScalar max2Norm = 0.0; - for (int j = 0; j < n; j++) max2Norm = (max)(max2Norm, m_pmat.col(j).norm()); + for (int j = 0; j < n; j++) max2Norm = numext::maxi(max2Norm, m_pmat.col(j).norm()); + if(max2Norm==RealScalar(0)) + max2Norm = RealScalar(1); pivotThreshold = 20 * (m + n) * max2Norm * NumTraits<RealScalar>::epsilon(); } @@ -368,7 +387,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat) Index nonzeroCol = 0; // Record the number of valid pivots m_Q.startVec(0); - + // Left looking rank-revealing QR factorization: compute a column of R and Q at a time for (Index col = 0; col < n; ++col) { @@ -384,7 +403,7 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat) // all the nodes (with indexes lower than rank) reachable through the column elimination tree (etree) rooted at node k. // Note: if the diagonal entry does not exist, then its contribution must be explicitly added, // thus the trick with found_diag that permits to do one more iteration on the diagonal element if this one has not been found. - for (typename MatrixType::InnerIterator itp(m_pmat, col); itp || !found_diag; ++itp) + for (typename QRMatrixType::InnerIterator itp(m_pmat, col); itp || !found_diag; ++itp) { Index curIdx = nonzeroCol; if(itp) curIdx = itp.row(); @@ -536,13 +555,13 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat) m_R.finalize(); m_R.makeCompressed(); m_isQSorted = false; - + m_nonzeropivots = nonzeroCol; if(nonzeroCol<n) { // Permute the triangular factor to put the 'dead' columns to the end - MatrixType tempR(m_R); + QRMatrixType tempR(m_R); m_R = tempR * m_pivotperm; // Update the column permutation @@ -554,34 +573,6 @@ void SparseQR<MatrixType,OrderingType>::factorize(const MatrixType& mat) m_info = Success; } -namespace internal { - -template<typename _MatrixType, typename OrderingType, typename Rhs> -struct solve_retval<SparseQR<_MatrixType,OrderingType>, Rhs> - : solve_retval_base<SparseQR<_MatrixType,OrderingType>, Rhs> -{ - typedef SparseQR<_MatrixType,OrderingType> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; -template<typename _MatrixType, typename OrderingType, typename Rhs> -struct sparse_solve_retval<SparseQR<_MatrixType, OrderingType>, Rhs> - : sparse_solve_retval_base<SparseQR<_MatrixType, OrderingType>, Rhs> -{ - typedef SparseQR<_MatrixType, OrderingType> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec, Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; -} // end namespace internal - template <typename SparseQRType, typename Derived> struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived> > { @@ -646,7 +637,7 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp typedef typename SparseQRType::Index Index; typedef typename SparseQRType::Scalar Scalar; typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix; - SparseQRMatrixQReturnType(const SparseQRType& qr) : m_qr(qr) {} + explicit SparseQRMatrixQReturnType(const SparseQRType& qr) : m_qr(qr) {} template<typename Derived> SparseQR_QProduct<SparseQRType, Derived> operator*(const MatrixBase<Derived>& other) { @@ -682,7 +673,7 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp template<typename SparseQRType> struct SparseQRMatrixQTransposeReturnType { - SparseQRMatrixQTransposeReturnType(const SparseQRType& qr) : m_qr(qr) {} + explicit SparseQRMatrixQTransposeReturnType(const SparseQRType& qr) : m_qr(qr) {} template<typename Derived> SparseQR_QProduct<SparseQRType,Derived> operator*(const MatrixBase<Derived>& other) { diff --git a/Eigen/src/StlSupport/StdDeque.h b/Eigen/src/StlSupport/StdDeque.h index aaf66330b..909600476 100644 --- a/Eigen/src/StlSupport/StdDeque.h +++ b/Eigen/src/StlSupport/StdDeque.h @@ -14,7 +14,7 @@ #include "details.h" // Define the explicit instantiation (e.g. necessary for the Intel compiler) -#if defined(__INTEL_COMPILER) || defined(__GNUC__) +#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC #define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...) template class std::deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >; #else #define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...) diff --git a/Eigen/src/StlSupport/StdList.h b/Eigen/src/StlSupport/StdList.h index 3c742430c..265b46f8e 100644 --- a/Eigen/src/StlSupport/StdList.h +++ b/Eigen/src/StlSupport/StdList.h @@ -13,7 +13,7 @@ #include "details.h" // Define the explicit instantiation (e.g. necessary for the Intel compiler) -#if defined(__INTEL_COMPILER) || defined(__GNUC__) +#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC #define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...) template class std::list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >; #else #define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...) diff --git a/Eigen/src/StlSupport/details.h b/Eigen/src/StlSupport/details.h index d8debc7c4..e42ec024f 100644 --- a/Eigen/src/StlSupport/details.h +++ b/Eigen/src/StlSupport/details.h @@ -46,7 +46,7 @@ namespace Eigen { ~aligned_allocator_indirection() {} }; -#ifdef _MSC_VER +#if EIGEN_COMP_MSVC // sometimes, MSVC detects, at compile time, that the argument x // in std::vector::resize(size_t s,T x) won't be aligned and generate an error diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index bcb355760..6de5b3dc5 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -288,8 +288,12 @@ MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat) * \brief The base class for the direct and incomplete LU factorization of SuperLU */ template<typename _MatrixType, typename Derived> -class SuperLUBase : internal::noncopyable +class SuperLUBase : public SparseSolverBase<Derived> { + protected: + typedef SparseSolverBase<Derived> Base; + using Base::derived; + using Base::m_isInitialized; public: typedef _MatrixType MatrixType; typedef typename MatrixType::Scalar Scalar; @@ -309,9 +313,6 @@ class SuperLUBase : internal::noncopyable clearFactors(); } - Derived& derived() { return *static_cast<Derived*>(this); } - const Derived& derived() const { return *static_cast<const Derived*>(this); } - inline Index rows() const { return m_matrix.rows(); } inline Index cols() const { return m_matrix.cols(); } @@ -335,33 +336,7 @@ class SuperLUBase : internal::noncopyable derived().analyzePattern(matrix); derived().factorize(matrix); } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::solve_retval<SuperLUBase, Rhs> solve(const MatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "SuperLU is not initialized."); - eigen_assert(rows()==b.rows() - && "SuperLU::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval<SuperLUBase, Rhs>(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::sparse_solve_retval<SuperLUBase, Rhs> solve(const SparseMatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "SuperLU is not initialized."); - eigen_assert(rows()==b.rows() - && "SuperLU::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval<SuperLUBase, Rhs>(*this, b.derived()); - } - + /** Performs a symbolic decomposition on the sparcity of \a matrix. * * This function is particularly useful when solving for several problems having the same structure. @@ -453,7 +428,6 @@ class SuperLUBase : internal::noncopyable mutable char m_sluEqued; mutable ComputationInfo m_info; - bool m_isInitialized; int m_factorizationIsOk; int m_analysisIsOk; mutable bool m_extractedDataAreDirty; @@ -491,10 +465,11 @@ class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> > typedef TriangularView<LUMatrixType, Upper> UMatrixType; public: + using Base::_solve_impl; SuperLU() : Base() { init(); } - SuperLU(const MatrixType& matrix) : Base() + explicit SuperLU(const MatrixType& matrix) : Base() { init(); Base::compute(matrix); @@ -528,7 +503,7 @@ class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> > #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal */ template<typename Rhs,typename Dest> - void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const; + void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const; #endif // EIGEN_PARSED_BY_DOXYGEN inline const LMatrixType& matrixL() const @@ -637,7 +612,7 @@ void SuperLU<MatrixType>::factorize(const MatrixType& a) template<typename MatrixType> template<typename Rhs,typename Dest> -void SuperLU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const +void SuperLU<MatrixType>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const { eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or analyzePattern()/factorize()"); @@ -652,8 +627,12 @@ void SuperLU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) m_sluFerr.resize(rhsCols); m_sluBerr.resize(rhsCols); - m_sluB = SluMatrix::Map(b.const_cast_derived()); - m_sluX = SluMatrix::Map(x.derived()); + + Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b); + Ref<const Matrix<typename Dest::Scalar,Dynamic,Dynamic,ColMajor> > x_ref(x); + + m_sluB = SluMatrix::Map(b_ref.const_cast_derived()); + m_sluX = SluMatrix::Map(x_ref.const_cast_derived()); typename Rhs::PlainObject b_cpy; if(m_sluEqued!='N') @@ -676,6 +655,10 @@ void SuperLU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) &m_sluFerr[0], &m_sluBerr[0], &m_sluStat, &info, Scalar()); StatFree(&m_sluStat); + + if(&x.coeffRef(0) != x_ref.data()) + x = x_ref; + m_info = info==0 ? Success : NumericalIssue; } @@ -828,6 +811,7 @@ class SuperILU : public SuperLUBase<_MatrixType,SuperILU<_MatrixType> > typedef typename Base::Index Index; public: + using Base::_solve_impl; SuperILU() : Base() { init(); } @@ -863,7 +847,7 @@ class SuperILU : public SuperLUBase<_MatrixType,SuperILU<_MatrixType> > #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal */ template<typename Rhs,typename Dest> - void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const; + void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const; #endif // EIGEN_PARSED_BY_DOXYGEN protected: @@ -948,7 +932,7 @@ void SuperILU<MatrixType>::factorize(const MatrixType& a) template<typename MatrixType> template<typename Rhs,typename Dest> -void SuperILU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const +void SuperILU<MatrixType>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const { eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or analyzePattern()/factorize()"); @@ -962,8 +946,12 @@ void SuperILU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) m_sluFerr.resize(rhsCols); m_sluBerr.resize(rhsCols); - m_sluB = SluMatrix::Map(b.const_cast_derived()); - m_sluX = SluMatrix::Map(x.derived()); + + Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b); + Ref<const Matrix<typename Dest::Scalar,Dynamic,Dynamic,ColMajor> > x_ref(x); + + m_sluB = SluMatrix::Map(b_ref.const_cast_derived()); + m_sluX = SluMatrix::Map(x_ref.const_cast_derived()); typename Rhs::PlainObject b_cpy; if(m_sluEqued!='N') @@ -986,41 +974,14 @@ void SuperILU<MatrixType>::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) &recip_pivot_growth, &rcond, &m_sluStat, &info, Scalar()); StatFree(&m_sluStat); + + if(&x.coeffRef(0) != x_ref.data()) + x = x_ref; m_info = info==0 ? Success : NumericalIssue; } #endif -namespace internal { - -template<typename _MatrixType, typename Derived, typename Rhs> -struct solve_retval<SuperLUBase<_MatrixType,Derived>, Rhs> - : solve_retval_base<SuperLUBase<_MatrixType,Derived>, Rhs> -{ - typedef SuperLUBase<_MatrixType,Derived> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec().derived()._solve(rhs(),dst); - } -}; - -template<typename _MatrixType, typename Derived, typename Rhs> -struct sparse_solve_retval<SuperLUBase<_MatrixType,Derived>, Rhs> - : sparse_solve_retval_base<SuperLUBase<_MatrixType,Derived>, Rhs> -{ - typedef SuperLUBase<_MatrixType,Derived> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; - -} // end namespace internal - } // end namespace Eigen #endif // EIGEN_SUPERLUSUPPORT_H diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h index 3a48cecf7..a2bb75b09 100644 --- a/Eigen/src/UmfPackSupport/UmfPackSupport.h +++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h @@ -107,6 +107,16 @@ inline int umfpack_get_determinant(std::complex<double> *Mx, double *Ex, void *N return umfpack_zi_get_determinant(&mx_real,0,Ex,NumericHandle,User_Info); } +namespace internal { + template<typename T> struct umfpack_helper_is_sparse_plain : false_type {}; + template<typename Scalar, int Options, typename StorageIndex> + struct umfpack_helper_is_sparse_plain<SparseMatrix<Scalar,Options,StorageIndex> > + : true_type {}; + template<typename Scalar, int Options, typename StorageIndex> + struct umfpack_helper_is_sparse_plain<MappedSparseMatrix<Scalar,Options,StorageIndex> > + : true_type {}; +} + /** \ingroup UmfPackSupport_Module * \brief A sparse LU factorization and solver based on UmfPack * @@ -121,9 +131,13 @@ inline int umfpack_get_determinant(std::complex<double> *Mx, double *Ex, void *N * \sa \ref TutorialSparseDirectSolvers */ template<typename _MatrixType> -class UmfPackLU : internal::noncopyable +class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > { + protected: + typedef SparseSolverBase<UmfPackLU<_MatrixType> > Base; + using Base::m_isInitialized; public: + using Base::_solve_impl; typedef _MatrixType MatrixType; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; @@ -138,7 +152,7 @@ class UmfPackLU : internal::noncopyable UmfPackLU() { init(); } - UmfPackLU(const MatrixType& matrix) + explicit UmfPackLU(const MatrixType& matrix) { init(); compute(matrix); @@ -192,36 +206,14 @@ class UmfPackLU : internal::noncopyable * Note that the matrix should be column-major, and in compressed format for best performance. * \sa SparseMatrix::makeCompressed(). */ - void compute(const MatrixType& matrix) - { - analyzePattern(matrix); - factorize(matrix); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::solve_retval<UmfPackLU, Rhs> solve(const MatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "UmfPackLU is not initialized."); - eigen_assert(rows()==b.rows() - && "UmfPackLU::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval<UmfPackLU, Rhs>(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template<typename Rhs> - inline const internal::sparse_solve_retval<UmfPackLU, Rhs> solve(const SparseMatrixBase<Rhs>& b) const + template<typename InputMatrixType> + void compute(const InputMatrixType& matrix) { - eigen_assert(m_isInitialized && "UmfPackLU is not initialized."); - eigen_assert(rows()==b.rows() - && "UmfPackLU::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval<UmfPackLU, Rhs>(*this, b.derived()); + if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar()); + if(m_numeric) umfpack_free_numeric(&m_numeric,Scalar()); + grapInput(matrix.derived()); + analyzePattern_impl(); + factorize_impl(); } /** Performs a symbolic decomposition on the sparcity of \a matrix. @@ -230,23 +222,15 @@ class UmfPackLU : internal::noncopyable * * \sa factorize(), compute() */ - void analyzePattern(const MatrixType& matrix) + template<typename InputMatrixType> + void analyzePattern(const InputMatrixType& matrix) { - if(m_symbolic) - umfpack_free_symbolic(&m_symbolic,Scalar()); - if(m_numeric) - umfpack_free_numeric(&m_numeric,Scalar()); + if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar()); + if(m_numeric) umfpack_free_numeric(&m_numeric,Scalar()); - grapInput(matrix); + grapInput(matrix.derived()); - int errorCode = 0; - errorCode = umfpack_symbolic(matrix.rows(), matrix.cols(), m_outerIndexPtr, m_innerIndexPtr, m_valuePtr, - &m_symbolic, 0, 0); - - m_isInitialized = true; - m_info = errorCode ? InvalidInput : Success; - m_analysisIsOk = true; - m_factorizationIsOk = false; + analyzePattern_impl(); } /** Performs a numeric decomposition of \a matrix @@ -255,26 +239,22 @@ class UmfPackLU : internal::noncopyable * * \sa analyzePattern(), compute() */ - void factorize(const MatrixType& matrix) + template<typename InputMatrixType> + void factorize(const InputMatrixType& matrix) { eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()"); if(m_numeric) umfpack_free_numeric(&m_numeric,Scalar()); - grapInput(matrix); - - int errorCode; - errorCode = umfpack_numeric(m_outerIndexPtr, m_innerIndexPtr, m_valuePtr, - m_symbolic, &m_numeric, 0, 0); - - m_info = errorCode ? NumericalIssue : Success; - m_factorizationIsOk = true; + grapInput(matrix.derived()); + + factorize_impl(); } #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal */ template<typename BDerived,typename XDerived> - bool _solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const; + bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const; #endif Scalar determinant() const; @@ -283,19 +263,20 @@ class UmfPackLU : internal::noncopyable protected: - void init() { - m_info = InvalidInput; - m_isInitialized = false; - m_numeric = 0; - m_symbolic = 0; - m_outerIndexPtr = 0; - m_innerIndexPtr = 0; - m_valuePtr = 0; + m_info = InvalidInput; + m_isInitialized = false; + m_numeric = 0; + m_symbolic = 0; + m_outerIndexPtr = 0; + m_innerIndexPtr = 0; + m_valuePtr = 0; + m_extractedDataAreDirty = true; } - void grapInput(const MatrixType& mat) + template<typename InputMatrixType> + void grapInput_impl(const InputMatrixType& mat, internal::true_type) { m_copyMatrix.resize(mat.rows(), mat.cols()); if( ((MatrixType::Flags&RowMajorBit)==RowMajorBit) || sizeof(typename MatrixType::Index)!=sizeof(int) || !mat.isCompressed() ) @@ -313,6 +294,45 @@ class UmfPackLU : internal::noncopyable m_valuePtr = mat.valuePtr(); } } + + template<typename InputMatrixType> + void grapInput_impl(const InputMatrixType& mat, internal::false_type) + { + m_copyMatrix = mat; + m_outerIndexPtr = m_copyMatrix.outerIndexPtr(); + m_innerIndexPtr = m_copyMatrix.innerIndexPtr(); + m_valuePtr = m_copyMatrix.valuePtr(); + } + + template<typename InputMatrixType> + void grapInput(const InputMatrixType& mat) + { + grapInput_impl(mat, internal::umfpack_helper_is_sparse_plain<InputMatrixType>()); + } + + void analyzePattern_impl() + { + int errorCode = 0; + errorCode = umfpack_symbolic(m_copyMatrix.rows(), m_copyMatrix.cols(), m_outerIndexPtr, m_innerIndexPtr, m_valuePtr, + &m_symbolic, 0, 0); + + m_isInitialized = true; + m_info = errorCode ? InvalidInput : Success; + m_analysisIsOk = true; + m_factorizationIsOk = false; + m_extractedDataAreDirty = true; + } + + void factorize_impl() + { + int errorCode; + errorCode = umfpack_numeric(m_outerIndexPtr, m_innerIndexPtr, m_valuePtr, + m_symbolic, &m_numeric, 0, 0); + + m_info = errorCode ? NumericalIssue : Success; + m_factorizationIsOk = true; + m_extractedDataAreDirty = true; + } // cached data to reduce reallocation, etc. mutable LUMatrixType m_l; @@ -328,7 +348,6 @@ class UmfPackLU : internal::noncopyable void* m_symbolic; mutable ComputationInfo m_info; - bool m_isInitialized; int m_factorizationIsOk; int m_analysisIsOk; mutable bool m_extractedDataAreDirty; @@ -376,7 +395,7 @@ typename UmfPackLU<MatrixType>::Scalar UmfPackLU<MatrixType>::determinant() cons template<typename MatrixType> template<typename BDerived,typename XDerived> -bool UmfPackLU<MatrixType>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const +bool UmfPackLU<MatrixType>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const { const int rhsCols = b.cols(); eigen_assert((BDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major rhs yet"); @@ -384,11 +403,22 @@ bool UmfPackLU<MatrixType>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDe eigen_assert(b.derived().data() != x.derived().data() && " Umfpack does not support inplace solve"); int errorCode; + Scalar* x_ptr = 0; + Matrix<Scalar,Dynamic,1> x_tmp; + if(x.innerStride()!=1) + { + x_tmp.resize(x.rows()); + x_ptr = x_tmp.data(); + } for (int j=0; j<rhsCols; ++j) { + if(x.innerStride()==1) + x_ptr = &x.col(j).coeffRef(0); errorCode = umfpack_solve(UMFPACK_A, m_outerIndexPtr, m_innerIndexPtr, m_valuePtr, - &x.col(j).coeffRef(0), &b.const_cast_derived().col(j).coeffRef(0), m_numeric, 0, 0); + x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, 0, 0); + if(x.innerStride()!=1) + x.col(j) = x_tmp; if (errorCode!=0) return false; } @@ -396,37 +426,6 @@ bool UmfPackLU<MatrixType>::_solve(const MatrixBase<BDerived> &b, MatrixBase<XDe return true; } - -namespace internal { - -template<typename _MatrixType, typename Rhs> -struct solve_retval<UmfPackLU<_MatrixType>, Rhs> - : solve_retval_base<UmfPackLU<_MatrixType>, Rhs> -{ - typedef UmfPackLU<_MatrixType> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template<typename _MatrixType, typename Rhs> -struct sparse_solve_retval<UmfPackLU<_MatrixType>, Rhs> - : sparse_solve_retval_base<UmfPackLU<_MatrixType>, Rhs> -{ - typedef UmfPackLU<_MatrixType> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; - -} // end namespace internal - } // end namespace Eigen #endif // EIGEN_UMFPACKSUPPORT_H diff --git a/Eigen/src/misc/Kernel.h b/Eigen/src/misc/Kernel.h index b9e1518fd..4b03e44c1 100644 --- a/Eigen/src/misc/Kernel.h +++ b/Eigen/src/misc/Kernel.h @@ -41,7 +41,7 @@ template<typename _DecompositionType> struct kernel_retval_base typedef ReturnByValue<kernel_retval_base> Base; typedef typename Base::Index Index; - kernel_retval_base(const DecompositionType& dec) + explicit kernel_retval_base(const DecompositionType& dec) : m_dec(dec), m_rank(dec.rank()), m_cols(m_rank==dec.cols() ? 1 : dec.cols() - m_rank) diff --git a/Eigen/src/misc/Solve.h b/Eigen/src/misc/Solve.h deleted file mode 100644 index 7f70d60af..000000000 --- a/Eigen/src/misc/Solve.h +++ /dev/null @@ -1,76 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MISC_SOLVE_H -#define EIGEN_MISC_SOLVE_H - -namespace Eigen { - -namespace internal { - -/** \class solve_retval_base - * - */ -template<typename DecompositionType, typename Rhs> -struct traits<solve_retval_base<DecompositionType, Rhs> > -{ - typedef typename DecompositionType::MatrixType MatrixType; - typedef Matrix<typename Rhs::Scalar, - MatrixType::ColsAtCompileTime, - Rhs::ColsAtCompileTime, - Rhs::PlainObject::Options, - MatrixType::MaxColsAtCompileTime, - Rhs::MaxColsAtCompileTime> ReturnType; -}; - -template<typename _DecompositionType, typename Rhs> struct solve_retval_base - : public ReturnByValue<solve_retval_base<_DecompositionType, Rhs> > -{ - typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned; - typedef _DecompositionType DecompositionType; - typedef ReturnByValue<solve_retval_base> Base; - typedef typename Base::Index Index; - - solve_retval_base(const DecompositionType& dec, const Rhs& rhs) - : m_dec(dec), m_rhs(rhs) - {} - - inline Index rows() const { return m_dec.cols(); } - inline Index cols() const { return m_rhs.cols(); } - inline const DecompositionType& dec() const { return m_dec; } - inline const RhsNestedCleaned& rhs() const { return m_rhs; } - - template<typename Dest> inline void evalTo(Dest& dst) const - { - static_cast<const solve_retval<DecompositionType,Rhs>*>(this)->evalTo(dst); - } - - protected: - const DecompositionType& m_dec; - typename Rhs::Nested m_rhs; -}; - -} // end namespace internal - -#define EIGEN_MAKE_SOLVE_HELPERS(DecompositionType,Rhs) \ - typedef typename DecompositionType::MatrixType MatrixType; \ - typedef typename MatrixType::Scalar Scalar; \ - typedef typename MatrixType::RealScalar RealScalar; \ - typedef typename MatrixType::Index Index; \ - typedef Eigen::internal::solve_retval_base<DecompositionType,Rhs> Base; \ - using Base::dec; \ - using Base::rhs; \ - using Base::rows; \ - using Base::cols; \ - solve_retval(const DecompositionType& dec, const Rhs& rhs) \ - : Base(dec, rhs) {} - -} // end namespace Eigen - -#endif // EIGEN_MISC_SOLVE_H diff --git a/Eigen/src/misc/SparseSolve.h b/Eigen/src/misc/SparseSolve.h deleted file mode 100644 index 05caa9266..000000000 --- a/Eigen/src/misc/SparseSolve.h +++ /dev/null @@ -1,130 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_SOLVE_H -#define EIGEN_SPARSE_SOLVE_H - -namespace Eigen { - -namespace internal { - -template<typename _DecompositionType, typename Rhs> struct sparse_solve_retval_base; -template<typename _DecompositionType, typename Rhs> struct sparse_solve_retval; - -template<typename DecompositionType, typename Rhs> -struct traits<sparse_solve_retval_base<DecompositionType, Rhs> > -{ - typedef typename DecompositionType::MatrixType MatrixType; - typedef SparseMatrix<typename Rhs::Scalar, Rhs::Options, typename Rhs::Index> ReturnType; -}; - -template<typename _DecompositionType, typename Rhs> struct sparse_solve_retval_base - : public ReturnByValue<sparse_solve_retval_base<_DecompositionType, Rhs> > -{ - typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned; - typedef _DecompositionType DecompositionType; - typedef ReturnByValue<sparse_solve_retval_base> Base; - typedef typename Base::Index Index; - - sparse_solve_retval_base(const DecompositionType& dec, const Rhs& rhs) - : m_dec(dec), m_rhs(rhs) - {} - - inline Index rows() const { return m_dec.cols(); } - inline Index cols() const { return m_rhs.cols(); } - inline const DecompositionType& dec() const { return m_dec; } - inline const RhsNestedCleaned& rhs() const { return m_rhs; } - - template<typename Dest> inline void evalTo(Dest& dst) const - { - static_cast<const sparse_solve_retval<DecompositionType,Rhs>*>(this)->evalTo(dst); - } - - protected: - template<typename DestScalar, int DestOptions, typename DestIndex> - inline void defaultEvalTo(SparseMatrix<DestScalar,DestOptions,DestIndex>& dst) const - { - // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix. - static const int NbColsAtOnce = 4; - int rhsCols = m_rhs.cols(); - int size = m_rhs.rows(); - // the temporary matrices do not need more columns than NbColsAtOnce: - int tmpCols = (std::min)(rhsCols, NbColsAtOnce); - Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,tmpCols); - Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,tmpCols); - for(int k=0; k<rhsCols; k+=NbColsAtOnce) - { - int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce); - tmp.leftCols(actualCols) = m_rhs.middleCols(k,actualCols); - tmpX.leftCols(actualCols) = m_dec.solve(tmp.leftCols(actualCols)); - dst.middleCols(k,actualCols) = tmpX.leftCols(actualCols).sparseView(); - } - } - const DecompositionType& m_dec; - typename Rhs::Nested m_rhs; -}; - -#define EIGEN_MAKE_SPARSE_SOLVE_HELPERS(DecompositionType,Rhs) \ - typedef typename DecompositionType::MatrixType MatrixType; \ - typedef typename MatrixType::Scalar Scalar; \ - typedef typename MatrixType::RealScalar RealScalar; \ - typedef typename MatrixType::Index Index; \ - typedef Eigen::internal::sparse_solve_retval_base<DecompositionType,Rhs> Base; \ - using Base::dec; \ - using Base::rhs; \ - using Base::rows; \ - using Base::cols; \ - sparse_solve_retval(const DecompositionType& dec, const Rhs& rhs) \ - : Base(dec, rhs) {} - - - -template<typename DecompositionType, typename Rhs, typename Guess> struct solve_retval_with_guess; - -template<typename DecompositionType, typename Rhs, typename Guess> -struct traits<solve_retval_with_guess<DecompositionType, Rhs, Guess> > -{ - typedef typename DecompositionType::MatrixType MatrixType; - typedef Matrix<typename Rhs::Scalar, - MatrixType::ColsAtCompileTime, - Rhs::ColsAtCompileTime, - Rhs::PlainObject::Options, - MatrixType::MaxColsAtCompileTime, - Rhs::MaxColsAtCompileTime> ReturnType; -}; - -template<typename DecompositionType, typename Rhs, typename Guess> struct solve_retval_with_guess - : public ReturnByValue<solve_retval_with_guess<DecompositionType, Rhs, Guess> > -{ - typedef typename DecompositionType::Index Index; - - solve_retval_with_guess(const DecompositionType& dec, const Rhs& rhs, const Guess& guess) - : m_dec(dec), m_rhs(rhs), m_guess(guess) - {} - - inline Index rows() const { return m_dec.cols(); } - inline Index cols() const { return m_rhs.cols(); } - - template<typename Dest> inline void evalTo(Dest& dst) const - { - dst = m_guess; - m_dec._solveWithGuess(m_rhs,dst); - } - - protected: - const DecompositionType& m_dec; - const typename Rhs::Nested m_rhs; - const typename Guess::Nested m_guess; -}; - -} // namepsace internal - -} // end namespace Eigen - -#endif // EIGEN_SPARSE_SOLVE_H diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index f6d7d8944..f6f526d2b 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -1,5 +1,22 @@ +typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> AbsReturnType; +typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> Abs2ReturnType; +typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> SqrtReturnType; +typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> InverseReturnType; + +typedef CwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived> ExpReturnType; +typedef CwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived> LogReturnType; +typedef CwiseUnaryOp<internal::scalar_cos_op<Scalar>, const Derived> CosReturnType; +typedef CwiseUnaryOp<internal::scalar_sin_op<Scalar>, const Derived> SinReturnType; +typedef CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived> AcosReturnType; +typedef CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived> AsinReturnType; +typedef CwiseUnaryOp<internal::scalar_tan_op<Scalar>, const Derived> TanReturnType; +typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturnType; +typedef CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> PowReturnType; +typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType; +typedef CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> CubeReturnType; + /** \returns an expression of the coefficient-wise absolute value of \c *this * * Example: \include Cwise_abs.cpp @@ -8,10 +25,10 @@ * \sa abs2() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> +EIGEN_STRONG_INLINE const AbsReturnType abs() const { - return derived(); + return AbsReturnType(derived()); } /** \returns an expression of the coefficient-wise squared absolute value of \c *this @@ -22,10 +39,10 @@ abs() const * \sa abs(), square() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> +EIGEN_STRONG_INLINE const Abs2ReturnType abs2() const { - return derived(); + return Abs2ReturnType(derived()); } /** \returns an expression of the coefficient-wise exponential of *this. @@ -39,10 +56,10 @@ abs2() const * \sa pow(), log(), sin(), cos() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived> +inline const ExpReturnType exp() const { - return derived(); + return ExpReturnType(derived()); } /** \returns an expression of the coefficient-wise logarithm of *this. @@ -56,10 +73,10 @@ exp() const * \sa exp() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived> +inline const LogReturnType log() const { - return derived(); + return LogReturnType(derived()); } /** \returns an expression of the coefficient-wise square root of *this. @@ -73,10 +90,10 @@ log() const * \sa pow(), square() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> +inline const SqrtReturnType sqrt() const { - return derived(); + return SqrtReturnType(derived()); } /** \returns an expression of the coefficient-wise cosine of *this. @@ -90,10 +107,10 @@ sqrt() const * \sa sin(), acos() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_cos_op<Scalar>, const Derived> +inline const CosReturnType cos() const { - return derived(); + return CosReturnType(derived()); } @@ -108,10 +125,10 @@ cos() const * \sa cos(), asin() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_sin_op<Scalar>, const Derived> +inline const SinReturnType sin() const { - return derived(); + return SinReturnType(derived()); } /** \returns an expression of the coefficient-wise arc cosine of *this. @@ -122,10 +139,10 @@ sin() const * \sa cos(), asin() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived> +inline const AcosReturnType acos() const { - return derived(); + return AcosReturnType(derived()); } /** \returns an expression of the coefficient-wise arc sine of *this. @@ -136,10 +153,10 @@ acos() const * \sa sin(), acos() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived> +inline const AsinReturnType asin() const { - return derived(); + return AsinReturnType(derived()); } /** \returns an expression of the coefficient-wise tan of *this. @@ -150,10 +167,10 @@ asin() const * \sa cos(), sin() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_tan_op<Scalar>, Derived> +inline const TanReturnType tan() const { - return derived(); + return TanReturnType(derived()); } /** \returns an expression of the coefficient-wise arc tan of *this. @@ -163,10 +180,10 @@ tan() const * * \sa cos(), sin(), tan() */ -inline const CwiseUnaryOp<internal::scalar_atan_op<Scalar>, Derived> +inline const AtanReturnType atan() const { - return derived(); + return AtanReturnType(derived()); } /** \returns an expression of the coefficient-wise power of *this to the given exponent. @@ -180,11 +197,10 @@ atan() const * \sa exp(), log() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> +inline const PowReturnType pow(const Scalar& exponent) const { - return CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> - (derived(), internal::scalar_pow_op<Scalar>(exponent)); + return PowReturnType(derived(), internal::scalar_pow_op<Scalar>(exponent)); } @@ -196,10 +212,10 @@ pow(const Scalar& exponent) const * \sa operator/(), operator*() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> +inline const InverseReturnType inverse() const { - return derived(); + return InverseReturnType(derived()); } /** \returns an expression of the coefficient-wise square of *this. @@ -210,10 +226,10 @@ inverse() const * \sa operator/(), operator*(), abs2() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> +inline const SquareReturnType square() const { - return derived(); + return SquareReturnType(derived()); } /** \returns an expression of the coefficient-wise cube of *this. @@ -224,10 +240,10 @@ square() const * \sa square(), pow() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> +inline const CubeReturnType cube() const { - return derived(); + return CubeReturnType(derived()); } #define EIGEN_MAKE_SCALAR_CWISE_UNARY_OP(METHOD_NAME,FUNCTOR) \ diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h index a17153e64..050bce03c 100644 --- a/Eigen/src/plugins/CommonCwiseUnaryOps.h +++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h @@ -14,6 +14,8 @@ /** \internal Represents a scalar multiple of an expression */ typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Derived> ScalarMultipleReturnType; +typedef CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >, const Derived> ScalarComplexMultipleReturnType; + /** \internal Represents a quotient of an expression by a scalar*/ typedef CwiseUnaryOp<internal::scalar_quotient1_op<Scalar>, const Derived> ScalarQuotient1ReturnType; /** \internal the return type of conjugate() */ @@ -36,13 +38,16 @@ typedef CwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived> ImagReturn /** \internal the return type of imag() */ typedef CwiseUnaryView<internal::scalar_imag_ref_op<Scalar>, Derived> NonConstImagReturnType; +typedef CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> NegativeReturnType; +//typedef CwiseUnaryOp<internal::scalar_quotient1_op<Scalar>, const Derived> + #endif // not EIGEN_PARSED_BY_DOXYGEN /** \returns an expression of the opposite of \c *this */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_opposite_op<typename internal::traits<Derived>::Scalar>, const Derived> -operator-() const { return derived(); } +inline const NegativeReturnType +operator-() const { return NegativeReturnType(derived()); } /** \returns an expression of \c *this scaled by the scalar factor \a scalar */ @@ -50,8 +55,7 @@ EIGEN_DEVICE_FUNC inline const ScalarMultipleReturnType operator*(const Scalar& scalar) const { - return CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Derived> - (derived(), internal::scalar_multiple_op<Scalar>(scalar)); + return ScalarMultipleReturnType(derived(), internal::scalar_multiple_op<Scalar>(scalar)); } #ifdef EIGEN_PARSED_BY_DOXYGEN @@ -60,20 +64,18 @@ const ScalarMultipleReturnType operator*(const RealScalar& scalar) const; /** \returns an expression of \c *this divided by the scalar value \a scalar */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_quotient1_op<typename internal::traits<Derived>::Scalar>, const Derived> +inline const ScalarQuotient1ReturnType operator/(const Scalar& scalar) const { - return CwiseUnaryOp<internal::scalar_quotient1_op<Scalar>, const Derived> - (derived(), internal::scalar_quotient1_op<Scalar>(scalar)); + return ScalarQuotient1ReturnType(derived(), internal::scalar_quotient1_op<Scalar>(scalar)); } /** Overloaded for efficient real matrix times complex scalar value */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >, const Derived> +inline const ScalarComplexMultipleReturnType operator*(const std::complex<Scalar>& scalar) const { - return CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >, const Derived> - (*static_cast<const Derived*>(this), internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >(scalar)); + return ScalarComplexMultipleReturnType(derived(), internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >(scalar)); } EIGEN_DEVICE_FUNC @@ -86,6 +88,9 @@ inline friend const CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,std::compl operator*(const std::complex<Scalar>& scalar, const StorageBaseType& matrix) { return matrix*scalar; } + +template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; }; + /** \returns an expression of *this with the \a Scalar type casted to * \a NewScalar. * @@ -95,10 +100,10 @@ operator*(const std::complex<Scalar>& scalar, const StorageBaseType& matrix) */ template<typename NewType> EIGEN_DEVICE_FUNC -typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<typename internal::traits<Derived>::Scalar, NewType>, const Derived> >::type +typename CastXpr<NewType>::Type cast() const { - return derived(); + return typename CastXpr<NewType>::Type(derived()); } /** \returns an expression of the complex conjugate of \c *this. @@ -116,14 +121,14 @@ conjugate() const * \sa imag() */ EIGEN_DEVICE_FUNC inline RealReturnType -real() const { return derived(); } +real() const { return RealReturnType(derived()); } /** \returns an read-only expression of the imaginary part of \c *this. * * \sa real() */ EIGEN_DEVICE_FUNC inline const ImagReturnType -imag() const { return derived(); } +imag() const { return ImagReturnType(derived()); } /** \brief Apply a unary operator coefficient-wise * \param[in] func Functor implementing the unary operator @@ -176,11 +181,11 @@ unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const * \sa imag() */ EIGEN_DEVICE_FUNC inline NonConstRealReturnType -real() { return derived(); } +real() { return NonConstRealReturnType(derived()); } /** \returns a non const expression of the imaginary part of \c *this. * * \sa real() */ EIGEN_DEVICE_FUNC inline NonConstImagReturnType -imag() { return derived(); } +imag() { return NonConstImagReturnType(derived()); } diff --git a/Eigen/src/plugins/MatrixCwiseUnaryOps.h b/Eigen/src/plugins/MatrixCwiseUnaryOps.h index 1bb15f862..c99ee94ec 100644 --- a/Eigen/src/plugins/MatrixCwiseUnaryOps.h +++ b/Eigen/src/plugins/MatrixCwiseUnaryOps.h @@ -10,6 +10,11 @@ // This file is a base class plugin containing matrix specifics coefficient wise functions. +typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> CwiseAbsReturnType; +typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> CwiseAbs2ReturnType; +typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType; +typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType; +typedef CwiseUnaryOp<std::binder1st<std::equal_to<Scalar> >, const Derived> CwiseScalarEqualReturnType; /** \returns an expression of the coefficient-wise absolute value of \c *this * * Example: \include MatrixBase_cwiseAbs.cpp @@ -18,8 +23,8 @@ * \sa cwiseAbs2() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> -cwiseAbs() const { return derived(); } +EIGEN_STRONG_INLINE const CwiseAbsReturnType +cwiseAbs() const { return CwiseAbsReturnType(derived()); } /** \returns an expression of the coefficient-wise squared absolute value of \c *this * @@ -29,8 +34,8 @@ cwiseAbs() const { return derived(); } * \sa cwiseAbs() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> -cwiseAbs2() const { return derived(); } +EIGEN_STRONG_INLINE const CwiseAbs2ReturnType +cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); } /** \returns an expression of the coefficient-wise square root of *this. * @@ -40,8 +45,8 @@ cwiseAbs2() const { return derived(); } * \sa cwisePow(), cwiseSquare() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> -cwiseSqrt() const { return derived(); } +inline const CwiseSqrtReturnType +cwiseSqrt() const { return CwiseSqrtReturnType(derived()); } /** \returns an expression of the coefficient-wise inverse of *this. * @@ -51,8 +56,8 @@ cwiseSqrt() const { return derived(); } * \sa cwiseProduct() */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> -cwiseInverse() const { return derived(); } +inline const CwiseInverseReturnType +cwiseInverse() const { return CwiseInverseReturnType(derived()); } /** \returns an expression of the coefficient-wise == operator of \c *this and a scalar \a s * @@ -64,9 +69,8 @@ cwiseInverse() const { return derived(); } * \sa cwiseEqual(const MatrixBase<OtherDerived> &) const */ EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp<std::binder1st<std::equal_to<Scalar> >, const Derived> +inline const CwiseScalarEqualReturnType cwiseEqual(const Scalar& s) const { - return CwiseUnaryOp<std::binder1st<std::equal_to<Scalar> >,const Derived> - (derived(), std::bind1st(std::equal_to<Scalar>(), s)); + return CwiseScalarEqualReturnType(derived(), std::bind1st(std::equal_to<Scalar>(), s)); } |