diff options
Diffstat (limited to 'Eigen/src/Core/CoreEvaluators.h')
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 878 |
1 files changed, 549 insertions, 329 deletions
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 3568cb85f..1c7123b85 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com> -// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk> // // This Source Code Form is subject to the terms of the Mozilla @@ -14,57 +14,85 @@ #define EIGEN_COREEVALUATORS_H namespace Eigen { - + namespace internal { -// evaluator_traits<T> contains traits for evaluator_impl<T> +// This class returns the evaluator kind from the expression storage kind. +// Default assumes index based accessors +template<typename StorageKind> +struct storage_kind_to_evaluator_kind { + typedef IndexBased Kind; +}; -template<typename T> -struct evaluator_traits -{ - // 1 if evaluator_impl<T>::evalTo() exists - // 0 if evaluator_impl<T> allows coefficient-based access - static const int HasEvalTo = 0; +// This class returns the evaluator shape from the expression storage kind. +// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc. +template<typename StorageKind> struct storage_kind_to_shape; - // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a - // temporary; 0 if not. - static const int AssumeAliasing = 0; -}; -// expression class for evaluating nested expression to a temporary - -template<typename ArgType> -class EvalToTemp; +template<> struct storage_kind_to_shape<Dense> { typedef DenseShape Shape; }; -// evaluator<T>::type is type of evaluator for T -// evaluator<T>::nestedType is type of evaluator if T is nested inside another evaluator - -template<typename T> -struct evaluator_impl -{ }; - -template<typename T, int Nested = evaluator_traits<T>::HasEvalTo> -struct evaluator_nested_type; + +// FIXME Is this necessary? And why was it not before refactoring??? +template<> struct storage_kind_to_shape<PermutationStorage> { typedef PermutationShape Shape; }; + + +// Evaluators have to be specialized with respect to various criteria such as: +// - storage/structure/shape +// - scalar type +// - etc. +// Therefore, we need specialization of evaluator providing additional template arguments for each kind of evaluators. +// We currently distinguish the following kind of evaluators: +// - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate) +// - binary_evaluator for expression taking two arguments (CwiseBinaryOp) +// - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching. +// - mapbase_evaluator for Map, Block, Ref +// - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator) + +template< typename T, + typename LhsKind = typename evaluator_traits<typename T::Lhs>::Kind, + typename RhsKind = typename evaluator_traits<typename T::Rhs>::Kind, + typename LhsScalar = typename traits<typename T::Lhs>::Scalar, + typename RhsScalar = typename traits<typename T::Rhs>::Scalar> struct binary_evaluator; + +template< typename T, + typename Kind = typename evaluator_traits<typename T::NestedExpression>::Kind, + typename Scalar = typename T::Scalar> struct unary_evaluator; + +// evaluator_traits<T> contains traits for evaluator<T> template<typename T> -struct evaluator_nested_type<T, 0> +struct evaluator_traits_base { - typedef evaluator_impl<T> type; + // TODO check whether these two indirections are really needed. + // Basically, if nobody overwrite type and nestedType, then, they can be dropped +// typedef evaluator<T> type; +// typedef evaluator<T> nestedType; + + // by default, get evaluator kind and shape from storage + typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind; + typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape; + + // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a + // temporary; 0 if not. + static const int AssumeAliasing = 0; }; +// Default evaluator traits template<typename T> -struct evaluator_nested_type<T, 1> +struct evaluator_traits : public evaluator_traits_base<T> { - typedef evaluator_impl<EvalToTemp<T> > type; }; + +// By default, we assume a unary expression: template<typename T> -struct evaluator +struct evaluator : public unary_evaluator<T> { - typedef evaluator_impl<T> type; - typedef typename evaluator_nested_type<T>::type nestedType; + typedef unary_evaluator<T> Base; + EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {} }; + // TODO: Think about const-correctness template<typename T> @@ -76,47 +104,59 @@ struct evaluator<const T> // TODO this class does not seem to be necessary anymore template<typename ExpressionType> -struct evaluator_impl_base +struct evaluator_base { - typedef typename ExpressionType::Index Index; +// typedef typename evaluator_traits<ExpressionType>::type type; +// typedef typename evaluator_traits<ExpressionType>::nestedType nestedType; + typedef evaluator<ExpressionType> type; + typedef evaluator<ExpressionType> nestedType; + + typedef typename traits<ExpressionType>::Index Index; // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. typedef traits<ExpressionType> ExpressionTraits; - - evaluator_impl<ExpressionType>& derived() - { - return *static_cast<evaluator_impl<ExpressionType>*>(this); - } }; // -------------------- Matrix and Array -------------------- // -// evaluator_impl<PlainObjectBase> is a common base class for the +// evaluator<PlainObjectBase> is a common base class for the // Matrix and Array evaluators. +// Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, +// so no need for more sophisticated dispatching. template<typename Derived> -struct evaluator_impl<PlainObjectBase<Derived> > - : evaluator_impl_base<Derived> +struct evaluator<PlainObjectBase<Derived> > + : evaluator_base<Derived> { typedef PlainObjectBase<Derived> PlainObjectType; + typedef typename PlainObjectType::Index Index; + typedef typename PlainObjectType::Scalar Scalar; + typedef typename PlainObjectType::CoeffReturnType CoeffReturnType; + typedef typename PlainObjectType::PacketScalar PacketScalar; + typedef typename PlainObjectType::PacketReturnType PacketReturnType; enum { IsRowMajor = PlainObjectType::IsRowMajor, IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime, RowsAtCompileTime = PlainObjectType::RowsAtCompileTime, - ColsAtCompileTime = PlainObjectType::ColsAtCompileTime + ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, + + CoeffReadCost = NumTraits<Scalar>::ReadCost, + Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime, + Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret }; - - evaluator_impl(const PlainObjectType& m) + + EIGEN_DEVICE_FUNC evaluator() + : m_data(0), + m_outerStride(IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime) + {} + + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) { } - typedef typename PlainObjectType::Index Index; - typedef typename PlainObjectType::Scalar Scalar; - typedef typename PlainObjectType::CoeffReturnType CoeffReturnType; - typedef typename PlainObjectType::PacketScalar PacketScalar; - typedef typename PlainObjectType::PacketReturnType PacketReturnType; - - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { if (IsRowMajor) return m_data[row * m_outerStride.value() + col]; @@ -124,12 +164,12 @@ struct evaluator_impl<PlainObjectBase<Derived> > return m_data[row + col * m_outerStride.value()]; } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_data[index]; } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { if (IsRowMajor) return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col]; @@ -137,7 +177,7 @@ struct evaluator_impl<PlainObjectBase<Derived> > return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()]; } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return const_cast<Scalar*>(m_data)[index]; } @@ -184,153 +224,45 @@ protected: }; template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> -struct evaluator_impl<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > - : evaluator_impl<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > +struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > + : evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > { typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType; + + evaluator() {} - evaluator_impl(const XprType& m) - : evaluator_impl<PlainObjectBase<XprType> >(m) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator<PlainObjectBase<XprType> >(m) { } }; template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> -struct evaluator_impl<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > - : evaluator_impl<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > +struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > + : evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > { typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType; - evaluator_impl(const XprType& m) - : evaluator_impl<PlainObjectBase<XprType> >(m) - { } -}; - -// -------------------- EvalToTemp -------------------- - -template<typename ArgType> -struct traits<EvalToTemp<ArgType> > - : public traits<ArgType> -{ }; - -template<typename ArgType> -class EvalToTemp - : public dense_xpr_base<EvalToTemp<ArgType> >::type -{ - public: - - typedef typename dense_xpr_base<EvalToTemp>::type Base; - EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) - - EvalToTemp(const ArgType& arg) - : m_arg(arg) - { } - - const ArgType& arg() const - { - return m_arg; - } - - Index rows() const - { - return m_arg.rows(); - } - - Index cols() const - { - return m_arg.cols(); - } - - private: - const ArgType& m_arg; -}; - -template<typename ArgType> -struct evaluator_impl<EvalToTemp<ArgType> > -{ - typedef EvalToTemp<ArgType> XprType; - typedef typename ArgType::PlainObject PlainObject; - - evaluator_impl(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()), m_resultImpl(m_result) - { - // TODO we should simply do m_result(xpr.arg()); - call_dense_assignment_loop(m_result, xpr.arg()); - } - - // This constructor is used when nesting an EvalTo evaluator in another evaluator - evaluator_impl(const ArgType& arg) - : m_result(arg.rows(), arg.cols()), m_resultImpl(m_result) - { - // TODO we should simply do m_result(xpr.arg()); - call_dense_assignment_loop(m_result, arg); - } - - typedef typename PlainObject::Index Index; - typedef typename PlainObject::Scalar Scalar; - typedef typename PlainObject::CoeffReturnType CoeffReturnType; - typedef typename PlainObject::PacketScalar PacketScalar; - typedef typename PlainObject::PacketReturnType PacketReturnType; - - // All other functions are forwarded to m_resultImpl - - CoeffReturnType coeff(Index row, Index col) const - { - return m_resultImpl.coeff(row, col); - } - - CoeffReturnType coeff(Index index) const - { - return m_resultImpl.coeff(index); - } + evaluator() {} - Scalar& coeffRef(Index row, Index col) - { - return m_resultImpl.coeffRef(row, col); - } - - Scalar& coeffRef(Index index) - { - return m_resultImpl.coeffRef(index); - } - - template<int LoadMode> - PacketReturnType packet(Index row, Index col) const - { - return m_resultImpl.template packet<LoadMode>(row, col); - } - - template<int LoadMode> - PacketReturnType packet(Index index) const - { - return m_resultImpl.packet<LoadMode>(index); - } - - template<int StoreMode> - void writePacket(Index row, Index col, const PacketScalar& x) - { - m_resultImpl.template writePacket<StoreMode>(row, col, x); - } - - template<int StoreMode> - void writePacket(Index index, const PacketScalar& x) - { - m_resultImpl.template writePacket<StoreMode>(index, x); - } - -protected: - PlainObject m_result; - typename evaluator<PlainObject>::nestedType m_resultImpl; + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator<PlainObjectBase<XprType> >(m) + { } }; // -------------------- Transpose -------------------- template<typename ArgType> -struct evaluator_impl<Transpose<ArgType> > - : evaluator_impl_base<Transpose<ArgType> > +struct unary_evaluator<Transpose<ArgType>, IndexBased> + : evaluator_base<Transpose<ArgType> > { typedef Transpose<ArgType> XprType; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + Flags = evaluator<ArgType>::Flags ^ RowMajorBit + }; - evaluator_impl(const XprType& t) : m_argImpl(t.nestedExpression()) {} + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; @@ -338,22 +270,22 @@ struct evaluator_impl<Transpose<ArgType> > typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(col, row); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(col, row); } - typename XprType::Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } @@ -387,13 +319,27 @@ protected: }; // -------------------- CwiseNullaryOp -------------------- +// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator. +// Likewise, there is not need to more sophisticated dispatching here. template<typename NullaryOp, typename PlainObjectType> -struct evaluator_impl<CwiseNullaryOp<NullaryOp,PlainObjectType> > +struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> > + : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> > { typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType; + typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned; + + enum { + CoeffReadCost = internal::functor_traits<NullaryOp>::Cost, + + Flags = (evaluator<PlainObjectTypeCleaned>::Flags + & ( HereditaryBits + | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0) + | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0))) + | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore + }; - evaluator_impl(const XprType& n) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) : m_functor(n.functor()) { } @@ -401,12 +347,12 @@ struct evaluator_impl<CwiseNullaryOp<NullaryOp,PlainObjectType> > typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_functor(row, col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(index); } @@ -430,11 +376,20 @@ protected: // -------------------- CwiseUnaryOp -------------------- template<typename UnaryOp, typename ArgType> -struct evaluator_impl<CwiseUnaryOp<UnaryOp, ArgType> > +struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > + : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType> > { typedef CwiseUnaryOp<UnaryOp, ArgType> XprType; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost, + + Flags = evaluator<ArgType>::Flags & ( + HereditaryBits | LinearAccessBit | AlignedBit + | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)) + }; - evaluator_impl(const XprType& op) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) { } @@ -443,12 +398,12 @@ struct evaluator_impl<CwiseUnaryOp<UnaryOp, ArgType> > typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_functor(m_argImpl.coeff(row, col)); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_argImpl.coeff(index)); } @@ -472,12 +427,43 @@ protected: // -------------------- CwiseBinaryOp -------------------- +// this is a binary expression template<typename BinaryOp, typename Lhs, typename Rhs> -struct evaluator_impl<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > +struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > + : public binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > { typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; + typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; - evaluator_impl(const XprType& xpr) +template<typename BinaryOp, typename Lhs, typename Rhs> +struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBased> + : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > +{ + typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; + + enum { + CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, + + LhsFlags = evaluator<Lhs>::Flags, + RhsFlags = evaluator<Rhs>::Flags, + SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value, + StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit), + Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( + HereditaryBits + | (int(LhsFlags) & int(RhsFlags) & + ( AlignedBit + | (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit) + }; + + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) @@ -487,12 +473,12 @@ struct evaluator_impl<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); } @@ -501,14 +487,14 @@ struct evaluator_impl<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > PacketScalar packet(Index row, Index col) const { return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(row, col), - m_rhsImpl.template packet<LoadMode>(row, col)); + m_rhsImpl.template packet<LoadMode>(row, col)); } template<int LoadMode> PacketScalar packet(Index index) const { return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(index), - m_rhsImpl.template packet<LoadMode>(index)); + m_rhsImpl.template packet<LoadMode>(index)); } protected: @@ -520,12 +506,18 @@ protected: // -------------------- CwiseUnaryView -------------------- template<typename UnaryOp, typename ArgType> -struct evaluator_impl<CwiseUnaryView<UnaryOp, ArgType> > - : evaluator_impl_base<CwiseUnaryView<UnaryOp, ArgType> > +struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> + : evaluator_base<CwiseUnaryView<UnaryOp, ArgType> > { typedef CwiseUnaryView<UnaryOp, ArgType> XprType; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost, + + Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)) + }; - evaluator_impl(const XprType& op) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_unaryOp(op.functor()), m_argImpl(op.nestedExpression()) { } @@ -534,22 +526,22 @@ struct evaluator_impl<CwiseUnaryView<UnaryOp, ArgType> > typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_unaryOp(m_argImpl.coeff(row, col)); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_unaryOp(m_argImpl.coeff(index)); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_unaryOp(m_argImpl.coeffRef(row, col)); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_unaryOp(m_argImpl.coeffRef(index)); } @@ -561,13 +553,15 @@ protected: // -------------------- Map -------------------- -template<typename Derived, int AccessorsType> -struct evaluator_impl<MapBase<Derived, AccessorsType> > - : evaluator_impl_base<Derived> -{ - typedef MapBase<Derived, AccessorsType> MapType; - typedef Derived XprType; +// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ? +// but that might complicate template specialization +template<typename Derived, typename PlainObjectType> +struct mapbase_evaluator; +template<typename Derived, typename PlainObjectType> +struct mapbase_evaluator : evaluator_base<Derived> +{ + typedef Derived XprType; typedef typename XprType::PointerType PointerType; typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; @@ -575,81 +569,121 @@ struct evaluator_impl<MapBase<Derived, AccessorsType> > typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; - evaluator_impl(const XprType& map) - : m_data(const_cast<PointerType>(map.data())), - m_rowStride(map.rowStride()), - m_colStride(map.colStride()) - { } - enum { - RowsAtCompileTime = XprType::RowsAtCompileTime + IsRowMajor = XprType::RowsAtCompileTime, + ColsAtCompileTime = XprType::ColsAtCompileTime, + CoeffReadCost = NumTraits<Scalar>::ReadCost }; + + EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) + : m_data(const_cast<PointerType>(map.data())), + m_xpr(map) + { + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1), + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + } - CoeffReturnType coeff(Index row, Index col) const - { - return m_data[col * m_colStride + row * m_rowStride]; + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } - CoeffReturnType coeff(Index index) const - { - return coeff(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_data[index * m_xpr.innerStride()]; } - Scalar& coeffRef(Index row, Index col) - { - return m_data[col * m_colStride + row * m_rowStride]; + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } - Scalar& coeffRef(Index index) - { - return coeffRef(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return m_data[index * m_xpr.innerStride()]; } template<int LoadMode> PacketReturnType packet(Index row, Index col) const - { - PointerType ptr = m_data + row * m_rowStride + col * m_colStride; + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::ploadt<PacketScalar, LoadMode>(ptr); } template<int LoadMode> PacketReturnType packet(Index index) const - { - return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + { + return internal::ploadt<PacketScalar, LoadMode>(m_data + index * m_xpr.innerStride()); } template<int StoreMode> void writePacket(Index row, Index col, const PacketScalar& x) - { - PointerType ptr = m_data + row * m_rowStride + col * m_colStride; + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::pstoret<Scalar, PacketScalar, StoreMode>(ptr, x); } template<int StoreMode> void writePacket(Index index, const PacketScalar& x) - { - return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, - x); + { + internal::pstoret<Scalar, PacketScalar, StoreMode>(m_data + index * m_xpr.innerStride(), x); } protected: PointerType m_data; - int m_rowStride; - int m_colStride; + const XprType& m_xpr; }; template<typename PlainObjectType, int MapOptions, typename StrideType> -struct evaluator_impl<Map<PlainObjectType, MapOptions, StrideType> > - : public evaluator_impl<MapBase<Map<PlainObjectType, MapOptions, StrideType> > > +struct evaluator<Map<PlainObjectType, MapOptions, StrideType> > + : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType> { typedef Map<PlainObjectType, MapOptions, StrideType> XprType; + typedef typename XprType::Scalar Scalar; + + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + HasNoInnerStride = InnerStrideAtCompileTime == 1, + HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, + HasNoStride = HasNoInnerStride && HasNoOuterStride, + IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), + IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + KeepsPacketAccess = bool(HasNoInnerStride) + && ( bool(IsDynamicSize) + || HasNoOuterStride + || ( OuterStrideAtCompileTime!=Dynamic + && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ), + Flags0 = evaluator<PlainObjectType>::Flags, + Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), + Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) + ? int(Flags1) : int(Flags1 & ~LinearAccessBit), + Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit) + }; - evaluator_impl(const XprType& map) - : evaluator_impl<MapBase<XprType> >(map) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) + : mapbase_evaluator<XprType, PlainObjectType>(map) + { } +}; + +// -------------------- Ref -------------------- + +template<typename PlainObjectType, int RefOptions, typename StrideType> +struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> > + : public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType> +{ + typedef Ref<PlainObjectType, RefOptions, StrideType> XprType; + + enum { + Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref) + : mapbase_evaluator<XprType, PlainObjectType>(ref) { } }; @@ -659,21 +693,68 @@ template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess = internal::has_direct_access<ArgType>::ret> struct block_evaluator; template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> -struct evaluator_impl<Block<ArgType, BlockRows, BlockCols, InnerPanel> > +struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; + typedef typename XprType::Scalar Scalar; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + + RowsAtCompileTime = traits<XprType>::RowsAtCompileTime, + ColsAtCompileTime = traits<XprType>::ColsAtCompileTime, + MaxRowsAtCompileTime = traits<XprType>::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits<XprType>::MaxColsAtCompileTime, + + ArgTypeIsRowMajor = (int(evaluator<ArgType>::Flags)&RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : ArgTypeIsRowMajor, + HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(inner_stride_at_compile_time<ArgType>::ret) + : int(outer_stride_at_compile_time<ArgType>::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(outer_stride_at_compile_time<ArgType>::ret) + : int(inner_stride_at_compile_time<ArgType>::ret), + MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0) + && (InnerStrideAtCompileTime == 1) + ? PacketAccessBit : 0, + + MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0, + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, + FlagsRowMajorBit = XprType::Flags&RowMajorBit, + Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) | + DirectAccessBit | + MaskPacketAccessBit | + MaskAlignedBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit + }; typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type; - evaluator_impl(const XprType& block) : block_evaluator_type(block) {} + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {} }; +// no direct-access => dispatch to a unary evaluator template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAccess*/ false> - : evaluator_impl_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> > + : unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > +{ + typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; + + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : unary_evaluator<XprType>(block) + {} +}; + +template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> +struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBased> + : evaluator_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> > { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; - block_evaluator(const XprType& block) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) : m_argImpl(block.nestedExpression()), m_startRow(block.startRow()), m_startCol(block.startCol()) @@ -689,26 +770,24 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc RowsAtCompileTime = XprType::RowsAtCompileTime }; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { - return coeff(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { - return coeffRef(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } template<int LoadMode> @@ -721,7 +800,7 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc PacketReturnType packet(Index index) const { return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + RowsAtCompileTime == 1 ? index : 0); } template<int StoreMode> @@ -734,8 +813,8 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc void writePacket(Index index, const PacketScalar& x) { return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, - x); + RowsAtCompileTime == 1 ? index : 0, + x); } protected: @@ -749,24 +828,38 @@ protected: template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true> - : evaluator_impl<MapBase<Block<ArgType, BlockRows, BlockCols, InnerPanel> > > + : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, + typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject> { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; - block_evaluator(const XprType& block) - : evaluator_impl<MapBase<XprType> >(block) - { } + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : mapbase_evaluator<XprType, typename XprType::PlainObject>(block) + { + // FIXME this should be an internal assertion + eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned"); + } }; // -------------------- Select -------------------- +// TODO shall we introduce a ternary_evaluator? +// TODO enable vectorization for Select template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> -struct evaluator_impl<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > +struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > + : evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > { typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType; + enum { + CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost + + EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost, + evaluator<ElseMatrixType>::CoeffReadCost), + + Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits + }; - evaluator_impl(const XprType& select) + inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) : m_conditionImpl(select.conditionMatrix()), m_thenImpl(select.thenMatrix()), m_elseImpl(select.elseMatrix()) @@ -775,7 +868,7 @@ struct evaluator_impl<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - CoeffReturnType coeff(Index row, Index col) const + inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { if (m_conditionImpl.coeff(row, col)) return m_thenImpl.coeff(row, col); @@ -783,7 +876,7 @@ struct evaluator_impl<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType return m_elseImpl.coeff(row, col); } - CoeffReturnType coeff(Index index) const + inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { if (m_conditionImpl.coeff(index)) return m_thenImpl.coeff(index); @@ -801,21 +894,33 @@ protected: // -------------------- Replicate -------------------- template<typename ArgType, int RowFactor, int ColFactor> -struct evaluator_impl<Replicate<ArgType, RowFactor, ColFactor> > +struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > + : evaluator_base<Replicate<ArgType, RowFactor, ColFactor> > { typedef Replicate<ArgType, RowFactor, ColFactor> XprType; - - evaluator_impl(const XprType& replicate) - : m_argImpl(replicate.nestedExpression()), - m_rows(replicate.nestedExpression().rows()), - m_cols(replicate.nestedExpression().cols()) - { } - typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketReturnType PacketReturnType; + enum { + Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor + }; + typedef typename internal::nested_eval<ArgType,Factor>::type ArgTypeNested; + typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned; + + enum { + CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost, + + Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit) + }; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate) + : m_arg(replicate.nestedExpression()), + m_argImpl(m_arg), + m_rows(replicate.nestedExpression().rows()), + m_cols(replicate.nestedExpression().cols()) + {} + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { // try to avoid using modulo; this is a pure optimization strategy const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0 @@ -842,9 +947,10 @@ struct evaluator_impl<Replicate<ArgType, RowFactor, ColFactor> > } protected: - typename evaluator<ArgType>::nestedType m_argImpl; - const variable_if_dynamic<Index, XprType::RowsAtCompileTime> m_rows; - const variable_if_dynamic<Index, XprType::ColsAtCompileTime> m_cols; + const ArgTypeNested m_arg; // FIXME is it OK to store both the argument and its evaluator?? (we have the same situation in evaluator_product) + typename evaluator<ArgTypeNestedCleaned>::nestedType m_argImpl; + const variable_if_dynamic<Index, ArgType::RowsAtCompileTime> m_rows; + const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols; }; @@ -855,23 +961,35 @@ protected: // the row() and col() member functions. template< typename ArgType, typename MemberOp, int Direction> -struct evaluator_impl<PartialReduxExpr<ArgType, MemberOp, Direction> > +struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> > + : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> > { typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType; + typedef typename XprType::Scalar InputScalar; + enum { + TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(XprType::ColsAtCompileTime) + }; + typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType; + enum { + CoeffReadCost = TraversalSize==Dynamic ? Dynamic + : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value), + + Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits) + }; - evaluator_impl(const XprType expr) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr) : m_expr(expr) - { } + {} typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_expr.coeff(row, col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_expr.coeff(index); } @@ -883,16 +1001,20 @@ protected: // -------------------- MatrixWrapper and ArrayWrapper -------------------- // -// evaluator_impl_wrapper_base<T> is a common base class for the +// evaluator_wrapper_base<T> is a common base class for the // MatrixWrapper and ArrayWrapper evaluators. template<typename XprType> -struct evaluator_impl_wrapper_base - : evaluator_impl_base<XprType> +struct evaluator_wrapper_base + : evaluator_base<XprType> { typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType; + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + Flags = evaluator<ArgType>::Flags + }; - evaluator_impl_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} + EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} typedef typename ArgType::Index Index; typedef typename ArgType::Scalar Scalar; @@ -900,22 +1022,22 @@ struct evaluator_impl_wrapper_base typedef typename ArgType::PacketScalar PacketScalar; typedef typename ArgType::PacketReturnType PacketReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(row, col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(row, col); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } @@ -949,24 +1071,24 @@ protected: }; template<typename TArgType> -struct evaluator_impl<MatrixWrapper<TArgType> > - : evaluator_impl_wrapper_base<MatrixWrapper<TArgType> > +struct unary_evaluator<MatrixWrapper<TArgType> > + : evaluator_wrapper_base<MatrixWrapper<TArgType> > { typedef MatrixWrapper<TArgType> XprType; - evaluator_impl(const XprType& wrapper) - : evaluator_impl_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression()) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression()) { } }; template<typename TArgType> -struct evaluator_impl<ArrayWrapper<TArgType> > - : evaluator_impl_wrapper_base<ArrayWrapper<TArgType> > +struct unary_evaluator<ArrayWrapper<TArgType> > + : evaluator_wrapper_base<ArrayWrapper<TArgType> > { typedef ArrayWrapper<TArgType> XprType; - evaluator_impl(const XprType& wrapper) - : evaluator_impl_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression()) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression()) { } }; @@ -977,8 +1099,8 @@ struct evaluator_impl<ArrayWrapper<TArgType> > template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond; template<typename ArgType, int Direction> -struct evaluator_impl<Reverse<ArgType, Direction> > - : evaluator_impl_base<Reverse<ArgType, Direction> > +struct unary_evaluator<Reverse<ArgType, Direction> > + : evaluator_base<Reverse<ArgType, Direction> > { typedef Reverse<ArgType, Direction> XprType; typedef typename XprType::Index Index; @@ -997,34 +1119,44 @@ struct evaluator_impl<Reverse<ArgType, Direction> > OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1, ReversePacket = (Direction == BothDirections) || ((Direction == Vertical) && IsColMajor) - || ((Direction == Horizontal) && IsRowMajor) + || ((Direction == Horizontal) && IsRowMajor), + + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + + // let's enable LinearAccess only with vectorization because of the product overhead + // FIXME enable DirectAccess with negative strides? + Flags0 = evaluator<ArgType>::Flags, + LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) + ? LinearAccessBit : 0, + + Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess) }; typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet; - evaluator_impl(const XprType& reverse) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse) : m_argImpl(reverse.nestedExpression()), m_rows(ReverseRow ? reverse.nestedExpression().rows() : 0), m_cols(ReverseCol ? reverse.nestedExpression().cols() : 0) { } - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, - ReverseCol ? m_cols.value() - col - 1 : col); + ReverseCol ? m_cols.value() - col - 1 : col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, - ReverseCol ? m_cols.value() - col - 1 : col); + ReverseCol ? m_cols.value() - col - 1 : col); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); } @@ -1071,36 +1203,44 @@ protected: // -------------------- Diagonal -------------------- template<typename ArgType, int DiagIndex> -struct evaluator_impl<Diagonal<ArgType, DiagIndex> > - : evaluator_impl_base<Diagonal<ArgType, DiagIndex> > +struct evaluator<Diagonal<ArgType, DiagIndex> > + : evaluator_base<Diagonal<ArgType, DiagIndex> > { typedef Diagonal<ArgType, DiagIndex> XprType; + + enum { + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + + Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit + }; - evaluator_impl(const XprType& diagonal) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal) : m_argImpl(diagonal.nestedExpression()), m_index(diagonal.index()) { } typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; + // FIXME having to check whether ArgType is sparse here i not very nice. + typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value, + typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType; - CoeffReturnType coeff(Index row, Index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const { return m_argImpl.coeff(row + rowOffset(), row + colOffset()); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index + rowOffset(), index + colOffset()); } - Scalar& coeffRef(Index row, Index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index) { return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); } @@ -1110,8 +1250,88 @@ protected: const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index; private: - EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } - EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } +}; + + +//---------------------------------------------------------------------- +// deprecated code +//---------------------------------------------------------------------- + +// -------------------- EvalToTemp -------------------- + +// expression class for evaluating nested expression to a temporary + +template<typename ArgType> class EvalToTemp; + +template<typename ArgType> +struct traits<EvalToTemp<ArgType> > + : public traits<ArgType> +{ }; + +template<typename ArgType> +class EvalToTemp + : public dense_xpr_base<EvalToTemp<ArgType> >::type +{ + public: + + typedef typename dense_xpr_base<EvalToTemp>::type Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) + + explicit EvalToTemp(const ArgType& arg) + : m_arg(arg) + { } + + const ArgType& arg() const + { + return m_arg; + } + + Index rows() const + { + return m_arg.rows(); + } + + Index cols() const + { + return m_arg.cols(); + } + + private: + const ArgType& m_arg; +}; + +template<typename ArgType> +struct evaluator<EvalToTemp<ArgType> > + : public evaluator<typename ArgType::PlainObject>::type +{ + typedef EvalToTemp<ArgType> XprType; + typedef typename ArgType::PlainObject PlainObject; + typedef typename evaluator<PlainObject>::type Base; + + typedef evaluator type; + typedef evaluator nestedType; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast<Base*>(this)) Base(m_result); + // TODO we should simply do m_result(xpr.arg()); + call_dense_assignment_loop(m_result, xpr.arg()); + } + + // This constructor is used when nesting an EvalTo evaluator in another evaluator + EIGEN_DEVICE_FUNC evaluator(const ArgType& arg) + : m_result(arg.rows(), arg.cols()) + { + ::new (static_cast<Base*>(this)) Base(m_result); + // TODO we should simply do m_result(xpr.arg()); + call_dense_assignment_loop(m_result, arg); + } + +protected: + PlainObject m_result; }; } // namespace internal |