diff options
author | Gael Guennebaud <g.gael@free.fr> | 2014-03-12 13:34:11 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2014-03-12 13:34:11 +0100 |
commit | 8dd3b716e39d4b4b472b948de1af20838bf17493 (patch) | |
tree | 3fa4e90f1a6caf23e5028c8c5025e04ad27a8768 /Eigen/src/Core | |
parent | 7eefdb948c1ff372f85991ff3f9d998e66a554d9 (diff) |
Move evaluation related flags from traits to evaluator and fix evaluators of MapBase and Replicate
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/AssignEvaluator.h | 15 | ||||
-rw-r--r-- | Eigen/src/Core/Block.h | 11 | ||||
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 231 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseBinaryOp.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseNullaryOp.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseUnaryOp.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseUnaryView.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/Diagonal.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/DiagonalMatrix.h | 1 | ||||
-rw-r--r-- | Eigen/src/Core/DiagonalProduct.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/Map.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/MapBase.h | 9 | ||||
-rw-r--r-- | Eigen/src/Core/Product.h | 27 | ||||
-rw-r--r-- | Eigen/src/Core/ProductEvaluators.h | 111 | ||||
-rw-r--r-- | Eigen/src/Core/Redux.h | 11 | ||||
-rw-r--r-- | Eigen/src/Core/Replicate.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/Reverse.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/Select.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/Transpose.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/VectorwiseOp.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixVector.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/util/ForwardDeclarations.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/util/XprHelper.h | 52 |
23 files changed, 433 insertions, 125 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 2ea1cc126..05816094c 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -28,11 +28,10 @@ template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc> struct copy_using_evaluator_traits { typedef typename DstEvaluator::XprType Dst; - typedef typename SrcEvaluator::XprType Src; - // TODO, we should get these flags from the evaluators + enum { - DstFlags = Dst::Flags, - SrcFlags = Src::Flags + DstFlags = DstEvaluator::Flags, + SrcFlags = SrcEvaluator::Flags }; public: @@ -56,7 +55,9 @@ private: }; enum { - StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), + DstIsRowMajor = DstEvaluator::Flags&RowMajorBit, + SrcIsRowMajor = SrcEvaluator::Flags&RowMajorBit, + StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), MightVectorize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) && (functor_traits<AssignFunc>::PacketAccess), @@ -596,7 +597,7 @@ public: typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::RowsAtCompileTime) == 1 ? 0 : int(Traits::ColsAtCompileTime) == 1 ? inner - : int(Traits::Flags)&RowMajorBit ? outer + : int(DstEvaluatorType::Flags)&RowMajorBit ? outer : inner; } @@ -605,7 +606,7 @@ public: typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::ColsAtCompileTime) == 1 ? 0 : int(Traits::RowsAtCompileTime) == 1 ? inner - : int(Traits::Flags)&RowMajorBit ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? inner : outer; } diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index 31cd5c72c..d92797a98 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -68,6 +68,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp MaxColsAtCompileTime = BlockCols==0 ? 0 : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) : int(traits<XprType>::MaxColsAtCompileTime), + XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0, IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -80,6 +81,10 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp OuterStrideAtCompileTime = HasSameStorageOrderAsXprType ? int(outer_stride_at_compile_time<XprType>::ret) : int(inner_stride_at_compile_time<XprType>::ret), + // IsAligned is needed by MapBase's assertions + // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator + IsAligned = 0, +#ifndef EIGEN_TEST_EVALUATORS MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0) && (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, @@ -92,6 +97,12 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp MaskPacketAccessBit | MaskAlignedBit), Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit +#else + // FIXME, this traits is rather specialized for dense object... + FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0, + FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, + Flags = (traits<XprType>::Flags & DirectAccessBit) | FlagsLvalueBit | FlagsRowMajorBit // FIXME DirectAccessBit should not be handled by expressions +#endif }; }; diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 33c89c2d4..a5de3593c 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -136,7 +136,9 @@ struct evaluator<PlainObjectBase<Derived> > RowsAtCompileTime = PlainObjectType::RowsAtCompileTime, ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, - CoeffReadCost = NumTraits<Scalar>::ReadCost + CoeffReadCost = NumTraits<Scalar>::ReadCost, + Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime, + Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret }; evaluator() @@ -323,7 +325,8 @@ struct evaluator<Transpose<ArgType> > typedef Transpose<ArgType> XprType; enum { - CoeffReadCost = evaluator<ArgType>::CoeffReadCost + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + Flags = evaluator<ArgType>::Flags ^ RowMajorBit }; evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} @@ -389,9 +392,16 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> > : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> > { typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType; + typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned; enum { - CoeffReadCost = internal::functor_traits<NullaryOp>::Cost + CoeffReadCost = internal::functor_traits<NullaryOp>::Cost, + + Flags = (evaluator<PlainObjectTypeCleaned>::Flags + & ( HereditaryBits + | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0) + | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0))) + | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore }; evaluator(const XprType& n) @@ -437,7 +447,11 @@ struct evaluator<CwiseUnaryOp<UnaryOp, ArgType> > typedef CwiseUnaryOp<UnaryOp, ArgType> XprType; enum { - CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost + CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost, + + Flags = evaluator<ArgType>::Flags & ( + HereditaryBits | LinearAccessBit | AlignedBit + | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)) }; evaluator(const XprType& op) @@ -485,7 +499,22 @@ struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; enum { - CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost + CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, + + LhsFlags = evaluator<Lhs>::Flags, + RhsFlags = evaluator<Rhs>::Flags, + SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value, + StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit), + Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( + HereditaryBits + | (int(LhsFlags) & int(RhsFlags) & + ( AlignedBit + | (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit) }; evaluator(const XprType& xpr) @@ -537,7 +566,9 @@ struct evaluator<CwiseUnaryView<UnaryOp, ArgType> > typedef CwiseUnaryView<UnaryOp, ArgType> XprType; enum { - CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost + CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost, + + Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)) }; evaluator(const XprType& op) @@ -576,12 +607,15 @@ protected: // -------------------- Map -------------------- -template<typename Derived, int AccessorsType> -struct evaluator<MapBase<Derived, AccessorsType> > - : evaluator_base<Derived> +// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ? +// but that might complicate template specialization +template<typename Derived, typename PlainObjectType> +struct mapbase_evaluator; + +template<typename Derived, typename PlainObjectType> +struct mapbase_evaluator : evaluator_base<Derived> { - typedef MapBase<Derived, AccessorsType> MapType; - typedef Derived XprType; + typedef Derived XprType; typedef typename XprType::PointerType PointerType; typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; @@ -590,81 +624,103 @@ struct evaluator<MapBase<Derived, AccessorsType> > typedef typename XprType::PacketReturnType PacketReturnType; enum { - RowsAtCompileTime = XprType::RowsAtCompileTime, + IsRowMajor = XprType::RowsAtCompileTime, + ColsAtCompileTime = XprType::ColsAtCompileTime, CoeffReadCost = NumTraits<Scalar>::ReadCost }; - evaluator(const XprType& map) + mapbase_evaluator(const XprType& map) : m_data(const_cast<PointerType>(map.data())), - m_rowStride(map.rowStride()), - m_colStride(map.colStride()) - { } + m_xpr(map) + { + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1), + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + } CoeffReturnType coeff(Index row, Index col) const - { - return m_data[col * m_colStride + row * m_rowStride]; + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } CoeffReturnType coeff(Index index) const - { - return coeff(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + { + return m_data[index * m_xpr.innerStride()]; } Scalar& coeffRef(Index row, Index col) - { - return m_data[col * m_colStride + row * m_rowStride]; + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } Scalar& coeffRef(Index index) - { - return coeffRef(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + { + return m_data[index * m_xpr.innerStride()]; } template<int LoadMode> PacketReturnType packet(Index row, Index col) const - { - PointerType ptr = m_data + row * m_rowStride + col * m_colStride; + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::ploadt<PacketScalar, LoadMode>(ptr); } template<int LoadMode> PacketReturnType packet(Index index) const - { - return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + { + return internal::ploadt<PacketScalar, LoadMode>(m_data + index * m_xpr.innerStride()); } template<int StoreMode> void writePacket(Index row, Index col, const PacketScalar& x) - { - PointerType ptr = m_data + row * m_rowStride + col * m_colStride; + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::pstoret<Scalar, PacketScalar, StoreMode>(ptr, x); } template<int StoreMode> void writePacket(Index index, const PacketScalar& x) - { - return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, - x); + { + internal::pstoret<Scalar, PacketScalar, StoreMode>(m_data + index * m_xpr.innerStride(), x); } protected: PointerType m_data; - int m_rowStride; - int m_colStride; + const XprType& m_xpr; }; template<typename PlainObjectType, int MapOptions, typename StrideType> struct evaluator<Map<PlainObjectType, MapOptions, StrideType> > - : public evaluator<MapBase<Map<PlainObjectType, MapOptions, StrideType> > > + : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType> { typedef Map<PlainObjectType, MapOptions, StrideType> XprType; + typedef typename XprType::Scalar Scalar; + + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + HasNoInnerStride = InnerStrideAtCompileTime == 1, + HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, + HasNoStride = HasNoInnerStride && HasNoOuterStride, + IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), + IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + KeepsPacketAccess = bool(HasNoInnerStride) + && ( bool(IsDynamicSize) + || HasNoOuterStride + || ( OuterStrideAtCompileTime!=Dynamic + && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ), + Flags0 = evaluator<PlainObjectType>::Flags, + Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), + Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) + ? int(Flags1) : int(Flags1 & ~LinearAccessBit), + Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit) + }; evaluator(const XprType& map) - : evaluator<MapBase<XprType> >(map) + : mapbase_evaluator<XprType, PlainObjectType>(map) { } }; @@ -672,12 +728,16 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> > template<typename PlainObjectType, int RefOptions, typename StrideType> struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> > - : public evaluator<MapBase<Ref<PlainObjectType, RefOptions, StrideType> > > + : public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType> { typedef Ref<PlainObjectType, RefOptions, StrideType> XprType; + + enum { + Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags + }; - evaluator(const XprType& map) - : evaluator<MapBase<XprType> >(map) + evaluator(const XprType& ref) + : mapbase_evaluator<XprType, PlainObjectType>(ref) { } }; @@ -691,8 +751,39 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; + typedef typename XprType::Scalar Scalar; + enum { - CoeffReadCost = evaluator<ArgType>::CoeffReadCost + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + + RowsAtCompileTime = traits<ArgType>::RowsAtCompileTime, + ColsAtCompileTime = traits<ArgType>::ColsAtCompileTime, + MaxRowsAtCompileTime = traits<ArgType>::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits<ArgType>::MaxColsAtCompileTime, + + XprTypeIsRowMajor = (int(traits<ArgType>::Flags)&RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : XprTypeIsRowMajor, + HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsXprType + ? int(inner_stride_at_compile_time<XprType>::ret) + : int(outer_stride_at_compile_time<XprType>::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsXprType + ? int(outer_stride_at_compile_time<XprType>::ret) + : int(inner_stride_at_compile_time<XprType>::ret), + MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0) + && (InnerStrideAtCompileTime == 1) + ? PacketAccessBit : 0, + MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0, + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0, + FlagsRowMajorBit = XprType::Flags&RowMajorBit, + Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) | + DirectAccessBit | + MaskPacketAccessBit | + MaskAlignedBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit }; typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type; evaluator(const XprType& block) : block_evaluator_type(block) {} @@ -778,18 +869,23 @@ protected: template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true> - : evaluator<MapBase<Block<ArgType, BlockRows, BlockCols, InnerPanel> > > + : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, + typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject> { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; block_evaluator(const XprType& block) - : evaluator<MapBase<XprType> >(block) - { } + : mapbase_evaluator<XprType, typename XprType::PlainObject>(block) + { + // FIXME this should be an internal assertion + eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % 16) == 0) && "data is not aligned"); + } }; // -------------------- Select -------------------- +// TODO enable vectorization for Select template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > : evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > @@ -798,7 +894,9 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > enum { CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost + EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost, - evaluator<ElseMatrixType>::CoeffReadCost) + evaluator<ElseMatrixType>::CoeffReadCost), + + Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits }; evaluator(const XprType& select) @@ -850,7 +948,9 @@ struct evaluator<Replicate<ArgType, RowFactor, ColFactor> > typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned; enum { - CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost + CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost, + + Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit) }; evaluator(const XprType& replicate) @@ -858,7 +958,7 @@ struct evaluator<Replicate<ArgType, RowFactor, ColFactor> > m_argImpl(m_arg), m_rows(replicate.nestedExpression().rows()), m_cols(replicate.nestedExpression().cols()) - { } + {} CoeffReturnType coeff(Index row, Index col) const { @@ -907,17 +1007,19 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> > typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType; typedef typename XprType::Scalar InputScalar; enum { - TraversalSize = Direction==Vertical ? XprType::RowsAtCompileTime : XprType::ColsAtCompileTime + TraversalSize = Direction==Vertical ? ArgType::RowsAtCompileTime : XprType::ColsAtCompileTime }; typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType; enum { CoeffReadCost = TraversalSize==Dynamic ? Dynamic - : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value) + : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value), + + Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits) }; evaluator(const XprType expr) : m_expr(expr) - { } + {} typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -948,7 +1050,8 @@ struct evaluator_wrapper_base { typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType; enum { - CoeffReadCost = evaluator<ArgType>::CoeffReadCost + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + Flags = evaluator<ArgType>::Flags }; evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} @@ -1058,7 +1161,15 @@ struct evaluator<Reverse<ArgType, Direction> > || ((Direction == Vertical) && IsColMajor) || ((Direction == Horizontal) && IsRowMajor), - CoeffReadCost = evaluator<ArgType>::CoeffReadCost + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + + // let's enable LinearAccess only with vectorization because of the product overhead + // FIXME enable DirectAccess with negative strides? + Flags0 = evaluator<ArgType>::Flags, + LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) + ? LinearAccessBit : 0, + + Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess) }; typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet; @@ -1071,7 +1182,7 @@ struct evaluator<Reverse<ArgType, Direction> > CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, - ReverseCol ? m_cols.value() - col - 1 : col); + ReverseCol ? m_cols.value() - col - 1 : col); } CoeffReturnType coeff(Index index) const @@ -1082,7 +1193,7 @@ struct evaluator<Reverse<ArgType, Direction> > Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, - ReverseCol ? m_cols.value() - col - 1 : col); + ReverseCol ? m_cols.value() - col - 1 : col); } Scalar& coeffRef(Index index) @@ -1138,7 +1249,9 @@ struct evaluator<Diagonal<ArgType, DiagIndex> > typedef Diagonal<ArgType, DiagIndex> XprType; enum { - CoeffReadCost = evaluator<ArgType>::CoeffReadCost + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + + Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit }; evaluator(const XprType& diagonal) diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index 105e7fb11..07861dbc9 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -65,6 +65,7 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > typedef typename remove_reference<LhsNested>::type _LhsNested; typedef typename remove_reference<RhsNested>::type _RhsNested; enum { +#ifndef EIGEN_TEST_EVALUATORS LhsFlags = _LhsNested::Flags, RhsFlags = _RhsNested::Flags, SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value, @@ -78,12 +79,13 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > ) ) ), - Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit) -#ifndef EIGEN_TEST_EVALUATORS - , + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), + LhsCoeffReadCost = _LhsNested::CoeffReadCost, RhsCoeffReadCost = _RhsNested::CoeffReadCost, CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost +#else + Flags = _LhsNested::Flags & RowMajorBit #endif }; }; diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 560e03f12..f9f127cc2 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -35,14 +35,15 @@ template<typename NullaryOp, typename PlainObjectType> struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType> { enum { +#ifndef EIGEN_TEST_EVALUATORS Flags = (traits<PlainObjectType>::Flags & ( HereditaryBits | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0) | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0))) - | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) -#ifndef EIGEN_TEST_EVALUATORS - , + | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit), CoeffReadCost = functor_traits<NullaryOp>::Cost +#else + Flags = traits<PlainObjectType>::Flags & RowMajorBit #endif }; }; diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h index 25da52ab7..af05a9108 100644 --- a/Eigen/src/Core/CwiseUnaryOp.h +++ b/Eigen/src/Core/CwiseUnaryOp.h @@ -44,12 +44,13 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> > typedef typename XprType::Nested XprTypeNested; typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; enum { +#ifndef EIGEN_TEST_EVALUATORS Flags = _XprTypeNested::Flags & ( HereditaryBits | LinearAccessBit | AlignedBit - | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)) -#ifndef EIGEN_TEST_EVALUATORS - , + | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)), CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost +#else + Flags = _XprTypeNested::Flags & RowMajorBit #endif }; }; diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h index a0bd80fb9..9cdebb8e7 100644 --- a/Eigen/src/Core/CwiseUnaryView.h +++ b/Eigen/src/Core/CwiseUnaryView.h @@ -37,9 +37,11 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> > typedef typename MatrixType::Nested MatrixTypeNested; typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested; enum { - Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)), #ifndef EIGEN_TEST_EVALUATORS + Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)), CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits<ViewOp>::Cost, +#else + Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | LvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions #endif MatrixTypeInnerStride = inner_stride_at_compile_time<MatrixType>::ret, // need to cast the sizeof's from size_t to int explicitly, otherwise: diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h index 02ab04980..3ff6a3e66 100644 --- a/Eigen/src/Core/Diagonal.h +++ b/Eigen/src/Core/Diagonal.h @@ -51,10 +51,13 @@ struct traits<Diagonal<MatrixType,DiagIndex> > : (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0), MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))), MaxColsAtCompileTime = 1, +#ifndef EIGEN_TEST_EVALUATORS MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0, Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, -#ifndef EIGEN_TEST_EVALUATORS CoeffReadCost = _MatrixTypeNested::CoeffReadCost, +#else + MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0, + Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions #endif MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret, InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1, diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index 784e4b1ce..ba0042ba4 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -275,6 +275,7 @@ struct traits<DiagonalWrapper<_DiagonalVectorType> > typedef typename DiagonalVectorType::Scalar Scalar; typedef typename DiagonalVectorType::Index Index; typedef typename DiagonalVectorType::StorageKind StorageKind; + typedef typename traits<DiagonalVectorType>::XprKind XprKind; enum { RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, diff --git a/Eigen/src/Core/DiagonalProduct.h b/Eigen/src/Core/DiagonalProduct.h index 840b70dbb..c6dafdddc 100644 --- a/Eigen/src/Core/DiagonalProduct.h +++ b/Eigen/src/Core/DiagonalProduct.h @@ -26,6 +26,7 @@ struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> > MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, +#ifndef EIGEN_TEST_EVALUATORS _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor, _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), @@ -34,11 +35,10 @@ struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> > //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0, - - Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit //(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit), -#ifndef EIGEN_TEST_EVALUATORS - , + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit, //(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit), CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost +#else + Flags = RowMajorBit & (unsigned int)(MatrixType::Flags) #endif }; }; diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index 8ea13cfb7..23bbb46bf 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -79,10 +79,11 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> > OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 ? int(PlainObjectType::OuterStrideAtCompileTime) : int(StrideType::OuterStrideAtCompileTime), + IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), +#ifndef EIGEN_TEST_EVALUATORS HasNoInnerStride = InnerStrideAtCompileTime == 1, HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, HasNoStride = HasNoInnerStride && HasNoOuterStride, - IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, KeepsPacketAccess = bool(HasNoInnerStride) && ( bool(IsDynamicSize) @@ -95,6 +96,10 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> > ? int(Flags1) : int(Flags1 & ~LinearAccessBit), Flags3 = is_lvalue<PlainObjectType>::value ? int(Flags2) : (int(Flags2) & ~LvalueBit), Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit) +#else + Flags0 = TraitsBase::Flags & (~NestByRefBit), + Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) +#endif }; private: enum { Options }; // Expressions don't have Options diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index ffa1371c2..de1424b09 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -161,11 +161,16 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors> EIGEN_DEVICE_FUNC void checkSanity() const { +#ifndef EIGEN_TEST_EVALUATORS + // moved to evaluator EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1), PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); - eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0) - && "data is not aligned"); + eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0) && "data is not aligned"); +#else + eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % 16) == 0) && "data is not aligned"); +#endif + } PointerType m_data; diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index cac90bc1f..453180049 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -33,14 +33,29 @@ template<typename Lhs, typename Rhs, int Option, typename StorageKind> class Pro namespace internal { template<typename Lhs, typename Rhs, int Option> struct traits<Product<Lhs, Rhs, Option> > - : traits<CoeffBasedProduct<Lhs, Rhs, NestByRefBit> > -{ - // We want A+B*C to be of type Product<Matrix, Sum> and not Product<Matrix, Matrix> - // TODO: This flag should eventually go in a separate evaluator traits class +{ + typedef typename remove_all<Lhs>::type LhsCleaned; + typedef typename remove_all<Rhs>::type RhsCleaned; + + typedef MatrixXpr XprKind; + + typedef typename scalar_product_traits<typename LhsCleaned::Scalar, typename RhsCleaned::Scalar>::ReturnType Scalar; + typedef typename promote_storage_type<typename traits<LhsCleaned>::StorageKind, + typename traits<RhsCleaned>::StorageKind>::ret StorageKind; + typedef typename promote_index_type<typename traits<LhsCleaned>::Index, + typename traits<RhsCleaned>::Index>::type Index; + enum { - Flags = traits<CoeffBasedProduct<Lhs, Rhs, NestByRefBit> >::Flags & ~(EvalBeforeNestingBit | DirectAccessBit) + RowsAtCompileTime = LhsCleaned::RowsAtCompileTime, + ColsAtCompileTime = RhsCleaned::ColsAtCompileTime, + MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime, + + // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator. + Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0) }; }; + } // end namespace internal @@ -59,8 +74,6 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option, typename internal::promote_storage_type<typename Lhs::StorageKind, typename Rhs::StorageKind>::ret>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(Product) - - typedef typename internal::nested<Lhs>::type LhsNested; typedef typename internal::nested<Rhs>::type RhsNested; diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 7ebf31696..1159c2f44 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -18,19 +18,6 @@ namespace Eigen { namespace internal { /** \internal - * \class product_evaluator - * Products need their own evaluator with more template arguments allowing for - * easier partial template specializations. - */ -template< typename T, - int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret, - typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape, - typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape, - typename LhsScalar = typename T::Lhs::Scalar, - typename RhsScalar = typename T::Rhs::Scalar - > struct product_evaluator; - -/** \internal * Evaluator of a product expression. * Since products require special treatments to handle all possible cases, * we simply deffer the evaluation logic to a product_evaluator class @@ -119,6 +106,18 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, DenseSha : m_result(xpr.rows(), xpr.cols()) { ::new (static_cast<Base*>(this)) Base(m_result); + +// FIXME shall we handle nested_eval here? +// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; +// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; +// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned; +// typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned; +// +// const LhsNested lhs(xpr.lhs()); +// const RhsNested rhs(xpr.rhs()); +// +// generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs); + generic_product_impl<Lhs, Rhs>::evalTo(m_result, xpr.lhs(), xpr.rhs()); } @@ -133,6 +132,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_ typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &) { + // FIXME shall we handle nested_eval here? generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs()); } }; @@ -144,6 +144,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_ass typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &) { + // FIXME shall we handle nested_eval here? generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs()); } }; @@ -155,6 +156,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_ass typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &) { + // FIXME shall we handle nested_eval here? generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs()); } }; @@ -368,7 +370,6 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, : evaluator_base<Product<Lhs, Rhs, LazyProduct> > { typedef Product<Lhs, Rhs, LazyProduct> XprType; - typedef CoeffBasedProduct<Lhs, Rhs, 0> CoeffBasedProductType; typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -396,9 +397,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, typedef typename evaluator<RhsNestedCleaned>::type RhsEtorType; enum { - RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime, + RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, + ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), + MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime, + PacketSize = packet_traits<Scalar>::size, - InnerSize = traits<CoeffBasedProductType>::InnerSize, LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, @@ -407,8 +412,51 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, + (InnerSize - 1) * NumTraits<Scalar>::AddCost, Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, - CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner, - Flags = traits<CoeffBasedProductType>::Flags + + LhsFlags = LhsEtorType::Flags, + RhsFlags = RhsEtorType::Flags, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value, + + CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) + && (ColsAtCompileTime == Dynamic + || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0 + && (RhsFlags&AlignedBit) + ) + ), + + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) + && (RowsAtCompileTime == Dynamic + || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0 + && (LhsFlags&AlignedBit) + ) + ), + + EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 + : (RhsRowMajor && !CanVectorizeLhs), + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) + | (EvalToRowMajor ? RowMajorBit : 0) + | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0) + | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0) + // TODO enable vectorization for mixed types + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + CanVectorizeInner = SameType + && LhsRowMajor + && (!RhsRowMajor) + && (LhsFlags & RhsFlags & ActualPacketAccessBit) + && (LhsFlags & RhsFlags & AlignedBit) + && (InnerSize % packet_traits<Scalar>::size == 0) }; const CoeffReturnType coeff(Index row, Index col) const @@ -689,7 +737,7 @@ protected: * Diagonal products ***************************************************************************/ -template<typename MatrixType, typename DiagonalType, typename Derived> +template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder> struct diagonal_product_evaluator_base : evaluator_base<Derived> { @@ -698,7 +746,20 @@ struct diagonal_product_evaluator_base typedef typename internal::packet_traits<Scalar>::type PacketScalar; public: enum { - CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost + CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost, + + MatrixFlags = evaluator<MatrixType>::Flags, + DiagFlags = evaluator<DiagonalType>::Flags, + _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, + _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) + ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), + _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value, + // FIXME currently we need same types, but in the future the next rule should be the one + //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), + _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), + _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit + //(int(MatrixFlags)&int(DiagFlags)&AlignedBit), }; diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) @@ -724,7 +785,7 @@ protected: { enum { InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, - DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) + DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) }; return internal::pmul(m_matImpl.template packet<LoadMode>(row, col), m_diagImpl.template packet<DiagonalPacketLoadMode>(id)); @@ -737,9 +798,9 @@ protected: // diagonal * dense template<typename Lhs, typename Rhs, int ProductKind, int ProductTag> struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> - : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct> > + : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> { - typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct> > Base; + typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base; using Base::m_diagImpl; using Base::m_matImpl; using Base::coeff; @@ -783,9 +844,9 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha // dense * diagonal template<typename Lhs, typename Rhs, int ProductKind, int ProductTag> struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape, typename Lhs::Scalar, typename Rhs::Scalar> - : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct> > + : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> { - typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct> > Base; + typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base; using Base::m_diagImpl; using Base::m_matImpl; using Base::coeff; diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 41290323f..6c8c58e95 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -389,8 +389,19 @@ DenseBase<Derived>::redux(const Func& func) const eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); #ifdef EIGEN_TEST_EVALUATORS + // FIXME, eval_nest should be handled by redux_evaluator, however: + // - it is currently difficult to provide the right Flags since they are still handled by the expressions + // - handling it here might reduce the number of template instantiations +// typedef typename internal::nested_eval<Derived,1>::type ThisNested; +// typedef typename internal::remove_all<ThisNested>::type ThisNestedCleaned; +// typedef typename internal::redux_evaluator<ThisNestedCleaned> ThisEvaluator; +// +// ThisNested thisNested(derived()); +// ThisEvaluator thisEval(thisNested); + typedef typename internal::redux_evaluator<Derived> ThisEvaluator; ThisEvaluator thisEval(derived()); + return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func); #else diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h index 1e640d8aa..2dff03ea3 100644 --- a/Eigen/src/Core/Replicate.h +++ b/Eigen/src/Core/Replicate.h @@ -53,10 +53,13 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> > IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1 : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0 : (MatrixType::Flags & RowMajorBit) ? 1 : 0, - Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0) + #ifndef EIGEN_TEST_EVALUATORS - , + Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0), CoeffReadCost = _MatrixTypeNested::CoeffReadCost +#else + // FIXME enable DirectAccess with negative strides? + Flags = IsRowMajor ? RowMajorBit : 0 #endif }; }; diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index 495b44cc4..4969bb4fc 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -45,14 +45,15 @@ struct traits<Reverse<MatrixType, Direction> > MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, +#ifndef EIGEN_TEST_EVALUATORS // let's enable LinearAccess only with vectorization because of the product overhead LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) ) ? LinearAccessBit : 0, - Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess) -#ifndef EIGEN_TEST_EVALUATORS - , + Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess), CoeffReadCost = _MatrixTypeNested::CoeffReadCost +#else + Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit) #endif }; }; diff --git a/Eigen/src/Core/Select.h b/Eigen/src/Core/Select.h index abcba2d15..d4fd88e62 100644 --- a/Eigen/src/Core/Select.h +++ b/Eigen/src/Core/Select.h @@ -43,12 +43,13 @@ struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, - Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits #ifndef EIGEN_TEST_EVALUATORS - , + Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits, CoeffReadCost = traits<typename remove_all<ConditionMatrixNested>::type>::CoeffReadCost + EIGEN_SIZE_MAX(traits<typename remove_all<ThenMatrixNested>::type>::CoeffReadCost, traits<typename remove_all<ElseMatrixNested>::type>::CoeffReadCost) +#else + Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit #endif }; }; diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 98f9e888f..11b0e45a8 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -41,12 +41,17 @@ struct traits<Transpose<MatrixType> > : traits<MatrixType> ColsAtCompileTime = MatrixType::RowsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime, +#ifndef EIGEN_TEST_EVALUATORS FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0, Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit), Flags1 = Flags0 | FlagsLvalueBit, Flags = Flags1 ^ RowMajorBit, -#ifndef EIGEN_TEST_EVALUATORS CoeffReadCost = MatrixTypeNestedPlain::CoeffReadCost, +#else + FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0, + Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit), + Flags1 = Flags0 | FlagsLvalueBit, + Flags = Flags1 ^ RowMajorBit, #endif InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret, OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 702d0006d..672b9662f 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -48,8 +48,12 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> > ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime, +#ifndef EIGEN_TEST_EVALUATORS Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits, Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0), +#else + Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0, +#endif TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime }; #ifndef EIGEN_TEST_EVALUATORS diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index eed7f4258..771613b11 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -259,7 +259,7 @@ template<int Mode> struct trmv_selector<Mode,ColMajor> typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; typedef internal::blas_traits<Rhs> RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - + typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 975fdbf2a..092ba758e 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -157,6 +157,18 @@ template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynami namespace internal { template<typename Lhs, typename Rhs> struct product_type; +/** \internal + * \class product_evaluator + * Products need their own evaluator with more template arguments allowing for + * easier partial template specializations. + */ +template< typename T, + int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret, + typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape, + typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape, + typename LhsScalar = typename T::Lhs::Scalar, + typename RhsScalar = typename T::Rhs::Scalar + > struct product_evaluator; } template<typename Lhs, typename Rhs, diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index bcd6183e2..016b37f71 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -124,6 +124,7 @@ template<typename _Scalar, int _Rows, int _Cols, typedef Matrix<_Scalar, _Rows, _Cols, Options, _MaxRows, _MaxCols> type; }; +#ifndef EIGEN_TEST_EVALUATORS template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> class compute_matrix_flags { @@ -158,6 +159,57 @@ class compute_matrix_flags enum { ret = LinearAccessBit | LvalueBit | DirectAccessBit | NestByRefBit | packet_access_bit | row_major_bit | aligned_bit }; }; +#else // EIGEN_TEST_EVALUATORS + +template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> +class compute_matrix_flags +{ + enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 }; + public: + // FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<> + // and then propagate this information to the evaluator's flags. + // However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage. + enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit }; +}; +#endif + +#ifdef EIGEN_ENABLE_EVALUATORS +template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> +class compute_matrix_evaluator_flags +{ + enum { + row_major_bit = Options&RowMajor ? RowMajorBit : 0, + is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic, + + aligned_bit = + ( + ((Options&DontAlign)==0) + && ( +#if EIGEN_ALIGN_STATICALLY + ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % 16) == 0)) +#else + 0 +#endif + + || + +#if EIGEN_ALIGN + is_dynamic_size_storage +#else + 0 +#endif + + ) + ) ? AlignedBit : 0, + packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0 + }; + + public: + enum { ret = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit }; +}; + +#endif // EIGEN_ENABLE_EVALUATORS + template<int _Rows, int _Cols> struct size_at_compile_time { enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols }; |