aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2014-03-12 13:34:11 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2014-03-12 13:34:11 +0100
commit8dd3b716e39d4b4b472b948de1af20838bf17493 (patch)
tree3fa4e90f1a6caf23e5028c8c5025e04ad27a8768 /Eigen/src/Core
parent7eefdb948c1ff372f85991ff3f9d998e66a554d9 (diff)
Move evaluation related flags from traits to evaluator and fix evaluators of MapBase and Replicate
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/AssignEvaluator.h15
-rw-r--r--Eigen/src/Core/Block.h11
-rw-r--r--Eigen/src/Core/CoreEvaluators.h231
-rw-r--r--Eigen/src/Core/CwiseBinaryOp.h8
-rw-r--r--Eigen/src/Core/CwiseNullaryOp.h7
-rw-r--r--Eigen/src/Core/CwiseUnaryOp.h7
-rw-r--r--Eigen/src/Core/CwiseUnaryView.h4
-rw-r--r--Eigen/src/Core/Diagonal.h5
-rw-r--r--Eigen/src/Core/DiagonalMatrix.h1
-rw-r--r--Eigen/src/Core/DiagonalProduct.h8
-rw-r--r--Eigen/src/Core/Map.h7
-rw-r--r--Eigen/src/Core/MapBase.h9
-rw-r--r--Eigen/src/Core/Product.h27
-rw-r--r--Eigen/src/Core/ProductEvaluators.h111
-rw-r--r--Eigen/src/Core/Redux.h11
-rw-r--r--Eigen/src/Core/Replicate.h7
-rw-r--r--Eigen/src/Core/Reverse.h7
-rw-r--r--Eigen/src/Core/Select.h5
-rw-r--r--Eigen/src/Core/Transpose.h7
-rw-r--r--Eigen/src/Core/VectorwiseOp.h4
-rw-r--r--Eigen/src/Core/products/TriangularMatrixVector.h2
-rw-r--r--Eigen/src/Core/util/ForwardDeclarations.h12
-rw-r--r--Eigen/src/Core/util/XprHelper.h52
23 files changed, 433 insertions, 125 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 2ea1cc126..05816094c 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -28,11 +28,10 @@ template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
struct copy_using_evaluator_traits
{
typedef typename DstEvaluator::XprType Dst;
- typedef typename SrcEvaluator::XprType Src;
- // TODO, we should get these flags from the evaluators
+
enum {
- DstFlags = Dst::Flags,
- SrcFlags = Src::Flags
+ DstFlags = DstEvaluator::Flags,
+ SrcFlags = SrcEvaluator::Flags
};
public:
@@ -56,7 +55,9 @@ private:
};
enum {
- StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
+ DstIsRowMajor = DstEvaluator::Flags&RowMajorBit,
+ SrcIsRowMajor = SrcEvaluator::Flags&RowMajorBit,
+ StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
MightVectorize = StorageOrdersAgree
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
&& (functor_traits<AssignFunc>::PacketAccess),
@@ -596,7 +597,7 @@ public:
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::RowsAtCompileTime) == 1 ? 0
: int(Traits::ColsAtCompileTime) == 1 ? inner
- : int(Traits::Flags)&RowMajorBit ? outer
+ : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
: inner;
}
@@ -605,7 +606,7 @@ public:
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::ColsAtCompileTime) == 1 ? 0
: int(Traits::RowsAtCompileTime) == 1 ? inner
- : int(Traits::Flags)&RowMajorBit ? inner
+ : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
: outer;
}
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index 31cd5c72c..d92797a98 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -68,6 +68,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
MaxColsAtCompileTime = BlockCols==0 ? 0
: ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
: int(traits<XprType>::MaxColsAtCompileTime),
+
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
@@ -80,6 +81,10 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(outer_stride_at_compile_time<XprType>::ret)
: int(inner_stride_at_compile_time<XprType>::ret),
+ // IsAligned is needed by MapBase's assertions
+ // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
+ IsAligned = 0,
+#ifndef EIGEN_TEST_EVALUATORS
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
@@ -92,6 +97,12 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
MaskPacketAccessBit |
MaskAlignedBit),
Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit
+#else
+ // FIXME, this traits is rather specialized for dense object...
+ FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
+ FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
+ Flags = (traits<XprType>::Flags & DirectAccessBit) | FlagsLvalueBit | FlagsRowMajorBit // FIXME DirectAccessBit should not be handled by expressions
+#endif
};
};
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 33c89c2d4..a5de3593c 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -136,7 +136,9 @@ struct evaluator<PlainObjectBase<Derived> >
RowsAtCompileTime = PlainObjectType::RowsAtCompileTime,
ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,
- CoeffReadCost = NumTraits<Scalar>::ReadCost
+ CoeffReadCost = NumTraits<Scalar>::ReadCost,
+ Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime,
+ Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret
};
evaluator()
@@ -323,7 +325,8 @@ struct evaluator<Transpose<ArgType> >
typedef Transpose<ArgType> XprType;
enum {
- CoeffReadCost = evaluator<ArgType>::CoeffReadCost
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = evaluator<ArgType>::Flags ^ RowMajorBit
};
evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
@@ -389,9 +392,16 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
: evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
{
typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;
+ typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned;
enum {
- CoeffReadCost = internal::functor_traits<NullaryOp>::Cost
+ CoeffReadCost = internal::functor_traits<NullaryOp>::Cost,
+
+ Flags = (evaluator<PlainObjectTypeCleaned>::Flags
+ & ( HereditaryBits
+ | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
+ | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
+ | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore
};
evaluator(const XprType& n)
@@ -437,7 +447,11 @@ struct evaluator<CwiseUnaryOp<UnaryOp, ArgType> >
typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
enum {
- CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+
+ Flags = evaluator<ArgType>::Flags & (
+ HereditaryBits | LinearAccessBit | AlignedBit
+ | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))
};
evaluator(const XprType& op)
@@ -485,7 +499,22 @@ struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
enum {
- CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost
+ CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+
+ LhsFlags = evaluator<Lhs>::Flags,
+ RhsFlags = evaluator<Rhs>::Flags,
+ SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value,
+ StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit),
+ Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
+ HereditaryBits
+ | (int(LhsFlags) & int(RhsFlags) &
+ ( AlignedBit
+ | (StorageOrdersAgree ? LinearAccessBit : 0)
+ | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
+ )
+ )
+ ),
+ Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit)
};
evaluator(const XprType& xpr)
@@ -537,7 +566,9 @@ struct evaluator<CwiseUnaryView<UnaryOp, ArgType> >
typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
enum {
- CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+
+ Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit))
};
evaluator(const XprType& op)
@@ -576,12 +607,15 @@ protected:
// -------------------- Map --------------------
-template<typename Derived, int AccessorsType>
-struct evaluator<MapBase<Derived, AccessorsType> >
- : evaluator_base<Derived>
+// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ?
+// but that might complicate template specialization
+template<typename Derived, typename PlainObjectType>
+struct mapbase_evaluator;
+
+template<typename Derived, typename PlainObjectType>
+struct mapbase_evaluator : evaluator_base<Derived>
{
- typedef MapBase<Derived, AccessorsType> MapType;
- typedef Derived XprType;
+ typedef Derived XprType;
typedef typename XprType::PointerType PointerType;
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
@@ -590,81 +624,103 @@ struct evaluator<MapBase<Derived, AccessorsType> >
typedef typename XprType::PacketReturnType PacketReturnType;
enum {
- RowsAtCompileTime = XprType::RowsAtCompileTime,
+ IsRowMajor = XprType::RowsAtCompileTime,
+ ColsAtCompileTime = XprType::ColsAtCompileTime,
CoeffReadCost = NumTraits<Scalar>::ReadCost
};
- evaluator(const XprType& map)
+ mapbase_evaluator(const XprType& map)
: m_data(const_cast<PointerType>(map.data())),
- m_rowStride(map.rowStride()),
- m_colStride(map.colStride())
- { }
+ m_xpr(map)
+ {
+ EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
+ PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
+ }
CoeffReturnType coeff(Index row, Index col) const
- {
- return m_data[col * m_colStride + row * m_rowStride];
+ {
+ return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
}
CoeffReturnType coeff(Index index) const
- {
- return coeff(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0);
+ {
+ return m_data[index * m_xpr.innerStride()];
}
Scalar& coeffRef(Index row, Index col)
- {
- return m_data[col * m_colStride + row * m_rowStride];
+ {
+ return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
}
Scalar& coeffRef(Index index)
- {
- return coeffRef(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0);
+ {
+ return m_data[index * m_xpr.innerStride()];
}
template<int LoadMode>
PacketReturnType packet(Index row, Index col) const
- {
- PointerType ptr = m_data + row * m_rowStride + col * m_colStride;
+ {
+ PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::ploadt<PacketScalar, LoadMode>(ptr);
}
template<int LoadMode>
PacketReturnType packet(Index index) const
- {
- return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0);
+ {
+ return internal::ploadt<PacketScalar, LoadMode>(m_data + index * m_xpr.innerStride());
}
template<int StoreMode>
void writePacket(Index row, Index col, const PacketScalar& x)
- {
- PointerType ptr = m_data + row * m_rowStride + col * m_colStride;
+ {
+ PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::pstoret<Scalar, PacketScalar, StoreMode>(ptr, x);
}
template<int StoreMode>
void writePacket(Index index, const PacketScalar& x)
- {
- return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0,
- x);
+ {
+ internal::pstoret<Scalar, PacketScalar, StoreMode>(m_data + index * m_xpr.innerStride(), x);
}
protected:
PointerType m_data;
- int m_rowStride;
- int m_colStride;
+ const XprType& m_xpr;
};
template<typename PlainObjectType, int MapOptions, typename StrideType>
struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
- : public evaluator<MapBase<Map<PlainObjectType, MapOptions, StrideType> > >
+ : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType>
{
typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
+ typedef typename XprType::Scalar Scalar;
+
+ enum {
+ InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+ ? int(PlainObjectType::InnerStrideAtCompileTime)
+ : int(StrideType::InnerStrideAtCompileTime),
+ OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+ ? int(PlainObjectType::OuterStrideAtCompileTime)
+ : int(StrideType::OuterStrideAtCompileTime),
+ HasNoInnerStride = InnerStrideAtCompileTime == 1,
+ HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
+ HasNoStride = HasNoInnerStride && HasNoOuterStride,
+ IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
+ IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
+ KeepsPacketAccess = bool(HasNoInnerStride)
+ && ( bool(IsDynamicSize)
+ || HasNoOuterStride
+ || ( OuterStrideAtCompileTime!=Dynamic
+ && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
+ Flags0 = evaluator<PlainObjectType>::Flags,
+ Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
+ Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
+ ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
+ Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
+ };
evaluator(const XprType& map)
- : evaluator<MapBase<XprType> >(map)
+ : mapbase_evaluator<XprType, PlainObjectType>(map)
{ }
};
@@ -672,12 +728,16 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
template<typename PlainObjectType, int RefOptions, typename StrideType>
struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
- : public evaluator<MapBase<Ref<PlainObjectType, RefOptions, StrideType> > >
+ : public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType>
{
typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
+
+ enum {
+ Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags
+ };
- evaluator(const XprType& map)
- : evaluator<MapBase<XprType> >(map)
+ evaluator(const XprType& ref)
+ : mapbase_evaluator<XprType, PlainObjectType>(ref)
{ }
};
@@ -691,8 +751,39 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
: block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+ typedef typename XprType::Scalar Scalar;
+
enum {
- CoeffReadCost = evaluator<ArgType>::CoeffReadCost
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+
+ RowsAtCompileTime = traits<ArgType>::RowsAtCompileTime,
+ ColsAtCompileTime = traits<ArgType>::ColsAtCompileTime,
+ MaxRowsAtCompileTime = traits<ArgType>::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = traits<ArgType>::MaxColsAtCompileTime,
+
+ XprTypeIsRowMajor = (int(traits<ArgType>::Flags)&RowMajorBit) != 0,
+ IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1
+ : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
+ : XprTypeIsRowMajor,
+ HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
+ InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+ InnerStrideAtCompileTime = HasSameStorageOrderAsXprType
+ ? int(inner_stride_at_compile_time<XprType>::ret)
+ : int(outer_stride_at_compile_time<XprType>::ret),
+ OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
+ ? int(outer_stride_at_compile_time<XprType>::ret)
+ : int(inner_stride_at_compile_time<XprType>::ret),
+ MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
+ && (InnerStrideAtCompileTime == 1)
+ ? PacketAccessBit : 0,
+ MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
+ FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
+ FlagsRowMajorBit = XprType::Flags&RowMajorBit,
+ Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
+ DirectAccessBit |
+ MaskPacketAccessBit |
+ MaskAlignedBit),
+ Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit
};
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
evaluator(const XprType& block) : block_evaluator_type(block) {}
@@ -778,18 +869,23 @@ protected:
template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true>
- : evaluator<MapBase<Block<ArgType, BlockRows, BlockCols, InnerPanel> > >
+ : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>,
+ typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
block_evaluator(const XprType& block)
- : evaluator<MapBase<XprType> >(block)
- { }
+ : mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
+ {
+ // FIXME this should be an internal assertion
+ eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % 16) == 0) && "data is not aligned");
+ }
};
// -------------------- Select --------------------
+// TODO enable vectorization for Select
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
: evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
@@ -798,7 +894,9 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
enum {
CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost
+ EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
- evaluator<ElseMatrixType>::CoeffReadCost)
+ evaluator<ElseMatrixType>::CoeffReadCost),
+
+ Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits
};
evaluator(const XprType& select)
@@ -850,7 +948,9 @@ struct evaluator<Replicate<ArgType, RowFactor, ColFactor> >
typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
enum {
- CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost
+ CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
+
+ Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit)
};
evaluator(const XprType& replicate)
@@ -858,7 +958,7 @@ struct evaluator<Replicate<ArgType, RowFactor, ColFactor> >
m_argImpl(m_arg),
m_rows(replicate.nestedExpression().rows()),
m_cols(replicate.nestedExpression().cols())
- { }
+ {}
CoeffReturnType coeff(Index row, Index col) const
{
@@ -907,17 +1007,19 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
typedef typename XprType::Scalar InputScalar;
enum {
- TraversalSize = Direction==Vertical ? XprType::RowsAtCompileTime : XprType::ColsAtCompileTime
+ TraversalSize = Direction==Vertical ? ArgType::RowsAtCompileTime : XprType::ColsAtCompileTime
};
typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
enum {
CoeffReadCost = TraversalSize==Dynamic ? Dynamic
- : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value)
+ : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
+
+ Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits)
};
evaluator(const XprType expr)
: m_expr(expr)
- { }
+ {}
typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType;
@@ -948,7 +1050,8 @@ struct evaluator_wrapper_base
{
typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
enum {
- CoeffReadCost = evaluator<ArgType>::CoeffReadCost
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+ Flags = evaluator<ArgType>::Flags
};
evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
@@ -1058,7 +1161,15 @@ struct evaluator<Reverse<ArgType, Direction> >
|| ((Direction == Vertical) && IsColMajor)
|| ((Direction == Horizontal) && IsRowMajor),
- CoeffReadCost = evaluator<ArgType>::CoeffReadCost
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+
+ // let's enable LinearAccess only with vectorization because of the product overhead
+ // FIXME enable DirectAccess with negative strides?
+ Flags0 = evaluator<ArgType>::Flags,
+ LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
+ ? LinearAccessBit : 0,
+
+ Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess)
};
typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
@@ -1071,7 +1182,7 @@ struct evaluator<Reverse<ArgType, Direction> >
CoeffReturnType coeff(Index row, Index col) const
{
return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
- ReverseCol ? m_cols.value() - col - 1 : col);
+ ReverseCol ? m_cols.value() - col - 1 : col);
}
CoeffReturnType coeff(Index index) const
@@ -1082,7 +1193,7 @@ struct evaluator<Reverse<ArgType, Direction> >
Scalar& coeffRef(Index row, Index col)
{
return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
- ReverseCol ? m_cols.value() - col - 1 : col);
+ ReverseCol ? m_cols.value() - col - 1 : col);
}
Scalar& coeffRef(Index index)
@@ -1138,7 +1249,9 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
typedef Diagonal<ArgType, DiagIndex> XprType;
enum {
- CoeffReadCost = evaluator<ArgType>::CoeffReadCost
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+
+ Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit
};
evaluator(const XprType& diagonal)
diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h
index 105e7fb11..07861dbc9 100644
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -65,6 +65,7 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
typedef typename remove_reference<LhsNested>::type _LhsNested;
typedef typename remove_reference<RhsNested>::type _RhsNested;
enum {
+#ifndef EIGEN_TEST_EVALUATORS
LhsFlags = _LhsNested::Flags,
RhsFlags = _RhsNested::Flags,
SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
@@ -78,12 +79,13 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
)
)
),
- Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit)
-#ifndef EIGEN_TEST_EVALUATORS
- ,
+ Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
+
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost
+#else
+ Flags = _LhsNested::Flags & RowMajorBit
#endif
};
};
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index 560e03f12..f9f127cc2 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -35,14 +35,15 @@ template<typename NullaryOp, typename PlainObjectType>
struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
{
enum {
+#ifndef EIGEN_TEST_EVALUATORS
Flags = (traits<PlainObjectType>::Flags
& ( HereditaryBits
| (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
| (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
- | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit)
-#ifndef EIGEN_TEST_EVALUATORS
- ,
+ | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
CoeffReadCost = functor_traits<NullaryOp>::Cost
+#else
+ Flags = traits<PlainObjectType>::Flags & RowMajorBit
#endif
};
};
diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h
index 25da52ab7..af05a9108 100644
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -44,12 +44,13 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
typedef typename XprType::Nested XprTypeNested;
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
enum {
+#ifndef EIGEN_TEST_EVALUATORS
Flags = _XprTypeNested::Flags & (
HereditaryBits | LinearAccessBit | AlignedBit
- | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))
-#ifndef EIGEN_TEST_EVALUATORS
- ,
+ | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost
+#else
+ Flags = _XprTypeNested::Flags & RowMajorBit
#endif
};
};
diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h
index a0bd80fb9..9cdebb8e7 100644
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@@ -37,9 +37,11 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
enum {
- Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
#ifndef EIGEN_TEST_EVALUATORS
+ Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits<ViewOp>::Cost,
+#else
+ Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | LvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
#endif
MatrixTypeInnerStride = inner_stride_at_compile_time<MatrixType>::ret,
// need to cast the sizeof's from size_t to int explicitly, otherwise:
diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h
index 02ab04980..3ff6a3e66 100644
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@@ -51,10 +51,13 @@ struct traits<Diagonal<MatrixType,DiagIndex> >
: (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
MaxColsAtCompileTime = 1,
+#ifndef EIGEN_TEST_EVALUATORS
MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit,
-#ifndef EIGEN_TEST_EVALUATORS
CoeffReadCost = _MatrixTypeNested::CoeffReadCost,
+#else
+ MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+ Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
#endif
MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h
index 784e4b1ce..ba0042ba4 100644
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -275,6 +275,7 @@ struct traits<DiagonalWrapper<_DiagonalVectorType> >
typedef typename DiagonalVectorType::Scalar Scalar;
typedef typename DiagonalVectorType::Index Index;
typedef typename DiagonalVectorType::StorageKind StorageKind;
+ typedef typename traits<DiagonalVectorType>::XprKind XprKind;
enum {
RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
diff --git a/Eigen/src/Core/DiagonalProduct.h b/Eigen/src/Core/DiagonalProduct.h
index 840b70dbb..c6dafdddc 100644
--- a/Eigen/src/Core/DiagonalProduct.h
+++ b/Eigen/src/Core/DiagonalProduct.h
@@ -26,6 +26,7 @@ struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+#ifndef EIGEN_TEST_EVALUATORS
_StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor,
_ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
@@ -34,11 +35,10 @@ struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
//_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
_LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0,
-
- Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit //(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
-#ifndef EIGEN_TEST_EVALUATORS
- ,
+ Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit, //(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
+#else
+ Flags = RowMajorBit & (unsigned int)(MatrixType::Flags)
#endif
};
};
diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h
index 8ea13cfb7..23bbb46bf 100644
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -79,10 +79,11 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime)
: int(StrideType::OuterStrideAtCompileTime),
+ IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
+#ifndef EIGEN_TEST_EVALUATORS
HasNoInnerStride = InnerStrideAtCompileTime == 1,
HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
HasNoStride = HasNoInnerStride && HasNoOuterStride,
- IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
KeepsPacketAccess = bool(HasNoInnerStride)
&& ( bool(IsDynamicSize)
@@ -95,6 +96,10 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
? int(Flags1) : int(Flags1 & ~LinearAccessBit),
Flags3 = is_lvalue<PlainObjectType>::value ? int(Flags2) : (int(Flags2) & ~LvalueBit),
Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit)
+#else
+ Flags0 = TraitsBase::Flags & (~NestByRefBit),
+ Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
+#endif
};
private:
enum { Options }; // Expressions don't have Options
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index ffa1371c2..de1424b09 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -161,11 +161,16 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
EIGEN_DEVICE_FUNC
void checkSanity() const
{
+#ifndef EIGEN_TEST_EVALUATORS
+ // moved to evaluator
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
internal::inner_stride_at_compile_time<Derived>::ret==1),
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
- eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
- && "data is not aligned");
+ eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0) && "data is not aligned");
+#else
+ eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % 16) == 0) && "data is not aligned");
+#endif
+
}
PointerType m_data;
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index cac90bc1f..453180049 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -33,14 +33,29 @@ template<typename Lhs, typename Rhs, int Option, typename StorageKind> class Pro
namespace internal {
template<typename Lhs, typename Rhs, int Option>
struct traits<Product<Lhs, Rhs, Option> >
- : traits<CoeffBasedProduct<Lhs, Rhs, NestByRefBit> >
-{
- // We want A+B*C to be of type Product<Matrix, Sum> and not Product<Matrix, Matrix>
- // TODO: This flag should eventually go in a separate evaluator traits class
+{
+ typedef typename remove_all<Lhs>::type LhsCleaned;
+ typedef typename remove_all<Rhs>::type RhsCleaned;
+
+ typedef MatrixXpr XprKind;
+
+ typedef typename scalar_product_traits<typename LhsCleaned::Scalar, typename RhsCleaned::Scalar>::ReturnType Scalar;
+ typedef typename promote_storage_type<typename traits<LhsCleaned>::StorageKind,
+ typename traits<RhsCleaned>::StorageKind>::ret StorageKind;
+ typedef typename promote_index_type<typename traits<LhsCleaned>::Index,
+ typename traits<RhsCleaned>::Index>::type Index;
+
enum {
- Flags = traits<CoeffBasedProduct<Lhs, Rhs, NestByRefBit> >::Flags & ~(EvalBeforeNestingBit | DirectAccessBit)
+ RowsAtCompileTime = LhsCleaned::RowsAtCompileTime,
+ ColsAtCompileTime = RhsCleaned::ColsAtCompileTime,
+ MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime,
+
+ // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
+ Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0)
};
};
+
} // end namespace internal
@@ -59,8 +74,6 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
typename internal::promote_storage_type<typename Lhs::StorageKind,
typename Rhs::StorageKind>::ret>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
-
-
typedef typename internal::nested<Lhs>::type LhsNested;
typedef typename internal::nested<Rhs>::type RhsNested;
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 7ebf31696..1159c2f44 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -18,19 +18,6 @@ namespace Eigen {
namespace internal {
/** \internal
- * \class product_evaluator
- * Products need their own evaluator with more template arguments allowing for
- * easier partial template specializations.
- */
-template< typename T,
- int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret,
- typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
- typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
- typename LhsScalar = typename T::Lhs::Scalar,
- typename RhsScalar = typename T::Rhs::Scalar
- > struct product_evaluator;
-
-/** \internal
* Evaluator of a product expression.
* Since products require special treatments to handle all possible cases,
* we simply deffer the evaluation logic to a product_evaluator class
@@ -119,6 +106,18 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, DenseSha
: m_result(xpr.rows(), xpr.cols())
{
::new (static_cast<Base*>(this)) Base(m_result);
+
+// FIXME shall we handle nested_eval here?
+// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+// typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+//
+// const LhsNested lhs(xpr.lhs());
+// const RhsNested rhs(xpr.rhs());
+//
+// generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
+
generic_product_impl<Lhs, Rhs>::evalTo(m_result, xpr.lhs(), xpr.rhs());
}
@@ -133,6 +132,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
{
+ // FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
}
};
@@ -144,6 +144,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_ass
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &)
{
+ // FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
}
};
@@ -155,6 +156,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_ass
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &)
{
+ // FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
}
};
@@ -368,7 +370,6 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
: evaluator_base<Product<Lhs, Rhs, LazyProduct> >
{
typedef Product<Lhs, Rhs, LazyProduct> XprType;
- typedef CoeffBasedProduct<Lhs, Rhs, 0> CoeffBasedProductType;
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
@@ -396,9 +397,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
typedef typename evaluator<RhsNestedCleaned>::type RhsEtorType;
enum {
- RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime,
+ RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
+ ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
+ InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
+ MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
+ MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime,
+
PacketSize = packet_traits<Scalar>::size,
- InnerSize = traits<CoeffBasedProductType>::InnerSize,
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
@@ -407,8 +412,51 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
- CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner,
- Flags = traits<CoeffBasedProductType>::Flags
+
+ LhsFlags = LhsEtorType::Flags,
+ RhsFlags = RhsEtorType::Flags,
+
+ LhsRowMajor = LhsFlags & RowMajorBit,
+ RhsRowMajor = RhsFlags & RowMajorBit,
+
+ SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
+
+ CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
+ && (ColsAtCompileTime == Dynamic
+ || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
+ && (RhsFlags&AlignedBit)
+ )
+ ),
+
+ CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
+ && (RowsAtCompileTime == Dynamic
+ || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
+ && (LhsFlags&AlignedBit)
+ )
+ ),
+
+ EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
+ : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
+ : (RhsRowMajor && !CanVectorizeLhs),
+
+ Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
+ | (EvalToRowMajor ? RowMajorBit : 0)
+ | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
+ | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
+ // TODO enable vectorization for mixed types
+ | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
+
+ /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
+ * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
+ * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
+ * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
+ */
+ CanVectorizeInner = SameType
+ && LhsRowMajor
+ && (!RhsRowMajor)
+ && (LhsFlags & RhsFlags & ActualPacketAccessBit)
+ && (LhsFlags & RhsFlags & AlignedBit)
+ && (InnerSize % packet_traits<Scalar>::size == 0)
};
const CoeffReturnType coeff(Index row, Index col) const
@@ -689,7 +737,7 @@ protected:
* Diagonal products
***************************************************************************/
-template<typename MatrixType, typename DiagonalType, typename Derived>
+template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
struct diagonal_product_evaluator_base
: evaluator_base<Derived>
{
@@ -698,7 +746,20 @@ struct diagonal_product_evaluator_base
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
public:
enum {
- CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost
+ CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
+
+ MatrixFlags = evaluator<MatrixType>::Flags,
+ DiagFlags = evaluator<DiagonalType>::Flags,
+ _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
+ _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
+ ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
+ _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
+ // FIXME currently we need same types, but in the future the next rule should be the one
+ //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
+ _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
+ _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
+ Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit
+ //(int(MatrixFlags)&int(DiagFlags)&AlignedBit),
};
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
@@ -724,7 +785,7 @@ protected:
{
enum {
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
- DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
+ DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
};
return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
m_diagImpl.template packet<DiagonalPacketLoadMode>(id));
@@ -737,9 +798,9 @@ protected:
// diagonal * dense
template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar>
- : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct> >
+ : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>
{
- typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct> > Base;
+ typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;
using Base::m_diagImpl;
using Base::m_matImpl;
using Base::coeff;
@@ -783,9 +844,9 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
// dense * diagonal
template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape, typename Lhs::Scalar, typename Rhs::Scalar>
- : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct> >
+ : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>
{
- typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct> > Base;
+ typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;
using Base::m_diagImpl;
using Base::m_matImpl;
using Base::coeff;
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 41290323f..6c8c58e95 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -389,8 +389,19 @@ DenseBase<Derived>::redux(const Func& func) const
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
#ifdef EIGEN_TEST_EVALUATORS
+ // FIXME, eval_nest should be handled by redux_evaluator, however:
+ // - it is currently difficult to provide the right Flags since they are still handled by the expressions
+ // - handling it here might reduce the number of template instantiations
+// typedef typename internal::nested_eval<Derived,1>::type ThisNested;
+// typedef typename internal::remove_all<ThisNested>::type ThisNestedCleaned;
+// typedef typename internal::redux_evaluator<ThisNestedCleaned> ThisEvaluator;
+//
+// ThisNested thisNested(derived());
+// ThisEvaluator thisEval(thisNested);
+
typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
ThisEvaluator thisEval(derived());
+
return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
#else
diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h
index 1e640d8aa..2dff03ea3 100644
--- a/Eigen/src/Core/Replicate.h
+++ b/Eigen/src/Core/Replicate.h
@@ -53,10 +53,13 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
: MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
: (MatrixType::Flags & RowMajorBit) ? 1 : 0,
- Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0)
+
#ifndef EIGEN_TEST_EVALUATORS
- ,
+ Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0),
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+#else
+ // FIXME enable DirectAccess with negative strides?
+ Flags = IsRowMajor ? RowMajorBit : 0
#endif
};
};
diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h
index 495b44cc4..4969bb4fc 100644
--- a/Eigen/src/Core/Reverse.h
+++ b/Eigen/src/Core/Reverse.h
@@ -45,14 +45,15 @@ struct traits<Reverse<MatrixType, Direction> >
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+#ifndef EIGEN_TEST_EVALUATORS
// let's enable LinearAccess only with vectorization because of the product overhead
LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) )
? LinearAccessBit : 0,
- Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess)
-#ifndef EIGEN_TEST_EVALUATORS
- ,
+ Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess),
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
+#else
+ Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit)
#endif
};
};
diff --git a/Eigen/src/Core/Select.h b/Eigen/src/Core/Select.h
index abcba2d15..d4fd88e62 100644
--- a/Eigen/src/Core/Select.h
+++ b/Eigen/src/Core/Select.h
@@ -43,12 +43,13 @@ struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,
- Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits
#ifndef EIGEN_TEST_EVALUATORS
- ,
+ Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits,
CoeffReadCost = traits<typename remove_all<ConditionMatrixNested>::type>::CoeffReadCost
+ EIGEN_SIZE_MAX(traits<typename remove_all<ThenMatrixNested>::type>::CoeffReadCost,
traits<typename remove_all<ElseMatrixNested>::type>::CoeffReadCost)
+#else
+ Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit
#endif
};
};
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index 98f9e888f..11b0e45a8 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -41,12 +41,17 @@ struct traits<Transpose<MatrixType> > : traits<MatrixType>
ColsAtCompileTime = MatrixType::RowsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+#ifndef EIGEN_TEST_EVALUATORS
FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit),
Flags1 = Flags0 | FlagsLvalueBit,
Flags = Flags1 ^ RowMajorBit,
-#ifndef EIGEN_TEST_EVALUATORS
CoeffReadCost = MatrixTypeNestedPlain::CoeffReadCost,
+#else
+ FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+ Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit),
+ Flags1 = Flags0 | FlagsLvalueBit,
+ Flags = Flags1 ^ RowMajorBit,
#endif
InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h
index 702d0006d..672b9662f 100644
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -48,8 +48,12 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
+#ifndef EIGEN_TEST_EVALUATORS
Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits,
Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0),
+#else
+ Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0,
+#endif
TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime
};
#ifndef EIGEN_TEST_EVALUATORS
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index eed7f4258..771613b11 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -259,7 +259,7 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
-
+
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index 975fdbf2a..092ba758e 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -157,6 +157,18 @@ template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynami
namespace internal {
template<typename Lhs, typename Rhs> struct product_type;
+/** \internal
+ * \class product_evaluator
+ * Products need their own evaluator with more template arguments allowing for
+ * easier partial template specializations.
+ */
+template< typename T,
+ int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret,
+ typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
+ typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
+ typename LhsScalar = typename T::Lhs::Scalar,
+ typename RhsScalar = typename T::Rhs::Scalar
+ > struct product_evaluator;
}
template<typename Lhs, typename Rhs,
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index bcd6183e2..016b37f71 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -124,6 +124,7 @@ template<typename _Scalar, int _Rows, int _Cols,
typedef Matrix<_Scalar, _Rows, _Cols, Options, _MaxRows, _MaxCols> type;
};
+#ifndef EIGEN_TEST_EVALUATORS
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
class compute_matrix_flags
{
@@ -158,6 +159,57 @@ class compute_matrix_flags
enum { ret = LinearAccessBit | LvalueBit | DirectAccessBit | NestByRefBit | packet_access_bit | row_major_bit | aligned_bit };
};
+#else // EIGEN_TEST_EVALUATORS
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+class compute_matrix_flags
+{
+ enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 };
+ public:
+ // FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<>
+ // and then propagate this information to the evaluator's flags.
+ // However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage.
+ enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
+};
+#endif
+
+#ifdef EIGEN_ENABLE_EVALUATORS
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+class compute_matrix_evaluator_flags
+{
+ enum {
+ row_major_bit = Options&RowMajor ? RowMajorBit : 0,
+ is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
+
+ aligned_bit =
+ (
+ ((Options&DontAlign)==0)
+ && (
+#if EIGEN_ALIGN_STATICALLY
+ ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % 16) == 0))
+#else
+ 0
+#endif
+
+ ||
+
+#if EIGEN_ALIGN
+ is_dynamic_size_storage
+#else
+ 0
+#endif
+
+ )
+ ) ? AlignedBit : 0,
+ packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0
+ };
+
+ public:
+ enum { ret = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit };
+};
+
+#endif // EIGEN_ENABLE_EVALUATORS
+
template<int _Rows, int _Cols> struct size_at_compile_time
{
enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };