aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/AssignEvaluator.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/AssignEvaluator.h')
-rw-r--r--Eigen/src/Core/AssignEvaluator.h119
1 files changed, 78 insertions, 41 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index b1193e421..b7cc7c0e9 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -75,23 +75,24 @@ private:
DstIsRowMajor = DstFlags&RowMajorBit,
SrcIsRowMajor = SrcFlags&RowMajorBit,
StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
- MightVectorize = StorageOrdersAgree
+ MightVectorize = bool(StorageOrdersAgree)
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
- && (functor_traits<AssignFunc>::PacketAccess),
+ && bool(functor_traits<AssignFunc>::PacketAccess),
MayInnerVectorize = MightVectorize
&& int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
- && int(JointAlignment)>=int(InnerRequiredAlignment),
- MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
- MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
- && ((int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
+ && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
+ MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
+ MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
+ && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
- MaySliceVectorize = MightVectorize && DstHasDirectAccess
- && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*InnerPacketSize)
+ MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
+ && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
/* slice vectorization can be slow, so we only want it if the slices are big, which is
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
- in a fixed-size matrix */
+ in a fixed-size matrix
+ However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
};
public:
@@ -116,9 +117,9 @@ private:
: 1,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
- && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit),
+ && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
MayUnrollInner = int(InnerSize) != Dynamic
- && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit)
+ && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
};
public:
@@ -130,11 +131,17 @@ public:
: int(NoUnrolling)
)
: int(Traversal) == int(LinearVectorizedTraversal)
- ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(LinearRequiredAlignment)) ? int(CompleteUnrolling)
- : int(NoUnrolling) )
+ ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
+ ? int(CompleteUnrolling)
+ : int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(NoUnrolling) )
+#if EIGEN_UNALIGNED_VECTORIZE
+ : int(Traversal) == int(SliceVectorizedTraversal)
+ ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
+ : int(NoUnrolling) )
+#endif
: int(NoUnrolling)
};
@@ -156,6 +163,7 @@ public:
EIGEN_DEBUG_VAR(InnerMaxSize)
EIGEN_DEBUG_VAR(LinearPacketSize)
EIGEN_DEBUG_VAR(InnerPacketSize)
+ EIGEN_DEBUG_VAR(ActualPacketSize)
EIGEN_DEBUG_VAR(StorageOrdersAgree)
EIGEN_DEBUG_VAR(MightVectorize)
EIGEN_DEBUG_VAR(MayLinearize)
@@ -163,6 +171,7 @@ public:
EIGEN_DEBUG_VAR(MayLinearVectorize)
EIGEN_DEBUG_VAR(MaySliceVectorize)
std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
+ EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
EIGEN_DEBUG_VAR(UnrollingLimit)
EIGEN_DEBUG_VAR(MayUnrollCompletely)
EIGEN_DEBUG_VAR(MayUnrollInner)
@@ -256,13 +265,13 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime,
- JointAlignment = Kernel::AssignmentTraits::JointAlignment,
- DefaultAlignment = unpacket_traits<PacketType>::alignment
+ SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
+ DstAlignment = Kernel::AssignmentTraits::DstAlignment
};
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
- kernel.template assignPacketByOuterInner<DefaultAlignment, JointAlignment, PacketType>(outer, inner);
+ kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
enum { NextIndex = Index + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
}
@@ -274,23 +283,20 @@ struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
};
-template<typename Kernel, int Index_, int Stop>
+template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
struct copy_using_evaluator_innervec_InnerUnrolling
{
typedef typename Kernel::PacketType PacketType;
- enum {
- DefaultAlignment = unpacket_traits<PacketType>::alignment
- };
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
{
- kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, Index_);
+ kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
- copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
}
};
-template<typename Kernel, int Stop>
-struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
+template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
+struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
};
@@ -419,9 +425,10 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::PacketType PacketType;
enum { size = DstXprType::SizeAtCompileTime,
- packetSize = packet_traits<typename Kernel::Scalar>::size,
+ packetSize =unpacket_traits<PacketType>::size,
alignedSize = (size/packetSize)*packetSize };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
@@ -438,7 +445,8 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
{
typedef typename Kernel::PacketType PacketType;
enum {
- DefaultAlignment = unpacket_traits<PacketType>::alignment
+ SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
+ DstAlignment = Kernel::AssignmentTraits::DstAlignment
};
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
@@ -447,7 +455,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
const Index packetSize = unpacket_traits<PacketType>::size;
for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; inner+=packetSize)
- kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, inner);
+ kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
}
};
@@ -467,9 +475,11 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::AssignmentTraits Traits;
const Index outerSize = kernel.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
- copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
+ Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
}
};
@@ -518,7 +528,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
: int(Kernel::AssignmentTraits::DstAlignment)
};
const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
- if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)
+ if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
{
// the pointer is not aligend-on scalar, so alignment is not possible
return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
@@ -549,6 +559,29 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
}
};
+#if EIGEN_UNALIGNED_VECTORIZE
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
+{
+ EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::PacketType PacketType;
+
+ enum { size = DstXprType::InnerSizeAtCompileTime,
+ packetSize =unpacket_traits<PacketType>::size,
+ vectorizableSize = (size/packetSize)*packetSize };
+
+ for(Index outer = 0; outer < kernel.outerSize(); ++outer)
+ {
+ copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
+ }
+ }
+};
+#endif
+
+
/***************************************************************************
* Part 4 : Generic dense assignment kernel
***************************************************************************/
@@ -676,14 +709,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstX
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
-
+
dense_assignment_loop<Kernel>::run(kernel);
}
template<typename DstXprType, typename SrcXprType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
{
- call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
+ call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
}
/***************************************************************************
@@ -705,7 +738,7 @@ template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Ki
// This is the main assignment class
template< typename DstXprType, typename SrcXprType, typename Functor,
typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
- typename Scalar = typename DstXprType::Scalar>
+ typename EnableIf = void>
struct Assignment;
@@ -718,13 +751,13 @@ template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(Dst& dst, const Src& src)
{
- call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
+ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
}
template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(const Dst& dst, const Src& src)
{
- call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
+ call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
}
// Deal with "assume-aliasing"
@@ -783,7 +816,7 @@ template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias(Dst& dst, const Src& src)
{
- call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
+ call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
}
template<typename Dst, typename Src, typename Func>
@@ -805,15 +838,17 @@ template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
{
- call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
+ call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
}
// forward declaration
template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
// Generic Dense to Dense assignment
-template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
-struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
+// Note that the last template argument "Weak" is needed to make it possible to perform
+// both partial specialization+SFINAE without ambiguous specialization
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
+struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
@@ -830,11 +865,13 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
// Generic assignment through evalTo.
// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
-template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
-struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
+// Note that the last template argument "Weak" is needed to make it possible to perform
+// both partial specialization+SFINAE without ambiguous specialization
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
+struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
{
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+ static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
src.evalTo(dst);