aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/Assign.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/Assign.h')
-rw-r--r--Eigen/src/Core/Assign.h141
1 files changed, 51 insertions, 90 deletions
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 9440cebf1..3133aa03a 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -37,34 +37,37 @@ struct ei_assign_traits
public:
enum {
DstIsAligned = Derived::Flags & AlignedBit,
+ DstHasDirectAccess = Derived::Flags & DirectAccessBit,
SrcIsAligned = OtherDerived::Flags & AlignedBit,
- SrcAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned
+ JointAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned
};
private:
enum {
- InnerSize = int(Derived::Flags)&RowMajorBit
- ? Derived::ColsAtCompileTime
- : Derived::RowsAtCompileTime,
- InnerMaxSize = int(Derived::Flags)&RowMajorBit
- ? Derived::MaxColsAtCompileTime
- : Derived::MaxRowsAtCompileTime,
- MaxSizeAtCompileTime = ei_size_at_compile_time<Derived::MaxColsAtCompileTime,Derived::MaxRowsAtCompileTime>::ret,
+ InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
+ : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
+ : int(Derived::RowsAtCompileTime),
+ InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
+ : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
+ : int(Derived::MaxRowsAtCompileTime),
+ MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
PacketSize = ei_packet_traits<typename Derived::Scalar>::size
};
enum {
- StorageOrdersAgree = (int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit),
+ LhsIsEffectivelyRowMajor = (Derived::RowsAtCompileTime==1) || (int(Derived::Flags)&RowMajorBit),
+ RhsIsEffectivelyRowMajor = (OtherDerived::RowsAtCompileTime==1) || (int(OtherDerived::Flags)&RowMajorBit),
+ StorageOrdersAgree = (LhsIsEffectivelyRowMajor == RhsIsEffectivelyRowMajor),
MightVectorize = StorageOrdersAgree
&& (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
&& int(DstIsAligned) && int(SrcIsAligned),
MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
- MayLinearVectorize = MightVectorize && MayLinearize
- && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
+ MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
+ && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
- MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
+ MaySliceVectorize = MightVectorize && DstHasDirectAccess && int(InnerMaxSize)>=3*PacketSize
/* slice vectorization can be slow, so we only want it if the slices are big, which is
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
in a fixed-size matrix */
@@ -108,12 +111,13 @@ public:
{
EIGEN_DEBUG_VAR(DstIsAligned)
EIGEN_DEBUG_VAR(SrcIsAligned)
- EIGEN_DEBUG_VAR(SrcAlignment)
+ EIGEN_DEBUG_VAR(JointAlignment)
EIGEN_DEBUG_VAR(InnerSize)
EIGEN_DEBUG_VAR(InnerMaxSize)
EIGEN_DEBUG_VAR(PacketSize)
EIGEN_DEBUG_VAR(StorageOrdersAgree)
EIGEN_DEBUG_VAR(MightVectorize)
+ EIGEN_DEBUG_VAR(MayLinearize)
EIGEN_DEBUG_VAR(MayInnerVectorize)
EIGEN_DEBUG_VAR(MayLinearVectorize)
EIGEN_DEBUG_VAR(MaySliceVectorize)
@@ -137,17 +141,13 @@ template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_DefaultTraversal_CompleteUnrolling
{
enum {
- row = int(Derived1::Flags)&RowMajorBit
- ? Index / int(Derived1::ColsAtCompileTime)
- : Index % Derived1::RowsAtCompileTime,
- col = int(Derived1::Flags)&RowMajorBit
- ? Index % int(Derived1::ColsAtCompileTime)
- : Index / Derived1::RowsAtCompileTime
+ outer = Index / Derived1::InnerSizeAtCompileTime,
+ inner = Index % Derived1::InnerSizeAtCompileTime
};
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
- dst.copyCoeff(row, col, src);
+ dst.copyCoeffByOuterInner(outer, inner, src);
ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
}
};
@@ -161,13 +161,10 @@ struct ei_assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, St
template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_DefaultTraversal_InnerUnrolling
{
- EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
+ EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
{
- const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
- const int row = rowMajor ? row_or_col : Index;
- const int col = rowMajor ? Index : row_or_col;
- dst.copyCoeff(row, col, src);
- ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, row_or_col);
+ dst.copyCoeffByOuterInner(outer, Index, src);
+ ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
}
};
@@ -205,18 +202,14 @@ template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_innervec_CompleteUnrolling
{
enum {
- row = int(Derived1::Flags)&RowMajorBit
- ? Index / int(Derived1::ColsAtCompileTime)
- : Index % Derived1::RowsAtCompileTime,
- col = int(Derived1::Flags)&RowMajorBit
- ? Index % int(Derived1::ColsAtCompileTime)
- : Index / Derived1::RowsAtCompileTime,
- SrcAlignment = ei_assign_traits<Derived1,Derived2>::SrcAlignment
+ outer = Index / Derived1::InnerSizeAtCompileTime,
+ inner = Index % Derived1::InnerSizeAtCompileTime,
+ JointAlignment = ei_assign_traits<Derived1,Derived2>::JointAlignment
};
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
- dst.template copyPacket<Derived2, Aligned, SrcAlignment>(row, col, src);
+ dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2,
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
}
@@ -231,13 +224,11 @@ struct ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
template<typename Derived1, typename Derived2, int Index, int Stop>
struct ei_assign_innervec_InnerUnrolling
{
- EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
+ EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer)
{
- const int row = int(Derived1::Flags)&RowMajorBit ? row_or_col : Index;
- const int col = int(Derived1::Flags)&RowMajorBit ? Index : row_or_col;
- dst.template copyPacket<Derived2, Aligned, Aligned>(row, col, src);
+ dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
ei_assign_innervec_InnerUnrolling<Derived1, Derived2,
- Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, row_or_col);
+ Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer);
}
};
@@ -267,14 +258,9 @@ struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling>
{
const int innerSize = dst.innerSize();
const int outerSize = dst.outerSize();
- for(int j = 0; j < outerSize; ++j)
- for(int i = 0; i < innerSize; ++i)
- {
- if(int(Derived1::Flags)&RowMajorBit)
- dst.copyCoeff(j, i, src);
- else
- dst.copyCoeff(i, j, src);
- }
+ for(int outer = 0; outer < outerSize; ++outer)
+ for(int inner = 0; inner < innerSize; ++inner)
+ dst.copyCoeffByOuterInner(outer, inner, src);
}
};
@@ -293,12 +279,10 @@ struct ei_assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling>
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
- const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
- const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
const int outerSize = dst.outerSize();
- for(int j = 0; j < outerSize; ++j)
- ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, innerSize>
- ::run(dst, src, j);
+ for(int outer = 0; outer < outerSize; ++outer)
+ ei_assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
+ ::run(dst, src, outer);
}
};
@@ -339,14 +323,9 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling>
const int innerSize = dst.innerSize();
const int outerSize = dst.outerSize();
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
- for(int j = 0; j < outerSize; ++j)
- for(int i = 0; i < innerSize; i+=packetSize)
- {
- if(int(Derived1::Flags)&RowMajorBit)
- dst.template copyPacket<Derived2, Aligned, Aligned>(j, i, src);
- else
- dst.template copyPacket<Derived2, Aligned, Aligned>(i, j, src);
- }
+ for(int outer = 0; outer < outerSize; ++outer)
+ for(int inner = 0; inner < innerSize; inner+=packetSize)
+ dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, inner, src);
}
};
@@ -365,12 +344,10 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolli
{
EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
{
- const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
- const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
const int outerSize = dst.outerSize();
- for(int j = 0; j < outerSize; ++j)
- ei_assign_innervec_InnerUnrolling<Derived1, Derived2, 0, innerSize>
- ::run(dst, src, j);
+ for(int outer = 0; outer < outerSize; ++outer)
+ ei_assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
+ ::run(dst, src, outer);
}
};
@@ -418,7 +395,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling
for(int index = alignedStart; index < alignedEnd; index += packetSize)
{
- dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
+ dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::JointAlignment>(index, src);
}
ei_unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
@@ -452,40 +429,24 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
const int packetAlignedMask = packetSize - 1;
const int innerSize = dst.innerSize();
const int outerSize = dst.outerSize();
- const int alignedStep = (packetSize - dst.stride() % packetSize) & packetAlignedMask;
+ const int alignedStep = (packetSize - dst.outerStride() % packetSize) & packetAlignedMask;
int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
: ei_first_aligned(&dst.coeffRef(0,0), innerSize);
- for(int i = 0; i < outerSize; ++i)
+ for(int outer = 0; outer < outerSize; ++outer)
{
const int alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
-
// do the non-vectorizable part of the assignment
- for (int index = 0; index<alignedStart ; ++index)
- {
- if(Derived1::Flags&RowMajorBit)
- dst.copyCoeff(i, index, src);
- else
- dst.copyCoeff(index, i, src);
- }
+ for(int inner = 0; inner<alignedStart ; ++inner)
+ dst.copyCoeffByOuterInner(outer, inner, src);
// do the vectorizable part of the assignment
- for (int index = alignedStart; index<alignedEnd; index+=packetSize)
- {
- if(Derived1::Flags&RowMajorBit)
- dst.template copyPacket<Derived2, Aligned, Unaligned>(i, index, src);
- else
- dst.template copyPacket<Derived2, Aligned, Unaligned>(index, i, src);
- }
+ for(int inner = alignedStart; inner<alignedEnd; inner+=packetSize)
+ dst.template copyPacketByOuterInner<Derived2, Aligned, Unaligned>(outer, inner, src);
// do the non-vectorizable part of the assignment
- for (int index = alignedEnd; index<innerSize ; ++index)
- {
- if(Derived1::Flags&RowMajorBit)
- dst.copyCoeff(i, index, src);
- else
- dst.copyCoeff(index, i, src);
- }
+ for(int inner = alignedEnd; inner<innerSize ; ++inner)
+ dst.copyCoeffByOuterInner(outer, inner, src);
alignedStart = std::min<int>((alignedStart+alignedStep)%packetSize, innerSize);
}