aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2009-11-18 17:20:39 -0500
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2009-11-18 17:20:39 -0500
commit40865fa28cab9473959458ec890d68b9df398dc9 (patch)
treee260179a5df2bb4862017604d9ecf936a959983a /Eigen
parent11fa2ae2c602a9771fadee6f5162e82d5d29cd85 (diff)
fix bugs, old and new:
* old bug: in CwiseBinaryOp: only set the LinearAccessBit if both sides have the same storage order. * new bug: in Assign.h, only consider linear traversal if both sides have the same storage order.
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/Assign.h12
-rw-r--r--Eigen/src/Core/CwiseBinaryOp.h11
2 files changed, 15 insertions, 8 deletions
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 32f9623e8..00febdc5d 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -28,7 +28,7 @@
#define EIGEN_ASSIGN_H
/***************************************************************************
-* Part 1 : the logic deciding a strategy for vectorization and unrolling
+* Part 1 : the logic deciding a strategy for traversal and unrolling *
***************************************************************************/
template <typename Derived, typename OtherDerived>
@@ -53,11 +53,12 @@ private:
};
enum {
- MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit)
- && ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
+ StorageOrdersAgree = (int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit),
+ MightVectorize = StorageOrdersAgree
+ && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
&& int(DstIsAligned) && int(SrcIsAligned),
- MayLinearize = (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
+ MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
MayLinearVectorize = MightVectorize && MayLinearize
&& (DstIsAligned || InnerMaxSize == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
@@ -73,7 +74,7 @@ public:
Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
-// : int(MayLinearize) ? int(LinearTraversal)
+ : int(MayLinearize) ? int(LinearTraversal)
: int(DefaultTraversal),
Vectorized = int(Traversal) == InnerVectorizedTraversal
|| int(Traversal) == LinearVectorizedTraversal
@@ -110,6 +111,7 @@ public:
EIGEN_DEBUG_VAR(InnerSize)
EIGEN_DEBUG_VAR(InnerMaxSize)
EIGEN_DEBUG_VAR(PacketSize)
+ EIGEN_DEBUG_VAR(StorageOrdersAgree)
EIGEN_DEBUG_VAR(MightVectorize)
EIGEN_DEBUG_VAR(MayInnerVectorize)
EIGEN_DEBUG_VAR(MayLinearVectorize)
diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h
index 875bc9aa5..4bea0425f 100644
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -67,11 +67,16 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
ColsAtCompileTime = Lhs::ColsAtCompileTime,
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime,
+ StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit),
Flags = (int(LhsFlags) | int(RhsFlags)) & (
HereditaryBits
- | (int(LhsFlags) & int(RhsFlags) & (LinearAccessBit | AlignedBit))
- | (ei_functor_traits<BinaryOp>::PacketAccess && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit))
- ? (int(LhsFlags) & int(RhsFlags) & PacketAccessBit) : 0)),
+ | (int(LhsFlags) & int(RhsFlags) &
+ ( AlignedBit
+ | (StorageOrdersAgree ? LinearAccessBit : 0)
+ | (ei_functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree ? PacketAccessBit : 0)
+ )
+ )
+ ),
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost
};
};