diff options
author | 2009-11-18 17:20:39 -0500 | |
---|---|---|
committer | 2009-11-18 17:20:39 -0500 | |
commit | 40865fa28cab9473959458ec890d68b9df398dc9 (patch) | |
tree | e260179a5df2bb4862017604d9ecf936a959983a /Eigen | |
parent | 11fa2ae2c602a9771fadee6f5162e82d5d29cd85 (diff) |
fix bugs, old and new:
* old bug: in CwiseBinaryOp: only set the LinearAccessBit if both sides have the same storage order.
* new bug: in Assign.h, only consider linear traversal if both sides have the same storage order.
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/src/Core/Assign.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseBinaryOp.h | 11 |
2 files changed, 15 insertions, 8 deletions
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 32f9623e8..00febdc5d 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -28,7 +28,7 @@ #define EIGEN_ASSIGN_H /*************************************************************************** -* Part 1 : the logic deciding a strategy for vectorization and unrolling +* Part 1 : the logic deciding a strategy for traversal and unrolling * ***************************************************************************/ template <typename Derived, typename OtherDerived> @@ -53,11 +53,12 @@ private: }; enum { - MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit) - && ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)), + StorageOrdersAgree = (int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit), + MightVectorize = StorageOrdersAgree + && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 && int(DstIsAligned) && int(SrcIsAligned), - MayLinearize = (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), + MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), MayLinearVectorize = MightVectorize && MayLinearize && (DstIsAligned || InnerMaxSize == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, @@ -73,7 +74,7 @@ public: Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) -// : int(MayLinearize) ? int(LinearTraversal) + : int(MayLinearize) ? int(LinearTraversal) : int(DefaultTraversal), Vectorized = int(Traversal) == InnerVectorizedTraversal || int(Traversal) == LinearVectorizedTraversal @@ -110,6 +111,7 @@ public: EIGEN_DEBUG_VAR(InnerSize) EIGEN_DEBUG_VAR(InnerMaxSize) EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(StorageOrdersAgree) EIGEN_DEBUG_VAR(MightVectorize) EIGEN_DEBUG_VAR(MayInnerVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize) diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index 875bc9aa5..4bea0425f 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -67,11 +67,16 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > ColsAtCompileTime = Lhs::ColsAtCompileTime, MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime, + StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit), Flags = (int(LhsFlags) | int(RhsFlags)) & ( HereditaryBits - | (int(LhsFlags) & int(RhsFlags) & (LinearAccessBit | AlignedBit)) - | (ei_functor_traits<BinaryOp>::PacketAccess && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit)) - ? (int(LhsFlags) & int(RhsFlags) & PacketAccessBit) : 0)), + | (int(LhsFlags) & int(RhsFlags) & + ( AlignedBit + | (StorageOrdersAgree ? LinearAccessBit : 0) + | (ei_functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree ? PacketAccessBit : 0) + ) + ) + ), CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost }; }; |