diff options
author | Gael Guennebaud <g.gael@free.fr> | 2015-08-06 15:31:07 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2015-08-06 15:31:07 +0200 |
commit | 1f5024332e47f295c991c3781d57d0466d41a9c8 (patch) | |
tree | 154fa01d41076ca58bdbff8c3bb0850119df92fa /Eigen/src/Core | |
parent | 65186ef18d6212b3d09b1d619f1cf1019c2ae0fb (diff) |
First part of a big refactoring of alignment control to enable the handling of arbitrarily aligned buffers. It includes:
- AlignedBit flag is deprecated. Alignment is now specified by the evaluator through the 'Alignment' enum, e.g., evaluator<Xpr>::Alignment. Its value is in Bytes.
- Add several enums to specify alignment: Aligned8, Aligned16, Aligned32, Aligned64, Aligned128. AlignedMax corresponds to EIGEN_MAX_ALIGN_BYTES. Such enums are used to define the above Alignment value, and as the 'Options' template parameter of Map<> and Ref<>.
- The Aligned enum is now deprecated. It is now an alias for Aligned16.
- Currently, traits<Matrix<>>, traits<Array<>>, traits<Ref<>>, traits<Map<>>, and traits<Block<>> also expose the Alignment enum.
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/AssignEvaluator.h | 41 | ||||
-rw-r--r-- | Eigen/src/Core/Block.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 87 | ||||
-rw-r--r-- | Eigen/src/Core/DenseCoeffsBase.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/Map.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/MapBase.h | 3 | ||||
-rw-r--r-- | Eigen/src/Core/Matrix.h | 20 | ||||
-rw-r--r-- | Eigen/src/Core/PlainObjectBase.h | 14 | ||||
-rwxr-xr-x | Eigen/src/Core/ProductEvaluators.h | 31 | ||||
-rw-r--r-- | Eigen/src/Core/Redux.h | 13 | ||||
-rw-r--r-- | Eigen/src/Core/Ref.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/StableNorm.h | 18 | ||||
-rw-r--r-- | Eigen/src/Core/Transpose.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/util/Constants.h | 33 | ||||
-rw-r--r-- | Eigen/src/Core/util/XprHelper.h | 37 |
16 files changed, 185 insertions, 153 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index b0468dd64..39efb1d5a 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -28,18 +28,19 @@ template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc> struct copy_using_evaluator_traits { typedef typename DstEvaluator::XprType Dst; - + typedef typename Dst::Scalar DstScalar; enum { DstFlags = DstEvaluator::Flags, - SrcFlags = SrcEvaluator::Flags + SrcFlags = SrcEvaluator::Flags, + RequiredAlignment = packet_traits<DstScalar>::size*sizeof(DstScalar) // FIXME ask packet_traits for the true alignment requirement }; public: enum { - DstIsAligned = DstFlags & AlignedBit, + DstAlignment = DstEvaluator::Alignment, + SrcAlignment = SrcEvaluator::Alignment, DstHasDirectAccess = DstFlags & DirectAccessBit, - SrcIsAligned = SrcFlags & AlignedBit, - JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned + JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) }; private: @@ -51,7 +52,7 @@ private: : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) : int(Dst::MaxRowsAtCompileTime), MaxSizeAtCompileTime = Dst::SizeAtCompileTime, - PacketSize = packet_traits<typename Dst::Scalar>::size + PacketSize = packet_traits<DstScalar>::size }; enum { @@ -62,10 +63,10 @@ private: && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) && (functor_traits<AssignFunc>::PacketAccess), MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 - && int(DstIsAligned) && int(SrcIsAligned), + && int(JointAlignment)>=int(RequiredAlignment), MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess - && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), + && ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. */ MaySliceVectorize = MightVectorize && DstHasDirectAccess @@ -107,8 +108,8 @@ public: : int(NoUnrolling) ) : int(Traversal) == int(LinearVectorizedTraversal) - ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) - : int(NoUnrolling) ) + ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling) + : int(NoUnrolling) ) : int(Traversal) == int(LinearTraversal) ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) @@ -124,8 +125,9 @@ public: EIGEN_DEBUG_VAR(DstFlags) EIGEN_DEBUG_VAR(SrcFlags) std::cerr.unsetf(std::ios::hex); - EIGEN_DEBUG_VAR(DstIsAligned) - EIGEN_DEBUG_VAR(SrcIsAligned) + EIGEN_DEBUG_VAR(DstAlignment) + EIGEN_DEBUG_VAR(SrcAlignment) + EIGEN_DEBUG_VAR(RequiredAlignment) EIGEN_DEBUG_VAR(JointAlignment) EIGEN_DEBUG_VAR(InnerSize) EIGEN_DEBUG_VAR(InnerMaxSize) @@ -360,11 +362,13 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling> EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { const Index size = kernel.size(); - typedef packet_traits<typename Kernel::Scalar> PacketTraits; + typedef typename Kernel::Scalar Scalar; + typedef packet_traits<Scalar> PacketTraits; enum { packetSize = PacketTraits::size, - dstIsAligned = int(Kernel::AssignmentTraits::DstIsAligned), - dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned, + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(Kernel::AssignmentTraits::RequiredAlignment), + dstAlignment = PacketTraits::AlignedOnScalar ? int(Kernel::AssignmentTraits::RequiredAlignment) + : int(Kernel::AssignmentTraits::DstAlignment), srcAlignment = Kernel::AssignmentTraits::JointAlignment }; const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size); @@ -475,9 +479,10 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> typedef packet_traits<Scalar> PacketTraits; enum { packetSize = PacketTraits::size, - alignable = PacketTraits::AlignedOnScalar, - dstIsAligned = Kernel::AssignmentTraits::DstIsAligned, - dstAlignment = alignable ? Aligned : int(dstIsAligned) + alignable = PacketTraits::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar), + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(Kernel::AssignmentTraits::RequiredAlignment), + dstAlignment = alignable ? int(Kernel::AssignmentTraits::RequiredAlignment) + : int(Kernel::AssignmentTraits::DstAlignment) }; const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0); if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0) diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index aed6147c7..3748e259b 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -81,14 +81,16 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp OuterStrideAtCompileTime = HasSameStorageOrderAsXprType ? int(outer_stride_at_compile_time<XprType>::ret) : int(inner_stride_at_compile_time<XprType>::ret), - // IsAligned is needed by MapBase's assertions - // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator - IsAligned = 0, + // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0, FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, - Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit + Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit, // FIXME DirectAccessBit should not be handled by expressions + // + // Alignment is needed by MapBase's assertions + // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator + Alignment = 0 }; }; diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 6b0e3617d..910d04ecb 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -111,6 +111,10 @@ struct evaluator_base typedef typename traits<ExpressionType>::StorageIndex StorageIndex; // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. typedef traits<ExpressionType> ExpressionTraits; + + enum { + Alignment = 0 + }; }; // -------------------- Matrix and Array -------------------- @@ -137,8 +141,8 @@ struct evaluator<PlainObjectBase<Derived> > ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, CoeffReadCost = NumTraits<Scalar>::ReadCost, - Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime, - Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret + Flags = traits<Derived>::EvaluatorFlags, + Alignment = traits<Derived>::Alignment }; EIGEN_DEVICE_FUNC evaluator() @@ -255,7 +259,8 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased> enum { CoeffReadCost = evaluator<ArgType>::CoeffReadCost, - Flags = evaluator<ArgType>::Flags ^ RowMajorBit + Flags = evaluator<ArgType>::Flags ^ RowMajorBit, + Alignment = evaluator<ArgType>::Alignment }; EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} @@ -331,7 +336,8 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> > & ( HereditaryBits | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0) | (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0))) - | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore + | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit), // FIXME EvalBeforeNestingBit should be needed anymore + Alignment = 0 // FIXME alignment should not matter here, perhaps we could set it to AlignMax?? }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) @@ -378,9 +384,9 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > enum { CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost, - Flags = evaluator<ArgType>::Flags & ( - HereditaryBits | LinearAccessBit | AlignedBit - | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)) + Flags = evaluator<ArgType>::Flags + & (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)), + Alignment = evaluator<ArgType>::Alignment }; EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) @@ -447,13 +453,13 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( HereditaryBits | (int(LhsFlags) & int(RhsFlags) & - ( AlignedBit - | (StorageOrdersAgree ? LinearAccessBit : 0) + ( (StorageOrdersAgree ? LinearAccessBit : 0) | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) ) ) ), - Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit) + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment) }; EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) @@ -506,7 +512,9 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> enum { CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost, - Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)) + Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)), + + Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... }; EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) @@ -641,7 +649,6 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> > HasNoInnerStride = InnerStrideAtCompileTime == 1, HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, HasNoStride = HasNoInnerStride && HasNoOuterStride, - IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned), IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, // TODO: should check for smaller packet types once we can handle multi-sized packet types @@ -653,10 +660,13 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> > || ( OuterStrideAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ), Flags0 = evaluator<PlainObjectType>::Flags, - Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), - Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) - ? int(Flags1) : int(Flags1 & ~LinearAccessBit), - Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit) + //Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), + Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) + ? int(Flags0) : int(Flags0 & ~LinearAccessBit), + Flags = KeepsPacketAccess ? int(Flags1) : (int(Flags1) & ~PacketAccessBit), + + //IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&int(AlignedMask))>0), + Alignment = int(MapOptions)&int(AlignedMask) }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) @@ -673,7 +683,8 @@ struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> > typedef Ref<PlainObjectType, RefOptions, StrideType> XprType; enum { - Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags + Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags, + Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref) @@ -717,17 +728,17 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > && (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, - // TODO: should check for smaller packet types once we can handle multi-sized packet types - AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar), - - MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignedBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsRowMajorBit = XprType::Flags&RowMajorBit, Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) | DirectAccessBit | - MaskPacketAccessBit | - MaskAlignedBit), - Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit + MaskPacketAccessBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, + + // TODO: should check for smaller packet types once we can handle multi-sized packet types + AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar), + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignBytes : 0, + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0) }; typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {} @@ -833,11 +844,8 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) : mapbase_evaluator<XprType, typename XprType::PlainObject>(block) { - // TODO: should check for smaller packet types once we can handle multi-sized packet types - const int AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar); - EIGEN_ONLY_USED_FOR_DEBUG(AlignBytes) // FIXME this should be an internal assertion - eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned"); + eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned"); } }; @@ -856,7 +864,9 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > + EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost, evaluator<ElseMatrixType>::CoeffReadCost), - Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits + Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits, + + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment) }; inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) @@ -908,7 +918,9 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > enum { CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost, - Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit) + Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit), + + Alignment = evaluator<ArgTypeNestedCleaned>::Alignment }; EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate) @@ -992,7 +1004,9 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> > CoeffReadCost = TraversalSize==Dynamic ? Dynamic : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value), - Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits) + Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits), + + Alignment = 0 // FIXME this could be improved }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr) @@ -1028,7 +1042,8 @@ struct evaluator_wrapper_base typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType; enum { CoeffReadCost = evaluator<ArgType>::CoeffReadCost, - Flags = evaluator<ArgType>::Flags + Flags = evaluator<ArgType>::Flags, + Alignment = evaluator<ArgType>::Alignment }; EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} @@ -1144,7 +1159,9 @@ struct unary_evaluator<Reverse<ArgType, Direction> > LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) ? LinearAccessBit : 0, - Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess) + Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess), + + Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f. }; typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet; @@ -1226,7 +1243,9 @@ struct evaluator<Diagonal<ArgType, DiagIndex> > enum { CoeffReadCost = evaluator<ArgType>::CoeffReadCost, - Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit + Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit, + + Alignment = 0 }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal) diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h index f08380bed..11e2a1809 100644 --- a/Eigen/src/Core/DenseCoeffsBase.h +++ b/Eigen/src/Core/DenseCoeffsBase.h @@ -602,11 +602,11 @@ struct first_aligned_impl<Derived, false> * documentation. */ template<typename Derived> -static inline Index first_aligned(const Derived& m) +static inline Index first_aligned(const DenseBase<Derived>& m) { return first_aligned_impl - <Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)> - ::run(m); + <Derived, (evaluator<Derived>::Alignment > 0 ) || !(Derived::Flags & DirectAccessBit)> // FIXME Alignment! + ::run(m.derived()); } template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret> diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index cbb15f79d..4cc5f656f 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -450,22 +450,22 @@ pmadd(const Packet& a, { return padd(pmul(a, b),c); } /** \internal \returns a packet version of \a *from. - * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */ -template<typename Packet, int LoadMode> + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template<typename Packet, int Alignment> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from) { - if(LoadMode == Aligned) + if(Alignment >= unpacket_traits<Packet>::size*sizeof(typename unpacket_traits<Packet>::type)) return pload<Packet>(from); else return ploadu<Packet>(from); } /** \internal copy the packet \a from to \a *to. - * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */ -template<typename Scalar, typename Packet, int LoadMode> + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template<typename Scalar, typename Packet, int Alignment> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) { - if(LoadMode == Aligned) + if(Alignment >= unpacket_traits<Packet>::size*sizeof(typename unpacket_traits<Packet>::type)) pstore(to, from); else pstoreu(to, from); diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index 2b5971730..3a8375da9 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -19,7 +19,7 @@ namespace Eigen { * \brief A matrix or vector expression mapping an existing array of data. * * \tparam PlainObjectType the equivalent matrix type of the mapped data - * \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned. + * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. * The default is \c #Unaligned. * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout * of an ordinary, contiguous array. This can be overridden by specifying strides. @@ -77,7 +77,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> > OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 ? int(PlainObjectType::OuterStrideAtCompileTime) : int(StrideType::OuterStrideAtCompileTime), - IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned), + Alignment = int(MapOptions)&int(AlignedMask), Flags0 = TraitsBase::Flags & (~NestByRefBit), Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) }; diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index b175a3fa0..ae28d4db6 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -160,9 +160,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors> EIGEN_DEVICE_FUNC void checkSanity() const { - // TODO "IsAligned" should be replaced to handle arbitrary alignment #if EIGEN_MAX_ALIGN_BYTES>0 - eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_MAX_ALIGN_BYTES) == 0) && "data is not aligned"); + eigen_assert(((size_t(m_data) % EIGEN_PLAIN_ENUM_MAX(1,internal::traits<Derived>::Alignment)) == 0) && "data is not aligned"); #endif } diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index 83c1ef2c8..e67fff6c5 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -139,6 +139,18 @@ namespace internal { template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > { +private: + enum { + row_major_bit = _Options&RowMajor ? RowMajorBit : 0, + is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic, + max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols, + default_alignment = compute_default_alignment<_Scalar,max_size>::value, + actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0, + required_alignment = packet_traits<_Scalar>::size * sizeof(_Scalar), // FIXME ask packet_traits for the true required alignment + packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0 + }; + +public: typedef _Scalar Scalar; typedef Dense StorageKind; typedef Eigen::Index StorageIndex; @@ -149,11 +161,13 @@ struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > MaxRowsAtCompileTime = _MaxRows, MaxColsAtCompileTime = _MaxCols, Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, - // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase - EvaluatorFlags = compute_matrix_evaluator_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, Options = _Options, InnerStrideAtCompileTime = 1, - OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime + OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime, + + // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase + EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit, + Alignment = actual_alignment }; }; } diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 9cb32e7d8..e83b92476 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -116,20 +116,20 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type typedef Eigen::Map<Derived, Unaligned> MapType; friend class Eigen::Map<const Derived, Unaligned>; typedef const Eigen::Map<const Derived, Unaligned> ConstMapType; - friend class Eigen::Map<Derived, Aligned>; - typedef Eigen::Map<Derived, Aligned> AlignedMapType; - friend class Eigen::Map<const Derived, Aligned>; - typedef const Eigen::Map<const Derived, Aligned> ConstAlignedMapType; + friend class Eigen::Map<Derived, AlignedMax>; + typedef Eigen::Map<Derived, AlignedMax> AlignedMapType; + friend class Eigen::Map<const Derived, AlignedMax>; + typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType; template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; }; template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; }; - template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; }; - template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; }; + template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, AlignedMax, StrideType> type; }; + template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, AlignedMax, StrideType> type; }; protected: DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage; public: - enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::EvaluatorFlags & AlignedBit) != 0 }; + enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) }; EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) EIGEN_DEVICE_FUNC diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 43ba86193..1f5d7addd 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -430,24 +430,22 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, LhsFlags = LhsEtorType::Flags, RhsFlags = RhsEtorType::Flags, + LhsAlignment = LhsEtorType::Alignment, + RhsAlignment = RhsEtorType::Alignment, + + LhsIsAligned = int(LhsAlignment) >= int(sizeof(Scalar)*PacketSize), // FIXME compare to required alignment + RhsIsAligned = int(RhsAlignment) >= int(sizeof(Scalar)*PacketSize), + LhsRowMajor = LhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit, SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value, CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic - || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0 - && (RhsFlags&AlignedBit) - ) - ), + && (ColsAtCompileTime == Dynamic || ( (ColsAtCompileTime % PacketSize) == 0 && RhsIsAligned ) ), CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic - || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0 - && (LhsFlags&AlignedBit) - ) - ), + && (RowsAtCompileTime == Dynamic || ( (RowsAtCompileTime % PacketSize) == 0 && LhsIsAligned ) ), EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -455,11 +453,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) | (EvalToRowMajor ? RowMajorBit : 0) - | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0) - | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0) // TODO enable vectorization for mixed types | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), + Alignment = CanVectorizeLhs ? LhsAlignment + : CanVectorizeRhs ? RhsAlignment + : 0, + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect @@ -469,7 +469,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, && LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit) - && (LhsFlags & RhsFlags & AlignedBit) + && (LhsIsAligned && RhsIsAligned) && (InnerSize % packet_traits<Scalar>::size == 0) }; @@ -706,7 +706,8 @@ public: //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, - Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), + Alignment = evaluator<MatrixType>::Alignment }; diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) @@ -732,7 +733,7 @@ protected: { enum { InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, - DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) + DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!! }; return internal::pmul(m_matImpl.template packet<LoadMode>(row, col), m_diagImpl.template packet<DiagonalPacketLoadMode>(id)); diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index fea4e2895..0c25223aa 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -165,7 +165,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1> index = Start * packet_traits<typename Derived::Scalar>::size, outer = index / int(Derived::InnerSizeAtCompileTime), inner = index % int(Derived::InnerSizeAtCompileTime), - alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned + alignment = Derived::Alignment }; typedef typename Derived::Scalar Scalar; @@ -222,10 +222,10 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling> const Index size = mat.size(); const Index packetSize = packet_traits<Scalar>::size; - const Index alignedStart = internal::first_aligned(mat); + const Index alignedStart = internal::first_aligned(mat.nestedExpression()); enum { - alignment = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) || bool(Derived::Flags & AlignedBit) - ? Aligned : Unaligned + alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(sizeof(Scalar)*packetSize) : int(Unaligned), // FIXME take into account alignment requirement + alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment) }; const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); @@ -352,7 +352,8 @@ public: IsRowMajor = XprType::IsRowMajor, SizeAtCompileTime = XprType::SizeAtCompileTime, InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, - CoeffReadCost = evaluator<XprType>::CoeffReadCost + CoeffReadCost = evaluator<XprType>::CoeffReadCost, + Alignment = evaluator<XprType>::Alignment }; EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } @@ -385,6 +386,8 @@ public: PacketReturnType packetByOuterInner(Index outer, Index inner) const { return m_evaluator.template packet<LoadMode>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + const XprType & nestedExpression() const { return m_xpr; } + protected: typename internal::evaluator<XprType>::nestedType m_evaluator; const XprType &m_xpr; diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index ea5a2bd5c..61de5ed17 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -18,7 +18,7 @@ namespace Eigen { * \brief A matrix or vector expression mapping an existing expression * * \tparam PlainObjectType the equivalent matrix type of the mapped data - * \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned. + * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. * The default is \c #Unaligned. * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1), * but accepts a variable outer stride (leading dimension). @@ -92,7 +92,8 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> > typedef _StrideType StrideType; enum { Options = _Options, - Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit + Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit, + Alignment = traits<Map<_PlainObjectType, _Options, _StrideType> >::Alignment }; template<typename Derived> struct match { @@ -104,7 +105,7 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> > || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1), OuterStrideMatch = Derived::IsVectorAtCompileTime || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime), - AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit), + AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (int(evaluator<Derived>::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value, MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch }; diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index f9cd01b7e..aca81f463 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -162,21 +162,27 @@ MatrixBase<Derived>::stableNorm() const RealScalar scale(0); RealScalar invScale(1); RealScalar ssq(0); // sum of square + + typedef typename internal::nested_eval<Derived,2>::type DerivedCopy; + typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean; + DerivedCopy copy(derived()); + enum { - Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0 + CanAlign = (int(Flags)&DirectAccessBit) || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME }; - typedef typename internal::conditional<Alignment, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, Aligned>, - typename Base::ConstSegmentReturnType>::type SegmentWrapper; + typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>, + typename DerivedCopyClean + ::ConstSegmentReturnType>::type SegmentWrapper; Index n = size(); if(n==1) return abs(this->coeff(0)); - Index bi = internal::first_aligned(derived()); + Index bi = internal::first_aligned(copy); if (bi>0) - internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale); + internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale); for (; bi<n; bi+=blockSize) - internal::stable_norm_kernel(SegmentWrapper(this->segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale); + internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale); return scale * sqrt(ssq); } diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index e205cec4a..2152405d5 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -233,7 +233,7 @@ struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x Packet typedef typename MatrixType::Scalar Scalar; typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet; const Index PacketSize = internal::packet_traits<Scalar>::size; - const Index Alignment = internal::evaluator<MatrixType>::Flags&AlignedBit ? Aligned : Unaligned; + const Index Alignment = internal::evaluator<MatrixType>::Alignment; PacketBlock<Packet> A; for (Index i=0; i<PacketSize; ++i) A.packet[i] = m.template packetByOuterInner<Alignment>(i,0); diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 93c0786fa..3e811a173 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com> // // This Source Code Form is subject to the terms of the Mozilla @@ -140,7 +140,7 @@ const unsigned int LvalueBit = 0x20; */ const unsigned int DirectAccessBit = 0x40; -/** \ingroup flags +/* \ingroup flags * * means the first coefficient packet is guaranteed to be aligned. * An expression cannot has the AlignedBit without the PacketAccessBit flag. @@ -215,12 +215,31 @@ enum { }; /** \ingroup enums - * Enum for indicating whether an object is aligned or not. */ + * Enum for indicating whether a buffer is aligned or not. */ enum { - /** Object is not correctly aligned for vectorization. */ - Unaligned=0, - /** Object is aligned for vectorization. */ - Aligned=1 + Unaligned=0, /**< Data pointer has no specific alignment. */ + Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */ + Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */ + Aligned32=32, /**< Data pointer is aligned on a 32 bytes boundary. */ + Aligned64=64, /**< Data pointer is aligned on a 64 bytes boundary. */ + Aligned128=128, /**< Data pointer is aligned on a 128 bytes boundary. */ + AlignedMask=255, + Aligned=16, /**< \deprecated Synonym for Aligned16. */ +#if EIGEN_MAX_ALIGN_BYTES==128 + AlignedMax = Aligned128 +#elif EIGEN_MAX_ALIGN_BYTES==64 + AlignedMax = Aligned64 +#elif EIGEN_MAX_ALIGN_BYTES==32 + AlignedMax = Aligned32 +#elif EIGEN_MAX_ALIGN_BYTES==16 + AlignedMax = Aligned16 +#elif EIGEN_MAX_ALIGN_BYTES==8 + AlignedMax = Aligned8 +#elif EIGEN_MAX_ALIGN_BYTES==0 + AlignedMax = Unaligned +#else +#error Invalid value for EIGEN_MAX_ALIGN_BYTES +#endif }; /** \ingroup enums diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 6dc1f6e3f..81e992392 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -192,43 +192,6 @@ class compute_matrix_flags enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit }; }; -template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> -class compute_matrix_evaluator_flags -{ - enum { - row_major_bit = Options&RowMajor ? RowMajorBit : 0, - is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic, - - // TODO: should check for smaller packet types once we can handle multi-sized packet types - align_bytes = int(packet_traits<Scalar>::size) * sizeof(Scalar), - - aligned_bit = - ( - ((Options&DontAlign)==0) - && ( -#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 - ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0)) -#else - 0 -#endif - - || - -#if EIGEN_MAX_ALIGN_BYTES!=0 - is_dynamic_size_storage -#else - 0 -#endif - - ) - ) ? AlignedBit : 0, - packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0 - }; - - public: - enum { ret = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit }; -}; - template<int _Rows, int _Cols> struct size_at_compile_time { enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols }; |