aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2015-08-06 15:31:07 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2015-08-06 15:31:07 +0200
commit1f5024332e47f295c991c3781d57d0466d41a9c8 (patch)
tree154fa01d41076ca58bdbff8c3bb0850119df92fa /Eigen
parent65186ef18d6212b3d09b1d619f1cf1019c2ae0fb (diff)
First part of a big refactoring of alignment control to enable the handling of arbitrarily aligned buffers. It includes:
- AlignedBit flag is deprecated. Alignment is now specified by the evaluator through the 'Alignment' enum, e.g., evaluator<Xpr>::Alignment. Its value is in Bytes. - Add several enums to specify alignment: Aligned8, Aligned16, Aligned32, Aligned64, Aligned128. AlignedMax corresponds to EIGEN_MAX_ALIGN_BYTES. Such enums are used to define the above Alignment value, and as the 'Options' template parameter of Map<> and Ref<>. - The Aligned enum is now deprecated. It is now an alias for Aligned16. - Currently, traits<Matrix<>>, traits<Array<>>, traits<Ref<>>, traits<Map<>>, and traits<Block<>> also expose the Alignment enum.
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/AssignEvaluator.h41
-rw-r--r--Eigen/src/Core/Block.h10
-rw-r--r--Eigen/src/Core/CoreEvaluators.h87
-rw-r--r--Eigen/src/Core/DenseCoeffsBase.h6
-rw-r--r--Eigen/src/Core/GenericPacketMath.h12
-rw-r--r--Eigen/src/Core/Map.h4
-rw-r--r--Eigen/src/Core/MapBase.h3
-rw-r--r--Eigen/src/Core/Matrix.h20
-rw-r--r--Eigen/src/Core/PlainObjectBase.h14
-rwxr-xr-xEigen/src/Core/ProductEvaluators.h31
-rw-r--r--Eigen/src/Core/Redux.h13
-rw-r--r--Eigen/src/Core/Ref.h7
-rw-r--r--Eigen/src/Core/StableNorm.h18
-rw-r--r--Eigen/src/Core/Transpose.h2
-rw-r--r--Eigen/src/Core/util/Constants.h33
-rw-r--r--Eigen/src/Core/util/XprHelper.h37
-rw-r--r--Eigen/src/Geometry/Quaternion.h12
-rw-r--r--Eigen/src/Geometry/arch/Geometry_SSE.h10
-rw-r--r--Eigen/src/Jacobi/Jacobi.h18
-rw-r--r--Eigen/src/LU/arch/Inverse_SSE.h8
-rw-r--r--Eigen/src/SparseCore/SparseDiagonalProduct.h8
21 files changed, 213 insertions, 181 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index b0468dd64..39efb1d5a 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -28,18 +28,19 @@ template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
struct copy_using_evaluator_traits
{
typedef typename DstEvaluator::XprType Dst;
-
+ typedef typename Dst::Scalar DstScalar;
enum {
DstFlags = DstEvaluator::Flags,
- SrcFlags = SrcEvaluator::Flags
+ SrcFlags = SrcEvaluator::Flags,
+ RequiredAlignment = packet_traits<DstScalar>::size*sizeof(DstScalar) // FIXME ask packet_traits for the true alignment requirement
};
public:
enum {
- DstIsAligned = DstFlags & AlignedBit,
+ DstAlignment = DstEvaluator::Alignment,
+ SrcAlignment = SrcEvaluator::Alignment,
DstHasDirectAccess = DstFlags & DirectAccessBit,
- SrcIsAligned = SrcFlags & AlignedBit,
- JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
+ JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
};
private:
@@ -51,7 +52,7 @@ private:
: int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
: int(Dst::MaxRowsAtCompileTime),
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
- PacketSize = packet_traits<typename Dst::Scalar>::size
+ PacketSize = packet_traits<DstScalar>::size
};
enum {
@@ -62,10 +63,10 @@ private:
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
&& (functor_traits<AssignFunc>::PacketAccess),
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
- && int(DstIsAligned) && int(SrcIsAligned),
+ && int(JointAlignment)>=int(RequiredAlignment),
MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
- && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
+ && ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
MaySliceVectorize = MightVectorize && DstHasDirectAccess
@@ -107,8 +108,8 @@ public:
: int(NoUnrolling)
)
: int(Traversal) == int(LinearVectorizedTraversal)
- ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling)
- : int(NoUnrolling) )
+ ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling)
+ : int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(NoUnrolling) )
@@ -124,8 +125,9 @@ public:
EIGEN_DEBUG_VAR(DstFlags)
EIGEN_DEBUG_VAR(SrcFlags)
std::cerr.unsetf(std::ios::hex);
- EIGEN_DEBUG_VAR(DstIsAligned)
- EIGEN_DEBUG_VAR(SrcIsAligned)
+ EIGEN_DEBUG_VAR(DstAlignment)
+ EIGEN_DEBUG_VAR(SrcAlignment)
+ EIGEN_DEBUG_VAR(RequiredAlignment)
EIGEN_DEBUG_VAR(JointAlignment)
EIGEN_DEBUG_VAR(InnerSize)
EIGEN_DEBUG_VAR(InnerMaxSize)
@@ -360,11 +362,13 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
const Index size = kernel.size();
- typedef packet_traits<typename Kernel::Scalar> PacketTraits;
+ typedef typename Kernel::Scalar Scalar;
+ typedef packet_traits<Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
- dstIsAligned = int(Kernel::AssignmentTraits::DstIsAligned),
- dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
+ dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(Kernel::AssignmentTraits::RequiredAlignment),
+ dstAlignment = PacketTraits::AlignedOnScalar ? int(Kernel::AssignmentTraits::RequiredAlignment)
+ : int(Kernel::AssignmentTraits::DstAlignment),
srcAlignment = Kernel::AssignmentTraits::JointAlignment
};
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size);
@@ -475,9 +479,10 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
typedef packet_traits<Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
- alignable = PacketTraits::AlignedOnScalar,
- dstIsAligned = Kernel::AssignmentTraits::DstIsAligned,
- dstAlignment = alignable ? Aligned : int(dstIsAligned)
+ alignable = PacketTraits::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
+ dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(Kernel::AssignmentTraits::RequiredAlignment),
+ dstAlignment = alignable ? int(Kernel::AssignmentTraits::RequiredAlignment)
+ : int(Kernel::AssignmentTraits::DstAlignment)
};
const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index aed6147c7..3748e259b 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -81,14 +81,16 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(outer_stride_at_compile_time<XprType>::ret)
: int(inner_stride_at_compile_time<XprType>::ret),
- // IsAligned is needed by MapBase's assertions
- // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
- IsAligned = 0,
+
// FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
- Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit
+ Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
// FIXME DirectAccessBit should not be handled by expressions
+ //
+ // Alignment is needed by MapBase's assertions
+ // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
+ Alignment = 0
};
};
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 6b0e3617d..910d04ecb 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -111,6 +111,10 @@ struct evaluator_base
typedef typename traits<ExpressionType>::StorageIndex StorageIndex;
// TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
typedef traits<ExpressionType> ExpressionTraits;
+
+ enum {
+ Alignment = 0
+ };
};
// -------------------- Matrix and Array --------------------
@@ -137,8 +141,8 @@ struct evaluator<PlainObjectBase<Derived> >
ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,
CoeffReadCost = NumTraits<Scalar>::ReadCost,
- Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime,
- Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret
+ Flags = traits<Derived>::EvaluatorFlags,
+ Alignment = traits<Derived>::Alignment
};
EIGEN_DEVICE_FUNC evaluator()
@@ -255,7 +259,8 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
- Flags = evaluator<ArgType>::Flags ^ RowMajorBit
+ Flags = evaluator<ArgType>::Flags ^ RowMajorBit,
+ Alignment = evaluator<ArgType>::Alignment
};
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
@@ -331,7 +336,8 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
& ( HereditaryBits
| (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
| (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
- | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore
+ | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit), // FIXME EvalBeforeNestingBit should be needed anymore
+ Alignment = 0 // FIXME alignment should not matter here, perhaps we could set it to AlignMax??
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
@@ -378,9 +384,9 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
- Flags = evaluator<ArgType>::Flags & (
- HereditaryBits | LinearAccessBit | AlignedBit
- | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))
+ Flags = evaluator<ArgType>::Flags
+ & (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
+ Alignment = evaluator<ArgType>::Alignment
};
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
@@ -447,13 +453,13 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
HereditaryBits
| (int(LhsFlags) & int(RhsFlags) &
- ( AlignedBit
- | (StorageOrdersAgree ? LinearAccessBit : 0)
+ ( (StorageOrdersAgree ? LinearAccessBit : 0)
| (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
)
)
),
- Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit)
+ Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
+ Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment)
};
EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr)
@@ -506,7 +512,9 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
- Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit))
+ Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
+
+ Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost...
};
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
@@ -641,7 +649,6 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
HasNoInnerStride = InnerStrideAtCompileTime == 1,
HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
HasNoStride = HasNoInnerStride && HasNoOuterStride,
- IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned),
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
// TODO: should check for smaller packet types once we can handle multi-sized packet types
@@ -653,10 +660,13 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
|| ( OuterStrideAtCompileTime!=Dynamic
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ),
Flags0 = evaluator<PlainObjectType>::Flags,
- Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
- Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
- ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
- Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
+ //Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
+ Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
+ ? int(Flags0) : int(Flags0 & ~LinearAccessBit),
+ Flags = KeepsPacketAccess ? int(Flags1) : (int(Flags1) & ~PacketAccessBit),
+
+ //IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&int(AlignedMask))>0),
+ Alignment = int(MapOptions)&int(AlignedMask)
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map)
@@ -673,7 +683,8 @@ struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
enum {
- Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags
+ Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags,
+ Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref)
@@ -717,17 +728,17 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
- // TODO: should check for smaller packet types once we can handle multi-sized packet types
- AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
-
- MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
DirectAccessBit |
- MaskPacketAccessBit |
- MaskAlignedBit),
- Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit
+ MaskPacketAccessBit),
+ Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
+
+ // TODO: should check for smaller packet types once we can handle multi-sized packet types
+ AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
+ Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignBytes : 0,
+ Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
};
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {}
@@ -833,11 +844,8 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
{
- // TODO: should check for smaller packet types once we can handle multi-sized packet types
- const int AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar);
- EIGEN_ONLY_USED_FOR_DEBUG(AlignBytes)
// FIXME this should be an internal assertion
- eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned");
+ eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
}
};
@@ -856,7 +864,9 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+ EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
evaluator<ElseMatrixType>::CoeffReadCost),
- Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits
+ Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits,
+
+ Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
};
inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
@@ -908,7 +918,9 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
enum {
CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
- Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit)
+ Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit),
+
+ Alignment = evaluator<ArgTypeNestedCleaned>::Alignment
};
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate)
@@ -992,7 +1004,9 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
CoeffReadCost = TraversalSize==Dynamic ? Dynamic
: TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
- Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits)
+ Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits),
+
+ Alignment = 0 // FIXME this could be improved
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr)
@@ -1028,7 +1042,8 @@ struct evaluator_wrapper_base
typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
- Flags = evaluator<ArgType>::Flags
+ Flags = evaluator<ArgType>::Flags,
+ Alignment = evaluator<ArgType>::Alignment
};
EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
@@ -1144,7 +1159,9 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
? LinearAccessBit : 0,
- Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess)
+ Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess),
+
+ Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f.
};
typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
@@ -1226,7 +1243,9 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
- Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit
+ Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit,
+
+ Alignment = 0
};
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal)
diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h
index f08380bed..11e2a1809 100644
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -602,11 +602,11 @@ struct first_aligned_impl<Derived, false>
* documentation.
*/
template<typename Derived>
-static inline Index first_aligned(const Derived& m)
+static inline Index first_aligned(const DenseBase<Derived>& m)
{
return first_aligned_impl
- <Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
- ::run(m);
+ <Derived, (evaluator<Derived>::Alignment > 0 ) || !(Derived::Flags & DirectAccessBit)> // FIXME Alignment!
+ ::run(m.derived());
}
template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index cbb15f79d..4cc5f656f 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -450,22 +450,22 @@ pmadd(const Packet& a,
{ return padd(pmul(a, b),c); }
/** \internal \returns a packet version of \a *from.
- * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
-template<typename Packet, int LoadMode>
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+template<typename Packet, int Alignment>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
{
- if(LoadMode == Aligned)
+ if(Alignment >= unpacket_traits<Packet>::size*sizeof(typename unpacket_traits<Packet>::type))
return pload<Packet>(from);
else
return ploadu<Packet>(from);
}
/** \internal copy the packet \a from to \a *to.
- * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
-template<typename Scalar, typename Packet, int LoadMode>
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+template<typename Scalar, typename Packet, int Alignment>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
{
- if(LoadMode == Aligned)
+ if(Alignment >= unpacket_traits<Packet>::size*sizeof(typename unpacket_traits<Packet>::type))
pstore(to, from);
else
pstoreu(to, from);
diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h
index 2b5971730..3a8375da9 100644
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -19,7 +19,7 @@ namespace Eigen {
* \brief A matrix or vector expression mapping an existing array of data.
*
* \tparam PlainObjectType the equivalent matrix type of the mapped data
- * \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned.
+ * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
* of an ordinary, contiguous array. This can be overridden by specifying strides.
@@ -77,7 +77,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime)
: int(StrideType::OuterStrideAtCompileTime),
- IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned),
+ Alignment = int(MapOptions)&int(AlignedMask),
Flags0 = TraitsBase::Flags & (~NestByRefBit),
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
};
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index b175a3fa0..ae28d4db6 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -160,9 +160,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
EIGEN_DEVICE_FUNC
void checkSanity() const
{
- // TODO "IsAligned" should be replaced to handle arbitrary alignment
#if EIGEN_MAX_ALIGN_BYTES>0
- eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_MAX_ALIGN_BYTES) == 0) && "data is not aligned");
+ eigen_assert(((size_t(m_data) % EIGEN_PLAIN_ENUM_MAX(1,internal::traits<Derived>::Alignment)) == 0) && "data is not aligned");
#endif
}
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 83c1ef2c8..e67fff6c5 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -139,6 +139,18 @@ namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
+private:
+ enum {
+ row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
+ is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
+ max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
+ default_alignment = compute_default_alignment<_Scalar,max_size>::value,
+ actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
+ required_alignment = packet_traits<_Scalar>::size * sizeof(_Scalar), // FIXME ask packet_traits for the true required alignment
+ packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
+ };
+
+public:
typedef _Scalar Scalar;
typedef Dense StorageKind;
typedef Eigen::Index StorageIndex;
@@ -149,11 +161,13 @@ struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
MaxRowsAtCompileTime = _MaxRows,
MaxColsAtCompileTime = _MaxCols,
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
- // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
- EvaluatorFlags = compute_matrix_evaluator_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
Options = _Options,
InnerStrideAtCompileTime = 1,
- OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime
+ OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
+
+ // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
+ EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
+ Alignment = actual_alignment
};
};
}
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 9cb32e7d8..e83b92476 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -116,20 +116,20 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
typedef Eigen::Map<Derived, Unaligned> MapType;
friend class Eigen::Map<const Derived, Unaligned>;
typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
- friend class Eigen::Map<Derived, Aligned>;
- typedef Eigen::Map<Derived, Aligned> AlignedMapType;
- friend class Eigen::Map<const Derived, Aligned>;
- typedef const Eigen::Map<const Derived, Aligned> ConstAlignedMapType;
+ friend class Eigen::Map<Derived, AlignedMax>;
+ typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
+ friend class Eigen::Map<const Derived, AlignedMax>;
+ typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
- template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, Aligned, StrideType> type; };
- template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, Aligned, StrideType> type; };
+ template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, AlignedMax, StrideType> type; };
+ template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, AlignedMax, StrideType> type; };
protected:
DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
public:
- enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::EvaluatorFlags & AlignedBit) != 0 };
+ enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) };
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
EIGEN_DEVICE_FUNC
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 43ba86193..1f5d7addd 100755
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -430,24 +430,22 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
LhsFlags = LhsEtorType::Flags,
RhsFlags = RhsEtorType::Flags,
+ LhsAlignment = LhsEtorType::Alignment,
+ RhsAlignment = RhsEtorType::Alignment,
+
+ LhsIsAligned = int(LhsAlignment) >= int(sizeof(Scalar)*PacketSize), // FIXME compare to required alignment
+ RhsIsAligned = int(RhsAlignment) >= int(sizeof(Scalar)*PacketSize),
+
LhsRowMajor = LhsFlags & RowMajorBit,
RhsRowMajor = RhsFlags & RowMajorBit,
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
- && (ColsAtCompileTime == Dynamic
- || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
- && (RhsFlags&AlignedBit)
- )
- ),
+ && (ColsAtCompileTime == Dynamic || ( (ColsAtCompileTime % PacketSize) == 0 && RhsIsAligned ) ),
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
- && (RowsAtCompileTime == Dynamic
- || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
- && (LhsFlags&AlignedBit)
- )
- ),
+ && (RowsAtCompileTime == Dynamic || ( (RowsAtCompileTime % PacketSize) == 0 && LhsIsAligned ) ),
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
@@ -455,11 +453,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
| (EvalToRowMajor ? RowMajorBit : 0)
- | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
- | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
// TODO enable vectorization for mixed types
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
+ Alignment = CanVectorizeLhs ? LhsAlignment
+ : CanVectorizeRhs ? RhsAlignment
+ : 0,
+
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
@@ -469,7 +469,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
&& LhsRowMajor
&& (!RhsRowMajor)
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
- && (LhsFlags & RhsFlags & AlignedBit)
+ && (LhsIsAligned && RhsIsAligned)
&& (InnerSize % packet_traits<Scalar>::size == 0)
};
@@ -706,7 +706,8 @@ public:
//_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
- Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0)
+ Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
+ Alignment = evaluator<MatrixType>::Alignment
};
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
@@ -732,7 +733,7 @@ protected:
{
enum {
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
- DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
+ DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
};
return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
m_diagImpl.template packet<DiagonalPacketLoadMode>(id));
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index fea4e2895..0c25223aa 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -165,7 +165,7 @@ struct redux_vec_unroller<Func, Derived, Start, 1>
index = Start * packet_traits<typename Derived::Scalar>::size,
outer = index / int(Derived::InnerSizeAtCompileTime),
inner = index % int(Derived::InnerSizeAtCompileTime),
- alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
+ alignment = Derived::Alignment
};
typedef typename Derived::Scalar Scalar;
@@ -222,10 +222,10 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
const Index size = mat.size();
const Index packetSize = packet_traits<Scalar>::size;
- const Index alignedStart = internal::first_aligned(mat);
+ const Index alignedStart = internal::first_aligned(mat.nestedExpression());
enum {
- alignment = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) || bool(Derived::Flags & AlignedBit)
- ? Aligned : Unaligned
+ alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(sizeof(Scalar)*packetSize) : int(Unaligned), // FIXME take into account alignment requirement
+ alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
};
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
@@ -352,7 +352,8 @@ public:
IsRowMajor = XprType::IsRowMajor,
SizeAtCompileTime = XprType::SizeAtCompileTime,
InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
- CoeffReadCost = evaluator<XprType>::CoeffReadCost
+ CoeffReadCost = evaluator<XprType>::CoeffReadCost,
+ Alignment = evaluator<XprType>::Alignment
};
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
@@ -385,6 +386,8 @@ public:
PacketReturnType packetByOuterInner(Index outer, Index inner) const
{ return m_evaluator.template packet<LoadMode>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
+ const XprType & nestedExpression() const { return m_xpr; }
+
protected:
typename internal::evaluator<XprType>::nestedType m_evaluator;
const XprType &m_xpr;
diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h
index ea5a2bd5c..61de5ed17 100644
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -18,7 +18,7 @@ namespace Eigen {
* \brief A matrix or vector expression mapping an existing expression
*
* \tparam PlainObjectType the equivalent matrix type of the mapped data
- * \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned.
+ * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
* The default is \c #Unaligned.
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
* but accepts a variable outer stride (leading dimension).
@@ -92,7 +92,8 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
typedef _StrideType StrideType;
enum {
Options = _Options,
- Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit
+ Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit,
+ Alignment = traits<Map<_PlainObjectType, _Options, _StrideType> >::Alignment
};
template<typename Derived> struct match {
@@ -104,7 +105,7 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|| (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
OuterStrideMatch = Derived::IsVectorAtCompileTime
|| int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
- AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit),
+ AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (int(evaluator<Derived>::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
};
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index f9cd01b7e..aca81f463 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -162,21 +162,27 @@ MatrixBase<Derived>::stableNorm() const
RealScalar scale(0);
RealScalar invScale(1);
RealScalar ssq(0); // sum of square
+
+ typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
+ typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
+ DerivedCopy copy(derived());
+
enum {
- Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
+ CanAlign = (int(Flags)&DirectAccessBit) || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME
};
- typedef typename internal::conditional<Alignment, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, Aligned>,
- typename Base::ConstSegmentReturnType>::type SegmentWrapper;
+ typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
+ typename DerivedCopyClean
+ ::ConstSegmentReturnType>::type SegmentWrapper;
Index n = size();
if(n==1)
return abs(this->coeff(0));
- Index bi = internal::first_aligned(derived());
+ Index bi = internal::first_aligned(copy);
if (bi>0)
- internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
+ internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale);
for (; bi<n; bi+=blockSize)
- internal::stable_norm_kernel(SegmentWrapper(this->segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
+ internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
return scale * sqrt(ssq);
}
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index e205cec4a..2152405d5 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -233,7 +233,7 @@ struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x Packet
typedef typename MatrixType::Scalar Scalar;
typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
const Index PacketSize = internal::packet_traits<Scalar>::size;
- const Index Alignment = internal::evaluator<MatrixType>::Flags&AlignedBit ? Aligned : Unaligned;
+ const Index Alignment = internal::evaluator<MatrixType>::Alignment;
PacketBlock<Packet> A;
for (Index i=0; i<PacketSize; ++i)
A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index 93c0786fa..3e811a173 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
@@ -140,7 +140,7 @@ const unsigned int LvalueBit = 0x20;
*/
const unsigned int DirectAccessBit = 0x40;
-/** \ingroup flags
+/* \ingroup flags
*
* means the first coefficient packet is guaranteed to be aligned.
* An expression cannot has the AlignedBit without the PacketAccessBit flag.
@@ -215,12 +215,31 @@ enum {
};
/** \ingroup enums
- * Enum for indicating whether an object is aligned or not. */
+ * Enum for indicating whether a buffer is aligned or not. */
enum {
- /** Object is not correctly aligned for vectorization. */
- Unaligned=0,
- /** Object is aligned for vectorization. */
- Aligned=1
+ Unaligned=0, /**< Data pointer has no specific alignment. */
+ Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */
+ Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */
+ Aligned32=32, /**< Data pointer is aligned on a 32 bytes boundary. */
+ Aligned64=64, /**< Data pointer is aligned on a 64 bytes boundary. */
+ Aligned128=128, /**< Data pointer is aligned on a 128 bytes boundary. */
+ AlignedMask=255,
+ Aligned=16, /**< \deprecated Synonym for Aligned16. */
+#if EIGEN_MAX_ALIGN_BYTES==128
+ AlignedMax = Aligned128
+#elif EIGEN_MAX_ALIGN_BYTES==64
+ AlignedMax = Aligned64
+#elif EIGEN_MAX_ALIGN_BYTES==32
+ AlignedMax = Aligned32
+#elif EIGEN_MAX_ALIGN_BYTES==16
+ AlignedMax = Aligned16
+#elif EIGEN_MAX_ALIGN_BYTES==8
+ AlignedMax = Aligned8
+#elif EIGEN_MAX_ALIGN_BYTES==0
+ AlignedMax = Unaligned
+#else
+#error Invalid value for EIGEN_MAX_ALIGN_BYTES
+#endif
};
/** \ingroup enums
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 6dc1f6e3f..81e992392 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -192,43 +192,6 @@ class compute_matrix_flags
enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
};
-template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
-class compute_matrix_evaluator_flags
-{
- enum {
- row_major_bit = Options&RowMajor ? RowMajorBit : 0,
- is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
-
- // TODO: should check for smaller packet types once we can handle multi-sized packet types
- align_bytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
-
- aligned_bit =
- (
- ((Options&DontAlign)==0)
- && (
-#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
- ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0))
-#else
- 0
-#endif
-
- ||
-
-#if EIGEN_MAX_ALIGN_BYTES!=0
- is_dynamic_size_storage
-#else
- 0
-#endif
-
- )
- ) ? AlignedBit : 0,
- packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0
- };
-
- public:
- enum { ret = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit };
-};
-
template<int _Rows, int _Cols> struct size_at_compile_time
{
enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 15a063994..7a30dc0b6 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -217,8 +217,8 @@ struct traits<Quaternion<_Scalar,_Options> >
typedef _Scalar Scalar;
typedef Matrix<_Scalar,4,1,_Options> Coefficients;
enum{
- IsAligned = (internal::traits<Coefficients>::EvaluatorFlags & AlignedBit) != 0,
- Flags = IsAligned ? (AlignedBit | LvalueBit) : LvalueBit
+ Alignment = internal::traits<Coefficients>::Alignment,
+ Flags = LvalueBit
};
};
}
@@ -228,7 +228,7 @@ class Quaternion : public QuaternionBase<Quaternion<_Scalar,_Options> >
{
public:
typedef QuaternionBase<Quaternion<_Scalar,_Options> > Base;
- enum { IsAligned = internal::traits<Quaternion>::IsAligned };
+ enum { NeedsAlignment = internal::traits<Quaternion>::Alignment>0 };
typedef _Scalar Scalar;
@@ -277,7 +277,7 @@ public:
inline Coefficients& coeffs() { return m_coeffs;}
inline const Coefficients& coeffs() const { return m_coeffs;}
- EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(IsAligned)
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsAlignment)
protected:
Coefficients m_coeffs;
@@ -441,7 +441,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
return internal::quat_product<Architecture::Target, Derived, OtherDerived,
typename internal::traits<Derived>::Scalar,
- (internal::traits<Derived>::IsAligned && internal::traits<OtherDerived>::IsAligned)?Aligned:Unaligned>::run(*this, other);
+ EIGEN_PLAIN_ENUM_MIN(internal::traits<Derived>::Alignment, internal::traits<OtherDerived>::Alignment)>::run(*this, other);
}
/** \sa operator*(Quaternion) */
@@ -668,7 +668,7 @@ QuaternionBase<Derived>::conjugate() const
{
return internal::quat_conj<Architecture::Target, Derived,
typename internal::traits<Derived>::Scalar,
- internal::traits<Derived>::IsAligned?Aligned:Unaligned>::run(*this);
+ internal::traits<Derived>::Alignment>::run(*this);
}
diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h
index e59c32c56..1a86ff837 100644
--- a/Eigen/src/Geometry/arch/Geometry_SSE.h
+++ b/Eigen/src/Geometry/arch/Geometry_SSE.h
@@ -16,14 +16,14 @@ namespace Eigen {
namespace internal {
template<class Derived, class OtherDerived>
-struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned>
+struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16>
{
static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
{
Quaternion<float> res;
const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
- __m128 a = _a.coeffs().template packet<Aligned>(0);
- __m128 b = _b.coeffs().template packet<Aligned>(0);
+ __m128 a = _a.coeffs().template packet<Aligned16>(0);
+ __m128 b = _b.coeffs().template packet<Aligned16>(0);
__m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
__m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
pstore(&res.x(),
@@ -55,8 +55,8 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
static inline typename plain_matrix_type<VectorLhs>::type
run(const VectorLhs& lhs, const VectorRhs& rhs)
{
- __m128 a = lhs.template packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
- __m128 b = rhs.template packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
+ __m128 a = lhs.template packet<traits<VectorLhs>::Alignment>(0);
+ __m128 b = rhs.template packet<traits<VectorRhs>::Alignment>(0);
__m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));
__m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));
typename plain_matrix_type<VectorLhs>::type res;
diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h
index 25eabe984..b7b83dcd2 100644
--- a/Eigen/src/Jacobi/Jacobi.h
+++ b/Eigen/src/Jacobi/Jacobi.h
@@ -263,7 +263,7 @@ namespace internal {
* \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
*/
template<typename VectorX, typename VectorY, typename OtherScalar>
-void apply_rotation_in_the_plane(VectorX& _x, VectorY& _y, const JacobiRotation<OtherScalar>& j);
+void apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j);
}
/** \jacobi_module
@@ -298,18 +298,18 @@ inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiR
namespace internal {
template<typename VectorX, typename VectorY, typename OtherScalar>
-void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(VectorX& _x, VectorY& _y, const JacobiRotation<OtherScalar>& j)
+void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
{
typedef typename VectorX::Scalar Scalar;
enum { PacketSize = packet_traits<Scalar>::size };
typedef typename packet_traits<Scalar>::type Packet;
- eigen_assert(_x.size() == _y.size());
- Index size = _x.size();
- Index incrx = _x.innerStride();
- Index incry = _y.innerStride();
+ eigen_assert(xpr_x.size() == xpr_y.size());
+ Index size = xpr_x.size();
+ Index incrx = xpr_x.derived().innerStride();
+ Index incry = xpr_y.derived().innerStride();
- Scalar* EIGEN_RESTRICT x = &_x.coeffRef(0);
- Scalar* EIGEN_RESTRICT y = &_y.coeffRef(0);
+ Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0);
+ Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0);
OtherScalar c = j.c();
OtherScalar s = j.s();
@@ -392,7 +392,7 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(VectorX& _x, VectorY& _y,
/*** fixed-size vectorized path ***/
else if(VectorX::SizeAtCompileTime != Dynamic &&
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
- (VectorX::Flags & VectorY::Flags & AlignedBit))
+ (EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
{
const Packet pc = pset1<Packet>(c);
const Packet ps = pset1<Packet>(s);
diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h
index 1f62ef14e..e1470c664 100644
--- a/Eigen/src/LU/arch/Inverse_SSE.h
+++ b/Eigen/src/LU/arch/Inverse_SSE.h
@@ -35,8 +35,8 @@ template<typename MatrixType, typename ResultType>
struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
{
enum {
- MatrixAlignment = bool(MatrixType::Flags&AlignedBit),
- ResultAlignment = bool(ResultType::Flags&AlignedBit),
+ MatrixAlignment = traits<MatrixType>::Alignment,
+ ResultAlignment = traits<ResultType>::Alignment,
StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
};
typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
@@ -165,8 +165,8 @@ template<typename MatrixType, typename ResultType>
struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
{
enum {
- MatrixAlignment = bool(MatrixType::Flags&AlignedBit),
- ResultAlignment = bool(ResultType::Flags&AlignedBit),
+ MatrixAlignment = traits<MatrixType>::Alignment,
+ ResultAlignment = traits<ResultType>::Alignment,
StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
};
typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
diff --git a/Eigen/src/SparseCore/SparseDiagonalProduct.h b/Eigen/src/SparseCore/SparseDiagonalProduct.h
index 29a67da35..d82927216 100644
--- a/Eigen/src/SparseCore/SparseDiagonalProduct.h
+++ b/Eigen/src/SparseCore/SparseDiagonalProduct.h
@@ -41,7 +41,7 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, Diagonal
typedef Product<Lhs, Rhs, DefaultProduct> XprType;
typedef evaluator<XprType> type;
typedef evaluator<XprType> nestedType;
- enum { CoeffReadCost = Dynamic, Flags = Rhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags
+ enum { CoeffReadCost = Dynamic, Flags = Rhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags
typedef sparse_diagonal_product_evaluator<Rhs, typename Lhs::DiagonalVectorType, Rhs::Flags&RowMajorBit?SDP_AsScalarProduct:SDP_AsCwiseProduct> Base;
explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {}
@@ -54,14 +54,14 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, SparseSh
typedef Product<Lhs, Rhs, DefaultProduct> XprType;
typedef evaluator<XprType> type;
typedef evaluator<XprType> nestedType;
- enum { CoeffReadCost = Dynamic, Flags = Lhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags
+ enum { CoeffReadCost = Dynamic, Flags = Lhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags
typedef sparse_diagonal_product_evaluator<Lhs, Transpose<const typename Rhs::DiagonalVectorType>, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> Base;
explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal().transpose()) {}
};
template<typename SparseXprType, typename DiagonalCoeffType>
-struct sparse_diagonal_product_evaluator<SparseXprType, DiagonalCoeffType, SDP_AsScalarProduct>
+struct sparse_diagonal_product_evaluator<SparseXprType, DiagonalCoeffType, SDP_AsScalarProduct>
{
protected:
typedef typename evaluator<SparseXprType>::InnerIterator SparseXprInnerIterator;
@@ -92,7 +92,7 @@ protected:
template<typename SparseXprType, typename DiagCoeffType>
-struct sparse_diagonal_product_evaluator<SparseXprType, DiagCoeffType, SDP_AsCwiseProduct>
+struct sparse_diagonal_product_evaluator<SparseXprType, DiagCoeffType, SDP_AsCwiseProduct>
{
typedef typename SparseXprType::Scalar Scalar;