aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2015-08-07 10:44:01 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2015-08-07 10:44:01 +0200
commitce57dbd937ca250863d4d52f3d0940d57551a25c (patch)
tree39f15bd3db545a9e52d88ba48c37ddb6083f7ade /Eigen/src
parent2afdef6a54e7fd09a4a6d5e933cf15ffa129beb6 (diff)
Let unpacket_traits<> exposes the required alignment and make use of it everywhere
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/AssignEvaluator.h5
-rw-r--r--Eigen/src/Core/CoreEvaluators.h18
-rw-r--r--Eigen/src/Core/DenseCoeffsBase.h3
-rw-r--r--Eigen/src/Core/GenericPacketMath.h4
-rw-r--r--Eigen/src/Core/Matrix.h4
-rwxr-xr-xEigen/src/Core/ProductEvaluators.h4
-rw-r--r--Eigen/src/Core/Redux.h6
-rw-r--r--Eigen/src/Core/arch/AVX/Complex.h4
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/AltiVec/Complex.h4
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMath.h4
-rw-r--r--Eigen/src/Core/arch/NEON/Complex.h4
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h4
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h6
-rw-r--r--Eigen/src/Core/util/Memory.h3
-rw-r--r--Eigen/src/Core/util/XprHelper.h6
18 files changed, 54 insertions, 43 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index f589555cc..574f945d5 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -29,10 +29,13 @@ struct copy_using_evaluator_traits
{
typedef typename DstEvaluator::XprType Dst;
typedef typename Dst::Scalar DstScalar;
+ // TODO recursively find best packet size
+ typedef typename packet_traits<DstScalar>::type DstPacket;
+
enum {
DstFlags = DstEvaluator::Flags,
SrcFlags = SrcEvaluator::Flags,
- RequiredAlignment = packet_traits<DstScalar>::size*sizeof(DstScalar) // FIXME ask packet_traits for the true alignment requirement
+ RequiredAlignment = unpacket_traits<DstPacket>::alignment
};
public:
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 910d04ecb..a599a6a69 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -638,6 +638,8 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
{
typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
typedef typename XprType::Scalar Scalar;
+ // TODO: should check for smaller packet types once we can handle multi-sized packet types
+ typedef typename packet_traits<Scalar>::type PacketScalar;
enum {
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
@@ -651,21 +653,18 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
HasNoStride = HasNoInnerStride && HasNoOuterStride,
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
- // TODO: should check for smaller packet types once we can handle multi-sized packet types
- AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
+ PacketAlignment = unpacket_traits<PacketScalar>::alignment,
KeepsPacketAccess = bool(HasNoInnerStride)
&& ( bool(IsDynamicSize)
|| HasNoOuterStride
|| ( OuterStrideAtCompileTime!=Dynamic
- && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ),
+ && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % PacketAlignment)==0 ) ),
Flags0 = evaluator<PlainObjectType>::Flags,
- //Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
? int(Flags0) : int(Flags0 & ~LinearAccessBit),
Flags = KeepsPacketAccess ? int(Flags1) : (int(Flags1) & ~PacketAccessBit),
- //IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&int(AlignedMask))>0),
Alignment = int(MapOptions)&int(AlignedMask)
};
@@ -702,7 +701,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
: block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
- typedef typename XprType::Scalar Scalar;
+ typedef typename XprType::Scalar Scalar;
+ // TODO: should check for smaller packet types once we can handle multi-sized packet types
+ typedef typename packet_traits<Scalar>::type PacketScalar;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
@@ -735,9 +736,8 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
MaskPacketAccessBit),
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
- // TODO: should check for smaller packet types once we can handle multi-sized packet types
- AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
- Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignBytes : 0,
+ PacketAlignment = unpacket_traits<PacketScalar>::alignment,
+ Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
};
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h
index d053911e3..d7ccb3b6f 100644
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -614,7 +614,8 @@ template<typename Derived>
static inline Index first_default_aligned(const DenseBase<Derived>& m)
{
typedef typename Derived::Scalar Scalar;
- return first_aligned<packet_traits<Scalar>::size*sizeof(Scalar)>(m);
+ typedef typename packet_traits<Scalar>::type DefaultPacketType;
+ return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(m);
}
template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 4cc5f656f..c656f61bf 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -454,7 +454,7 @@ pmadd(const Packet& a,
template<typename Packet, int Alignment>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
{
- if(Alignment >= unpacket_traits<Packet>::size*sizeof(typename unpacket_traits<Packet>::type))
+ if(Alignment >= unpacket_traits<Packet>::alignment)
return pload<Packet>(from);
else
return ploadu<Packet>(from);
@@ -465,7 +465,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_trai
template<typename Scalar, typename Packet, int Alignment>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
{
- if(Alignment >= unpacket_traits<Packet>::size*sizeof(typename unpacket_traits<Packet>::type))
+ if(Alignment >= unpacket_traits<Packet>::alignment)
pstore(to, from);
else
pstoreu(to, from);
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index e67fff6c5..b7c90ae16 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -140,13 +140,15 @@ template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
private:
+ // TODO find ideal packet size
+ typedef typename packet_traits<_Scalar>::type PacketScalar;
enum {
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
- required_alignment = packet_traits<_Scalar>::size * sizeof(_Scalar), // FIXME ask packet_traits for the true required alignment
+ required_alignment = unpacket_traits<PacketScalar>::alignment,
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
};
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 1f5d7addd..b24d54ef9 100755
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -433,8 +433,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
LhsAlignment = LhsEtorType::Alignment,
RhsAlignment = RhsEtorType::Alignment,
- LhsIsAligned = int(LhsAlignment) >= int(sizeof(Scalar)*PacketSize), // FIXME compare to required alignment
- RhsIsAligned = int(RhsAlignment) >= int(sizeof(Scalar)*PacketSize),
+ LhsIsAligned = int(LhsAlignment) >= int(unpacket_traits<PacketScalar>::alignment),
+ RhsIsAligned = int(RhsAlignment) >= int(unpacket_traits<PacketScalar>::alignment),
LhsRowMajor = LhsFlags & RowMajorBit,
RhsRowMajor = RhsFlags & RowMajorBit,
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index fa308b53e..942f51d51 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -221,10 +221,10 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
{
const Index size = mat.size();
- const Index packetSize = packet_traits<Scalar>::size;
- const int packetBytes = int(packetSize*sizeof(Scalar));
+ const Index packetSize = packet_traits<Scalar>::size;
+ const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
enum {
- alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetBytes) : int(Unaligned), // FIXME take into account alignment requirement
+ alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
};
const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 003a1fc3c..b16e0ddd4 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -45,7 +45,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
@@ -267,7 +267,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index a2306fd1a..4dda35b41 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -100,9 +100,9 @@ template<> struct packet_traits<int> : default_packet_traits
};
*/
-template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8}; };
-template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4}; };
-template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8}; };
+template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8, alignment=Aligned32}; };
+template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4, alignment=Aligned32}; };
+template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; };
template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
index 1f00e27d8..58c296171 100644
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -53,7 +53,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
@@ -275,7 +275,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index d647427ce..7ab777009 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -148,8 +148,8 @@ template<> struct packet_traits<int> : default_packet_traits
};
-template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
-template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)
{
@@ -756,7 +756,7 @@ template<> struct packet_traits<double> : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h
index ceed1d1ef..8b767a4e7 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMath.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -65,8 +65,8 @@ template<> struct packet_traits<double> : default_packet_traits
};
-template<> struct unpacket_traits<float4> { typedef float type; enum {size=4}; typedef float4 half; };
-template<> struct unpacket_traits<double2> { typedef double type; enum {size=2}; typedef double2 half; };
+template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16}; typedef float4 half; };
+template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16}; typedef double2 half; };
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
return make_float4(from, from, from, from);
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index 60ab56a47..d2322b307 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -48,7 +48,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
@@ -306,7 +306,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index cda9056a4..a72f91f21 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -114,8 +114,8 @@ EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q
EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
#endif
-template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
-template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return vdupq_n_s32(from); }
@@ -570,7 +570,7 @@ template<> struct packet_traits<double> : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); }
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 3713b73da..2a44b6272 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -50,7 +50,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
};
#endif
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
@@ -297,7 +297,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
};
#endif
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 38a84273d..d5e852799 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -144,9 +144,9 @@ template<> struct packet_traits<int> : default_packet_traits
};
};
-template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; typedef Packet4f half; };
-template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2}; typedef Packet2d half; };
-template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; typedef Packet4i half; };
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
#if EIGEN_COMP_MSVC==1500
// Workaround MSVC 9 internal compiler error.
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 957c36bcf..69a489d43 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -553,7 +553,8 @@ inline Index first_aligned(const Scalar* array, Index size)
template<typename Scalar, typename Index>
inline Index first_default_aligned(const Scalar* array, Index size)
{
- return first_aligned<packet_traits<Scalar>::size*sizeof(Scalar)>(array, size);
+ typedef typename packet_traits<Scalar>::type DefaultPacketType;
+ return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
}
/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 81e992392..2808dfd6c 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -119,7 +119,11 @@ template<typename T> struct unpacket_traits
{
typedef T type;
typedef T half;
- enum {size=1};
+ enum
+ {
+ size = 1,
+ alignment = 1
+ };
};
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0