From d41577819bddb5ca734acc3ba0697646475dc786 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 5 Oct 2009 10:11:11 -0400 Subject: we were already aligning to 16 byte boundary fixed-size objects that are multiple of 16 bytes; now we also align to 8byte boundary fixed-size objects that are multiple of 8 bytes. That's only useful for now for double, not e.g. for Vector2f, but that didn't seem to hurt. Am I missing something? Do you prefer that we don't align Vector2f at all? Also, improvements in test_unalignedassert. --- Eigen/src/Core/arch/AltiVec/PacketMath.h | 6 +++--- Eigen/src/Core/arch/SSE/PacketMath.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'Eigen/src/Core/arch') diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index a9c16200e..1526a4b97 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -265,14 +265,14 @@ template<> inline void ei_pstoreu(int* to , const v4i& from ) template<> inline float ei_pfirst(const v4f& a) { - float EIGEN_ALIGN_128 af[4]; + float EIGEN_ALIGN16 af[4]; vec_st(a, 0, af); return af[0]; } template<> inline int ei_pfirst(const v4i& a) { - int EIGEN_ALIGN_128 ai[4]; + int EIGEN_ALIGN16 ai[4]; vec_st(a, 0, ai); return ai[0]; } @@ -373,7 +373,7 @@ inline float ei_predux_mul(const v4f& a) inline int ei_predux_mul(const v4i& a) { - EIGEN_ALIGN_128 int aux[4]; + EIGEN_ALIGN16 int aux[4]; ei_pstore(aux, a); return aux[0] * aux[1] * aux[2] * aux[3]; } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index ddc7b4aaf..eb1c2d311 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -359,7 +359,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_mul(const Packet4i& a) // after some experiments, it is seems this is the fastest way to implement it // for GCC (eg., reusing ei_pmul is very slow !) // TODO try to call _mm_mul_epu32 directly - EIGEN_ALIGN_128 int aux[4]; + EIGEN_ALIGN16 int aux[4]; ei_pstore(aux, a); return (aux[0] * aux[1]) * (aux[2] * aux[3]);; } @@ -378,7 +378,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_min(const Packet4i& a) { // after some experiments, it is seems this is the fastest way to implement it // for GCC (eg., it does not like using std::min after the ei_pstore !!) - EIGEN_ALIGN_128 int aux[4]; + EIGEN_ALIGN16 int aux[4]; ei_pstore(aux, a); register int aux0 = aux[0] EIGEN_STRONG_INLINE int ei_predux_max(const Packet4i& a) { // after some experiments, it is seems this is the fastest way to implement it // for GCC (eg., it does not like using std::min after the ei_pstore !!) - EIGEN_ALIGN_128 int aux[4]; + EIGEN_ALIGN16 int aux[4]; ei_pstore(aux, a); register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1]; register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3]; -- cgit v1.2.3