diff options
author | 2009-10-05 10:11:11 -0400 | |
---|---|---|
committer | 2009-10-05 10:11:11 -0400 | |
commit | d41577819bddb5ca734acc3ba0697646475dc786 (patch) | |
tree | 771543563ada0c34d89296fc990d47930747c968 /Eigen/src/Core/arch | |
parent | a9a9ba8453853db2c5a2212cedb8fbc8dc4cde2e (diff) |
we were already aligning to 16 byte boundary fixed-size objects that are multiple of 16 bytes;
now we also align to 8byte boundary fixed-size objects that are multiple of 8 bytes.
That's only useful for now for double, not e.g. for Vector2f, but that didn't seem to hurt. Am I missing something? Do you prefer that we don't align Vector2f at all?
Also, improvements in test_unalignedassert.
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/PacketMath.h | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/PacketMath.h | 6 |
2 files changed, 6 insertions, 6 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index a9c16200e..1526a4b97 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -265,14 +265,14 @@ template<> inline void ei_pstoreu(int* to , const v4i& from ) template<> inline float ei_pfirst(const v4f& a) { - float EIGEN_ALIGN_128 af[4]; + float EIGEN_ALIGN16 af[4]; vec_st(a, 0, af); return af[0]; } template<> inline int ei_pfirst(const v4i& a) { - int EIGEN_ALIGN_128 ai[4]; + int EIGEN_ALIGN16 ai[4]; vec_st(a, 0, ai); return ai[0]; } @@ -373,7 +373,7 @@ inline float ei_predux_mul(const v4f& a) inline int ei_predux_mul(const v4i& a) { - EIGEN_ALIGN_128 int aux[4]; + EIGEN_ALIGN16 int aux[4]; ei_pstore(aux, a); return aux[0] * aux[1] * aux[2] * aux[3]; } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index ddc7b4aaf..eb1c2d311 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -359,7 +359,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a) // after some experiments, it is seems this is the fastest way to implement it // for GCC (eg., reusing ei_pmul is very slow !) // TODO try to call _mm_mul_epu32 directly - EIGEN_ALIGN_128 int aux[4]; + EIGEN_ALIGN16 int aux[4]; ei_pstore(aux, a); return (aux[0] * aux[1]) * (aux[2] * aux[3]);; } @@ -378,7 +378,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a) { // after some experiments, it is seems this is the fastest way to implement it // for GCC (eg., it does not like using std::min after the ei_pstore !!) - EIGEN_ALIGN_128 int aux[4]; + EIGEN_ALIGN16 int aux[4]; ei_pstore(aux, a); register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1]; register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3]; @@ -399,7 +399,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a) { // after some experiments, it is seems this is the fastest way to implement it // for GCC (eg., it does not like using std::min after the ei_pstore !!) - EIGEN_ALIGN_128 int aux[4]; + EIGEN_ALIGN16 int aux[4]; ei_pstore(aux, a); register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1]; register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3]; |