aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2009-10-05 10:11:11 -0400
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2009-10-05 10:11:11 -0400
commitd41577819bddb5ca734acc3ba0697646475dc786 (patch)
tree771543563ada0c34d89296fc990d47930747c968 /Eigen/src/Core/arch
parenta9a9ba8453853db2c5a2212cedb8fbc8dc4cde2e (diff)
we were already aligning to 16 byte boundary fixed-size objects that are multiple of 16 bytes;
now we also align to 8byte boundary fixed-size objects that are multiple of 8 bytes. That's only useful for now for double, not e.g. for Vector2f, but that didn't seem to hurt. Am I missing something? Do you prefer that we don't align Vector2f at all? Also, improvements in test_unalignedassert.
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r--Eigen/src/Core/arch/AltiVec/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h6
2 files changed, 6 insertions, 6 deletions
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index a9c16200e..1526a4b97 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -265,14 +265,14 @@ template<> inline void ei_pstoreu(int* to , const v4i& from )
template<> inline float ei_pfirst(const v4f& a)
{
- float EIGEN_ALIGN_128 af[4];
+ float EIGEN_ALIGN16 af[4];
vec_st(a, 0, af);
return af[0];
}
template<> inline int ei_pfirst(const v4i& a)
{
- int EIGEN_ALIGN_128 ai[4];
+ int EIGEN_ALIGN16 ai[4];
vec_st(a, 0, ai);
return ai[0];
}
@@ -373,7 +373,7 @@ inline float ei_predux_mul(const v4f& a)
inline int ei_predux_mul(const v4i& a)
{
- EIGEN_ALIGN_128 int aux[4];
+ EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a);
return aux[0] * aux[1] * aux[2] * aux[3];
}
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index ddc7b4aaf..eb1c2d311 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -359,7 +359,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a)
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., reusing ei_pmul is very slow !)
// TODO try to call _mm_mul_epu32 directly
- EIGEN_ALIGN_128 int aux[4];
+ EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a);
return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
}
@@ -378,7 +378,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a)
{
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
- EIGEN_ALIGN_128 int aux[4];
+ EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a);
register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
@@ -399,7 +399,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
{
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
- EIGEN_ALIGN_128 int aux[4];
+ EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a);
register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];