aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2009-10-05 10:11:11 -0400
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2009-10-05 10:11:11 -0400
commitd41577819bddb5ca734acc3ba0697646475dc786 (patch)
tree771543563ada0c34d89296fc990d47930747c968 /Eigen
parenta9a9ba8453853db2c5a2212cedb8fbc8dc4cde2e (diff)
we were already aligning to 16 byte boundary fixed-size objects that are multiple of 16 bytes;
now we also align to 8byte boundary fixed-size objects that are multiple of 8 bytes. That's only useful for now for double, not e.g. for Vector2f, but that didn't seem to hurt. Am I missing something? Do you prefer that we don't align Vector2f at all? Also, improvements in test_unalignedassert.
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/MatrixStorage.h44
-rw-r--r--Eigen/src/Core/arch/AltiVec/PacketMath.h6
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h6
-rw-r--r--Eigen/src/Core/util/Macros.h16
4 files changed, 46 insertions, 26 deletions
diff --git a/Eigen/src/Core/MatrixStorage.h b/Eigen/src/Core/MatrixStorage.h
index f67095d0c..654fdf5e6 100644
--- a/Eigen/src/Core/MatrixStorage.h
+++ b/Eigen/src/Core/MatrixStorage.h
@@ -29,29 +29,45 @@
struct ei_constructor_without_unaligned_array_assert {};
/** \internal
- * Static array automatically aligned if the total byte size is a multiple of 16 and the matrix options require auto alignment
+ * Static array. If the MatrixOptions require auto-alignment, and the array will be automatically aligned:
+ * - to 16 bytes boundary, if the total size is a multiple of 16 bytes;
+ * - or else to 8 bytes boundary, if the total size is a multiple of 8 bytes.
*/
template <typename T, int Size, int MatrixOptions,
- bool Align = (!(MatrixOptions&DontAlign)) && (((Size*sizeof(T))&0xf)==0)
-> struct ei_matrix_array
+ int Alignment = (MatrixOptions&DontAlign) ? 0
+ : (((Size*sizeof(T))%16)==0) ? 16
+ : (((Size*sizeof(T))%8)==0) ? 8
+ : 0 >
+struct ei_matrix_array
{
- EIGEN_ALIGN_128 T array[Size];
+ T array[Size];
+ ei_matrix_array() {}
+ ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
+};
- ei_matrix_array()
- {
- #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
- ei_assert((reinterpret_cast<size_t>(array) & 0xf) == 0
- && "this assertion is explained here: http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html **** READ THIS WEB PAGE !!! ****");
- #endif
- }
+#ifdef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+ #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
+#else
+ #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
+ ei_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
+ && "this assertion is explained here: " \
+ "http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html" \
+ " **** READ THIS WEB PAGE !!! ****");
+#endif
+template <typename T, int Size, int MatrixOptions>
+struct ei_matrix_array<T, Size, MatrixOptions, 16>
+{
+ EIGEN_ALIGN16 T array[Size];
+ ei_matrix_array() { EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf) }
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
};
-template <typename T, int Size, int MatrixOptions> struct ei_matrix_array<T,Size,MatrixOptions,false>
+template <typename T, int Size, int MatrixOptions>
+struct ei_matrix_array<T, Size, MatrixOptions, 8>
{
- T array[Size];
- ei_matrix_array() {}
+ EIGEN_ALIGN8 T array[Size];
+ ei_matrix_array() { EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0x7) }
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
};
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index a9c16200e..1526a4b97 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -265,14 +265,14 @@ template<> inline void ei_pstoreu(int* to , const v4i& from )
template<> inline float ei_pfirst(const v4f& a)
{
- float EIGEN_ALIGN_128 af[4];
+ float EIGEN_ALIGN16 af[4];
vec_st(a, 0, af);
return af[0];
}
template<> inline int ei_pfirst(const v4i& a)
{
- int EIGEN_ALIGN_128 ai[4];
+ int EIGEN_ALIGN16 ai[4];
vec_st(a, 0, ai);
return ai[0];
}
@@ -373,7 +373,7 @@ inline float ei_predux_mul(const v4f& a)
inline int ei_predux_mul(const v4i& a)
{
- EIGEN_ALIGN_128 int aux[4];
+ EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a);
return aux[0] * aux[1] * aux[2] * aux[3];
}
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index ddc7b4aaf..eb1c2d311 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -359,7 +359,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a)
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., reusing ei_pmul is very slow !)
// TODO try to call _mm_mul_epu32 directly
- EIGEN_ALIGN_128 int aux[4];
+ EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a);
return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
}
@@ -378,7 +378,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a)
{
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
- EIGEN_ALIGN_128 int aux[4];
+ EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a);
register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
@@ -399,7 +399,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
{
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
- EIGEN_ALIGN_128 int aux[4];
+ EIGEN_ALIGN16 int aux[4];
ei_pstore(aux, a);
register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 71962bcae..fb149e50a 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -202,25 +202,29 @@ using Eigen::ei_cos;
#define EIGEN_ASM_COMMENT(X)
#endif
-/* EIGEN_ALIGN_128 forces data to be 16-byte aligned, EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
+/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
+ * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
* so that vectorization doesn't affect binary compatibility.
*
* If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
* vectorized and non-vectorized code.
*/
#if !EIGEN_ALIGN
- #define EIGEN_ALIGN_128
+ #define EIGEN_ALIGN_TO_BOUNDARY(n)
#elif (defined __GNUC__)
- #define EIGEN_ALIGN_128 __attribute__((aligned(16)))
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
#elif (defined _MSC_VER)
- #define EIGEN_ALIGN_128 __declspec(align(16))
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
#elif (defined __SUNPRO_CC)
// FIXME not sure about this one:
- #define EIGEN_ALIGN_128 __attribute__((aligned(16)))
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
#else
- #error Please tell me what is the equivalent of __attribute__((aligned(16))) for your compiler
+ #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
#endif
+#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
+#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
+
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
#define EIGEN_RESTRICT
#endif