aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2009-02-04 16:53:03 +0000
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2009-02-04 16:53:03 +0000
commit93a089adc8bd7d7cf541341c9a631bcb51f2e62d (patch)
tree01060086ad5358e7d341a9bbbb7406bc292cd30d
parentc26dc9ab1b29ee9e8d9165aec01d2fd3b584c667 (diff)
disable alignment altogether outside of the platforms which potentially have SSE or AltiVec
This should remove most portability issues to other platforms where data alignment issues (including overloading operator new and new[]) can be tricky, and where data alignment is not needed in the first place.
-rw-r--r--Eigen/src/Core/util/Macros.h24
-rw-r--r--Eigen/src/Core/util/Memory.h75
-rw-r--r--test/dynalloc.cpp28
3 files changed, 62 insertions, 65 deletions
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 92c565720..795efb90c 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -36,6 +36,24 @@
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
EIGEN_MINOR_VERSION>=z))))
+// if the compiler is GNUC, disable 16 byte alignment on exotic archs that probably don't need it, and on which
+// it may be extra trouble to get aligned memory allocation to work (example: on ARM, overloading new[] is a PITA
+// because extra memory must be allocated for bookkeeping).
+// if the compiler is not GNUC, just cross fingers that the architecture isn't too exotic, because we don't want
+// to keep track of all the different preprocessor symbols for all compilers.
+#if !defined(__GNUC__) || defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || defined(__ia64__)
+ #define EIGEN_ARCH_WANTS_ALIGNMENT 1
+#else
+ #ifdef EIGEN_VECTORIZE
+ #error Vectorization enabled, but the architecture is not listed among those for which we require 16 byte alignment. If you added vectorization for another architecture, you also need to edit this list.
+ #endif
+ #define EIGEN_ARCH_WANTS_ALIGNMENT 0
+ #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+ #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+ #endif
+#endif
+
+
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION RowMajor
#else
@@ -147,12 +165,14 @@ using Eigen::ei_cos;
* If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
* vectorized and non-vectorized code.
*/
-#if (defined __GNUC__)
+#if !EIGEN_ARCH_WANTS_ALIGNMENT
+#define EIGEN_ALIGN_128
+#elif (defined __GNUC__)
#define EIGEN_ALIGN_128 __attribute__((aligned(16)))
#elif (defined _MSC_VER)
#define EIGEN_ALIGN_128 __declspec(align(16))
#else
-#define EIGEN_ALIGN_128
+#error Please tell me what is the equivalent of __attribute__((aligned(16))) for your compiler
#endif
#define EIGEN_RESTRICT __restrict
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 8afd81828..8c58debea 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -74,13 +74,15 @@ inline void* ei_aligned_malloc(size_t size)
#endif
void *result;
- #if EIGEN_HAS_POSIX_MEMALIGN && !EIGEN_MALLOC_ALREADY_ALIGNED
+ #if EIGEN_HAS_POSIX_MEMALIGN && EIGEN_ARCH_WANTS_ALIGNMENT && !EIGEN_MALLOC_ALREADY_ALIGNED
#ifdef EIGEN_EXCEPTIONS
const int failed =
#endif
posix_memalign(&result, 16, size);
#else
- #if EIGEN_MALLOC_ALREADY_ALIGNED
+ #if !EIGEN_ARCH_WANTS_ALIGNMENT
+ result = malloc(size);
+ #elif EIGEN_MALLOC_ALREADY_ALIGNED
result = malloc(size);
#elif EIGEN_HAS_MM_MALLOC
result = _mm_malloc(size, 16);
@@ -141,7 +143,9 @@ template<typename T, bool Align> inline T* ei_conditional_aligned_new(size_t siz
*/
inline void ei_aligned_free(void *ptr)
{
- #if EIGEN_MALLOC_ALREADY_ALIGNED
+ #if !EIGEN_ARCH_WANTS_ALIGNMENT
+ free(ptr);
+ #elif EIGEN_MALLOC_ALREADY_ALIGNED
free(ptr);
#elif EIGEN_HAS_POSIX_MEMALIGN
free(ptr);
@@ -232,60 +236,27 @@ inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
#define ei_aligned_stack_delete(TYPE,PTR,SIZE) do {ei_delete_elements_of_array<TYPE>(PTR, SIZE); \
ei_aligned_stack_free(PTR,sizeof(TYPE)*SIZE);} while(0)
-
-/** \brief Overloads the operator new and delete of the class Type with operators that are aligned if NeedsToAlign is true
- *
- * When Eigen's explicit vectorization is enabled, Eigen assumes that some fixed sizes types are aligned
- * on a 16 bytes boundary. Those include all Matrix types having a sizeof multiple of 16 bytes, e.g.:
- * - Vector2d, Vector4f, Vector4i, Vector4d,
- * - Matrix2d, Matrix4f, Matrix4i, Matrix4d,
- * - etc.
- * When an object is statically allocated, the compiler will automatically and always enforces 16 bytes
- * alignment of the data when needed. However some troubles might appear when data are dynamically allocated.
- * Let's pick an example:
- * \code
- * struct Foo {
- * char dummy;
- * Vector4f some_vector;
- * };
- * Foo obj1; // static allocation
- * obj1.some_vector = Vector4f(..); // => OK
- *
- * Foo *pObj2 = new Foo; // dynamic allocation
- * pObj2->some_vector = Vector4f(..); // => !! might segfault !!
- * \endcode
- * Here, the problem is that operator new is not aware of the compile time alignment requirement of the
- * type Vector4f (and hence of the type Foo). Therefore "new Foo" does not necessarily returns a 16 bytes
- * aligned pointer. The purpose of the class WithAlignedOperatorNew is exactly to overcome this issue by
- * overloading the operator new to return aligned data when the vectorization is enabled.
- * Here is a similar safe example:
- * \code
- * struct Foo {
- * EIGEN_MAKE_ALIGNED_OPERATOR_NEW
- * char dummy;
- * Vector4f some_vector;
- * };
- * Foo *pObj2 = new Foo; // dynamic allocation
- * pObj2->some_vector = Vector4f(..); // => SAFE !
- * \endcode
- *
- * \sa class ei_new_allocator
- */
-#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
- void *operator new(size_t size) throw() { \
- return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); \
- } \
- void *operator new[](size_t size) throw() { \
- return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); \
- } \
- void operator delete(void * ptr) { Eigen::ei_conditional_aligned_free<NeedsToAlign>(ptr); } \
- void operator delete[](void * ptr) { Eigen::ei_conditional_aligned_free<NeedsToAlign>(ptr); } \
- void *operator new(size_t, void *ptr) throw() { return ptr; }
+
+#if EIGEN_ARCH_WANTS_ALIGNMENT
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
+ void *operator new(size_t size) throw() { \
+ return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); \
+ } \
+ void *operator new[](size_t size) throw() { \
+ return Eigen::ei_conditional_aligned_malloc<NeedsToAlign>(size); \
+ } \
+ void operator delete(void * ptr) { Eigen::ei_conditional_aligned_free<NeedsToAlign>(ptr); } \
+ void operator delete[](void * ptr) { Eigen::ei_conditional_aligned_free<NeedsToAlign>(ptr); } \
+ void *operator new(size_t, void *ptr) throw() { return ptr; }
+#else
+ #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
+#endif
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%16==0))
+
/** \class aligned_allocator
*
* \brief stl compatible allocator to use with with 16 byte aligned types
diff --git a/test/dynalloc.cpp b/test/dynalloc.cpp
index 899342d83..c0fcdde3d 100644
--- a/test/dynalloc.cpp
+++ b/test/dynalloc.cpp
@@ -24,12 +24,18 @@
#include "main.h"
+#if EIGEN_ARCH_WANTS_ALIGNMENT
+#define ALIGNMENT 16
+#else
+#define ALIGNMENT 1
+#endif
+
void check_handmade_aligned_malloc()
{
for(int i = 1; i < 1000; i++)
{
char *p = (char*)ei_handmade_aligned_malloc(i);
- VERIFY(size_t(p)%16==0);
+ VERIFY(size_t(p)%ALIGNMENT==0);
// if the buffer is wrongly allocated this will give a bad write --> check with valgrind
for(int j = 0; j < i; j++) p[j]=0;
ei_handmade_aligned_free(p);
@@ -41,7 +47,7 @@ void check_aligned_malloc()
for(int i = 1; i < 1000; i++)
{
char *p = (char*)ei_aligned_malloc(i);
- VERIFY(size_t(p)%16==0);
+ VERIFY(size_t(p)%ALIGNMENT==0);
// if the buffer is wrongly allocated this will give a bad write --> check with valgrind
for(int j = 0; j < i; j++) p[j]=0;
ei_aligned_free(p);
@@ -53,7 +59,7 @@ void check_aligned_new()
for(int i = 1; i < 1000; i++)
{
float *p = ei_aligned_new<float>(i);
- VERIFY(size_t(p)%16==0);
+ VERIFY(size_t(p)%ALIGNMENT==0);
// if the buffer is wrongly allocated this will give a bad write --> check with valgrind
for(int j = 0; j < i; j++) p[j]=0;
ei_aligned_delete(p,i);
@@ -65,7 +71,7 @@ void check_aligned_stack_alloc()
for(int i = 1; i < 1000; i++)
{
float *p = ei_aligned_stack_new(float,i);
- VERIFY(size_t(p)%16==0);
+ VERIFY(size_t(p)%ALIGNMENT==0);
// if the buffer is wrongly allocated this will give a bad write --> check with valgrind
for(int j = 0; j < i; j++) p[j]=0;
ei_aligned_stack_delete(float,p,i);
@@ -92,7 +98,7 @@ class MyClassA
template<typename T> void check_dynaligned()
{
T* obj = new T;
- VERIFY(size_t(obj)%16==0);
+ VERIFY(size_t(obj)%ALIGNMENT==0);
delete obj;
}
@@ -115,15 +121,15 @@ void test_dynalloc()
// check static allocation, who knows ?
{
- MyStruct foo0; VERIFY(size_t(foo0.avec.data())%16==0);
- MyClassA fooA; VERIFY(size_t(fooA.avec.data())%16==0);
+ MyStruct foo0; VERIFY(size_t(foo0.avec.data())%ALIGNMENT==0);
+ MyClassA fooA; VERIFY(size_t(fooA.avec.data())%ALIGNMENT==0);
}
// dynamic allocation, single object
for (int i=0; i<g_repeat*100; ++i)
{
- MyStruct *foo0 = new MyStruct(); VERIFY(size_t(foo0->avec.data())%16==0);
- MyClassA *fooA = new MyClassA(); VERIFY(size_t(fooA->avec.data())%16==0);
+ MyStruct *foo0 = new MyStruct(); VERIFY(size_t(foo0->avec.data())%ALIGNMENT==0);
+ MyClassA *fooA = new MyClassA(); VERIFY(size_t(fooA->avec.data())%ALIGNMENT==0);
delete foo0;
delete fooA;
}
@@ -132,8 +138,8 @@ void test_dynalloc()
const int N = 10;
for (int i=0; i<g_repeat*100; ++i)
{
- MyStruct *foo0 = new MyStruct[N]; VERIFY(size_t(foo0->avec.data())%16==0);
- MyClassA *fooA = new MyClassA[N]; VERIFY(size_t(fooA->avec.data())%16==0);
+ MyStruct *foo0 = new MyStruct[N]; VERIFY(size_t(foo0->avec.data())%ALIGNMENT==0);
+ MyClassA *fooA = new MyClassA[N]; VERIFY(size_t(fooA->avec.data())%ALIGNMENT==0);
delete[] foo0;
delete[] fooA;
}