diff options
-rw-r--r-- | Eigen/src/Core/util/Memory.h | 57 | ||||
-rw-r--r-- | test/dynalloc.cpp | 57 |
2 files changed, 100 insertions, 14 deletions
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 562fbdc6c..15bdb1b5b 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -27,11 +27,42 @@ #ifndef EIGEN_MEMORY_H #define EIGEN_MEMORY_H -#ifdef __linux -// it seems we cannot assume posix_memalign is defined in the stdlib header -extern "C" int posix_memalign (void **, size_t, size_t) throw (); +#if defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN64) + #define EIGEN_MALLOC_ALREADY_ALIGNED 1 +#else + #define EIGEN_MALLOC_ALREADY_ALIGNED 0 #endif +#if (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600)) + #define EIGEN_HAS_POSIX_MEMALIGN 1 +#else + #define EIGEN_HAS_POSIX_MEMALIGN 0 +#endif + +#ifdef EIGEN_VECTORIZE_SSE + #define EIGEN_HAS_MM_MALLOC 1 +#else + #define EIGEN_HAS_MM_MALLOC 0 +#endif + +/** \internal like malloc, but the returned pointer is guaranteed to be 16-byte aligned. + * Fast, but wastes 16 additional bytes of memory. + * Does not throw any exception. + */ +inline void* ei_handmade_aligned_malloc(size_t size) +{ + void *original = malloc(size+16); + void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16); + *(reinterpret_cast<void**>(aligned) - 1) = original; + return aligned; +} + +/** \internal frees memory allocated with ei_handmade_aligned_malloc */ +inline void ei_handmade_aligned_free(void *ptr) +{ + free(*(reinterpret_cast<void**>(ptr) - 1)); +} + /** \internal allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment. * On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown. */ @@ -42,18 +73,20 @@ inline void* ei_aligned_malloc(size_t size) #endif void *result; - #ifdef __linux + #if EIGEN_HAS_POSIX_MEMALIGN && !EIGEN_MALLOC_ALREADY_ALIGNED #ifdef EIGEN_EXCEPTIONS const int failed = #endif posix_memalign(&result, 16, size); #else - #ifdef _MSC_VER + #if EIGEN_MALLOC_ALREADY_ALIGNED + result = malloc(size); + #elif EIGEN_HAS_MM_MALLOC + result = _mm_malloc(size, 16); + #elif (defined _MSC_VER) result = _aligned_malloc(size, 16); - #elif defined(__APPLE__) - result = malloc(size); // Apple's malloc() already returns 16-byte-aligned ptrs #else - result = _mm_malloc(size, 16); + result = ei_handmade_aligned_malloc(size); #endif #ifdef EIGEN_EXCEPTIONS const int failed = (result == 0); @@ -103,14 +136,16 @@ template<typename T, bool Align> inline T* ei_conditional_aligned_new(size_t siz */ inline void ei_aligned_free(void *ptr) { - #if defined(__linux) + #if EIGEN_HAS_POSIX_MEMALIGN free(ptr); - #elif defined(__APPLE__) + #elif EIGEN_MALLOC_ALREADY_ALIGNED free(ptr); #elif defined(_MSC_VER) _aligned_free(ptr); - #else + #elif EIGEN_HAS_MM_MALLOC _mm_free(ptr); + #else + ei_handmade_aligned_free(ptr); #endif } diff --git a/test/dynalloc.cpp b/test/dynalloc.cpp index 916193df0..ffe21771a 100644 --- a/test/dynalloc.cpp +++ b/test/dynalloc.cpp @@ -24,6 +24,55 @@ #include "main.h" +void check_handmade_aligned_malloc() +{ + for(int i = 1; i < 1000; i++) + { + char *p = (char*)ei_handmade_aligned_malloc(i); + VERIFY(size_t(p)%16==0); + // if the buffer is wrongly allocated this will give a bad write --> check with valgrind + for(int j = 0; j < i; j++) p[j]=0; + ei_handmade_aligned_free(p); + } +} + +void check_aligned_malloc() +{ + for(int i = 1; i < 1000; i++) + { + char *p = (char*)ei_aligned_malloc(i); + VERIFY(size_t(p)%16==0); + // if the buffer is wrongly allocated this will give a bad write --> check with valgrind + for(int j = 0; j < i; j++) p[j]=0; + ei_aligned_free(p); + } +} + +void check_aligned_new() +{ + for(int i = 1; i < 1000; i++) + { + float *p = ei_aligned_new<float>(i); + VERIFY(size_t(p)%16==0); + // if the buffer is wrongly allocated this will give a bad write --> check with valgrind + for(int j = 0; j < i; j++) p[j]=0; + ei_aligned_delete(p,i); + } +} + +void check_aligned_stack_alloc() +{ + for(int i = 1; i < 1000; i++) + { + float *p = ei_aligned_stack_new(float,i); + VERIFY(size_t(p)%16==0); + // if the buffer is wrongly allocated this will give a bad write --> check with valgrind + for(int j = 0; j < i; j++) p[j]=0; + ei_aligned_stack_delete(float,p,i); + } +} + + // test compilation with both a struct and a class... struct MyStruct { @@ -49,8 +98,12 @@ template<typename T> void check_dynaligned() void test_dynalloc() { + // low level dynamic memory allocation + CALL_SUBTEST(check_handmade_aligned_malloc()); + CALL_SUBTEST(check_aligned_malloc()); + CALL_SUBTEST(check_aligned_new()); + CALL_SUBTEST(check_aligned_stack_alloc()); -#ifdef EIGEN_VECTORIZE for (int i=0; i<g_repeat*100; ++i) { CALL_SUBTEST( check_dynaligned<Vector4f>() ); @@ -100,6 +153,4 @@ void test_dynalloc() } } -#endif // EIGEN_VECTORIZE - } |