diff options
author | Gael Guennebaud <g.gael@free.fr> | 2015-07-29 10:22:25 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2015-07-29 10:22:25 +0200 |
commit | 175ed636ea3ac5b1b807642421a036f8a9a10f1f (patch) | |
tree | fd6d7126ebef659a12a78e8f18c2d40b2cd3e2bd /Eigen | |
parent | 76874b128ec7b191cc986e15ef28c34771120e82 (diff) |
bug #973: update macro-level control of alignement by introducing user-controllable EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES macros. This changeset also removes EIGEN_ALIGN (replaced by EIGEN_MAX_ALIGN_BYTES>0), EIGEN_ALIGN_STATICALLY (replaced by EIGEN_MAX_STATIC_ALIGN_BYTES>0), EIGEN_USER_ALIGN*, EIGEN_ALIGN_DEFAULT (replaced by EIGEN_ALIGN_MAX).
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/Core | 6 | ||||
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/DenseStorage.h | 23 | ||||
-rw-r--r-- | Eigen/src/Core/GeneralProduct.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/Map.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/MapBase.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixMatrix.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixVector.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixMatrix.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/util/Macros.h | 189 | ||||
-rw-r--r-- | Eigen/src/Core/util/Memory.h | 40 | ||||
-rw-r--r-- | Eigen/src/Core/util/XprHelper.h | 4 |
12 files changed, 170 insertions, 118 deletions
diff --git a/Eigen/Core b/Eigen/Core index 4843a33e6..143affc76 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -73,9 +73,9 @@ // and inclusion of their respective header files #include "src/Core/util/MKL_support.h" -// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into -// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks -#if !EIGEN_ALIGN +// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into +// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks +#if EIGEN_MAX_ALIGN_BYTES==0 #ifndef EIGEN_DONT_VECTORIZE #define EIGEN_DONT_VECTORIZE #endif diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index a1193019f..6b0e3617d 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -641,7 +641,7 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> > HasNoInnerStride = InnerStrideAtCompileTime == 1, HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, HasNoStride = HasNoInnerStride && HasNoOuterStride, - IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), + IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned), IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, // TODO: should check for smaller packet types once we can handle multi-sized packet types diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index 5eb434c6d..5edf8a4ee 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -34,26 +34,25 @@ void check_static_allocation_size() #endif } -template<typename T, int Size, typename Packet = typename packet_traits<T>::type, - bool Match = bool((Size%unpacket_traits<Packet>::size)==0), - bool TryHalf = bool(int(unpacket_traits<Packet>::size) > 1) - && bool(int(unpacket_traits<Packet>::size) > int(unpacket_traits<typename unpacket_traits<Packet>::half>::size)) > +template<int ArrayBytes, int AlignmentBytes, + bool Match = bool((ArrayBytes%AlignmentBytes)==0), + bool TryHalf = bool(AlignmentBytes>EIGEN_MIN_ALIGN_BYTES) > struct compute_default_alignment { enum { value = 0 }; }; -template<typename T, int Size, typename Packet, bool TryHalf> -struct compute_default_alignment<T, Size, Packet, true, TryHalf> // Match +template<int ArrayBytes, int AlignmentBytes, bool TryHalf> +struct compute_default_alignment<ArrayBytes, AlignmentBytes, true, TryHalf> // Match { - enum { value = sizeof(T) * unpacket_traits<Packet>::size }; + enum { value = AlignmentBytes }; }; -template<typename T, int Size, typename Packet> -struct compute_default_alignment<T, Size, Packet, false, true> // Try-half +template<int ArrayBytes, int AlignmentBytes> +struct compute_default_alignment<ArrayBytes, AlignmentBytes, false, true> // Try-half { // current packet too large, try with an half-packet - enum { value = compute_default_alignment<T, Size, typename unpacket_traits<Packet>::half>::value }; + enum { value = compute_default_alignment<ArrayBytes, AlignmentBytes/2>::value }; }; /** \internal @@ -62,7 +61,7 @@ struct compute_default_alignment<T, Size, Packet, false, true> // Try-half */ template <typename T, int Size, int MatrixOrArrayOptions, int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0 - : compute_default_alignment<T,Size>::value > + : compute_default_alignment<Size*sizeof(T), EIGEN_PLAIN_ENUM_MAX(packet_traits<T>::size*sizeof(T), EIGEN_MAX_STATIC_ALIGN_BYTES) >::value > struct plain_array { T array[Size]; @@ -180,7 +179,7 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 64> template <typename T, int MatrixOrArrayOptions, int Alignment> struct plain_array<T, 0, MatrixOrArrayOptions, Alignment> { - EIGEN_USER_ALIGN_DEFAULT T array[1]; + T array[1]; EIGEN_DEVICE_FUNC plain_array() {} EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {} }; diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 0ecfab449..475d6f4aa 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -183,7 +183,7 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true> template<typename Scalar,int Size,int MaxSize> struct gemv_static_vector_if<Scalar,Size,MaxSize,true> { - #if EIGEN_ALIGN_STATICALLY + #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data; EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } #else @@ -196,7 +196,7 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true> internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data; EIGEN_STRONG_INLINE Scalar* data() { return ForceAlignment - ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES) + ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) : m_data.array; } #endif diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index 6b2b3ade4..2b5971730 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -77,7 +77,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> > OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 ? int(PlainObjectType::OuterStrideAtCompileTime) : int(StrideType::OuterStrideAtCompileTime), - IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), + IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned), Flags0 = TraitsBase::Flags & (~NestByRefBit), Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) }; diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index fe61d2a28..b175a3fa0 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -160,7 +160,10 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors> EIGEN_DEVICE_FUNC void checkSanity() const { - eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned"); + // TODO "IsAligned" should be replaced to handle arbitrary alignment +#if EIGEN_MAX_ALIGN_BYTES>0 + eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_MAX_ALIGN_BYTES) == 0) && "data is not aligned"); +#endif } PointerType m_data; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index e2d6a8ba2..be89181ee 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -293,8 +293,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M SizeB = ActualCols * MaxDepth }; - EIGEN_ALIGN_DEFAULT LhsScalar m_staticA[SizeA]; - EIGEN_ALIGN_DEFAULT RhsScalar m_staticB[SizeB]; + EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA]; + EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB]; public: diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index 7df6a6b1a..439f14456 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -463,7 +463,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; for (Index i=skipRows; i<rowBound; i+=rowsAtOnce) { - EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0); + // FIXME: what is the purpose of this EIGEN_ALIGN_DEFAULT ?? + EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0); ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0); // this helps the compiler generating good binary code @@ -572,7 +573,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R { for (Index i=start; i<end; ++i) { - EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0); + EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0); ResPacket ptmp0 = pset1<ResPacket>(tmp0); const LhsScalars lhs0 = lhs.getVectorMapper(i, 0); // process first unaligned result's coeffs diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 5f01eb5a8..3d2345b66 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -274,7 +274,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction std::size_t sizeA = kc*mc; - std::size_t sizeB = kc*cols+EIGEN_ALIGN_BYTES/sizeof(Scalar); + std::size_t sizeB = kc*cols+EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); @@ -311,7 +311,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc; Scalar* geb = blockB+ts*ts; - geb = geb + internal::first_aligned(geb,EIGEN_ALIGN_BYTES/sizeof(Scalar)); + geb = geb + internal::first_aligned(geb,EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar)); pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs); diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index b90c88ed4..e491d3389 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> // // This Source Code Form is subject to the terms of the Mozilla @@ -306,68 +306,10 @@ #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1 #endif -// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable -// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always -// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in -// certain common platform (compiler+architecture combinations) to avoid these problems. -// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't -// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even -// when we have to disable static alignment. -#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64) -#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 -#else -#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 -#endif - -// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX -#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \ - && !EIGEN_GCC3_OR_OLDER \ - && !EIGEN_COMP_SUNCC \ - && !EIGEN_OS_QNX - #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1 -#else - #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0 -#endif - -// Defined the boundary (in bytes) on which the data needs to be aligned. Note -// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be -// aligned at all regardless of the value of this #define. -// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES, -// for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough, -// and 16 bytes alignment is also enough for Vector4f. -#define EIGEN_ALIGN_BYTES 16 - -#ifdef EIGEN_DONT_ALIGN - #ifndef EIGEN_DONT_ALIGN_STATICALLY - #define EIGEN_DONT_ALIGN_STATICALLY - #endif - #define EIGEN_ALIGN 0 -#elif !defined(EIGEN_DONT_VECTORIZE) - #if defined(__AVX__) - #undef EIGEN_ALIGN_BYTES - #define EIGEN_ALIGN_BYTES 32 - #endif - #define EIGEN_ALIGN 1 -#else - #define EIGEN_ALIGN 0 -#endif - - // This macro can be used to prevent from macro expansion, e.g.: // std::max EIGEN_NOT_A_MACRO(a,b) #define EIGEN_NOT_A_MACRO -// EIGEN_ALIGN_STATICALLY is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable -// alignment (EIGEN_DONT_ALIGN_STATICALLY) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only EIGEN_ALIGN_STATICALLY should be used. -#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT && !defined(EIGEN_DONT_ALIGN_STATICALLY) - #define EIGEN_ALIGN_STATICALLY 1 -#else - #define EIGEN_ALIGN_STATICALLY 0 - #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT - #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT - #endif -#endif - #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR #define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor #else @@ -585,6 +527,20 @@ namespace Eigen { #endif #endif + +//------------------------------------------------------------------------------------------ +// Static and dynamic alignment control +// +// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES +// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively. +// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not, +// a default value is automatically computed based on architecture, compiler, and OS. +// +// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX} +// to be used to declare statically aligned buffers. +//------------------------------------------------------------------------------------------ + + /* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements. * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled, * so that vectorization doesn't affect binary compatibility. @@ -605,23 +561,116 @@ namespace Eigen { #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler #endif +// If the user explicitly disable vectorization, then we also disable alignment +#if defined(EIGEN_DONT_VECTORIZE) + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0 +#elif defined(__AVX__) + // 32 bytes static alignmeent is preferred only if really required + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32 +#else + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16 +#endif + + +// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense +#define EIGEN_MIN_ALIGN_BYTES 16 + +// Defined the boundary (in bytes) on which the data needs to be aligned. Note +// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be +// aligned at all regardless of the value of this #define. + +#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0 +#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY. +#endif + +// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprectated +// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0 +#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN) + #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES + #undef EIGEN_MAX_STATIC_ALIGN_BYTES + #endif + #define EIGEN_MAX_STATIC_ALIGN_BYTES 0 +#endif + +#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES + + // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES + + // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable + // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always + // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in + // certain common platform (compiler+architecture combinations) to avoid these problems. + // Only static alignment is really problematic (relies on nonstandard compiler extensions that don't + // work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even + // when we have to disable static alignment. + #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64) + #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 + #else + #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 + #endif + + // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX + #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \ + && !EIGEN_GCC3_OR_OLDER \ + && !EIGEN_COMP_SUNCC \ + && !EIGEN_OS_QNX + #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1 + #else + #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0 + #endif + + #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT + #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES + #endif + +#endif + +// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_ALIGN_BYTES +#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES +#undef EIGEN_MAX_STATIC_ALIGN_BYTES +#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES +#endif + +#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) + #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT +#endif + +// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not. +// It takes into account both the user choice to explicitly enable/disable alignment (by settting EIGEN_MAX_STATIC_ALIGN_BYTES) +// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). +// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used. + + +// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY #define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8) #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16) #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32) -#define EIGEN_ALIGN_DEFAULT EIGEN_ALIGN_TO_BOUNDARY(EIGEN_ALIGN_BYTES) - -#if EIGEN_ALIGN_STATICALLY -#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) EIGEN_ALIGN_TO_BOUNDARY(n) -#define EIGEN_USER_ALIGN16 EIGEN_ALIGN16 -#define EIGEN_USER_ALIGN32 EIGEN_ALIGN32 -#define EIGEN_USER_ALIGN_DEFAULT EIGEN_ALIGN_DEFAULT +#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64) +#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 +#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES) #else -#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) -#define EIGEN_USER_ALIGN16 -#define EIGEN_USER_ALIGN32 -#define EIGEN_USER_ALIGN_DEFAULT +#define EIGEN_ALIGN_MAX +#endif + + +// Dynamic alignment control + +#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0 +#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN. +#endif + +#ifdef EIGEN_DONT_ALIGN + #ifdef EIGEN_MAX_ALIGN_BYTES + #undef EIGEN_MAX_ALIGN_BYTES + #endif + #define EIGEN_MAX_ALIGN_BYTES 0 +#elif !defined(EIGEN_MAX_ALIGN_BYTES) + #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES #endif +//---------------------------------------------------------------------- + + #ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD #define EIGEN_RESTRICT #endif diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 73287d6ca..95ed0a549 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com> // Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com> // Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com> @@ -32,7 +32,7 @@ // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed // quite safe, at least within the context of glibc, to equate 64-bit with LP64. #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \ - && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_ALIGN_BYTES == 16) + && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_MAX_ALIGN_BYTES == 16) #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1 #else #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0 @@ -42,14 +42,14 @@ // See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup // FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures // See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup -#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_ALIGN_BYTES == 16) +#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_MAX_ALIGN_BYTES == 16) #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1 #else #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0 #endif -#if (EIGEN_OS_MAC && (EIGEN_ALIGN_BYTES == 16)) \ - || (EIGEN_OS_WIN64 && (EIGEN_ALIGN_BYTES == 16)) \ +#if (EIGEN_OS_MAC && (EIGEN_MAX_ALIGN_BYTES == 16)) \ + || (EIGEN_OS_WIN64 && (EIGEN_MAX_ALIGN_BYTES == 16)) \ || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \ || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED #define EIGEN_MALLOC_ALREADY_ALIGNED 1 @@ -107,9 +107,9 @@ inline void throw_std_bad_alloc() */ inline void* handmade_aligned_malloc(std::size_t size) { - void *original = std::malloc(size+EIGEN_ALIGN_BYTES); + void *original = std::malloc(size+EIGEN_MAX_ALIGN_BYTES); if (original == 0) return 0; - void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES); + void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES); *(reinterpret_cast<void**>(aligned) - 1) = original; return aligned; } @@ -130,9 +130,9 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = if (ptr == 0) return handmade_aligned_malloc(size); void *original = *(reinterpret_cast<void**>(ptr) - 1); std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original); - original = std::realloc(original,size+EIGEN_ALIGN_BYTES); + original = std::realloc(original,size+EIGEN_MAX_ALIGN_BYTES); if (original == 0) return 0; - void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES); + void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES); void *previous_aligned = static_cast<char *>(original)+previous_offset; if(aligned!=previous_aligned) std::memmove(aligned, previous_aligned, size); @@ -218,16 +218,16 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size) check_that_malloc_is_allowed(); void *result; - #if !EIGEN_ALIGN + #if EIGEN_MAX_ALIGN_BYTES==0 result = std::malloc(size); #elif EIGEN_MALLOC_ALREADY_ALIGNED result = std::malloc(size); #elif EIGEN_HAS_POSIX_MEMALIGN - if(posix_memalign(&result, EIGEN_ALIGN_BYTES, size)) result = 0; + if(posix_memalign(&result, EIGEN_MAX_ALIGN_BYTES, size)) result = 0; #elif EIGEN_HAS_MM_MALLOC - result = _mm_malloc(size, EIGEN_ALIGN_BYTES); + result = _mm_malloc(size, EIGEN_MAX_ALIGN_BYTES); #elif EIGEN_OS_WIN_STRICT - result = _aligned_malloc(size, EIGEN_ALIGN_BYTES); + result = _aligned_malloc(size, EIGEN_MAX_ALIGN_BYTES); #else result = handmade_aligned_malloc(size); #endif @@ -241,7 +241,7 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size) /** \internal Frees memory allocated with aligned_malloc. */ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) { - #if !EIGEN_ALIGN + #if EIGEN_MAX_ALIGN_BYTES==0 std::free(ptr); #elif EIGEN_MALLOC_ALREADY_ALIGNED std::free(ptr); @@ -266,7 +266,7 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size) EIGEN_UNUSED_VARIABLE(old_size); void *result; -#if !EIGEN_ALIGN +#if EIGEN_MAX_ALIGN_BYTES==0 result = std::realloc(ptr,new_size); #elif EIGEN_MALLOC_ALREADY_ALIGNED result = std::realloc(ptr,new_size); @@ -277,12 +277,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size) // implements _mm_malloc/_mm_free based on the corresponding _aligned_ // functions. This may not always be the case and we just try to be safe. #if EIGEN_OS_WIN_STRICT && defined(_mm_free) - result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES); + result = _aligned_realloc(ptr,new_size,EIGEN_MAX_ALIGN_BYTES); #else result = generic_aligned_realloc(ptr,new_size,old_size); #endif #elif EIGEN_OS_WIN_STRICT - result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES); + result = _aligned_realloc(ptr,new_size,EIGEN_MAX_ALIGN_BYTES); #else result = handmade_aligned_realloc(ptr,new_size,old_size); #endif @@ -691,7 +691,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) #ifdef EIGEN_ALLOCA // We always manually re-align the result of EIGEN_ALLOCA. // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. - #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN_BYTES-1)) + EIGEN_ALIGN_BYTES-1) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_MAX_ALIGN_BYTES-1)) + EIGEN_MAX_ALIGN_BYTES-1) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ @@ -715,7 +715,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] *** *****************************************************************************/ -#if EIGEN_ALIGN +#if EIGEN_MAX_ALIGN_BYTES!=0 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ void* operator new(size_t size, const std::nothrow_t&) throw() { \ EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \ @@ -751,7 +751,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true) #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_ALIGN_BYTES==0))) + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0))) /****************************************************************************/ diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 8c280432b..433e816af 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -167,7 +167,7 @@ class compute_matrix_evaluator_flags ( ((Options&DontAlign)==0) && ( -#if EIGEN_ALIGN_STATICALLY +#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0)) #else 0 @@ -175,7 +175,7 @@ class compute_matrix_evaluator_flags || -#if EIGEN_ALIGN +#if EIGEN_MAX_ALIGN_BYTES!=0 is_dynamic_size_storage #else 0 |