aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-02-18 18:06:44 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-02-18 18:06:44 -0800
commit7ed9441ea472031bb9357b5bec80151cae7ed2cb (patch)
treeecd2a8a0973068987bdb5ec631a79e8b82641e56 /Eigen/src/Core
parent64a85800bd3573a7da7a396fde9707dce87a58d9 (diff)
Reverted the definition of the EIGEN_ALIGN to its former meaning (i.e. a boolean)
Created a new EIGEN_ALIGN_BYTES define to encode how the data should be aligned Fixed a few remaining alignment issues exposed when the Eigen code is compiled with avx enabled. Created a new EIGEN_ALIGN_DEFAULT define, which is set to the minimum alignment value required for the chosen instruction set. Use this value instead of EIGEN_ALIGN32 to preserve the existing alignment on SSE/Altivec/Neon.
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/Block.h2
-rw-r--r--Eigen/src/Core/DenseStorage.h4
-rw-r--r--Eigen/src/Core/GeneralProduct.h2
-rw-r--r--Eigen/src/Core/Map.h2
-rw-r--r--Eigen/src/Core/MapBase.h2
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h6
-rw-r--r--Eigen/src/Core/products/GeneralMatrixVector.h4
-rw-r--r--Eigen/src/Core/util/Macros.h18
-rw-r--r--Eigen/src/Core/util/Memory.h32
-rw-r--r--Eigen/src/Core/util/XprHelper.h2
10 files changed, 41 insertions, 33 deletions
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index 31cd5c72c..e948e14aa 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -83,7 +83,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
- MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
+ MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h
index 2342b08a1..7264b44c7 100644
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -40,7 +40,7 @@ void check_static_allocation_size()
*/
template <typename T, int Size, int MatrixOrArrayOptions,
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
- : (((Size*sizeof(T))%16)==0) ? 16
+ : (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES
: 0 >
struct plain_array
{
@@ -81,7 +81,7 @@ struct plain_array
#endif
template <typename T, int Size, int MatrixOrArrayOptions>
-struct plain_array<T, Size, MatrixOrArrayOptions, 16>
+struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
{
EIGEN_USER_ALIGN32 T array[Size];
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index e3a165ac6..adda6f784 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -397,7 +397,7 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() {
return ForceAlignment
- ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
+ ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES)
: m_data.array;
}
#endif
diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h
index 8ea13cfb7..c75a5e95f 100644
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -88,7 +88,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
&& ( bool(IsDynamicSize)
|| HasNoOuterStride
|| ( OuterStrideAtCompileTime!=Dynamic
- && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
+ && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
Flags0 = TraitsBase::Flags & (~NestByRefBit),
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index ffa1371c2..a45a0b374 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -164,7 +164,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
internal::inner_stride_at_compile_time<Derived>::ret==1),
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
- eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
+ eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0)
&& "data is not aligned");
}
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index eb399a824..3dfd239c1 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -286,9 +286,9 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
SizeW = MaxDepth * Traits::WorkSpaceFactor
};
- EIGEN_ALIGN32 LhsScalar m_staticA[SizeA];
- EIGEN_ALIGN32 RhsScalar m_staticB[SizeB];
- EIGEN_ALIGN32 RhsScalar m_staticW[SizeW];
+ EIGEN_ALIGN_DEFAULT LhsScalar m_staticA[SizeA];
+ EIGEN_ALIGN_DEFAULT RhsScalar m_staticB[SizeB];
+ EIGEN_ALIGN_DEFAULT RhsScalar m_staticW[SizeW];
public:
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index 7f35cc53a..7bf350c42 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -449,7 +449,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
{
- EIGEN_ALIGN32 ResScalar tmp0 = ResScalar(0);
+ EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
// this helps the compiler generating good binary code
@@ -558,7 +558,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,Co
{
for (Index i=start; i<end; ++i)
{
- EIGEN_ALIGN32 ResScalar tmp0 = ResScalar(0);
+ EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
ResPacket ptmp0 = pset1<ResPacket>(tmp0);
const LhsScalar* lhs0 = lhs + i*lhsStride;
// process first unaligned result's coeffs
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 787f800b8..d6d5bfa23 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -66,17 +66,22 @@
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
#endif
+// Defined the boundary (in bytes) on which the data needs to be aligned. Note
+// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
+// aligned at all regardless of the value of this #define.
+#define EIGEN_ALIGN_BYTES 16
+
#ifdef EIGEN_DONT_ALIGN
#ifndef EIGEN_DONT_ALIGN_STATICALLY
#define EIGEN_DONT_ALIGN_STATICALLY
#endif
#define EIGEN_ALIGN 0
-#else
- #if !defined(EIGEN_DONT_VECTORIZE) && defined(__AVX__)
- #define EIGEN_ALIGN 32
- #else
- #define EIGEN_ALIGN 16
+#elif !defined(EIGEN_DONT_VECTORIZE)
+ #if defined(__AVX__)
+ #undef EIGEN_ALIGN_BYTES
+ #define EIGEN_ALIGN_BYTES 32
#endif
+ #define EIGEN_ALIGN 1
#endif
// EIGEN_ALIGN_STATICALLY is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable
@@ -286,15 +291,18 @@
#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
+#define EIGEN_ALIGN_DEFAULT EIGEN_ALIGN_TO_BOUNDARY(EIGEN_ALIGN_BYTES)
#if EIGEN_ALIGN_STATICALLY
#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) EIGEN_ALIGN_TO_BOUNDARY(n)
#define EIGEN_USER_ALIGN16 EIGEN_ALIGN16
#define EIGEN_USER_ALIGN32 EIGEN_ALIGN32
+#define EIGEN_USER_ALIGN_DEFAULT EIGEN_ALIGN_DEFAULT
#else
#define EIGEN_USER_ALIGN_TO_BOUNDARY(n)
#define EIGEN_USER_ALIGN16
#define EIGEN_USER_ALIGN32
+#define EIGEN_USER_ALIGN_DEFAULT
#endif
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 76bdb6cfc..2f2398bbf 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -32,7 +32,7 @@
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
- && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_ALIGN == 16)
+ && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_ALIGN_BYTES == 16)
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
#else
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
@@ -42,14 +42,14 @@
// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
-#if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__) && (EIGEN_ALIGN == 16)
+#if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__) && (EIGEN_ALIGN_BYTES == 16)
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
#else
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
#endif
-#if (defined(__APPLE__) && (EIGEN_ALIGN == 16)) \
- || (defined(_WIN64) && (EIGEN_ALIGN == 16)) \
+#if (defined(__APPLE__) && (EIGEN_ALIGN_BYTES == 16)) \
+ || (defined(_WIN64) && (EIGEN_ALIGN_BYTES == 16)) \
|| EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
|| EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
#define EIGEN_MALLOC_ALREADY_ALIGNED 1
@@ -105,9 +105,9 @@ inline void throw_std_bad_alloc()
*/
inline void* handmade_aligned_malloc(std::size_t size)
{
- void *original = std::malloc(size+EIGEN_ALIGN);
+ void *original = std::malloc(size+EIGEN_ALIGN_BYTES);
if (original == 0) return 0;
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN-1))) + EIGEN_ALIGN);
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
@@ -128,9 +128,9 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
if (ptr == 0) return handmade_aligned_malloc(size);
void *original = *(reinterpret_cast<void**>(ptr) - 1);
std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
- original = std::realloc(original,size+EIGEN_ALIGN);
+ original = std::realloc(original,size+EIGEN_ALIGN_BYTES);
if (original == 0) return 0;
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN-1))) + EIGEN_ALIGN);
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
void *previous_aligned = static_cast<char *>(original)+previous_offset;
if(aligned!=previous_aligned)
std::memmove(aligned, previous_aligned, size);
@@ -221,11 +221,11 @@ inline void* aligned_malloc(size_t size)
#elif EIGEN_MALLOC_ALREADY_ALIGNED
result = std::malloc(size);
#elif EIGEN_HAS_POSIX_MEMALIGN
- if(posix_memalign(&result, EIGEN_ALIGN, size)) result = 0;
+ if(posix_memalign(&result, EIGEN_ALIGN_BYTES, size)) result = 0;
#elif EIGEN_HAS_MM_MALLOC
- result = _mm_malloc(size, EIGEN_ALIGN);
+ result = _mm_malloc(size, EIGEN_ALIGN_BYTES);
#elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
- result = _aligned_malloc(size, EIGEN_ALIGN);
+ result = _aligned_malloc(size, EIGEN_ALIGN_BYTES);
#else
result = handmade_aligned_malloc(size);
#endif
@@ -275,12 +275,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
// implements _mm_malloc/_mm_free based on the corresponding _aligned_
// functions. This may not always be the case and we just try to be safe.
#if defined(_MSC_VER) && defined(_mm_free)
- result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN);
+ result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
#else
result = generic_aligned_realloc(ptr,new_size,old_size);
#endif
#elif defined(_MSC_VER)
- result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN);
+ result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
#else
result = handmade_aligned_realloc(ptr,new_size,old_size);
#endif
@@ -608,8 +608,8 @@ template<typename T> class aligned_stack_memory_handler
*/
#ifdef EIGEN_ALLOCA
// The native alloca() that comes with llvm aligns buffer on 16 bytes even when AVX is enabled.
- #if defined(__arm__) || EIGEN_ALIGN > 16
- #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN)) & ~(size_t(EIGEN_ALIGN-1))) + EIGEN_ALIGN)
+ #if defined(__arm__) || EIGEN_ALIGN_BYTES > 16
+ #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN_BYTES)) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES)
#else
#define EIGEN_ALIGNED_ALLOCA EIGEN_ALLOCA
#endif
@@ -679,7 +679,7 @@ template<typename T> class aligned_stack_memory_handler
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
- EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_ALIGN==0)))
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_ALIGN_BYTES==0)))
/****************************************************************************/
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 195d9e2e1..a08538aff 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -136,7 +136,7 @@ class compute_matrix_flags
((Options&DontAlign)==0)
&& (
#if EIGEN_ALIGN_STATICALLY
- ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % 16) == 0))
+ ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0))
#else
0
#endif