diff options
author | Gael Guennebaud <g.gael@free.fr> | 2009-11-20 15:39:38 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2009-11-20 15:39:38 +0100 |
commit | eb8f4500719b52f410c545f738d8fda399cea587 (patch) | |
tree | 33ac90a04b6e6b3de516f2851895d92687b4db6a | |
parent | e3d890bc5a89798eff50ff6650292b4fa934f72e (diff) |
Hey, finally the copyCoeff stuff is not only used to implement swap anymore :)
Add an internal pseudo expression allowing to optimize operators like +=, *= using
the copyCoeff stuff.
This allows to easily enforce aligned load for the destination matrix everywhere.
-rw-r--r-- | Eigen/Core | 1 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseBinaryOp.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseUnaryOps.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 22 | ||||
-rw-r--r-- | Eigen/src/Core/MapBase.h | 34 | ||||
-rw-r--r-- | Eigen/src/Core/SelfCwiseBinaryOp.h | 113 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/PacketMath.h | 26 | ||||
-rw-r--r-- | Eigen/src/Core/util/ForwardDeclarations.h | 1 | ||||
-rw-r--r-- | test/CMakeLists.txt | 1 | ||||
-rw-r--r-- | test/map.cpp | 6 | ||||
-rw-r--r-- | test/unalignedcount.cpp | 56 |
11 files changed, 229 insertions, 51 deletions
diff --git a/Eigen/Core b/Eigen/Core index 778b1c921..085022310 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -167,6 +167,7 @@ struct Dense {}; #include "src/Core/ReturnByValue.h" #include "src/Core/NoAlias.h" #include "src/Core/Matrix.h" +#include "src/Core/SelfCwiseBinaryOp.h" #include "src/Core/CwiseBinaryOp.h" #include "src/Core/CwiseUnaryOp.h" #include "src/Core/CwiseNullaryOp.h" diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index 462e0f92d..401d57ee5 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -178,7 +178,9 @@ template<typename OtherDerived> EIGEN_STRONG_INLINE Derived & MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other) { - return *this = *this - other; + SelfCwiseBinaryOp<ei_scalar_difference_op<Scalar>, Derived> tmp(derived()); + tmp = other; + return derived(); } /** replaces \c *this by \c *this + \a other. @@ -190,7 +192,9 @@ template<typename OtherDerived> EIGEN_STRONG_INLINE Derived & MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other) { - return *this = *this + other; + SelfCwiseBinaryOp<ei_scalar_sum_op<Scalar>, Derived> tmp(derived()); + tmp = other; + return derived(); } #endif // EIGEN_CWISE_BINARY_OP_H diff --git a/Eigen/src/Core/CwiseUnaryOps.h b/Eigen/src/Core/CwiseUnaryOps.h index 39fd479b5..a7acd0036 100644 --- a/Eigen/src/Core/CwiseUnaryOps.h +++ b/Eigen/src/Core/CwiseUnaryOps.h @@ -33,9 +33,17 @@ EIGEN_STRONG_INLINE const CwiseUnaryOp<ei_scalar_opposite_op<typename ei_traits< operator-() const { return derived(); } EIGEN_STRONG_INLINE Derived& operator*=(const Scalar& other) -{ return *this = *this * other; } +{ + SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived> tmp(derived()); + tmp = PlainMatrixType::Constant(rows(),cols(),other); + return derived(); +} EIGEN_STRONG_INLINE Derived& operator/=(const Scalar& other) -{ return *this = *this / other; } +{ + SelfCwiseBinaryOp<typename ei_meta_if<NumTraits<Scalar>::HasFloatingPoint,ei_scalar_product_op<Scalar>,ei_scalar_quotient_op<Scalar> >::ret, Derived> tmp(derived()); + tmp = PlainMatrixType::Constant(rows(),cols(), NumTraits<Scalar>::HasFloatingPoint ? Scalar(1)/other : other); + return derived(); +} /** \returns an expression of \c *this scaled by the scalar factor \a scalar */ EIGEN_STRONG_INLINE const ScalarMultipleReturnType diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 77e5641ff..ae1720eca 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -34,6 +34,22 @@ * of generic vectorized code. */ +#ifndef EIGEN_DEBUG_ALIGNED_LOAD +#define EIGEN_DEBUG_ALIGNED_LOAD +#endif + +#ifndef EIGEN_DEBUG_UNALIGNED_LOAD +#define EIGEN_DEBUG_UNALIGNED_LOAD +#endif + +#ifndef EIGEN_DEBUG_ALIGNED_STORE +#define EIGEN_DEBUG_ALIGNED_STORE +#endif + +#ifndef EIGEN_DEBUG_UNALIGNED_STORE +#define EIGEN_DEBUG_UNALIGNED_STORE +#endif + struct ei_default_packet_traits { enum { @@ -44,13 +60,13 @@ struct ei_default_packet_traits HasAbs = 1, HasMin = 1, HasMax = 1, - + HasDiv = 0, HasSqrt = 0, HasExp = 0, HasLog = 0, HasPow = 0, - + HasSin = 0, HasCos = 0, HasTan = 0, @@ -128,7 +144,7 @@ ei_pxor(const Packet& a, const Packet& b) { return a ^ b; } /** \internal \returns the bitwise andnot of \a a and \a b */ template<typename Packet> inline Packet ei_pandnot(const Packet& a, const Packet& b) { return a & (!b); } - + /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */ template<typename Scalar> inline typename ei_packet_traits<Scalar>::type ei_pload(const Scalar* from) { return *from; } diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index 8770732de..0f0986bc5 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -34,13 +34,13 @@ * and type \c AlignedDerivedType in their respective ei_traits<> specialization structure. * The value of \c PacketAccess can be either \b AsRequested, or set to \b EnforceAlignedAccess which * enforces both aligned loads and stores. - * - * \c EnforceAlignedAccess is automatically set in expressions such as + * + * \c EnforceAlignedAccess is automatically set in expressions such as * \code A += B; \endcode where A is either a Block or a Map. Here, * this expression is transfomed into \code A = A_with_EnforceAlignedAccess + B; \endcode * avoiding unaligned loads from A. Indeed, since Eigen's packet evaluation mechanism * automatically align to the destination matrix, we know that loads to A will be aligned too. - * + * * The type \c AlignedDerivedType should correspond to the equivalent expression type * with \c PacketAccess set to \c EnforceAlignedAccess. * @@ -197,32 +197,6 @@ template<typename Derived> class MapBase using Base::operator=; using Base::operator*=; - // FIXME it seems VS does not allow to do "using Base::operator+=" - // and to overload operator+= at the same time, therefore we have to - // explicitly add these two overloads. - // Maybe there exists a better solution though. - template<typename ProductDerived, typename Lhs,typename Rhs> - Derived& operator+=(const Flagged<ProductBase<ProductDerived,Lhs,Rhs>, 0, EvalBeforeAssigningBit>& other) - { return Base::operator+=(other); } - - template<typename ProductDerived, typename Lhs,typename Rhs> - Derived& operator-=(const Flagged<ProductBase<ProductDerived,Lhs,Rhs>, 0, EvalBeforeAssigningBit>& other) - { return Base::operator-=(other); } - - template<typename OtherDerived> - Derived& operator+=(const MatrixBase<OtherDerived>& other) - { return derived() = forceAligned() + other; } - - template<typename OtherDerived> - Derived& operator-=(const MatrixBase<OtherDerived>& other) - { return derived() = forceAligned() - other; } - - Derived& operator*=(const Scalar& other) - { return derived() = forceAligned() * other; } - - Derived& operator/=(const Scalar& other) - { return derived() = forceAligned() / other; } - protected: void checkDataAlignment() const @@ -230,7 +204,7 @@ template<typename Derived> class MapBase ei_assert( ((!(ei_traits<Derived>::Flags&AlignedBit)) || ((std::size_t(m_data)&0xf)==0)) && "data is not aligned"); } - + const Scalar* EIGEN_RESTRICT m_data; const ei_int_if_dynamic<RowsAtCompileTime> m_rows; const ei_int_if_dynamic<ColsAtCompileTime> m_cols; diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h new file mode 100644 index 000000000..ac0ccd963 --- /dev/null +++ b/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -0,0 +1,113 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_SELFCWISEBINARYOP_H +#define EIGEN_SELFCWISEBINARYOP_H + +/** \class SelfCwiseBinaryOp + * + * \internal + * + * \brief Internal helper class for optimizing operators like +=, -= + */ +template<typename BinaryOp, typename MatrixType> +struct ei_traits<SelfCwiseBinaryOp<BinaryOp,MatrixType> > : ei_traits<MatrixType> {}; + +template<typename BinaryOp, typename MatrixType> class SelfCwiseBinaryOp + : public MatrixBase<SelfCwiseBinaryOp<BinaryOp,MatrixType> > +{ + public: + + EIGEN_GENERIC_PUBLIC_INTERFACE(SelfCwiseBinaryOp) + typedef typename ei_packet_traits<Scalar>::type Packet; + + using Base::operator=; + + inline SelfCwiseBinaryOp(MatrixType& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {} + + inline int rows() const { return m_matrix.rows(); } + inline int cols() const { return m_matrix.cols(); } + inline int stride() const { return m_matrix.stride(); } + + // note that this function is needed by assign to correctly align loads/stores + // TODO make Assign use .data() + inline Scalar& coeffRef(int row, int col) + { + return m_matrix.const_cast_derived().coeffRef(row, col); + } + + // note that this function is needed by assign to correctly align loads/stores + // TODO make Assign use .data() + inline Scalar& coeffRef(int index) + { + return m_matrix.const_cast_derived().coeffRef(index); + } + + template<typename OtherDerived> + void copyCoeff(int row, int col, const MatrixBase<OtherDerived>& other) + { + OtherDerived& _other = other.const_cast_derived(); + ei_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + Scalar& tmp = m_matrix.coeffRef(row,col); + tmp = m_functor(tmp, _other.coeff(row,col)); + } + + template<typename OtherDerived> + void copyCoeff(int index, const MatrixBase<OtherDerived>& other) + { + OtherDerived& _other = other.const_cast_derived(); + ei_internal_assert(index >= 0 && index < m_matrix.size()); + Scalar& tmp = m_matrix.coeffRef(index); + tmp = m_functor(tmp, _other.coeff(index)); + } + + template<typename OtherDerived, int StoreMode, int LoadMode> + void copyPacket(int row, int col, const MatrixBase<OtherDerived>& other) + { + OtherDerived& _other = other.const_cast_derived(); + ei_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + m_matrix.template writePacket<StoreMode>(row, col, + m_functor.packetOp(m_matrix.template packet<StoreMode>(row, col),_other.template packet<LoadMode>(row, col)) ); + } + + template<typename OtherDerived, int StoreMode, int LoadMode> + void copyPacket(int index, const MatrixBase<OtherDerived>& other) + { + OtherDerived& _other = other.const_cast_derived(); + ei_internal_assert(index >= 0 && index < m_matrix.size()); + m_matrix.template writePacket<StoreMode>(index, + m_functor.packetOp(m_matrix.template packet<StoreMode>(index),_other.template packet<LoadMode>(index)) ); + } + + protected: + MatrixType& m_matrix; + const BinaryOp& m_functor; + + private: + SelfCwiseBinaryOp& operator=(const SelfCwiseBinaryOp&); +}; + +#endif // EIGEN_SELFCWISEBINARYOP_H diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 60ccadc21..29c89c310 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -172,14 +172,14 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot<Packet4f>(const Packet4f& a, template<> EIGEN_STRONG_INLINE Packet2d ei_pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { return _mm_load_ps(from); } -template<> EIGEN_STRONG_INLINE Packet2d ei_pload<double>(const double* from) { return _mm_load_pd(from); } -template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); } +template<> EIGEN_STRONG_INLINE Packet4f ei_pload<float>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); } +template<> EIGEN_STRONG_INLINE Packet2d ei_pload<double>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); } +template<> EIGEN_STRONG_INLINE Packet4i ei_pload<int>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); } #if (!defined __GNUC__) && (!defined __ICC) -template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { return _mm_loadu_ps(from); } -template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<double>(const double* from) { return _mm_loadu_pd(from); } -template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<int>(const int* from) { return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); } +template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { return EIGEN_DEBUG_UNALIGNED_LOAD _mm_loadu_ps(from); } +template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu<double>(const double* from) { return EIGEN_DEBUG_UNALIGNED_LOAD _mm_loadu_pd(from); } +template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<int>(const int* from) { return EIGEN_DEBUG_UNALIGNED_LOAD _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); } #else // Fast unaligned loads. Note that here we cannot directly use intrinsics: this would // require pointer casting to incompatible pointer types and leads to invalid code @@ -188,6 +188,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu<int>(const int* from) { return // TODO: do the same for MSVC (ICC is compatible) template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { + EIGEN_DEBUG_UNALIGNED_LOAD __m128 res; asm volatile ("movsd %[from0], %[r]" : [r] "=x" (res) : [from0] "m" (*from), [dummy] "m" (*(from+1)) ); asm volatile ("movhps %[from2], %[r]" : [r] "+x" (res) : [from2] "m" (*(from+2)), [dummy] "m" (*(from+3)) ); @@ -195,6 +196,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) } template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu(const double* from) { + EIGEN_DEBUG_UNALIGNED_LOAD __m128d res; asm volatile ("movsd %[from0], %[r]" : [r] "=x" (res) : [from0] "m" (*from) ); asm volatile ("movhpd %[from1], %[r]" : [r] "+x" (res) : [from1] "m" (*(from+1)) ); @@ -202,6 +204,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu(const double* from) } template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { + EIGEN_DEBUG_UNALIGNED_LOAD __m128i res; asm volatile ("movsd %[from0], %[r]" : [r] "=x" (res) : [from0] "m" (*from), [dummy] "m" (*(from+1)) ); asm volatile ("movhps %[from2], %[r]" : [r] "+x" (res) : [from2] "m" (*(from+2)), [dummy] "m" (*(from+3)) ); @@ -209,16 +212,17 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) } #endif -template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { _mm_store_ps(to, from); } -template<> EIGEN_STRONG_INLINE void ei_pstore<double>(double* to, const Packet2d& from) { _mm_store_pd(to, from); } -template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); } +template<> EIGEN_STRONG_INLINE void ei_pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); } +template<> EIGEN_STRONG_INLINE void ei_pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); } +template<> EIGEN_STRONG_INLINE void ei_pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<Packet4i*>(to), from); } template<> EIGEN_STRONG_INLINE void ei_pstoreu<double>(double* to, const Packet2d& from) { + EIGEN_DEBUG_UNALIGNED_STORE _mm_storel_pd((to), from); _mm_storeh_pd((to+1), from); } -template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { ei_pstoreu((double*)to, _mm_castps_pd(from)); } -template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { ei_pstoreu((double*)to, _mm_castsi128_pd(from)); } +template<> EIGEN_STRONG_INLINE void ei_pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, _mm_castps_pd(from)); } +template<> EIGEN_STRONG_INLINE void ei_pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((double*)to, _mm_castsi128_pd(from)); } #if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64) // The temporary variable fixes an internal compilation error. diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 541b5dd9f..fb0233591 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -48,6 +48,7 @@ template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp; template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp; template<typename ViewOp, typename MatrixType> class CwiseUnaryView; template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp; +template<typename BinOp, typename MatrixType> class SelfCwiseBinaryOp; template<typename Derived, typename Lhs, typename Rhs> class ProductBase; template<typename Derived> class DiagonalBase; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 30668a2aa..149e623e6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -99,6 +99,7 @@ ei_add_test(vectorization_logic) ei_add_test(basicstuff) ei_add_test(linearstructure) ei_add_test(cwiseop) +ei_add_test(unalignedcount) ei_add_test(redux) ei_add_test(visitor) ei_add_test(product_small) diff --git a/test/map.cpp b/test/map.cpp index 18c6b2694..14c0393db 100644 --- a/test/map.cpp +++ b/test/map.cpp @@ -35,7 +35,7 @@ template<typename VectorType> void map_class(const VectorType& m) Scalar* array2 = ei_aligned_new<Scalar>(size); Scalar* array3 = new Scalar[size+1]; Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3; - + Map<VectorType, Aligned>(array1, size) = VectorType::Random(size); Map<VectorType, Aligned>(array2, size) = Map<VectorType,Aligned>(array1, size); Map<VectorType>(array3unaligned, size) = Map<VectorType>(array1, size); @@ -62,7 +62,7 @@ template<typename VectorType> void map_static_methods(const VectorType& m) Scalar* array2 = ei_aligned_new<Scalar>(size); Scalar* array3 = new Scalar[size+1]; Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3; - + VectorType::MapAligned(array1, size) = VectorType::Random(size); VectorType::Map(array2, size) = VectorType::Map(array1, size); VectorType::Map(array3unaligned, size) = VectorType::Map(array1, size); @@ -71,7 +71,7 @@ template<typename VectorType> void map_static_methods(const VectorType& m) VectorType ma3 = VectorType::Map(array3unaligned, size); VERIFY_IS_APPROX(ma1, ma2); VERIFY_IS_APPROX(ma1, ma3); - + ei_aligned_delete(array1, size); ei_aligned_delete(array2, size); delete[] array3; diff --git a/test/unalignedcount.cpp b/test/unalignedcount.cpp new file mode 100644 index 000000000..feff870e5 --- /dev/null +++ b/test/unalignedcount.cpp @@ -0,0 +1,56 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +static int nb_load; +static int nb_loadu; +static int nb_store; +static int nb_storeu; + +#define EIGEN_DEBUG_ALIGNED_LOAD { nb_load++; } +#define EIGEN_DEBUG_UNALIGNED_LOAD { nb_loadu++; } +#define EIGEN_DEBUG_ALIGNED_STORE { nb_store++; } +#define EIGEN_DEBUG_UNALIGNED_STORE { nb_storeu++; } + +#define VERIFY_ALIGNED_UNALIGNED_COUNT(XPR,AL,UL,AS,US) {\ + nb_load = nb_loadu = nb_store = nb_storeu = 0; \ + XPR; \ + if(!(nb_load==AL && nb_loadu==UL && nb_store==AS && nb_storeu==US)) \ + std::cerr << " >> " << nb_load << ", " << nb_loadu << ", " << nb_store << ", " << nb_storeu << "\n"; \ + VERIFY( (#XPR) && nb_load==AL && nb_loadu==UL && nb_store==AS && nb_storeu==US ); \ + } + + +#include "main.h" + +void test_unalignedcount() +{ + #ifdef EIGEN_VECTORIZE_SSE + VectorXf a(40), b(40); + VERIFY_ALIGNED_UNALIGNED_COUNT(a += b, 20, 0, 10, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) += b.segment(0,40), 10, 10, 10, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) -= b.segment(0,40), 10, 10, 10, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) *= 3.5, 10, 0, 10, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) /= 3.5, 10, 0, 10, 0); + #endif +} |