diff options
author | Gael Guennebaud <g.gael@free.fr> | 2010-07-23 16:29:29 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2010-07-23 16:29:29 +0200 |
commit | aa2b46aa9151bd739ba02114e1dad643a8cc5c4d (patch) | |
tree | 857c13746870a614a42661c7174363e65b83dbb7 /Eigen/src/Core | |
parent | 853c0e15df2c13cbfc4a85eab2c7f2edea2cee49 (diff) |
allow vectorization of mat44.col() by adding a InnerPanel boolean
template parameter to Block
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/Block.h | 17 | ||||
-rw-r--r-- | Eigen/src/Core/DenseBase.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/Map.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/MathFunctions.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/util/ForwardDeclarations.h | 2 |
5 files changed, 18 insertions, 17 deletions
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index a16aa22d4..2a28ea7cd 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -58,8 +58,8 @@ * * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock */ -template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> -struct ei_traits<Block<XprType, BlockRows, BlockCols, HasDirectAccess> > : ei_traits<XprType> +template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> +struct ei_traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> > : ei_traits<XprType> { typedef typename ei_traits<XprType>::Scalar Scalar; typedef typename ei_traits<XprType>::StorageKind StorageKind; @@ -92,15 +92,16 @@ struct ei_traits<Block<XprType, BlockRows, BlockCols, HasDirectAccess> > : ei_tr MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % ei_packet_traits<Scalar>::size) == 0) && (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, + MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && ((OuterStrideAtCompileTime % ei_packet_traits<Scalar>::size) == 0)) ? AlignedBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0, - Flags0 = ei_traits<XprType>::Flags & (HereditaryBits | MaskPacketAccessBit | LvalueBit | DirectAccessBit), + Flags0 = ei_traits<XprType>::Flags & (HereditaryBits | MaskPacketAccessBit | LvalueBit | DirectAccessBit | MaskAlignedBit), Flags1 = Flags0 | FlagsLinearAccessBit, Flags = (Flags1 & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0) }; }; -template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> class Block - : public ei_dense_xpr_base<Block<XprType, BlockRows, BlockCols, HasDirectAccess> >::type +template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class Block + : public ei_dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> >::type { public: @@ -229,9 +230,9 @@ template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> c }; /** \internal */ -template<typename XprType, int BlockRows, int BlockCols> -class Block<XprType,BlockRows,BlockCols,true> - : public MapBase<Block<XprType, BlockRows, BlockCols,true> > +template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> +class Block<XprType,BlockRows,BlockCols, InnerPanel,true> + : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel, true> > { public: diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 02eb31d76..a00b791cd 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -233,17 +233,17 @@ template<typename Derived> class DenseBase /** \internal the return type of MatrixBase::eigenvalues() */ typedef Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType; /** \internal expression type of a column */ - typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, 1> ColXpr; + typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ColXpr; /** \internal expression type of a row */ - typedef Block<Derived, 1, ei_traits<Derived>::ColsAtCompileTime> RowXpr; + typedef Block<Derived, 1, ei_traits<Derived>::ColsAtCompileTime, IsRowMajor> RowXpr; /** \internal expression type of a block of whole columns */ - typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, Dynamic> ColsBlockXpr; + typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ColsBlockXpr; /** \internal expression type of a block of whole rows */ - typedef Block<Derived, Dynamic, ei_traits<Derived>::ColsAtCompileTime> RowsBlockXpr; + typedef Block<Derived, Dynamic, ei_traits<Derived>::ColsAtCompileTime, IsRowMajor> RowsBlockXpr; /** \internal expression type of a block of whole columns */ - template<int N> struct NColsBlockXpr { typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, N> Type; }; + template<int N> struct NColsBlockXpr { typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; }; /** \internal expression type of a block of whole rows */ - template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, ei_traits<Derived>::ColsAtCompileTime> Type; }; + template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, ei_traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; }; #endif // not EIGEN_PARSED_BY_DOXYGEN diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index 3386c6d69..763948453 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -100,7 +100,7 @@ struct ei_traits<Map<PlainObjectType, MapOptions, StrideType> > || ( OuterStrideAtCompileTime!=Dynamic && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ), Flags0 = ei_traits<PlainObjectType>::Flags, - Flags1 = IsAligned ? int(Flags0) | AlignedBit : int(Flags0) & ~AlignedBit, + Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), Flags2 = HasNoStride ? int(Flags1) : int(Flags1 & ~LinearAccessBit), Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit) }; diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index e4ca82985..cb73fd205 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -373,7 +373,7 @@ Packet4f ei_pcos<Packet4f>(const Packet4f& _x) return _mm_xor_ps(y, sign_bit); } -// This is Quake3's fast inverse square root. +// This is based on Quake3's fast inverse square root. // For detail see here: http://www.beyond3d.com/content/articles/8/ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f ei_psqrt<Packet4f>(const Packet4f& _x) diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index e4466db5b..423aa110e 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -60,7 +60,7 @@ template<typename ExpressionType> class NestByValue; template<typename ExpressionType> class ForceAlignedAccess; template<typename ExpressionType> class SwapWrapper; -template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, +template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false, bool HasDirectAccess = ei_has_direct_access<XprType>::ret> class Block; template<typename MatrixType, int Size=Dynamic> class VectorBlock; |