aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2010-07-23 16:29:29 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2010-07-23 16:29:29 +0200
commitaa2b46aa9151bd739ba02114e1dad643a8cc5c4d (patch)
tree857c13746870a614a42661c7174363e65b83dbb7 /Eigen/src
parent853c0e15df2c13cbfc4a85eab2c7f2edea2cee49 (diff)
allow vectorization of mat44.col() by adding a InnerPanel boolean
template parameter to Block
Diffstat (limited to 'Eigen/src')
-rw-r--r--Eigen/src/Core/Block.h17
-rw-r--r--Eigen/src/Core/DenseBase.h12
-rw-r--r--Eigen/src/Core/Map.h2
-rw-r--r--Eigen/src/Core/arch/SSE/MathFunctions.h2
-rw-r--r--Eigen/src/Core/util/ForwardDeclarations.h2
5 files changed, 18 insertions, 17 deletions
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index a16aa22d4..2a28ea7cd 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -58,8 +58,8 @@
*
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
*/
-template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess>
-struct ei_traits<Block<XprType, BlockRows, BlockCols, HasDirectAccess> > : ei_traits<XprType>
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess>
+struct ei_traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> > : ei_traits<XprType>
{
typedef typename ei_traits<XprType>::Scalar Scalar;
typedef typename ei_traits<XprType>::StorageKind StorageKind;
@@ -92,15 +92,16 @@ struct ei_traits<Block<XprType, BlockRows, BlockCols, HasDirectAccess> > : ei_tr
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % ei_packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
+ MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && ((OuterStrideAtCompileTime % ei_packet_traits<Scalar>::size) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
- Flags0 = ei_traits<XprType>::Flags & (HereditaryBits | MaskPacketAccessBit | LvalueBit | DirectAccessBit),
+ Flags0 = ei_traits<XprType>::Flags & (HereditaryBits | MaskPacketAccessBit | LvalueBit | DirectAccessBit | MaskAlignedBit),
Flags1 = Flags0 | FlagsLinearAccessBit,
Flags = (Flags1 & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0)
};
};
-template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> class Block
- : public ei_dense_xpr_base<Block<XprType, BlockRows, BlockCols, HasDirectAccess> >::type
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class Block
+ : public ei_dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> >::type
{
public:
@@ -229,9 +230,9 @@ template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> c
};
/** \internal */
-template<typename XprType, int BlockRows, int BlockCols>
-class Block<XprType,BlockRows,BlockCols,true>
- : public MapBase<Block<XprType, BlockRows, BlockCols,true> >
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
+ : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel, true> >
{
public:
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 02eb31d76..a00b791cd 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -233,17 +233,17 @@ template<typename Derived> class DenseBase
/** \internal the return type of MatrixBase::eigenvalues() */
typedef Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
/** \internal expression type of a column */
- typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, 1> ColXpr;
+ typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ColXpr;
/** \internal expression type of a row */
- typedef Block<Derived, 1, ei_traits<Derived>::ColsAtCompileTime> RowXpr;
+ typedef Block<Derived, 1, ei_traits<Derived>::ColsAtCompileTime, IsRowMajor> RowXpr;
/** \internal expression type of a block of whole columns */
- typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, Dynamic> ColsBlockXpr;
+ typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ColsBlockXpr;
/** \internal expression type of a block of whole rows */
- typedef Block<Derived, Dynamic, ei_traits<Derived>::ColsAtCompileTime> RowsBlockXpr;
+ typedef Block<Derived, Dynamic, ei_traits<Derived>::ColsAtCompileTime, IsRowMajor> RowsBlockXpr;
/** \internal expression type of a block of whole columns */
- template<int N> struct NColsBlockXpr { typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, N> Type; };
+ template<int N> struct NColsBlockXpr { typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };
/** \internal expression type of a block of whole rows */
- template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, ei_traits<Derived>::ColsAtCompileTime> Type; };
+ template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, ei_traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
#endif // not EIGEN_PARSED_BY_DOXYGEN
diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h
index 3386c6d69..763948453 100644
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -100,7 +100,7 @@ struct ei_traits<Map<PlainObjectType, MapOptions, StrideType> >
|| ( OuterStrideAtCompileTime!=Dynamic
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
Flags0 = ei_traits<PlainObjectType>::Flags,
- Flags1 = IsAligned ? int(Flags0) | AlignedBit : int(Flags0) & ~AlignedBit,
+ Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags2 = HasNoStride ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
};
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index e4ca82985..cb73fd205 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -373,7 +373,7 @@ Packet4f ei_pcos<Packet4f>(const Packet4f& _x)
return _mm_xor_ps(y, sign_bit);
}
-// This is Quake3's fast inverse square root.
+// This is based on Quake3's fast inverse square root.
// For detail see here: http://www.beyond3d.com/content/articles/8/
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f ei_psqrt<Packet4f>(const Packet4f& _x)
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index e4466db5b..423aa110e 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -60,7 +60,7 @@ template<typename ExpressionType> class NestByValue;
template<typename ExpressionType> class ForceAlignedAccess;
template<typename ExpressionType> class SwapWrapper;
-template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic,
+template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
bool HasDirectAccess = ei_has_direct_access<XprType>::ret> class Block;
template<typename MatrixType, int Size=Dynamic> class VectorBlock;