diff options
author | Jitse Niesen <jitse@maths.leeds.ac.uk> | 2011-03-31 13:50:52 +0100 |
---|---|---|
committer | Jitse Niesen <jitse@maths.leeds.ac.uk> | 2011-03-31 13:50:52 +0100 |
commit | d90a8ee8bd0a20b76c5a04e95c79b59ce42963ec (patch) | |
tree | e662b6b7a6ed5061df8a54577df517c8b05c8f35 /Eigen | |
parent | b471161f28e13b41fdc847f441de04ab3aadbfe8 (diff) |
Evaluators: add Block evaluator as dumb wrapper, add slice vectorization.
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/src/Core/AssignEvaluator.h | 58 | ||||
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 67 |
2 files changed, 125 insertions, 0 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 78014c6f9..cf0ab5fda 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -289,6 +289,64 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnro } }; +/************************** +*** Slice vectorization *** +***************************/ + +template<typename DstXprType, typename SrcXprType> +struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversal, NoUnrolling> +{ + inline static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + typedef typename DstXprType::Index Index; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + typedef packet_traits<typename DstXprType::Scalar> PacketTraits; + enum { + packetSize = PacketTraits::size, + alignable = PacketTraits::AlignedOnScalar, + dstAlignment = alignable ? Aligned : int(assign_traits<DstXprType,SrcXprType>::DstIsAligned) , + srcAlignment = assign_traits<DstXprType,SrcXprType>::JointAlignment + }; + const Index packetAlignedMask = packetSize - 1; + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0; + Index alignedStart = ((!alignable) || assign_traits<DstXprType,SrcXprType>::DstIsAligned) ? 0 + : first_aligned(&dstEvaluator.coeffRef(0,0), innerSize); + + for(Index outer = 0; outer < outerSize; ++outer) + { + const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); + // do the non-vectorizable part of the assignment + for(Index inner = 0; inner<alignedStart ; ++inner) { + Index row = dst.rowIndexByOuterInner(outer, inner); + Index col = dst.colIndexByOuterInner(outer, inner); + dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col); + } + + // do the vectorizable part of the assignment + for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) { + Index row = dst.rowIndexByOuterInner(outer, inner); + Index col = dst.colIndexByOuterInner(outer, inner); + dstEvaluator.template writePacket<dstAlignment>(row, col, srcEvaluator.template packet<srcAlignment>(row, col)); + } + + // do the non-vectorizable part of the assignment + for(Index inner = alignedEnd; inner<innerSize ; ++inner) { + Index row = dst.rowIndexByOuterInner(outer, inner); + Index col = dst.colIndexByOuterInner(outer, inner); + dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col); + } + + alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize); + } + } +}; // Based on DenseBase::LazyAssign() diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 008285b4c..4dd466bc6 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -368,6 +368,73 @@ protected: PlainObject m_result; }; +// -------------------- Block -------------------- +// +// This evaluator is implemented as a dumb wrapper around Block expression class. +// TODO: Make this a real evaluator + +template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> +struct evaluator_impl<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> > +{ + typedef Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> BlockType; + evaluator_impl(const BlockType& block) : m_block(block) { } + + typedef typename BlockType::Index Index; + typedef typename BlockType::Scalar Scalar; + typedef typename BlockType::CoeffReturnType CoeffReturnType; + typedef typename BlockType::PacketScalar PacketScalar; + typedef typename BlockType::PacketReturnType PacketReturnType; + + + CoeffReturnType coeff(Index i, Index j) const + { + return m_block.coeff(i,j); + } + + CoeffReturnType coeff(Index index) const + { + return m_block.coeff(index); + } + + Scalar& coeffRef(Index i, Index j) + { + return m_block.const_cast_derived().coeffRef(i,j); + } + + Scalar& coeffRef(Index index) + { + return m_block.const_cast_derived().coeffRef(index); + } + + template<int LoadMode> + PacketReturnType packet(Index row, Index col) const + { + return m_block.template packet<LoadMode>(row, col); + } + + template<int LoadMode> + PacketReturnType packet(Index index) const + { + return m_block.template packet<LoadMode>(index); + } + + template<int StoreMode> + void writePacket(Index row, Index col, const PacketScalar& x) + { + m_block.const_cast_derived().template writePacket<StoreMode>(row, col, x); + } + + template<int StoreMode> + void writePacket(Index index, const PacketScalar& x) + { + m_block.const_cast_derived().template writePacket<StoreMode>(index, x); + } + +protected: + const BlockType& m_block; +}; + + } // namespace internal #endif // EIGEN_COREEVALUATORS_H |