diff options
-rw-r--r-- | Eigen/src/Core/AssignEvaluator.h | 58 | ||||
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 67 | ||||
-rw-r--r-- | test/evaluators.cpp | 19 |
3 files changed, 143 insertions, 1 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 78014c6f9..cf0ab5fda 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -289,6 +289,64 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnro } }; +/************************** +*** Slice vectorization *** +***************************/ + +template<typename DstXprType, typename SrcXprType> +struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversal, NoUnrolling> +{ + inline static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + typedef typename DstXprType::Index Index; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + typedef packet_traits<typename DstXprType::Scalar> PacketTraits; + enum { + packetSize = PacketTraits::size, + alignable = PacketTraits::AlignedOnScalar, + dstAlignment = alignable ? Aligned : int(assign_traits<DstXprType,SrcXprType>::DstIsAligned) , + srcAlignment = assign_traits<DstXprType,SrcXprType>::JointAlignment + }; + const Index packetAlignedMask = packetSize - 1; + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0; + Index alignedStart = ((!alignable) || assign_traits<DstXprType,SrcXprType>::DstIsAligned) ? 0 + : first_aligned(&dstEvaluator.coeffRef(0,0), innerSize); + + for(Index outer = 0; outer < outerSize; ++outer) + { + const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); + // do the non-vectorizable part of the assignment + for(Index inner = 0; inner<alignedStart ; ++inner) { + Index row = dst.rowIndexByOuterInner(outer, inner); + Index col = dst.colIndexByOuterInner(outer, inner); + dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col); + } + + // do the vectorizable part of the assignment + for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) { + Index row = dst.rowIndexByOuterInner(outer, inner); + Index col = dst.colIndexByOuterInner(outer, inner); + dstEvaluator.template writePacket<dstAlignment>(row, col, srcEvaluator.template packet<srcAlignment>(row, col)); + } + + // do the non-vectorizable part of the assignment + for(Index inner = alignedEnd; inner<innerSize ; ++inner) { + Index row = dst.rowIndexByOuterInner(outer, inner); + Index col = dst.colIndexByOuterInner(outer, inner); + dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col); + } + + alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize); + } + } +}; // Based on DenseBase::LazyAssign() diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 008285b4c..4dd466bc6 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -368,6 +368,73 @@ protected: PlainObject m_result; }; +// -------------------- Block -------------------- +// +// This evaluator is implemented as a dumb wrapper around Block expression class. +// TODO: Make this a real evaluator + +template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> +struct evaluator_impl<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> > +{ + typedef Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> BlockType; + evaluator_impl(const BlockType& block) : m_block(block) { } + + typedef typename BlockType::Index Index; + typedef typename BlockType::Scalar Scalar; + typedef typename BlockType::CoeffReturnType CoeffReturnType; + typedef typename BlockType::PacketScalar PacketScalar; + typedef typename BlockType::PacketReturnType PacketReturnType; + + + CoeffReturnType coeff(Index i, Index j) const + { + return m_block.coeff(i,j); + } + + CoeffReturnType coeff(Index index) const + { + return m_block.coeff(index); + } + + Scalar& coeffRef(Index i, Index j) + { + return m_block.const_cast_derived().coeffRef(i,j); + } + + Scalar& coeffRef(Index index) + { + return m_block.const_cast_derived().coeffRef(index); + } + + template<int LoadMode> + PacketReturnType packet(Index row, Index col) const + { + return m_block.template packet<LoadMode>(row, col); + } + + template<int LoadMode> + PacketReturnType packet(Index index) const + { + return m_block.template packet<LoadMode>(index); + } + + template<int StoreMode> + void writePacket(Index row, Index col, const PacketScalar& x) + { + m_block.const_cast_derived().template writePacket<StoreMode>(row, col, x); + } + + template<int StoreMode> + void writePacket(Index index, const PacketScalar& x) + { + m_block.const_cast_derived().template writePacket<StoreMode>(index, x); + } + +protected: + const BlockType& m_block; +}; + + } // namespace internal #endif // EIGEN_COREEVALUATORS_H diff --git a/test/evaluators.cpp b/test/evaluators.cpp index b804171a7..e3fe53215 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -98,6 +98,7 @@ void test_evaluators() VERIFY_IS_APPROX_EVALUATOR(m3.transpose(), Matrix3f::Identity().transpose()); // transpose VERIFY_IS_APPROX_EVALUATOR(m3, 2 * Matrix3f::Identity()); // unary VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Identity() + m3); // binary + VERIFY_IS_APPROX_EVALUATOR(m3.block(0,0,2,2), Matrix3f::Identity().block(1,1,2,2)); // block // test linear traversal VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Zero()); // matrix, nullary @@ -118,11 +119,27 @@ void test_evaluators() // test linear vectorization MatrixXf mX(6,6), mXsrc = MatrixXf::Random(6,6); - ArrayXXf aX(6,6), aXsrc = MatrixXf::Random(6,6); + ArrayXXf aX(6,6), aXsrc = ArrayXXf::Random(6,6); VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc); // matrix VERIFY_IS_APPROX_EVALUATOR(aX, aXsrc); // array VERIFY_IS_APPROX_EVALUATOR(mX.transpose(), mXsrc.transpose()); // transpose VERIFY_IS_APPROX_EVALUATOR(mX, MatrixXf::Zero(6,6)); // nullary VERIFY_IS_APPROX_EVALUATOR(mX, 2 * mXsrc); // unary VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc + mXsrc); // binary + + // test blocks and slice vectorization + VERIFY_IS_APPROX_EVALUATOR(m4, (mXsrc.block<4,4>(1,0))); + VERIFY_IS_APPROX_EVALUATOR(aX, ArrayXXf::Constant(10, 10, 3.0).block(2, 3, 6, 6)); + + Matrix4f m4ref = m4; + copy_using_evaluator(m4.block(1, 1, 2, 3), m3.bottomRows(2)); + m4ref.block(1, 1, 2, 3) = m3.bottomRows(2); + VERIFY_IS_APPROX(m4, m4ref); + + mX.setIdentity(20,20); + MatrixXf mXref = MatrixXf::Identity(20,20); + mXsrc = MatrixXf::Random(9,12); + copy_using_evaluator(mX.block(4, 4, 9, 12), mXsrc); + mXref.block(4, 4, 9, 12) = mXsrc; + VERIFY_IS_APPROX(mX, mXref); } |