aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/AssignEvaluator.h58
-rw-r--r--Eigen/src/Core/CoreEvaluators.h67
-rw-r--r--test/evaluators.cpp19
3 files changed, 143 insertions, 1 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 78014c6f9..cf0ab5fda 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -289,6 +289,64 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnro
}
};
+/**************************
+*** Slice vectorization ***
+***************************/
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversal, NoUnrolling>
+{
+ inline static void run(const DstXprType &dst, const SrcXprType &src)
+ {
+ typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+ typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+ typedef typename DstXprType::Index Index;
+
+ DstEvaluatorType dstEvaluator(dst.const_cast_derived());
+ SrcEvaluatorType srcEvaluator(src);
+
+ typedef packet_traits<typename DstXprType::Scalar> PacketTraits;
+ enum {
+ packetSize = PacketTraits::size,
+ alignable = PacketTraits::AlignedOnScalar,
+ dstAlignment = alignable ? Aligned : int(assign_traits<DstXprType,SrcXprType>::DstIsAligned) ,
+ srcAlignment = assign_traits<DstXprType,SrcXprType>::JointAlignment
+ };
+ const Index packetAlignedMask = packetSize - 1;
+ const Index innerSize = dst.innerSize();
+ const Index outerSize = dst.outerSize();
+ const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
+ Index alignedStart = ((!alignable) || assign_traits<DstXprType,SrcXprType>::DstIsAligned) ? 0
+ : first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);
+
+ for(Index outer = 0; outer < outerSize; ++outer)
+ {
+ const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
+ // do the non-vectorizable part of the assignment
+ for(Index inner = 0; inner<alignedStart ; ++inner) {
+ Index row = dst.rowIndexByOuterInner(outer, inner);
+ Index col = dst.colIndexByOuterInner(outer, inner);
+ dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col);
+ }
+
+ // do the vectorizable part of the assignment
+ for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) {
+ Index row = dst.rowIndexByOuterInner(outer, inner);
+ Index col = dst.colIndexByOuterInner(outer, inner);
+ dstEvaluator.template writePacket<dstAlignment>(row, col, srcEvaluator.template packet<srcAlignment>(row, col));
+ }
+
+ // do the non-vectorizable part of the assignment
+ for(Index inner = alignedEnd; inner<innerSize ; ++inner) {
+ Index row = dst.rowIndexByOuterInner(outer, inner);
+ Index col = dst.colIndexByOuterInner(outer, inner);
+ dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col);
+ }
+
+ alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
+ }
+ }
+};
// Based on DenseBase::LazyAssign()
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 008285b4c..4dd466bc6 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -368,6 +368,73 @@ protected:
PlainObject m_result;
};
+// -------------------- Block --------------------
+//
+// This evaluator is implemented as a dumb wrapper around Block expression class.
+// TODO: Make this a real evaluator
+
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess>
+struct evaluator_impl<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> >
+{
+ typedef Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> BlockType;
+ evaluator_impl(const BlockType& block) : m_block(block) { }
+
+ typedef typename BlockType::Index Index;
+ typedef typename BlockType::Scalar Scalar;
+ typedef typename BlockType::CoeffReturnType CoeffReturnType;
+ typedef typename BlockType::PacketScalar PacketScalar;
+ typedef typename BlockType::PacketReturnType PacketReturnType;
+
+
+ CoeffReturnType coeff(Index i, Index j) const
+ {
+ return m_block.coeff(i,j);
+ }
+
+ CoeffReturnType coeff(Index index) const
+ {
+ return m_block.coeff(index);
+ }
+
+ Scalar& coeffRef(Index i, Index j)
+ {
+ return m_block.const_cast_derived().coeffRef(i,j);
+ }
+
+ Scalar& coeffRef(Index index)
+ {
+ return m_block.const_cast_derived().coeffRef(index);
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index row, Index col) const
+ {
+ return m_block.template packet<LoadMode>(row, col);
+ }
+
+ template<int LoadMode>
+ PacketReturnType packet(Index index) const
+ {
+ return m_block.template packet<LoadMode>(index);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index row, Index col, const PacketScalar& x)
+ {
+ m_block.const_cast_derived().template writePacket<StoreMode>(row, col, x);
+ }
+
+ template<int StoreMode>
+ void writePacket(Index index, const PacketScalar& x)
+ {
+ m_block.const_cast_derived().template writePacket<StoreMode>(index, x);
+ }
+
+protected:
+ const BlockType& m_block;
+};
+
+
} // namespace internal
#endif // EIGEN_COREEVALUATORS_H
diff --git a/test/evaluators.cpp b/test/evaluators.cpp
index b804171a7..e3fe53215 100644
--- a/test/evaluators.cpp
+++ b/test/evaluators.cpp
@@ -98,6 +98,7 @@ void test_evaluators()
VERIFY_IS_APPROX_EVALUATOR(m3.transpose(), Matrix3f::Identity().transpose()); // transpose
VERIFY_IS_APPROX_EVALUATOR(m3, 2 * Matrix3f::Identity()); // unary
VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Identity() + m3); // binary
+ VERIFY_IS_APPROX_EVALUATOR(m3.block(0,0,2,2), Matrix3f::Identity().block(1,1,2,2)); // block
// test linear traversal
VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Zero()); // matrix, nullary
@@ -118,11 +119,27 @@ void test_evaluators()
// test linear vectorization
MatrixXf mX(6,6), mXsrc = MatrixXf::Random(6,6);
- ArrayXXf aX(6,6), aXsrc = MatrixXf::Random(6,6);
+ ArrayXXf aX(6,6), aXsrc = ArrayXXf::Random(6,6);
VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc); // matrix
VERIFY_IS_APPROX_EVALUATOR(aX, aXsrc); // array
VERIFY_IS_APPROX_EVALUATOR(mX.transpose(), mXsrc.transpose()); // transpose
VERIFY_IS_APPROX_EVALUATOR(mX, MatrixXf::Zero(6,6)); // nullary
VERIFY_IS_APPROX_EVALUATOR(mX, 2 * mXsrc); // unary
VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc + mXsrc); // binary
+
+ // test blocks and slice vectorization
+ VERIFY_IS_APPROX_EVALUATOR(m4, (mXsrc.block<4,4>(1,0)));
+ VERIFY_IS_APPROX_EVALUATOR(aX, ArrayXXf::Constant(10, 10, 3.0).block(2, 3, 6, 6));
+
+ Matrix4f m4ref = m4;
+ copy_using_evaluator(m4.block(1, 1, 2, 3), m3.bottomRows(2));
+ m4ref.block(1, 1, 2, 3) = m3.bottomRows(2);
+ VERIFY_IS_APPROX(m4, m4ref);
+
+ mX.setIdentity(20,20);
+ MatrixXf mXref = MatrixXf::Identity(20,20);
+ mXsrc = MatrixXf::Random(9,12);
+ copy_using_evaluator(mX.block(4, 4, 9, 12), mXsrc);
+ mXref.block(4, 4, 9, 12) = mXsrc;
+ VERIFY_IS_APPROX(mX, mXref);
}