diff options
author | 2011-03-27 13:49:15 +0100 | |
---|---|---|
committer | 2011-03-27 13:49:15 +0100 | |
commit | 1b17a674dd409ea55cea4079ba9b8db18778e012 (patch) | |
tree | 1929e8be6e1064c821119bcf9543d2f26aa75046 /Eigen | |
parent | 5c204d1ff7b7b57bba2ef6e5701597d000e63842 (diff) |
Evaluators: Implement inner vectorization.
The implementation is minimal (I only wrote the functions called by
the unit test) and ugly (lots of copy and pasting).
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/src/Core/AssignEvaluator.h | 36 | ||||
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 80 |
2 files changed, 115 insertions, 1 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 7001abb40..2d61e7ff6 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -76,7 +76,7 @@ private: public: enum { - Traversal = int(MayInnerVectorize) ? int(DefaultTraversal) // int(InnerVectorizedTraversal) + Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) : int(MaySliceVectorize) ? int(DefaultTraversal) // int(SliceVectorizedTraversal) : int(MayLinearize) ? int(DefaultTraversal) // int(LinearTraversal) @@ -145,6 +145,10 @@ template<typename DstXprType, typename SrcXprType, int Unrolling = copy_using_evaluator_traits<DstXprType, SrcXprType>::Unrolling> struct copy_using_evaluator_impl; +/************************ +*** Default traversal *** +************************/ + template<typename DstXprType, typename SrcXprType> struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnrolling> { @@ -167,6 +171,10 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnr } }; +/*************************** +*** Linear vectorization *** +***************************/ + template <bool IsAligned = false> struct unaligned_copy_using_evaluator_impl { @@ -231,6 +239,32 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers } }; +/************************** +*** Inner vectorization *** +**************************/ + +template<typename DstXprType, typename SrcXprType> +struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, NoUnrolling> +{ + inline static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator<DstXprType>::type DstEvaluatorType; + typedef typename evaluator<SrcXprType>::type SrcEvaluatorType; + typedef typename DstXprType::Index Index; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + const Index packetSize = packet_traits<typename DstXprType::Scalar>::size; + for(Index outer = 0; outer < outerSize; ++outer) + for(Index inner = 0; inner < innerSize; inner+=packetSize) + dstEvaluator.template writePacketByOuterInner<Aligned>(outer, inner, srcEvaluator.template packetByOuterInner<Aligned>(outer, inner)); + } +}; + + // Based on DenseBase::LazyAssign() template<typename DstXprType, typename SrcXprType> diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 5666daae9..c06d9303e 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -71,6 +71,27 @@ struct evaluator_impl<Transpose<ExpressionType> > return m_argImpl.template packet<LoadMode>(index); } + // TODO: Difference between PacketScalar and PacketReturnType? + // TODO: Get this function by inheriting from DenseCoeffBase? + template<int LoadMode> + const typename ExpressionType::PacketScalar packetByOuterInner(Index outer, Index inner) const + { + return m_argImpl.template packetByOuterInner<LoadMode>(outer, inner); + } + +// TODO: Is this function needed? +// template<int StoreMode> +// void writePacket(Index index, const typename ExpressionType::PacketScalar& x) +// { +// m_argImpl.template writePacket<StoreMode>(index, x); +// } + + template<int StoreMode> + void writePacketByOuterInner(Index outer, Index inner, const typename ExpressionType::PacketScalar& x) + { + m_argImpl.template writePacketByOuterInner<StoreMode>(outer, inner, x); + } + protected: typename evaluator<ExpressionType>::type m_argImpl; }; @@ -86,6 +107,16 @@ struct evaluator_impl<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > typedef typename MatrixType::Index Index; + Index colIndexByOuterInner(Index outer, Index inner) const + { + return m_matrix.colIndexByOuterInner(outer, inner); + } + + Index rowIndexByOuterInner(Index outer, Index inner) const + { + return m_matrix.rowIndexByOuterInner(outer, inner); + } + typename MatrixType::CoeffReturnType coeff(Index i, Index j) const { return m_matrix.coeff(i, j); @@ -103,6 +134,18 @@ struct evaluator_impl<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > return m_matrix.template packet<LoadMode>(index); } + template<int LoadMode> + typename MatrixType::PacketReturnType packet(Index row, Index col) const + { + return m_matrix.template packet<LoadMode>(row, col); + } + + template<int LoadMode> + typename MatrixType::PacketReturnType packetByOuterInner(Index outer, Index inner) const + { + return m_matrix.template packetByOuterInner<LoadMode>(outer, inner); + } + template<int StoreMode> void writePacket(Index index, const typename MatrixType::PacketScalar& x) { @@ -110,6 +153,12 @@ struct evaluator_impl<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > m_matrix.const_cast_derived().template writePacket<StoreMode>(index, x); } + template<int StoreMode> + void writePacketByOuterInner(Index outer, Index inner, const typename MatrixType::PacketScalar& x) + { + m_matrix.const_cast_derived().template writePacketByOuterInner<StoreMode>(outer, inner, x); + } + protected: const MatrixType &m_matrix; }; @@ -149,6 +198,18 @@ struct evaluator_impl<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > return m_array.template packet<LoadMode>(index); } + template<int LoadMode> + typename ArrayType::PacketReturnType packet(Index row, Index col) const + { + return m_array.template packet<LoadMode>(row, col); + } + + template<int LoadMode> + typename ArrayType::PacketReturnType packetByOuterInner(Index outer, Index inner) const + { + return m_array.template packetByOuterInner<LoadMode>(outer, inner); + } + template<int StoreMode> void writePacket(Index index, const typename ArrayType::PacketScalar& x) { @@ -156,6 +217,12 @@ struct evaluator_impl<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > m_array.const_cast_derived().template writePacket<StoreMode>(index, x); } + template<int StoreMode> + void writePacketByOuterInner(Index outer, Index inner, const typename ArrayType::PacketScalar& x) + { + m_array.const_cast_derived().template writePacketByOuterInner<StoreMode>(outer, inner, x); + } + protected: const ArrayType &m_array; }; @@ -208,6 +275,19 @@ struct evaluator_impl<CwiseUnaryOp<UnaryOp, ArgType> > return m_unaryOp.functor().packetOp(m_argImpl.template packet<LoadMode>(index)); } + template<int LoadMode> + typename UnaryOpType::PacketScalar packet(Index row, Index col) const + { + return m_unaryOp.functor().packetOp(m_argImpl.template packet<LoadMode>(row, col)); + } + + template<int LoadMode> + typename UnaryOpType::PacketScalar packetByOuterInner(Index outer, Index inner) const + { + return packet<LoadMode>(m_argImpl.rowIndexByOuterInner(outer, inner), + m_argImpl.colIndexByOuterInner(outer, inner)); + } + protected: const UnaryOpType& m_unaryOp; typename evaluator<ArgType>::type m_argImpl; |