From 5fae6c7848944df9ed3389b5714362e29f531b91 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 22 Jun 2012 09:39:35 +0200 Subject: resurrect expression evaluators --- Eigen/src/Core/AssignEvaluator.h | 682 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 682 insertions(+) create mode 100644 Eigen/src/Core/AssignEvaluator.h (limited to 'Eigen/src/Core/AssignEvaluator.h') diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h new file mode 100644 index 000000000..006a87d47 --- /dev/null +++ b/Eigen/src/Core/AssignEvaluator.h @@ -0,0 +1,682 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2011 Gael Guennebaud +// Copyright (C) 2011 Jitse Niesen +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_ASSIGN_EVALUATOR_H +#define EIGEN_ASSIGN_EVALUATOR_H + +// This implementation is based on Assign.h + +namespace internal { + +/*************************************************************************** +* Part 1 : the logic deciding a strategy for traversal and unrolling * +***************************************************************************/ + +// copy_using_evaluator_traits is based on assign_traits +// (actually, it's identical) + +template +struct copy_using_evaluator_traits +{ +public: + enum { + DstIsAligned = Derived::Flags & AlignedBit, + DstHasDirectAccess = Derived::Flags & DirectAccessBit, + SrcIsAligned = OtherDerived::Flags & AlignedBit, + JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned + }; + +private: + enum { + InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime) + : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime) + : int(Derived::RowsAtCompileTime), + InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime) + : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime) + : int(Derived::MaxRowsAtCompileTime), + MaxSizeAtCompileTime = Derived::SizeAtCompileTime, + PacketSize = packet_traits::size + }; + + enum { + StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)), + MightVectorize = StorageOrdersAgree + && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), + MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 + && int(DstIsAligned) && int(SrcIsAligned), + MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), + MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess + && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), + /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, + so it's only good for large enough sizes. */ + MaySliceVectorize = MightVectorize && DstHasDirectAccess + && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize) + /* slice vectorization can be slow, so we only want it if the slices are big, which is + indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block + in a fixed-size matrix */ + }; + +public: + enum { + Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) + : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(MayLinearize) ? int(LinearTraversal) + : int(DefaultTraversal), + Vectorized = int(Traversal) == InnerVectorizedTraversal + || int(Traversal) == LinearVectorizedTraversal + || int(Traversal) == SliceVectorizedTraversal + }; + +private: + enum { + UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), + MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic + && int(OtherDerived::CoeffReadCost) != Dynamic + && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), + MayUnrollInner = int(InnerSize) != Dynamic + && int(OtherDerived::CoeffReadCost) != Dynamic + && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit) + }; + +public: + enum { + Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) + ? ( + int(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(MayUnrollInner) ? int(InnerUnrolling) + : int(NoUnrolling) + ) + : int(Traversal) == int(LinearVectorizedTraversal) + ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) + : int(NoUnrolling) ) + : int(Traversal) == int(LinearTraversal) + ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(NoUnrolling) ) + : int(NoUnrolling) + }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + EIGEN_DEBUG_VAR(DstIsAligned) + EIGEN_DEBUG_VAR(SrcIsAligned) + EIGEN_DEBUG_VAR(JointAlignment) + EIGEN_DEBUG_VAR(InnerSize) + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(StorageOrdersAgree) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearize) + EIGEN_DEBUG_VAR(MayInnerVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + EIGEN_DEBUG_VAR(Traversal) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(MayUnrollCompletely) + EIGEN_DEBUG_VAR(MayUnrollInner) + EIGEN_DEBUG_VAR(Unrolling) + } +#endif +}; + +/*************************************************************************** +* Part 2 : meta-unrollers +***************************************************************************/ + +/************************ +*** Default traversal *** +************************/ + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + typedef typename DstEvaluatorType::XprType DstXprType; + + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime + }; + + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator) + { + dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling + + ::run(dstEvaluator, srcEvaluator); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator, + int outer) + { + dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator); + copy_using_evaluator_DefaultTraversal_InnerUnrolling + + ::run(dstEvaluator, srcEvaluator, outer); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator) + { + dstEvaluator.copyCoeff(Index, srcEvaluator); + copy_using_evaluator_LinearTraversal_CompleteUnrolling + + ::run(dstEvaluator, srcEvaluator); + } +}; + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + typedef typename DstEvaluatorType::XprType DstXprType; + typedef typename SrcEvaluatorType::XprType SrcXprType; + + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime, + JointAlignment = copy_using_evaluator_traits::JointAlignment + }; + + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator) + { + dstEvaluator.template copyPacketByOuterInner(outer, inner, srcEvaluator); + enum { NextIndex = Index + packet_traits::size }; + copy_using_evaluator_innervec_CompleteUnrolling + + ::run(dstEvaluator, srcEvaluator); + } +}; + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator, + int outer) + { + dstEvaluator.template copyPacketByOuterInner(outer, Index, srcEvaluator); + typedef typename DstEvaluatorType::XprType DstXprType; + enum { NextIndex = Index + packet_traits::size }; + copy_using_evaluator_innervec_InnerUnrolling + + ::run(dstEvaluator, srcEvaluator, outer); + } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { } +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + +// copy_using_evaluator_impl is based on assign_impl + +template::Traversal, + int Unrolling = copy_using_evaluator_traits::Unrolling> +struct copy_using_evaluator_impl; + +/************************ +*** Default traversal *** +************************/ + +template +struct copy_using_evaluator_impl +{ + static void run(DstXprType& dst, const SrcXprType& src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + typedef typename DstXprType::Index Index; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + for(Index outer = 0; outer < dst.outerSize(); ++outer) { + for(Index inner = 0; inner < dst.innerSize(); ++inner) { + dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator); + } + } + } +}; + +template +struct copy_using_evaluator_impl +{ + EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + copy_using_evaluator_DefaultTraversal_CompleteUnrolling + + ::run(dstEvaluator, srcEvaluator); + } +}; + +template +struct copy_using_evaluator_impl +{ + typedef typename DstXprType::Index Index; + EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_DefaultTraversal_InnerUnrolling + + ::run(dstEvaluator, srcEvaluator, outer); + } +}; + +/*************************** +*** Linear vectorization *** +***************************/ + +template +struct unaligned_copy_using_evaluator_impl +{ + // if IsAligned = true, then do nothing + template + static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, + typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {} +}; + +template <> +struct unaligned_copy_using_evaluator_impl +{ + // MSVC must not inline this functions. If it does, it fails to optimize the + // packet access path. +#ifdef _MSC_VER + template + static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator, + const SrcEvaluatorType &srcEvaluator, + typename DstEvaluatorType::Index start, + typename DstEvaluatorType::Index end) +#else + template + static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, + const SrcEvaluatorType &srcEvaluator, + typename DstEvaluatorType::Index start, + typename DstEvaluatorType::Index end) +#endif + { + for (typename DstEvaluatorType::Index index = start; index < end; ++index) + dstEvaluator.copyCoeff(index, srcEvaluator); + } +}; + +template +struct copy_using_evaluator_impl +{ + EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + typedef typename DstXprType::Index Index; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + const Index size = dst.size(); + typedef packet_traits PacketTraits; + enum { + packetSize = PacketTraits::size, + dstIsAligned = int(copy_using_evaluator_traits::DstIsAligned), + dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned, + srcAlignment = copy_using_evaluator_traits::JointAlignment + }; + const Index alignedStart = dstIsAligned ? 0 : first_aligned(&dstEvaluator.coeffRef(0), size); + const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; + + unaligned_copy_using_evaluator_impl::run(dstEvaluator, srcEvaluator, 0, alignedStart); + + for(Index index = alignedStart; index < alignedEnd; index += packetSize) + { + dstEvaluator.template copyPacket(index, srcEvaluator); + } + + unaligned_copy_using_evaluator_impl<>::run(dstEvaluator, srcEvaluator, alignedEnd, size); + } +}; + +template +struct copy_using_evaluator_impl +{ + typedef typename DstXprType::Index Index; + EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + enum { size = DstXprType::SizeAtCompileTime, + packetSize = packet_traits::size, + alignedSize = (size/packetSize)*packetSize }; + + copy_using_evaluator_innervec_CompleteUnrolling + + ::run(dstEvaluator, srcEvaluator); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling + + ::run(dstEvaluator, srcEvaluator); + } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct copy_using_evaluator_impl +{ + inline static void run(DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + typedef typename DstXprType::Index Index; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + const Index packetSize = packet_traits::size; + for(Index outer = 0; outer < outerSize; ++outer) + for(Index inner = 0; inner < innerSize; inner+=packetSize) { + dstEvaluator.template copyPacketByOuterInner(outer, inner, srcEvaluator); + } + } +}; + +template +struct copy_using_evaluator_impl +{ + EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + copy_using_evaluator_innervec_CompleteUnrolling + + ::run(dstEvaluator, srcEvaluator); + } +}; + +template +struct copy_using_evaluator_impl +{ + typedef typename DstXprType::Index Index; + EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_innervec_InnerUnrolling + + ::run(dstEvaluator, srcEvaluator, outer); + } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct copy_using_evaluator_impl +{ + inline static void run(DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + typedef typename DstXprType::Index Index; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + const Index size = dst.size(); + for(Index i = 0; i < size; ++i) + dstEvaluator.copyCoeff(i, srcEvaluator); + } +}; + +template +struct copy_using_evaluator_impl +{ + EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + copy_using_evaluator_LinearTraversal_CompleteUnrolling + + ::run(dstEvaluator, srcEvaluator); + } +}; + +/************************** +*** Slice vectorization *** +***************************/ + +template +struct copy_using_evaluator_impl +{ + inline static void run(DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + typedef typename DstXprType::Index Index; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + typedef packet_traits PacketTraits; + enum { + packetSize = PacketTraits::size, + alignable = PacketTraits::AlignedOnScalar, + dstAlignment = alignable ? Aligned : int(copy_using_evaluator_traits::DstIsAligned) , + srcAlignment = copy_using_evaluator_traits::JointAlignment + }; + const Index packetAlignedMask = packetSize - 1; + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0; + Index alignedStart = ((!alignable) || copy_using_evaluator_traits::DstIsAligned) ? 0 + : first_aligned(&dstEvaluator.coeffRef(0,0), innerSize); + + for(Index outer = 0; outer < outerSize; ++outer) + { + const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); + // do the non-vectorizable part of the assignment + for(Index inner = 0; inner(outer, inner, srcEvaluator); + } + + // do the non-vectorizable part of the assignment + for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); + } + } +}; + +/*************************************************************************** +* Part 4 : Entry points +***************************************************************************/ + +// Based on DenseBase::LazyAssign() + +template +const DstXprType& copy_using_evaluator(const DstXprType& dst, const SrcXprType& src) +{ +#ifdef EIGEN_DEBUG_ASSIGN + internal::copy_using_evaluator_traits::debug(); +#endif + copy_using_evaluator_impl::run(const_cast(dst), src); + return dst; +} + +// Based on DenseBase::swap() +// TODO: Chech whether we need to do something special for swapping two +// Arrays or Matrices. + +template +void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src) +{ + copy_using_evaluator(SwapWrapper(const_cast(dst)), src); +} + +// Based on MatrixBase::operator+= (in CwiseBinaryOp.h) +template +void add_assign_using_evaluator(const MatrixBase& dst, const MatrixBase& src) +{ + typedef typename DstXprType::Scalar Scalar; + SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); + copy_using_evaluator(tmp, src.derived()); +} + +// Based on ArrayBase::operator+= +template +void add_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) +{ + typedef typename DstXprType::Scalar Scalar; + SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); + copy_using_evaluator(tmp, src.derived()); +} + +// TODO: Add add_assign_using_evaluator for EigenBase ? + +template +void subtract_assign_using_evaluator(const MatrixBase& dst, const MatrixBase& src) +{ + typedef typename DstXprType::Scalar Scalar; + SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); + copy_using_evaluator(tmp, src.derived()); +} + +template +void subtract_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) +{ + typedef typename DstXprType::Scalar Scalar; + SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); + copy_using_evaluator(tmp, src.derived()); +} + +template +void multiply_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) +{ + typedef typename DstXprType::Scalar Scalar; + SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); + copy_using_evaluator(tmp, src.derived()); +} + +template +void divide_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) +{ + typedef typename DstXprType::Scalar Scalar; + SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); + copy_using_evaluator(tmp, src.derived()); +} + + +} // namespace internal + +#endif // EIGEN_ASSIGN_EVALUATOR_H -- cgit v1.2.3