// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2011 Benoit Jacob // Copyright (C) 2011 Gael Guennebaud // Copyright (C) 2011-2012 Jitse Niesen // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_ASSIGN_EVALUATOR_H #define EIGEN_ASSIGN_EVALUATOR_H namespace Eigen { // This implementation is based on Assign.h namespace internal { /*************************************************************************** * Part 1 : the logic deciding a strategy for traversal and unrolling * ***************************************************************************/ // copy_using_evaluator_traits is based on assign_traits template struct copy_using_evaluator_traits { public: enum { DstIsAligned = Derived::Flags & AlignedBit, DstHasDirectAccess = Derived::Flags & DirectAccessBit, SrcIsAligned = OtherDerived::Flags & AlignedBit, JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned, SrcEvalBeforeAssign = (evaluator_traits::HasEvalTo == 1) }; private: enum { InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime) : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime) : int(Derived::RowsAtCompileTime), InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime) : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime) : int(Derived::MaxRowsAtCompileTime), MaxSizeAtCompileTime = Derived::SizeAtCompileTime, PacketSize = packet_traits::size }; enum { StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)), MightVectorize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 && int(DstIsAligned) && int(SrcIsAligned), MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. */ MaySliceVectorize = MightVectorize && DstHasDirectAccess && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize) /* slice vectorization can be slow, so we only want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block in a fixed-size matrix */ }; public: enum { Traversal = int(SrcEvalBeforeAssign) ? int(AllAtOnceTraversal) : int(MayInnerVectorize) ? int(InnerVectorizedTraversal) : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) : int(MayLinearize) ? int(LinearTraversal) : int(DefaultTraversal), Vectorized = int(Traversal) == InnerVectorizedTraversal || int(Traversal) == LinearVectorizedTraversal || int(Traversal) == SliceVectorizedTraversal }; private: enum { UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic && int(OtherDerived::CoeffReadCost) != Dynamic && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) != Dynamic && int(OtherDerived::CoeffReadCost) != Dynamic && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit) }; public: enum { Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) ? ( int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(MayUnrollInner) ? int(InnerUnrolling) : int(NoUnrolling) ) : int(Traversal) == int(LinearVectorizedTraversal) ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) : int(Traversal) == int(LinearTraversal) ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) : int(NoUnrolling) }; #ifdef EIGEN_DEBUG_ASSIGN static void debug() { EIGEN_DEBUG_VAR(DstIsAligned) EIGEN_DEBUG_VAR(SrcIsAligned) EIGEN_DEBUG_VAR(JointAlignment) EIGEN_DEBUG_VAR(InnerSize) EIGEN_DEBUG_VAR(InnerMaxSize) EIGEN_DEBUG_VAR(PacketSize) EIGEN_DEBUG_VAR(StorageOrdersAgree) EIGEN_DEBUG_VAR(MightVectorize) EIGEN_DEBUG_VAR(MayLinearize) EIGEN_DEBUG_VAR(MayInnerVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize) EIGEN_DEBUG_VAR(MaySliceVectorize) EIGEN_DEBUG_VAR(Traversal) EIGEN_DEBUG_VAR(UnrollingLimit) EIGEN_DEBUG_VAR(MayUnrollCompletely) EIGEN_DEBUG_VAR(MayUnrollInner) EIGEN_DEBUG_VAR(Unrolling) } #endif }; /*************************************************************************** * Part 2 : meta-unrollers ***************************************************************************/ /************************ *** Default traversal *** ************************/ template struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { typedef typename DstEvaluatorType::XprType DstXprType; enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime }; EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, SrcEvaluatorType &srcEvaluator) { dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator); copy_using_evaluator_DefaultTraversal_CompleteUnrolling ::run(dstEvaluator, srcEvaluator); } }; template struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { } }; template struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, SrcEvaluatorType &srcEvaluator, int outer) { dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator); copy_using_evaluator_DefaultTraversal_InnerUnrolling ::run(dstEvaluator, srcEvaluator, outer); } }; template struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { } }; /*********************** *** Linear traversal *** ***********************/ template struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, SrcEvaluatorType &srcEvaluator) { dstEvaluator.copyCoeff(Index, srcEvaluator); copy_using_evaluator_LinearTraversal_CompleteUnrolling ::run(dstEvaluator, srcEvaluator); } }; template struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { } }; /************************** *** Inner vectorization *** **************************/ template struct copy_using_evaluator_innervec_CompleteUnrolling { typedef typename DstEvaluatorType::XprType DstXprType; typedef typename SrcEvaluatorType::XprType SrcXprType; enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime, JointAlignment = copy_using_evaluator_traits::JointAlignment }; EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, SrcEvaluatorType &srcEvaluator) { dstEvaluator.template copyPacketByOuterInner(outer, inner, srcEvaluator); enum { NextIndex = Index + packet_traits::size }; copy_using_evaluator_innervec_CompleteUnrolling ::run(dstEvaluator, srcEvaluator); } }; template struct copy_using_evaluator_innervec_CompleteUnrolling { EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { } }; template struct copy_using_evaluator_innervec_InnerUnrolling { EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, SrcEvaluatorType &srcEvaluator, int outer) { dstEvaluator.template copyPacketByOuterInner(outer, Index, srcEvaluator); typedef typename DstEvaluatorType::XprType DstXprType; enum { NextIndex = Index + packet_traits::size }; copy_using_evaluator_innervec_InnerUnrolling ::run(dstEvaluator, srcEvaluator, outer); } }; template struct copy_using_evaluator_innervec_InnerUnrolling { EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { } }; /*************************************************************************** * Part 3 : implementation of all cases ***************************************************************************/ // copy_using_evaluator_impl is based on assign_impl template::Traversal, int Unrolling = copy_using_evaluator_traits::Unrolling> struct copy_using_evaluator_impl; /************************ *** Default traversal *** ************************/ template struct copy_using_evaluator_impl { static void run(DstXprType& dst, const SrcXprType& src) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; typedef typename DstXprType::Index Index; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); for(Index outer = 0; outer < dst.outerSize(); ++outer) { for(Index inner = 0; inner < dst.innerSize(); ++inner) { dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator); } } } }; template struct copy_using_evaluator_impl { EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); copy_using_evaluator_DefaultTraversal_CompleteUnrolling ::run(dstEvaluator, srcEvaluator); } }; template struct copy_using_evaluator_impl { typedef typename DstXprType::Index Index; EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); const Index outerSize = dst.outerSize(); for(Index outer = 0; outer < outerSize; ++outer) copy_using_evaluator_DefaultTraversal_InnerUnrolling ::run(dstEvaluator, srcEvaluator, outer); } }; /*************************** *** Linear vectorization *** ***************************/ template struct unaligned_copy_using_evaluator_impl { // if IsAligned = true, then do nothing template static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {} }; template <> struct unaligned_copy_using_evaluator_impl { // MSVC must not inline this functions. If it does, it fails to optimize the // packet access path. #ifdef _MSC_VER template static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator, const SrcEvaluatorType &srcEvaluator, typename DstEvaluatorType::Index start, typename DstEvaluatorType::Index end) #else template static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, const SrcEvaluatorType &srcEvaluator, typename DstEvaluatorType::Index start, typename DstEvaluatorType::Index end) #endif { for (typename DstEvaluatorType::Index index = start; index < end; ++index) dstEvaluator.copyCoeff(index, srcEvaluator); } }; template struct copy_using_evaluator_impl { EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; typedef typename DstXprType::Index Index; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); const Index size = dst.size(); typedef packet_traits PacketTraits; enum { packetSize = PacketTraits::size, dstIsAligned = int(copy_using_evaluator_traits::DstIsAligned), dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned, srcAlignment = copy_using_evaluator_traits::JointAlignment }; const Index alignedStart = dstIsAligned ? 0 : first_aligned(&dstEvaluator.coeffRef(0), size); const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; unaligned_copy_using_evaluator_impl::run(dstEvaluator, srcEvaluator, 0, alignedStart); for(Index index = alignedStart; index < alignedEnd; index += packetSize) { dstEvaluator.template copyPacket(index, srcEvaluator); } unaligned_copy_using_evaluator_impl<>::run(dstEvaluator, srcEvaluator, alignedEnd, size); } }; template struct copy_using_evaluator_impl { typedef typename DstXprType::Index Index; EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); enum { size = DstXprType::SizeAtCompileTime, packetSize = packet_traits::size, alignedSize = (size/packetSize)*packetSize }; copy_using_evaluator_innervec_CompleteUnrolling ::run(dstEvaluator, srcEvaluator); copy_using_evaluator_DefaultTraversal_CompleteUnrolling ::run(dstEvaluator, srcEvaluator); } }; /************************** *** Inner vectorization *** **************************/ template struct copy_using_evaluator_impl { inline static void run(DstXprType &dst, const SrcXprType &src) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; typedef typename DstXprType::Index Index; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); const Index innerSize = dst.innerSize(); const Index outerSize = dst.outerSize(); const Index packetSize = packet_traits::size; for(Index outer = 0; outer < outerSize; ++outer) for(Index inner = 0; inner < innerSize; inner+=packetSize) { dstEvaluator.template copyPacketByOuterInner(outer, inner, srcEvaluator); } } }; template struct copy_using_evaluator_impl { EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); copy_using_evaluator_innervec_CompleteUnrolling ::run(dstEvaluator, srcEvaluator); } }; template struct copy_using_evaluator_impl { typedef typename DstXprType::Index Index; EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); const Index outerSize = dst.outerSize(); for(Index outer = 0; outer < outerSize; ++outer) copy_using_evaluator_innervec_InnerUnrolling ::run(dstEvaluator, srcEvaluator, outer); } }; /*********************** *** Linear traversal *** ***********************/ template struct copy_using_evaluator_impl { inline static void run(DstXprType &dst, const SrcXprType &src) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; typedef typename DstXprType::Index Index; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); const Index size = dst.size(); for(Index i = 0; i < size; ++i) dstEvaluator.copyCoeff(i, srcEvaluator); } }; template struct copy_using_evaluator_impl { EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); copy_using_evaluator_LinearTraversal_CompleteUnrolling ::run(dstEvaluator, srcEvaluator); } }; /************************** *** Slice vectorization *** ***************************/ template struct copy_using_evaluator_impl { inline static void run(DstXprType &dst, const SrcXprType &src) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; typedef typename DstXprType::Index Index; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); typedef packet_traits PacketTraits; enum { packetSize = PacketTraits::size, alignable = PacketTraits::AlignedOnScalar, dstAlignment = alignable ? Aligned : int(copy_using_evaluator_traits::DstIsAligned) }; const Index packetAlignedMask = packetSize - 1; const Index innerSize = dst.innerSize(); const Index outerSize = dst.outerSize(); const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0; Index alignedStart = ((!alignable) || copy_using_evaluator_traits::DstIsAligned) ? 0 : first_aligned(&dstEvaluator.coeffRef(0,0), innerSize); for(Index outer = 0; outer < outerSize; ++outer) { const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); // do the non-vectorizable part of the assignment for(Index inner = 0; inner(outer, inner, srcEvaluator); } // do the non-vectorizable part of the assignment for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); } } }; /**************************** *** All-at-once traversal *** ****************************/ template struct copy_using_evaluator_impl { inline static void run(DstXprType &dst, const SrcXprType &src) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; typedef typename DstXprType::Index Index; DstEvaluatorType dstEvaluator(dst); SrcEvaluatorType srcEvaluator(src); // Evaluate rhs in temporary to prevent aliasing problems in a = a * a; // TODO: Do not pass the xpr object to evalTo() srcEvaluator.evalTo(dstEvaluator, dst); } }; /*************************************************************************** * Part 4 : Entry points ***************************************************************************/ // Based on DenseBase::LazyAssign() template class StorageBase, typename SrcXprType> EIGEN_STRONG_INLINE const DstXprType& copy_using_evaluator(const NoAlias& dst, const EigenBase& src) { return noalias_copy_using_evaluator(dst.expression(), src.derived()); } template::AssumeAliasing> struct AddEvalIfAssumingAliasing; template struct AddEvalIfAssumingAliasing { static const XprType& run(const XprType& xpr) { return xpr; } }; template struct AddEvalIfAssumingAliasing { static const EvalToTemp run(const XprType& xpr) { return EvalToTemp(xpr); } }; template EIGEN_STRONG_INLINE const DstXprType& copy_using_evaluator(const EigenBase& dst, const EigenBase& src) { return noalias_copy_using_evaluator(dst.const_cast_derived(), AddEvalIfAssumingAliasing::run(src.derived())); } template EIGEN_STRONG_INLINE const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase& dst, const EigenBase& src) { #ifdef EIGEN_DEBUG_ASSIGN internal::copy_using_evaluator_traits::debug(); #endif #ifdef EIGEN_NO_AUTOMATIC_RESIZING eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size()) : (dst.rows() == src.rows() && dst.cols() == src.cols()))) && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); #else dst.const_cast_derived().resizeLike(src.derived()); #endif return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived()); } template EIGEN_STRONG_INLINE const DstXprType& noalias_copy_using_evaluator(const EigenBase& dst, const EigenBase& src) { return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived()); } template const DstXprType& copy_using_evaluator_without_resizing(const DstXprType& dst, const SrcXprType& src) { #ifdef EIGEN_DEBUG_ASSIGN internal::copy_using_evaluator_traits::debug(); #endif eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); copy_using_evaluator_impl::run(const_cast(dst), src); return dst; } // Based on DenseBase::swap() // TODO: Chech whether we need to do something special for swapping two // Arrays or Matrices. template void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src) { copy_using_evaluator(SwapWrapper(const_cast(dst)), src); } // Based on MatrixBase::operator+= (in CwiseBinaryOp.h) template void add_assign_using_evaluator(const MatrixBase& dst, const MatrixBase& src) { typedef typename DstXprType::Scalar Scalar; SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); copy_using_evaluator(tmp, src.derived()); } // Based on ArrayBase::operator+= template void add_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) { typedef typename DstXprType::Scalar Scalar; SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); copy_using_evaluator(tmp, src.derived()); } // TODO: Add add_assign_using_evaluator for EigenBase ? template void subtract_assign_using_evaluator(const MatrixBase& dst, const MatrixBase& src) { typedef typename DstXprType::Scalar Scalar; SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); copy_using_evaluator(tmp, src.derived()); } template void subtract_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) { typedef typename DstXprType::Scalar Scalar; SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); copy_using_evaluator(tmp, src.derived()); } template void multiply_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) { typedef typename DstXprType::Scalar Scalar; SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); copy_using_evaluator(tmp, src.derived()); } template void divide_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) { typedef typename DstXprType::Scalar Scalar; SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); copy_using_evaluator(tmp, src.derived()); } } // namespace internal } // end namespace Eigen #endif // EIGEN_ASSIGN_EVALUATOR_H