// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2007 Michael Olbrich // Copyright (C) 2006-2008 Benoit Jacob // Copyright (C) 2008 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_ASSIGN_H #define EIGEN_ASSIGN_H /*************************************************************************** * Part 1 : the logic deciding a strategy for traversal and unrolling * ***************************************************************************/ template struct ei_assign_traits { public: enum { DstIsAligned = Derived::Flags & AlignedBit, SrcIsAligned = OtherDerived::Flags & AlignedBit, SrcAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned }; private: enum { InnerSize = int(Derived::Flags)&RowMajorBit ? Derived::ColsAtCompileTime : Derived::RowsAtCompileTime, InnerMaxSize = int(Derived::Flags)&RowMajorBit ? Derived::MaxColsAtCompileTime : Derived::MaxRowsAtCompileTime, MaxSizeAtCompileTime = ei_size_at_compile_time::ret, PacketSize = ei_packet_traits::size }; enum { StorageOrdersAgree = (int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit), MightVectorize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 && int(DstIsAligned) && int(SrcIsAligned), MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), MayLinearVectorize = MightVectorize && MayLinearize && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. */ MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize /* slice vectorization can be slow, so we only want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block in a fixed-size matrix */ }; public: enum { Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) : int(MayLinearize) ? int(LinearTraversal) : int(DefaultTraversal), Vectorized = int(Traversal) == InnerVectorizedTraversal || int(Traversal) == LinearVectorizedTraversal || int(Traversal) == SliceVectorizedTraversal }; private: enum { UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), MayUnrollCompletely = int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit) }; public: enum { Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) ? ( int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(MayUnrollInner) ? int(InnerUnrolling) : int(NoUnrolling) ) : int(Traversal) == int(LinearVectorizedTraversal) ? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) : int(Traversal) == int(LinearTraversal) ? ( int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) : int(NoUnrolling) }; static void debug() { EIGEN_DEBUG_VAR(DstIsAligned) EIGEN_DEBUG_VAR(SrcIsAligned) EIGEN_DEBUG_VAR(SrcAlignment) EIGEN_DEBUG_VAR(InnerSize) EIGEN_DEBUG_VAR(InnerMaxSize) EIGEN_DEBUG_VAR(PacketSize) EIGEN_DEBUG_VAR(StorageOrdersAgree) EIGEN_DEBUG_VAR(MightVectorize) EIGEN_DEBUG_VAR(MayInnerVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize) EIGEN_DEBUG_VAR(MaySliceVectorize) EIGEN_DEBUG_VAR(Traversal) EIGEN_DEBUG_VAR(UnrollingLimit) EIGEN_DEBUG_VAR(MayUnrollCompletely) EIGEN_DEBUG_VAR(MayUnrollInner) EIGEN_DEBUG_VAR(Unrolling) } }; /*************************************************************************** * Part 2 : meta-unrollers ***************************************************************************/ /************************ *** Default traversal *** ************************/ template struct ei_assign_DefaultTraversal_CompleteUnrolling { enum { row = int(Derived1::Flags)&RowMajorBit ? Index / int(Derived1::ColsAtCompileTime) : Index % Derived1::RowsAtCompileTime, col = int(Derived1::Flags)&RowMajorBit ? Index % int(Derived1::ColsAtCompileTime) : Index / Derived1::RowsAtCompileTime }; EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { dst.copyCoeff(row, col, src); ei_assign_DefaultTraversal_CompleteUnrolling::run(dst, src); } }; template struct ei_assign_DefaultTraversal_CompleteUnrolling { EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {} }; template struct ei_assign_DefaultTraversal_InnerUnrolling { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col) { const bool rowMajor = int(Derived1::Flags)&RowMajorBit; const int row = rowMajor ? row_or_col : Index; const int col = rowMajor ? Index : row_or_col; dst.copyCoeff(row, col, src); ei_assign_DefaultTraversal_InnerUnrolling::run(dst, src, row_or_col); } }; template struct ei_assign_DefaultTraversal_InnerUnrolling { EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {} }; /*********************** *** Linear traversal *** ***********************/ template struct ei_assign_LinearTraversal_CompleteUnrolling { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { dst.copyCoeff(Index, src); ei_assign_LinearTraversal_CompleteUnrolling::run(dst, src); } }; template struct ei_assign_LinearTraversal_CompleteUnrolling { EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {} }; /************************** *** Inner vectorization *** **************************/ template struct ei_assign_innervec_CompleteUnrolling { enum { row = int(Derived1::Flags)&RowMajorBit ? Index / int(Derived1::ColsAtCompileTime) : Index % Derived1::RowsAtCompileTime, col = int(Derived1::Flags)&RowMajorBit ? Index % int(Derived1::ColsAtCompileTime) : Index / Derived1::RowsAtCompileTime, SrcAlignment = ei_assign_traits::SrcAlignment }; EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { dst.template copyPacket(row, col, src); ei_assign_innervec_CompleteUnrolling::size, Stop>::run(dst, src); } }; template struct ei_assign_innervec_CompleteUnrolling { EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {} }; template struct ei_assign_innervec_InnerUnrolling { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col) { const int row = int(Derived1::Flags)&RowMajorBit ? row_or_col : Index; const int col = int(Derived1::Flags)&RowMajorBit ? Index : row_or_col; dst.template copyPacket(row, col, src); ei_assign_innervec_InnerUnrolling::size, Stop>::run(dst, src, row_or_col); } }; template struct ei_assign_innervec_InnerUnrolling { EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {} }; /*************************************************************************** * Part 3 : implementation of all cases ***************************************************************************/ template::Traversal, int Unrolling = ei_assign_traits::Unrolling> struct ei_assign_impl; /************************ *** Default traversal *** ************************/ template struct ei_assign_impl { inline static void run(Derived1 &dst, const Derived2 &src) { const int innerSize = dst.innerSize(); const int outerSize = dst.outerSize(); for(int j = 0; j < outerSize; ++j) for(int i = 0; i < innerSize; ++i) { if(int(Derived1::Flags)&RowMajorBit) dst.copyCoeff(j, i, src); else dst.copyCoeff(i, j, src); } } }; template struct ei_assign_impl { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { ei_assign_DefaultTraversal_CompleteUnrolling ::run(dst, src); } }; template struct ei_assign_impl { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { const bool rowMajor = int(Derived1::Flags)&RowMajorBit; const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime; const int outerSize = dst.outerSize(); for(int j = 0; j < outerSize; ++j) ei_assign_DefaultTraversal_InnerUnrolling ::run(dst, src, j); } }; /*********************** *** Linear traversal *** ***********************/ template struct ei_assign_impl { inline static void run(Derived1 &dst, const Derived2 &src) { const int size = dst.size(); for(int i = 0; i < size; ++i) dst.copyCoeff(i, src); } }; template struct ei_assign_impl { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { ei_assign_LinearTraversal_CompleteUnrolling ::run(dst, src); } }; /************************** *** Inner vectorization *** **************************/ template struct ei_assign_impl { inline static void run(Derived1 &dst, const Derived2 &src) { const int innerSize = dst.innerSize(); const int outerSize = dst.outerSize(); const int packetSize = ei_packet_traits::size; for(int j = 0; j < outerSize; ++j) for(int i = 0; i < innerSize; i+=packetSize) { if(int(Derived1::Flags)&RowMajorBit) dst.template copyPacket(j, i, src); else dst.template copyPacket(i, j, src); } } }; template struct ei_assign_impl { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { ei_assign_innervec_CompleteUnrolling ::run(dst, src); } }; template struct ei_assign_impl { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { const bool rowMajor = int(Derived1::Flags)&RowMajorBit; const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime; const int outerSize = dst.outerSize(); for(int j = 0; j < outerSize; ++j) ei_assign_innervec_InnerUnrolling ::run(dst, src, j); } }; /*************************** *** Linear vectorization *** ***************************/ template struct ei_unaligned_assign_impl { template static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, int, int) {} }; template <> struct ei_unaligned_assign_impl { // MSVC must not inline this functions. If it does, it fails to optimize the // packet access path. #ifdef _MSC_VER template static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end) #else template static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end) #endif { for (int index = start; index < end; ++index) dst.copyCoeff(index, src); } }; template struct ei_assign_impl { inline static void run(Derived1 &dst, const Derived2 &src) { const int size = dst.size(); const int packetSize = ei_packet_traits::size; const int alignedStart = ei_assign_traits::DstIsAligned ? 0 : ei_first_aligned(&dst.coeffRef(0), size); const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; ei_unaligned_assign_impl::DstIsAligned!=0>::run(src,dst,0,alignedStart); for(int index = alignedStart; index < alignedEnd; index += packetSize) { dst.template copyPacket::SrcAlignment>(index, src); } ei_unaligned_assign_impl<>::run(src,dst,alignedEnd,size); } }; template struct ei_assign_impl { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { const int size = Derived1::SizeAtCompileTime; const int packetSize = ei_packet_traits::size; const int alignedSize = (size/packetSize)*packetSize; ei_assign_innervec_CompleteUnrolling::run(dst, src); ei_assign_DefaultTraversal_CompleteUnrolling::run(dst, src); } }; /************************** *** Slice vectorization *** ***************************/ template struct ei_assign_impl { inline static void run(Derived1 &dst, const Derived2 &src) { const int packetSize = ei_packet_traits::size; const int packetAlignedMask = packetSize - 1; const int innerSize = dst.innerSize(); const int outerSize = dst.outerSize(); const int alignedStep = (packetSize - dst.stride() % packetSize) & packetAlignedMask; int alignedStart = ei_assign_traits::DstIsAligned ? 0 : ei_first_aligned(&dst.coeffRef(0,0), innerSize); for(int i = 0; i < outerSize; ++i) { const int alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); // do the non-vectorizable part of the assignment for (int index = 0; index(i, index, src); else dst.template copyPacket(index, i, src); } // do the non-vectorizable part of the assignment for (int index = alignedEnd; index((alignedStart+alignedStep)%packetSize, innerSize); } } }; /*************************************************************************** * Part 4 : implementation of DenseBase methods ***************************************************************************/ template template EIGEN_STRONG_INLINE Derived& DenseBase ::lazyAssign(const DenseBase& other) { EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) EIGEN_STATIC_ASSERT((ei_is_same_type::ret), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) #ifdef EIGEN_DEBUG_ASSIGN ei_assign_traits::debug(); #endif ei_assert(rows() == other.rows() && cols() == other.cols()); ei_assign_impl::run(derived(),other.derived()); #ifndef EIGEN_NO_DEBUG checkTransposeAliasing(other.derived()); #endif return derived(); } template struct ei_assign_selector; template struct ei_assign_selector { EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } }; template struct ei_assign_selector { EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); } }; template struct ei_assign_selector { EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); } }; template struct ei_assign_selector { EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } }; template template EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { return ei_assign_selector::run(derived(), other.derived()); } template EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { return ei_assign_selector::run(derived(), other.derived()); } template EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const MatrixBase& other) { return ei_assign_selector::run(derived(), other.derived()); } #endif // EIGEN_ASSIGN_H