// This file is part of Eigen, a lightweight C++ template library // for linear algebra. Eigen itself is part of the KDE project. // // Copyright (C) 2007 Michael Olbrich // Copyright (C) 2006-2008 Benoit Jacob // Copyright (C) 2008 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_ASSIGN_H #define EIGEN_ASSIGN_H /*************************************************************************** * Part 1 : the logic deciding a strategy for vectorization and unrolling ***************************************************************************/ template struct ei_assign_traits { private: enum { InnerSize = int(Derived::Flags)&RowMajorBit ? Derived::ColsAtCompileTime : Derived::RowsAtCompileTime, InnerMaxSize = int(Derived::Flags)&RowMajorBit ? Derived::MaxColsAtCompileTime : Derived::MaxRowsAtCompileTime, PacketSize = ei_packet_traits::size }; enum { MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & PacketAccessBit) && ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)), MayInnerVectorize = MightVectorize && InnerSize!=Dynamic && int(InnerSize)%int(PacketSize)==0, MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), MaySliceVectorize = MightVectorize && InnerMaxSize==Dynamic /* slice vectorization can be slow, so we only want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block in a fixed-size matrix */ }; public: enum { Vectorization = MayInnerVectorize ? InnerVectorization : MayLinearVectorize ? LinearVectorization : MaySliceVectorize ? SliceVectorization : NoVectorization }; private: enum { UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize)), MayUnrollCompletely = int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize * OtherDerived::CoeffReadCost) <= int(UnrollingLimit) }; public: enum { Unrolling = (int(Vectorization) == int(InnerVectorization) || int(Vectorization) == int(NoVectorization)) ? ( MayUnrollCompletely ? CompleteUnrolling : MayUnrollInner ? InnerUnrolling : NoUnrolling ) : int(Vectorization) == int(LinearVectorization) ? ( MayUnrollCompletely ? CompleteUnrolling : NoUnrolling ) : NoUnrolling }; }; /*************************************************************************** * Part 2 : meta-unrollers ***************************************************************************/ /*********************** *** No vectorization *** ***********************/ template struct ei_assign_novec_CompleteUnrolling { enum { row = int(Derived1::Flags)&RowMajorBit ? Index / int(Derived1::ColsAtCompileTime) : Index % Derived1::RowsAtCompileTime, col = int(Derived1::Flags)&RowMajorBit ? Index % int(Derived1::ColsAtCompileTime) : Index / Derived1::RowsAtCompileTime }; inline static void run(Derived1 &dst, const Derived2 &src) { dst.coeffRef(row, col) = src.coeff(row, col); ei_assign_novec_CompleteUnrolling::run(dst, src); } }; template struct ei_assign_novec_CompleteUnrolling { inline static void run(Derived1 &, const Derived2 &) {} }; template struct ei_assign_novec_InnerUnrolling { inline static void run(Derived1 &dst, const Derived2 &src, int row_or_col) { const bool rowMajor = int(Derived1::Flags)&RowMajorBit; const int row = rowMajor ? row_or_col : Index; const int col = rowMajor ? Index : row_or_col; dst.coeffRef(row, col) = src.coeff(row, col); ei_assign_novec_InnerUnrolling::run(dst, src, row_or_col); } }; template struct ei_assign_novec_InnerUnrolling { inline static void run(Derived1 &, const Derived2 &, int) {} }; /************************** *** Inner vectorization *** **************************/ template struct ei_assign_innervec_CompleteUnrolling { enum { row = int(Derived1::Flags)&RowMajorBit ? Index / int(Derived1::ColsAtCompileTime) : Index % Derived1::RowsAtCompileTime, col = int(Derived1::Flags)&RowMajorBit ? Index % int(Derived1::ColsAtCompileTime) : Index / Derived1::RowsAtCompileTime }; inline static void run(Derived1 &dst, const Derived2 &src) { dst.template writePacket(row, col, src.template packet(row, col)); ei_assign_innervec_CompleteUnrolling::size, Stop>::run(dst, src); } }; template struct ei_assign_innervec_CompleteUnrolling { inline static void run(Derived1 &, const Derived2 &) {} }; template struct ei_assign_innervec_InnerUnrolling { inline static void run(Derived1 &dst, const Derived2 &src, int row_or_col) { const int row = int(Derived1::Flags)&RowMajorBit ? row_or_col : Index; const int col = int(Derived1::Flags)&RowMajorBit ? Index : row_or_col; dst.template writePacket(row, col, src.template packet(row, col)); ei_assign_innervec_InnerUnrolling::size, Stop>::run(dst, src, row_or_col); } }; template struct ei_assign_innervec_InnerUnrolling { inline static void run(Derived1 &, const Derived2 &, int) {} }; /*************************************************************************** * Part 3 : implementation of all cases ***************************************************************************/ template::Vectorization, int Unrolling = ei_assign_traits::Unrolling> struct ei_assign_impl; /*********************** *** No vectorization *** ***********************/ template struct ei_assign_impl { static void run(Derived1 &dst, const Derived2 &src) { const bool rowMajor = int(Derived1::Flags)&RowMajorBit; const int innerSize = rowMajor ? dst.cols() : dst.rows(); const int outerSize = rowMajor ? dst.rows() : dst.cols(); for(int j = 0; j < outerSize; j++) for(int i = 0; i < innerSize; i++) { const int row = rowMajor ? j : i; const int col = rowMajor ? i : j; dst.coeffRef(row, col) = src.coeff(row, col); } } }; template struct ei_assign_impl { inline static void run(Derived1 &dst, const Derived2 &src) { ei_assign_novec_CompleteUnrolling ::run(dst, src); } }; template struct ei_assign_impl { static void run(Derived1 &dst, const Derived2 &src) { const bool rowMajor = int(Derived1::Flags)&RowMajorBit; const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime; const int outerSize = rowMajor ? dst.rows() : dst.cols(); for(int j = 0; j < outerSize; j++) ei_assign_novec_InnerUnrolling ::run(dst, src, j); } }; /************************** *** Inner vectorization *** **************************/ template struct ei_assign_impl { static void run(Derived1 &dst, const Derived2 &src) { const bool rowMajor = int(Derived1::Flags)&RowMajorBit; const int innerSize = rowMajor ? dst.cols() : dst.rows(); const int outerSize = rowMajor ? dst.rows() : dst.cols(); const int packetSize = ei_packet_traits::size; for(int j = 0; j < outerSize; j++) { for(int i = 0; i < innerSize; i+=packetSize) { const int row = rowMajor ? j : i; const int col = rowMajor ? i : j; dst.template writePacket(row, col, src.template packet(row, col)); } } } }; template struct ei_assign_impl { inline static void run(Derived1 &dst, const Derived2 &src) { ei_assign_innervec_CompleteUnrolling ::run(dst, src); } }; template struct ei_assign_impl { static void run(Derived1 &dst, const Derived2 &src) { const bool rowMajor = int(Derived1::Flags)&RowMajorBit; const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime; const int outerSize = rowMajor ? dst.rows() : dst.cols(); for(int j = 0; j < outerSize; j++) ei_assign_innervec_InnerUnrolling ::run(dst, src, j); } }; /*************************** *** Linear vectorization *** ***************************/ template struct ei_assign_impl { static void run(Derived1 &dst, const Derived2 &src) { const int size = dst.size(); const int packetSize = ei_packet_traits::size; const int alignedSize = (size/packetSize)*packetSize; const bool rowMajor = Derived1::Flags&RowMajorBit; const int innerSize = rowMajor ? dst.cols() : dst.rows(); const int outerSize = rowMajor ? dst.rows() : dst.cols(); int index = 0; // do the vectorizable part of the assignment int row = 0; int col = 0; while (index(row, col, src.template packet(row, col)); index += (rowMajor ? col : row) - start; row = rowMajor ? index/innerSize : index%innerSize; col = rowMajor ? index%innerSize : index/innerSize; } // now we must do the rest without vectorization. if(alignedSize == size) return; const int k = alignedSize/innerSize; // do the remainder of the current row or col for(int i = alignedSize%innerSize; i < innerSize; i++) { const int row = rowMajor ? k : i; const int col = rowMajor ? i : k; dst.coeffRef(row, col) = src.coeff(row, col); } // do the remaining rows or cols for(int j = k+1; j < outerSize; j++) for(int i = 0; i < innerSize; i++) { const int row = rowMajor ? i : j; const int col = rowMajor ? j : i; dst.coeffRef(row, col) = src.coeff(row, col); } } }; template struct ei_assign_impl { static void run(Derived1 &dst, const Derived2 &src) { const int size = Derived1::SizeAtCompileTime; const int packetSize = ei_packet_traits::size; const int alignedSize = (size/packetSize)*packetSize; const bool rowMajor = int(Derived1::Flags)&RowMajorBit; const int innerSize = rowMajor ? int(Derived1::ColsAtCompileTime) : int(Derived1::RowsAtCompileTime); const int outerSize = rowMajor ? int(Derived1::RowsAtCompileTime) : int(Derived1::ColsAtCompileTime); // do the vectorizable part of the assignment ei_assign_innervec_CompleteUnrolling::run(dst, src); // now we must do the rest without vectorization. const int k = alignedSize/innerSize; const int i = alignedSize%innerSize; // do the remainder of the current row or col ei_assign_novec_InnerUnrolling::run(dst, src, k); // do the remaining rows or cols for(int j = k+1; j < outerSize; j++) ei_assign_novec_InnerUnrolling::run(dst, src, j); } }; /************************** *** Slice vectorization *** ***************************/ template struct ei_assign_impl { static void run(Derived1 &dst, const Derived2 &src) { const int packetSize = ei_packet_traits::size; const bool rowMajor = Derived1::Flags&RowMajorBit; const int innerSize = rowMajor ? dst.cols() : dst.rows(); const int outerSize = rowMajor ? dst.rows() : dst.cols(); const int alignedInnerSize = (innerSize/packetSize)*packetSize; for(int i = 0; i < outerSize; i++) { // do the vectorizable part of the assignment for (int index = 0; index(row, col, src.template packet(row, col)); } // do the non-vectorizable part of the assignment for (int index = alignedInnerSize; index template inline Derived& MatrixBase ::lazyAssign(const MatrixBase& other) { EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived); ei_assert(rows() == other.rows() && cols() == other.cols()); ei_assign_impl::run(derived(),other.derived()); return derived(); } template struct ei_assign_selector; template struct ei_assign_selector { static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } }; template struct ei_assign_selector { static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); } }; template struct ei_assign_selector { static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); } }; template struct ei_assign_selector { static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } }; template template inline Derived& MatrixBase ::operator=(const MatrixBase& other) { return ei_assign_selector::run(derived(), other.derived()); } #endif // EIGEN_ASSIGN_H