// This file is part of Eigen, a lightweight C++ template library // for linear algebra. Eigen itself is part of the KDE project. // // Copyright (C) 2008 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_SOLVETRIANGULAR_H #define EIGEN_SOLVETRIANGULAR_H template struct ei_is_part { enum {value=false}; }; template struct ei_is_part > { enum {value=true}; }; template::value ? -1 // this is to solve ambiguous specializations : int(Lhs::Flags) & (RowMajorBit|SparseBit) > struct ei_solve_triangular_selector; // transform a Part xpr to a Flagged xpr template struct ei_solve_triangular_selector,Rhs,UpLo,StorageOrder> { static void run(const Part& lhs, Rhs& other) { ei_solve_triangular_selector,Rhs>::run(lhs._expression(), other); } }; // forward substitution, row-major template struct ei_solve_triangular_selector { typedef typename Rhs::Scalar Scalar; static void run(const Lhs& lhs, Rhs& other) { const bool IsLower = (UpLo==Lower); const int size = lhs.cols(); /* We perform the inverse product per block of 4 rows such that we perfectly match * our optimized matrix * vector product. blockyStart represents the number of rows * we have process first using the non-block version. */ int blockyStart = (std::max(size-5,0)/4)*4; if (IsLower) blockyStart = size - blockyStart; else blockyStart -= 1; for(int c=0 ; cblockyStart; i += (IsLower ? 1 : -1) ) { Scalar tmp = other.coeff(i,c) - (IsLower ? ((lhs.row(i).start(i)) * other.col(c).start(i)).coeff(0,0) : ((lhs.row(i).end(size-i-1)) * other.col(c).end(size-i-1)).coeff(0,0)); if (Lhs::Flags & UnitDiagBit) other.coeffRef(i,c) = tmp; else other.coeffRef(i,c) = tmp/lhs.coeff(i,i); } // now let's process the remaining rows 4 at once for(int i=blockyStart; IsLower ? i0; ) { int startBlock = i; int endBlock = startBlock + (IsLower ? 4 : -4); /* Process the i cols times 4 rows block, and keep the result in a temporary vector */ // FIXME use fixed size block but take care to small fixed size matrices... Matrix btmp(4); if (IsLower) btmp = lhs.block(startBlock,0,4,i) * other.col(c).start(i); else btmp = lhs.block(i-3,i+1,4,size-1-i) * other.col(c).end(size-1-i); /* Let's process the 4x4 sub-matrix as usual. * btmp stores the diagonal coefficients used to update the remaining part of the result. */ { Scalar tmp = other.coeff(startBlock,c)-btmp.coeff(IsLower?0:3); if (Lhs::Flags & UnitDiagBit) other.coeffRef(i,c) = tmp; else other.coeffRef(i,c) = tmp/lhs.coeff(i,i); } i += IsLower ? 1 : -1; for (;IsLower ? iendBlock; i += IsLower ? 1 : -1) { int remainingSize = IsLower ? i-startBlock : startBlock-i; Scalar tmp = other.coeff(i,c) - btmp.coeff(IsLower ? remainingSize : 3-remainingSize) - ( lhs.row(i).segment(IsLower ? startBlock : i+1, remainingSize) * other.col(c).segment(IsLower ? startBlock : i+1, remainingSize)).coeff(0,0); if (Lhs::Flags & UnitDiagBit) other.coeffRef(i,c) = tmp; else other.coeffRef(i,c) = tmp/lhs.coeff(i,i); } } } } }; // Implements the following configurations: // - inv(Lower, ColMajor) * Column vector // - inv(Lower,UnitDiag,ColMajor) * Column vector // - inv(Upper, ColMajor) * Column vector // - inv(Upper,UnitDiag,ColMajor) * Column vector template struct ei_solve_triangular_selector { typedef typename Rhs::Scalar Scalar; typedef typename ei_packet_traits::type Packet; enum { PacketSize = ei_packet_traits::size }; static void run(const Lhs& lhs, Rhs& other) { static const bool IsLower = (UpLo==Lower); const int size = lhs.cols(); for(int c=0 ; cblockyEnd;) { /* Let's process the 4x4 sub-matrix as usual. * btmp stores the diagonal coefficients used to update the remaining part of the result. */ int startBlock = i; int endBlock = startBlock + (IsLower ? 4 : -4); Matrix btmp; for (;IsLower ? iendBlock; i += IsLower ? 1 : -1) { if(!(Lhs::Flags & UnitDiagBit)) other.coeffRef(i,c) /= lhs.coeff(i,i); int remainingSize = IsLower ? endBlock-i-1 : i-endBlock-1; if (remainingSize>0) other.col(c).segment((IsLower ? i : endBlock) + 1, remainingSize) -= other.coeffRef(i,c) * Block(lhs, (IsLower ? i : endBlock) + 1, i, remainingSize, 1); btmp.coeffRef(IsLower ? i-startBlock : remainingSize) = -other.coeffRef(i,c); } /* Now we can efficiently update the remaining part of the result as a matrix * vector product. * NOTE in order to reduce both compilation time and binary size, let's directly call * the fast product implementation. It is equivalent to the following code: * other.col(c).end(size-endBlock) += (lhs.block(endBlock, startBlock, size-endBlock, endBlock-startBlock) * * other.col(c).block(startBlock,endBlock-startBlock)).lazy(); */ // FIXME this is cool but what about conjugate/adjoint expressions ? do we want to evaluate them ? // this is a more general problem though. ei_cache_friendly_product_colmajor_times_vector( IsLower ? size-endBlock : endBlock+1, &(lhs.const_cast_derived().coeffRef(IsLower ? endBlock : 0, IsLower ? startBlock : endBlock+1)), lhs.stride(), btmp, &(other.coeffRef(IsLower ? endBlock : 0, c))); // if (IsLower) // other.col(c).end(size-endBlock) += (lhs.block(endBlock, startBlock, size-endBlock, endBlock-startBlock) // * other.col(c).block(startBlock,endBlock-startBlock)).lazy(); // else // other.col(c).end(size-endBlock) += (lhs.block(endBlock, startBlock, size-endBlock, endBlock-startBlock) // * other.col(c).block(startBlock,endBlock-startBlock)).lazy(); } /* Now we have to process the remaining part as usual */ int i; for(i=blockyEnd; IsLower ? i0; i += (IsLower ? 1 : -1) ) { if(!(Lhs::Flags & UnitDiagBit)) other.coeffRef(i,c) /= lhs.coeff(i,i); /* NOTE we cannot use lhs.col(i).end(size-i-1) because Part::coeffRef gets called by .col() to * get the address of the start of the row */ if(IsLower) other.col(c).end(size-i-1) -= other.coeffRef(i,c) * Block(lhs, i+1,i, size-i-1,1); else other.col(c).start(i) -= other.coeffRef(i,c) * Block(lhs, 0,i, i, 1); } if(!(Lhs::Flags & UnitDiagBit)) other.coeffRef(i,c) /= lhs.coeff(i,i); } } }; /** "in-place" version of MatrixBase::solveTriangular() where the result is written in \a other * * See MatrixBase:solveTriangular() for the details. */ template template void MatrixBase::solveTriangularInPlace(MatrixBase& other) const { ei_assert(derived().cols() == derived().rows()); ei_assert(derived().cols() == other.rows()); ei_assert(!(Flags & ZeroDiagBit)); ei_assert(Flags & (UpperTriangularBit|LowerTriangularBit)); const bool copy = ei_traits::Flags&RowMajorBit; typedef typename ei_meta_if::type, OtherDerived&>::ret OtherCopy; OtherCopy otherCopy(other.derived()); ei_solve_triangular_selector::type>::run(derived(), otherCopy); if (copy) other = otherCopy; } /** \returns the product of the inverse of \c *this with \a other, \a *this being triangular. * * This function computes the inverse-matrix matrix product inverse(\c *this) * \a other. * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this * is an upper (resp. lower) triangular matrix. * * It is required that \c *this be marked as either an upper or a lower triangular matrix, which * can be done by marked(), and that is automatically the case with expressions such as those returned * by extract(). * * \addexample SolveTriangular \label How to solve a triangular system (aka. how to multiply the inverse of a triangular matrix by another one) * * Example: \include MatrixBase_marked.cpp * Output: \verbinclude MatrixBase_marked.out * * This function is essentially a wrapper to the faster solveTriangularInPlace() function creating * a temporary copy of \a other, calling solveTriangularInPlace() on the copy and returning it. * Therefore, if \a other is not needed anymore, it is quite faster to call solveTriangularInPlace() * instead of solveTriangular(). * * For users coming from BLAS, this function (and more specifically solveTriangularInPlace()) offer * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines. * * \b Tips: to perform a \em "right-inverse-multiply" you can simply transpose the operation, e.g.: * \code * M * T^1 <=> T.transpose().solveTriangularInPlace(M.transpose()); * \endcode * * \sa solveTriangularInPlace(), marked(), extract() */ template template typename ei_eval_to_column_major::type MatrixBase::solveTriangular(const MatrixBase& other) const { typename ei_eval_to_column_major::type res(other); solveTriangularInPlace(res); return res; } #endif // EIGEN_SOLVETRIANGULAR_H