diff options
Diffstat (limited to 'third_party/eigen3/Eigen/src/LU')
-rw-r--r-- | third_party/eigen3/Eigen/src/LU/Determinant.h | 101 | ||||
-rw-r--r-- | third_party/eigen3/Eigen/src/LU/FullPivLU.h | 745 | ||||
-rw-r--r-- | third_party/eigen3/Eigen/src/LU/Inverse.h | 417 | ||||
-rw-r--r-- | third_party/eigen3/Eigen/src/LU/PartialPivLU.h | 506 | ||||
-rw-r--r-- | third_party/eigen3/Eigen/src/LU/PartialPivLU_MKL.h | 85 | ||||
-rw-r--r-- | third_party/eigen3/Eigen/src/LU/arch/Inverse_SSE.h | 329 |
6 files changed, 0 insertions, 2183 deletions
diff --git a/third_party/eigen3/Eigen/src/LU/Determinant.h b/third_party/eigen3/Eigen/src/LU/Determinant.h deleted file mode 100644 index bb8e78a8a8..0000000000 --- a/third_party/eigen3/Eigen/src/LU/Determinant.h +++ /dev/null @@ -1,101 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_DETERMINANT_H -#define EIGEN_DETERMINANT_H - -namespace Eigen { - -namespace internal { - -template<typename Derived> -inline const typename Derived::Scalar bruteforce_det3_helper -(const MatrixBase<Derived>& matrix, int a, int b, int c) -{ - return matrix.coeff(0,a) - * (matrix.coeff(1,b) * matrix.coeff(2,c) - matrix.coeff(1,c) * matrix.coeff(2,b)); -} - -template<typename Derived> -const typename Derived::Scalar bruteforce_det4_helper -(const MatrixBase<Derived>& matrix, int j, int k, int m, int n) -{ - return (matrix.coeff(j,0) * matrix.coeff(k,1) - matrix.coeff(k,0) * matrix.coeff(j,1)) - * (matrix.coeff(m,2) * matrix.coeff(n,3) - matrix.coeff(n,2) * matrix.coeff(m,3)); -} - -template<typename Derived, - int DeterminantType = Derived::RowsAtCompileTime -> struct determinant_impl -{ - static inline typename traits<Derived>::Scalar run(const Derived& m) - { - if(Derived::ColsAtCompileTime==Dynamic && m.rows()==0) - return typename traits<Derived>::Scalar(1); - return m.partialPivLu().determinant(); - } -}; - -template<typename Derived> struct determinant_impl<Derived, 1> -{ - static inline typename traits<Derived>::Scalar run(const Derived& m) - { - return m.coeff(0,0); - } -}; - -template<typename Derived> struct determinant_impl<Derived, 2> -{ - static inline typename traits<Derived>::Scalar run(const Derived& m) - { - return m.coeff(0,0) * m.coeff(1,1) - m.coeff(1,0) * m.coeff(0,1); - } -}; - -template<typename Derived> struct determinant_impl<Derived, 3> -{ - static inline typename traits<Derived>::Scalar run(const Derived& m) - { - return bruteforce_det3_helper(m,0,1,2) - - bruteforce_det3_helper(m,1,0,2) - + bruteforce_det3_helper(m,2,0,1); - } -}; - -template<typename Derived> struct determinant_impl<Derived, 4> -{ - static typename traits<Derived>::Scalar run(const Derived& m) - { - // trick by Martin Costabel to compute 4x4 det with only 30 muls - return bruteforce_det4_helper(m,0,1,2,3) - - bruteforce_det4_helper(m,0,2,1,3) - + bruteforce_det4_helper(m,0,3,1,2) - + bruteforce_det4_helper(m,1,2,0,3) - - bruteforce_det4_helper(m,1,3,0,2) - + bruteforce_det4_helper(m,2,3,0,1); - } -}; - -} // end namespace internal - -/** \lu_module - * - * \returns the determinant of this matrix - */ -template<typename Derived> -inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const -{ - eigen_assert(rows() == cols()); - typedef typename internal::nested<Derived,Base::RowsAtCompileTime>::type Nested; - return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived()); -} - -} // end namespace Eigen - -#endif // EIGEN_DETERMINANT_H diff --git a/third_party/eigen3/Eigen/src/LU/FullPivLU.h b/third_party/eigen3/Eigen/src/LU/FullPivLU.h deleted file mode 100644 index 971b9da1d4..0000000000 --- a/third_party/eigen3/Eigen/src/LU/FullPivLU.h +++ /dev/null @@ -1,745 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_LU_H -#define EIGEN_LU_H - -namespace Eigen { - -/** \ingroup LU_Module - * - * \class FullPivLU - * - * \brief LU decomposition of a matrix with complete pivoting, and related features - * - * \param MatrixType the type of the matrix of which we are computing the LU decomposition - * - * This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is - * decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is - * upper-triangular, and P and Q are permutation matrices. This is a rank-revealing LU - * decomposition. The eigenvalues (diagonal coefficients) of U are sorted in such a way that any - * zeros are at the end. - * - * This decomposition provides the generic approach to solving systems of linear equations, computing - * the rank, invertibility, inverse, kernel, and determinant. - * - * This LU decomposition is very stable and well tested with large matrices. However there are use cases where the SVD - * decomposition is inherently more stable and/or flexible. For example, when computing the kernel of a matrix, - * working with the SVD allows to select the smallest singular values of the matrix, something that - * the LU decomposition doesn't see. - * - * The data of the LU decomposition can be directly accessed through the methods matrixLU(), - * permutationP(), permutationQ(). - * - * As an exemple, here is how the original matrix can be retrieved: - * \include class_FullPivLU.cpp - * Output: \verbinclude class_FullPivLU.out - * - * \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse() - */ -template<typename _MatrixType> class FullPivLU -{ - public: - typedef _MatrixType MatrixType; - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, - MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime - }; - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; - typedef typename internal::traits<MatrixType>::StorageKind StorageKind; - typedef typename MatrixType::Index Index; - typedef typename internal::plain_row_type<MatrixType, Index>::type IntRowVectorType; - typedef typename internal::plain_col_type<MatrixType, Index>::type IntColVectorType; - typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationQType; - typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationPType; - - /** - * \brief Default Constructor. - * - * The default constructor is useful in cases in which the user intends to - * perform decompositions via LU::compute(const MatrixType&). - */ - FullPivLU(); - - /** \brief Default Constructor with memory preallocation - * - * Like the default constructor but with preallocation of the internal data - * according to the specified problem \a size. - * \sa FullPivLU() - */ - FullPivLU(Index rows, Index cols); - - /** Constructor. - * - * \param matrix the matrix of which to compute the LU decomposition. - * It is required to be nonzero. - */ - FullPivLU(const MatrixType& matrix); - - /** Computes the LU decomposition of the given matrix. - * - * \param matrix the matrix of which to compute the LU decomposition. - * It is required to be nonzero. - * - * \returns a reference to *this - */ - FullPivLU& compute(const MatrixType& matrix); - - /** \returns the LU decomposition matrix: the upper-triangular part is U, the - * unit-lower-triangular part is L (at least for square matrices; in the non-square - * case, special care is needed, see the documentation of class FullPivLU). - * - * \sa matrixL(), matrixU() - */ - inline const MatrixType& matrixLU() const - { - eigen_assert(m_isInitialized && "LU is not initialized."); - return m_lu; - } - - /** \returns the number of nonzero pivots in the LU decomposition. - * Here nonzero is meant in the exact sense, not in a fuzzy sense. - * So that notion isn't really intrinsically interesting, but it is - * still useful when implementing algorithms. - * - * \sa rank() - */ - inline Index nonzeroPivots() const - { - eigen_assert(m_isInitialized && "LU is not initialized."); - return m_nonzero_pivots; - } - - /** \returns the absolute value of the biggest pivot, i.e. the biggest - * diagonal coefficient of U. - */ - RealScalar maxPivot() const { return m_maxpivot; } - - /** \returns the permutation matrix P - * - * \sa permutationQ() - */ - inline const PermutationPType& permutationP() const - { - eigen_assert(m_isInitialized && "LU is not initialized."); - return m_p; - } - - /** \returns the permutation matrix Q - * - * \sa permutationP() - */ - inline const PermutationQType& permutationQ() const - { - eigen_assert(m_isInitialized && "LU is not initialized."); - return m_q; - } - - /** \returns the kernel of the matrix, also called its null-space. The columns of the returned matrix - * will form a basis of the kernel. - * - * \note If the kernel has dimension zero, then the returned matrix is a column-vector filled with zeros. - * - * \note This method has to determine which pivots should be considered nonzero. - * For that, it uses the threshold value that you can control by calling - * setThreshold(const RealScalar&). - * - * Example: \include FullPivLU_kernel.cpp - * Output: \verbinclude FullPivLU_kernel.out - * - * \sa image() - */ - inline const internal::kernel_retval<FullPivLU> kernel() const - { - eigen_assert(m_isInitialized && "LU is not initialized."); - return internal::kernel_retval<FullPivLU>(*this); - } - - /** \returns the image of the matrix, also called its column-space. The columns of the returned matrix - * will form a basis of the kernel. - * - * \param originalMatrix the original matrix, of which *this is the LU decomposition. - * The reason why it is needed to pass it here, is that this allows - * a large optimization, as otherwise this method would need to reconstruct it - * from the LU decomposition. - * - * \note If the image has dimension zero, then the returned matrix is a column-vector filled with zeros. - * - * \note This method has to determine which pivots should be considered nonzero. - * For that, it uses the threshold value that you can control by calling - * setThreshold(const RealScalar&). - * - * Example: \include FullPivLU_image.cpp - * Output: \verbinclude FullPivLU_image.out - * - * \sa kernel() - */ - inline const internal::image_retval<FullPivLU> - image(const MatrixType& originalMatrix) const - { - eigen_assert(m_isInitialized && "LU is not initialized."); - return internal::image_retval<FullPivLU>(*this, originalMatrix); - } - - /** \return a solution x to the equation Ax=b, where A is the matrix of which - * *this is the LU decomposition. - * - * \param b the right-hand-side of the equation to solve. Can be a vector or a matrix, - * the only requirement in order for the equation to make sense is that - * b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition. - * - * \returns a solution. - * - * \note_about_checking_solutions - * - * \note_about_arbitrary_choice_of_solution - * \note_about_using_kernel_to_study_multiple_solutions - * - * Example: \include FullPivLU_solve.cpp - * Output: \verbinclude FullPivLU_solve.out - * - * \sa TriangularView::solve(), kernel(), inverse() - */ - template<typename Rhs> - inline const internal::solve_retval<FullPivLU, Rhs> - solve(const MatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "LU is not initialized."); - return internal::solve_retval<FullPivLU, Rhs>(*this, b.derived()); - } - - /** \returns the determinant of the matrix of which - * *this is the LU decomposition. It has only linear complexity - * (that is, O(n) where n is the dimension of the square matrix) - * as the LU decomposition has already been computed. - * - * \note This is only for square matrices. - * - * \note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers - * optimized paths. - * - * \warning a determinant can be very big or small, so for matrices - * of large enough dimension, there is a risk of overflow/underflow. - * - * \sa MatrixBase::determinant() - */ - typename internal::traits<MatrixType>::Scalar determinant() const; - - /** Allows to prescribe a threshold to be used by certain methods, such as rank(), - * who need to determine when pivots are to be considered nonzero. This is not used for the - * LU decomposition itself. - * - * When it needs to get the threshold value, Eigen calls threshold(). By default, this - * uses a formula to automatically determine a reasonable threshold. - * Once you have called the present method setThreshold(const RealScalar&), - * your value is used instead. - * - * \param threshold The new value to use as the threshold. - * - * A pivot will be considered nonzero if its absolute value is strictly greater than - * \f$ \vert pivot \vert \leqslant threshold \times \vert maxpivot \vert \f$ - * where maxpivot is the biggest pivot. - * - * If you want to come back to the default behavior, call setThreshold(Default_t) - */ - FullPivLU& setThreshold(const RealScalar& threshold) - { - m_usePrescribedThreshold = true; - m_prescribedThreshold = threshold; - return *this; - } - - /** Allows to come back to the default behavior, letting Eigen use its default formula for - * determining the threshold. - * - * You should pass the special object Eigen::Default as parameter here. - * \code lu.setThreshold(Eigen::Default); \endcode - * - * See the documentation of setThreshold(const RealScalar&). - */ - FullPivLU& setThreshold(Default_t) - { - m_usePrescribedThreshold = false; - return *this; - } - - /** Returns the threshold that will be used by certain methods such as rank(). - * - * See the documentation of setThreshold(const RealScalar&). - */ - RealScalar threshold() const - { - eigen_assert(m_isInitialized || m_usePrescribedThreshold); - return m_usePrescribedThreshold ? m_prescribedThreshold - // this formula comes from experimenting (see "LU precision tuning" thread on the list) - // and turns out to be identical to Higham's formula used already in LDLt. - : NumTraits<Scalar>::epsilon() * m_lu.diagonalSize(); - } - - /** \returns the rank of the matrix of which *this is the LU decomposition. - * - * \note This method has to determine which pivots should be considered nonzero. - * For that, it uses the threshold value that you can control by calling - * setThreshold(const RealScalar&). - */ - inline Index rank() const - { - using std::abs; - eigen_assert(m_isInitialized && "LU is not initialized."); - RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold(); - Index result = 0; - for(Index i = 0; i < m_nonzero_pivots; ++i) - result += (abs(m_lu.coeff(i,i)) > premultiplied_threshold); - return result; - } - - /** \returns the dimension of the kernel of the matrix of which *this is the LU decomposition. - * - * \note This method has to determine which pivots should be considered nonzero. - * For that, it uses the threshold value that you can control by calling - * setThreshold(const RealScalar&). - */ - inline Index dimensionOfKernel() const - { - eigen_assert(m_isInitialized && "LU is not initialized."); - return cols() - rank(); - } - - /** \returns true if the matrix of which *this is the LU decomposition represents an injective - * linear map, i.e. has trivial kernel; false otherwise. - * - * \note This method has to determine which pivots should be considered nonzero. - * For that, it uses the threshold value that you can control by calling - * setThreshold(const RealScalar&). - */ - inline bool isInjective() const - { - eigen_assert(m_isInitialized && "LU is not initialized."); - return rank() == cols(); - } - - /** \returns true if the matrix of which *this is the LU decomposition represents a surjective - * linear map; false otherwise. - * - * \note This method has to determine which pivots should be considered nonzero. - * For that, it uses the threshold value that you can control by calling - * setThreshold(const RealScalar&). - */ - inline bool isSurjective() const - { - eigen_assert(m_isInitialized && "LU is not initialized."); - return rank() == rows(); - } - - /** \returns true if the matrix of which *this is the LU decomposition is invertible. - * - * \note This method has to determine which pivots should be considered nonzero. - * For that, it uses the threshold value that you can control by calling - * setThreshold(const RealScalar&). - */ - inline bool isInvertible() const - { - eigen_assert(m_isInitialized && "LU is not initialized."); - return isInjective() && (m_lu.rows() == m_lu.cols()); - } - - /** \returns the inverse of the matrix of which *this is the LU decomposition. - * - * \note If this matrix is not invertible, the returned matrix has undefined coefficients. - * Use isInvertible() to first determine whether this matrix is invertible. - * - * \sa MatrixBase::inverse() - */ - inline const internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType> inverse() const - { - eigen_assert(m_isInitialized && "LU is not initialized."); - eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the inverse of a non-square matrix!"); - return internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType> - (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols())); - } - - MatrixType reconstructedMatrix() const; - - inline Index rows() const { return m_lu.rows(); } - inline Index cols() const { return m_lu.cols(); } - - protected: - MatrixType m_lu; - PermutationPType m_p; - PermutationQType m_q; - IntColVectorType m_rowsTranspositions; - IntRowVectorType m_colsTranspositions; - Index m_det_pq, m_nonzero_pivots; - RealScalar m_maxpivot, m_prescribedThreshold; - bool m_isInitialized, m_usePrescribedThreshold; -}; - -template<typename MatrixType> -FullPivLU<MatrixType>::FullPivLU() - : m_isInitialized(false), m_usePrescribedThreshold(false) -{ -} - -template<typename MatrixType> -FullPivLU<MatrixType>::FullPivLU(Index rows, Index cols) - : m_lu(rows, cols), - m_p(rows), - m_q(cols), - m_rowsTranspositions(rows), - m_colsTranspositions(cols), - m_isInitialized(false), - m_usePrescribedThreshold(false) -{ -} - -template<typename MatrixType> -FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix) - : m_lu(matrix.rows(), matrix.cols()), - m_p(matrix.rows()), - m_q(matrix.cols()), - m_rowsTranspositions(matrix.rows()), - m_colsTranspositions(matrix.cols()), - m_isInitialized(false), - m_usePrescribedThreshold(false) -{ - compute(matrix); -} - -template<typename MatrixType> -FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix) -{ - // the permutations are stored as int indices, so just to be sure: - eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest()); - - m_isInitialized = true; - m_lu = matrix; - - const Index size = matrix.diagonalSize(); - const Index rows = matrix.rows(); - const Index cols = matrix.cols(); - - // will store the transpositions, before we accumulate them at the end. - // can't accumulate on-the-fly because that will be done in reverse order for the rows. - m_rowsTranspositions.resize(matrix.rows()); - m_colsTranspositions.resize(matrix.cols()); - Index number_of_transpositions = 0; // number of NONTRIVIAL transpositions, i.e. m_rowsTranspositions[i]!=i - - m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case) - m_maxpivot = RealScalar(0); - - for(Index k = 0; k < size; ++k) - { - // First, we need to find the pivot. - - // biggest coefficient in the remaining bottom-right corner (starting at row k, col k) - Index row_of_biggest_in_corner, col_of_biggest_in_corner; - RealScalar biggest_in_corner; - biggest_in_corner = m_lu.bottomRightCorner(rows-k, cols-k) - .cwiseAbs() - .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner); - row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner, - col_of_biggest_in_corner += k; // need to add k to them. - - if(biggest_in_corner==RealScalar(0)) - { - // before exiting, make sure to initialize the still uninitialized transpositions - // in a sane state without destroying what we already have. - m_nonzero_pivots = k; - for(Index i = k; i < size; ++i) - { - m_rowsTranspositions.coeffRef(i) = i; - m_colsTranspositions.coeffRef(i) = i; - } - break; - } - - if(biggest_in_corner > m_maxpivot) m_maxpivot = biggest_in_corner; - - // Now that we've found the pivot, we need to apply the row/col swaps to - // bring it to the location (k,k). - - m_rowsTranspositions.coeffRef(k) = row_of_biggest_in_corner; - m_colsTranspositions.coeffRef(k) = col_of_biggest_in_corner; - if(k != row_of_biggest_in_corner) { - m_lu.row(k).swap(m_lu.row(row_of_biggest_in_corner)); - ++number_of_transpositions; - } - if(k != col_of_biggest_in_corner) { - m_lu.col(k).swap(m_lu.col(col_of_biggest_in_corner)); - ++number_of_transpositions; - } - - // Now that the pivot is at the right location, we update the remaining - // bottom-right corner by Gaussian elimination. - - if(k<rows-1) - m_lu.col(k).tail(rows-k-1) /= m_lu.coeff(k,k); - if(k<size-1) - m_lu.block(k+1,k+1,rows-k-1,cols-k-1).noalias() -= m_lu.col(k).tail(rows-k-1) * m_lu.row(k).tail(cols-k-1); - } - - // the main loop is over, we still have to accumulate the transpositions to find the - // permutations P and Q - - m_p.setIdentity(rows); - for(Index k = size-1; k >= 0; --k) - m_p.applyTranspositionOnTheRight(k, m_rowsTranspositions.coeff(k)); - - m_q.setIdentity(cols); - for(Index k = 0; k < size; ++k) - m_q.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k)); - - m_det_pq = (number_of_transpositions%2) ? -1 : 1; - return *this; -} - -template<typename MatrixType> -typename internal::traits<MatrixType>::Scalar FullPivLU<MatrixType>::determinant() const -{ - eigen_assert(m_isInitialized && "LU is not initialized."); - eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the determinant of a non-square matrix!"); - return Scalar(m_det_pq) * Scalar(m_lu.diagonal().prod()); -} - -/** \returns the matrix represented by the decomposition, - * i.e., it returns the product: \f$ P^{-1} L U Q^{-1} \f$. - * This function is provided for debug purposes. */ -template<typename MatrixType> -MatrixType FullPivLU<MatrixType>::reconstructedMatrix() const -{ - eigen_assert(m_isInitialized && "LU is not initialized."); - const Index smalldim = (std::min)(m_lu.rows(), m_lu.cols()); - // LU - MatrixType res(m_lu.rows(),m_lu.cols()); - // FIXME the .toDenseMatrix() should not be needed... - res = m_lu.leftCols(smalldim) - .template triangularView<UnitLower>().toDenseMatrix() - * m_lu.topRows(smalldim) - .template triangularView<Upper>().toDenseMatrix(); - - // P^{-1}(LU) - res = m_p.inverse() * res; - - // (P^{-1}LU)Q^{-1} - res = res * m_q.inverse(); - - return res; -} - -/********* Implementation of kernel() **************************************************/ - -namespace internal { -template<typename _MatrixType> -struct kernel_retval<FullPivLU<_MatrixType> > - : kernel_retval_base<FullPivLU<_MatrixType> > -{ - EIGEN_MAKE_KERNEL_HELPERS(FullPivLU<_MatrixType>) - - enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED( - MatrixType::MaxColsAtCompileTime, - MatrixType::MaxRowsAtCompileTime) - }; - - template<typename Dest> void evalTo(Dest& dst) const - { - using std::abs; - const Index cols = dec().matrixLU().cols(), dimker = cols - rank(); - if(dimker == 0) - { - // The Kernel is just {0}, so it doesn't have a basis properly speaking, but let's - // avoid crashing/asserting as that depends on floating point calculations. Let's - // just return a single column vector filled with zeros. - dst.setZero(); - return; - } - - /* Let us use the following lemma: - * - * Lemma: If the matrix A has the LU decomposition PAQ = LU, - * then Ker A = Q(Ker U). - * - * Proof: trivial: just keep in mind that P, Q, L are invertible. - */ - - /* Thus, all we need to do is to compute Ker U, and then apply Q. - * - * U is upper triangular, with eigenvalues sorted so that any zeros appear at the end. - * Thus, the diagonal of U ends with exactly - * dimKer zero's. Let us use that to construct dimKer linearly - * independent vectors in Ker U. - */ - - Matrix<Index, Dynamic, 1, 0, MaxSmallDimAtCompileTime, 1> pivots(rank()); - RealScalar premultiplied_threshold = dec().maxPivot() * dec().threshold(); - Index p = 0; - for(Index i = 0; i < dec().nonzeroPivots(); ++i) - if(abs(dec().matrixLU().coeff(i,i)) > premultiplied_threshold) - pivots.coeffRef(p++) = i; - eigen_internal_assert(p == rank()); - - // we construct a temporaty trapezoid matrix m, by taking the U matrix and - // permuting the rows and cols to bring the nonnegligible pivots to the top of - // the main diagonal. We need that to be able to apply our triangular solvers. - // FIXME when we get triangularView-for-rectangular-matrices, this can be simplified - Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, MatrixType::Options, - MaxSmallDimAtCompileTime, MatrixType::MaxColsAtCompileTime> - m(dec().matrixLU().block(0, 0, rank(), cols)); - for(Index i = 0; i < rank(); ++i) - { - if(i) m.row(i).head(i).setZero(); - m.row(i).tail(cols-i) = dec().matrixLU().row(pivots.coeff(i)).tail(cols-i); - } - m.block(0, 0, rank(), rank()); - m.block(0, 0, rank(), rank()).template triangularView<StrictlyLower>().setZero(); - for(Index i = 0; i < rank(); ++i) - m.col(i).swap(m.col(pivots.coeff(i))); - - // ok, we have our trapezoid matrix, we can apply the triangular solver. - // notice that the math behind this suggests that we should apply this to the - // negative of the RHS, but for performance we just put the negative sign elsewhere, see below. - m.topLeftCorner(rank(), rank()) - .template triangularView<Upper>().solveInPlace( - m.topRightCorner(rank(), dimker) - ); - - // now we must undo the column permutation that we had applied! - for(Index i = rank()-1; i >= 0; --i) - m.col(i).swap(m.col(pivots.coeff(i))); - - // see the negative sign in the next line, that's what we were talking about above. - for(Index i = 0; i < rank(); ++i) dst.row(dec().permutationQ().indices().coeff(i)) = -m.row(i).tail(dimker); - for(Index i = rank(); i < cols; ++i) dst.row(dec().permutationQ().indices().coeff(i)).setZero(); - for(Index k = 0; k < dimker; ++k) dst.coeffRef(dec().permutationQ().indices().coeff(rank()+k), k) = Scalar(1); - } -}; - -/***** Implementation of image() *****************************************************/ - -template<typename _MatrixType> -struct image_retval<FullPivLU<_MatrixType> > - : image_retval_base<FullPivLU<_MatrixType> > -{ - EIGEN_MAKE_IMAGE_HELPERS(FullPivLU<_MatrixType>) - - enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED( - MatrixType::MaxColsAtCompileTime, - MatrixType::MaxRowsAtCompileTime) - }; - - template<typename Dest> void evalTo(Dest& dst) const - { - using std::abs; - if(rank() == 0) - { - // The Image is just {0}, so it doesn't have a basis properly speaking, but let's - // avoid crashing/asserting as that depends on floating point calculations. Let's - // just return a single column vector filled with zeros. - dst.setZero(); - return; - } - - Matrix<Index, Dynamic, 1, 0, MaxSmallDimAtCompileTime, 1> pivots(rank()); - RealScalar premultiplied_threshold = dec().maxPivot() * dec().threshold(); - Index p = 0; - for(Index i = 0; i < dec().nonzeroPivots(); ++i) - if(abs(dec().matrixLU().coeff(i,i)) > premultiplied_threshold) - pivots.coeffRef(p++) = i; - eigen_internal_assert(p == rank()); - - for(Index i = 0; i < rank(); ++i) - dst.col(i) = originalMatrix().col(dec().permutationQ().indices().coeff(pivots.coeff(i))); - } -}; - -/***** Implementation of solve() *****************************************************/ - -template<typename _MatrixType, typename Rhs> -struct solve_retval<FullPivLU<_MatrixType>, Rhs> - : solve_retval_base<FullPivLU<_MatrixType>, Rhs> -{ - EIGEN_MAKE_SOLVE_HELPERS(FullPivLU<_MatrixType>,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}. - * So we proceed as follows: - * Step 1: compute c = P * rhs. - * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible. - * Step 3: replace c by the solution x to Ux = c. May or may not exist. - * Step 4: result = Q * c; - */ - - const Index rows = dec().rows(), cols = dec().cols(), - nonzero_pivots = dec().nonzeroPivots(); - eigen_assert(rhs().rows() == rows); - const Index smalldim = (std::min)(rows, cols); - - if(nonzero_pivots == 0) - { - dst.setZero(); - return; - } - - typename Rhs::PlainObject c(rhs().rows(), rhs().cols()); - - // Step 1 - c = dec().permutationP() * rhs(); - - // Step 2 - dec().matrixLU() - .topLeftCorner(smalldim,smalldim) - .template triangularView<UnitLower>() - .solveInPlace(c.topRows(smalldim)); - if(rows>cols) - { - c.bottomRows(rows-cols) - -= dec().matrixLU().bottomRows(rows-cols) - * c.topRows(cols); - } - - // Step 3 - dec().matrixLU() - .topLeftCorner(nonzero_pivots, nonzero_pivots) - .template triangularView<Upper>() - .solveInPlace(c.topRows(nonzero_pivots)); - - // Step 4 - for(Index i = 0; i < nonzero_pivots; ++i) - dst.row(dec().permutationQ().indices().coeff(i)) = c.row(i); - for(Index i = nonzero_pivots; i < dec().matrixLU().cols(); ++i) - dst.row(dec().permutationQ().indices().coeff(i)).setZero(); - } -}; - -} // end namespace internal - -/******* MatrixBase methods *****************************************************************/ - -/** \lu_module - * - * \return the full-pivoting LU decomposition of \c *this. - * - * \sa class FullPivLU - */ -#ifndef __CUDACC__ -template<typename Derived> -inline const FullPivLU<typename MatrixBase<Derived>::PlainObject> -MatrixBase<Derived>::fullPivLu() const -{ - return FullPivLU<PlainObject>(eval()); -} -#endif - -} // end namespace Eigen - -#endif // EIGEN_LU_H diff --git a/third_party/eigen3/Eigen/src/LU/Inverse.h b/third_party/eigen3/Eigen/src/LU/Inverse.h deleted file mode 100644 index 8d1364e0a9..0000000000 --- a/third_party/eigen3/Eigen/src/LU/Inverse.h +++ /dev/null @@ -1,417 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2010 Benoit Jacob <jacob.benoit.1@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_INVERSE_H -#define EIGEN_INVERSE_H - -namespace Eigen { - -namespace internal { - -/********************************** -*** General case implementation *** -**********************************/ - -template<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime> -struct compute_inverse -{ - EIGEN_DEVICE_FUNC - static inline void run(const MatrixType& matrix, ResultType& result) - { - result = matrix.partialPivLu().inverse(); - } -}; - -template<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime> -struct compute_inverse_and_det_with_check { /* nothing! general case not supported. */ }; - -/**************************** -*** Size 1 implementation *** -****************************/ - -template<typename MatrixType, typename ResultType> -struct compute_inverse<MatrixType, ResultType, 1> -{ - EIGEN_DEVICE_FUNC - static inline void run(const MatrixType& matrix, ResultType& result) - { - typedef typename MatrixType::Scalar Scalar; - result.coeffRef(0,0) = Scalar(1) / matrix.coeff(0,0); - } -}; - -template<typename MatrixType, typename ResultType> -struct compute_inverse_and_det_with_check<MatrixType, ResultType, 1> -{ - EIGEN_DEVICE_FUNC - static inline void run( - const MatrixType& matrix, - const typename MatrixType::RealScalar& absDeterminantThreshold, - ResultType& result, - typename ResultType::Scalar& determinant, - bool& invertible - ) - { - using std::abs; - determinant = matrix.coeff(0,0); - invertible = abs(determinant) > absDeterminantThreshold; - if(invertible) result.coeffRef(0,0) = typename ResultType::Scalar(1) / determinant; - } -}; - -/**************************** -*** Size 2 implementation *** -****************************/ - -template<typename MatrixType, typename ResultType> -EIGEN_DEVICE_FUNC -inline void compute_inverse_size2_helper( - const MatrixType& matrix, const typename ResultType::Scalar& invdet, - ResultType& result) -{ - result.coeffRef(0,0) = matrix.coeff(1,1) * invdet; - result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet; - result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet; - result.coeffRef(1,1) = matrix.coeff(0,0) * invdet; -} - -template<typename MatrixType, typename ResultType> -struct compute_inverse<MatrixType, ResultType, 2> -{ - EIGEN_DEVICE_FUNC - static inline void run(const MatrixType& matrix, ResultType& result) - { - typedef typename ResultType::Scalar Scalar; - const Scalar invdet = typename MatrixType::Scalar(1) / matrix.determinant(); - compute_inverse_size2_helper(matrix, invdet, result); - } -}; - -template<typename MatrixType, typename ResultType> -struct compute_inverse_and_det_with_check<MatrixType, ResultType, 2> -{ - EIGEN_DEVICE_FUNC - static inline void run( - const MatrixType& matrix, - const typename MatrixType::RealScalar& absDeterminantThreshold, - ResultType& inverse, - typename ResultType::Scalar& determinant, - bool& invertible - ) - { - using std::abs; - typedef typename ResultType::Scalar Scalar; - determinant = matrix.determinant(); - invertible = abs(determinant) > absDeterminantThreshold; - if(!invertible) return; - const Scalar invdet = Scalar(1) / determinant; - compute_inverse_size2_helper(matrix, invdet, inverse); - } -}; - -/**************************** -*** Size 3 implementation *** -****************************/ - -template<typename MatrixType, int i, int j> -EIGEN_DEVICE_FUNC -inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m) -{ - enum { - i1 = (i+1) % 3, - i2 = (i+2) % 3, - j1 = (j+1) % 3, - j2 = (j+2) % 3 - }; - return m.coeff(i1, j1) * m.coeff(i2, j2) - - m.coeff(i1, j2) * m.coeff(i2, j1); -} - -template<typename MatrixType, typename ResultType> -EIGEN_DEVICE_FUNC -inline void compute_inverse_size3_helper( - const MatrixType& matrix, - const typename ResultType::Scalar& invdet, - const Matrix<typename ResultType::Scalar,3,1>& cofactors_col0, - ResultType& result) -{ - result.row(0) = cofactors_col0 * invdet; - result.coeffRef(1,0) = cofactor_3x3<MatrixType,0,1>(matrix) * invdet; - result.coeffRef(1,1) = cofactor_3x3<MatrixType,1,1>(matrix) * invdet; - result.coeffRef(1,2) = cofactor_3x3<MatrixType,2,1>(matrix) * invdet; - result.coeffRef(2,0) = cofactor_3x3<MatrixType,0,2>(matrix) * invdet; - result.coeffRef(2,1) = cofactor_3x3<MatrixType,1,2>(matrix) * invdet; - result.coeffRef(2,2) = cofactor_3x3<MatrixType,2,2>(matrix) * invdet; -} - -template<typename MatrixType, typename ResultType> -struct compute_inverse<MatrixType, ResultType, 3> -{ - EIGEN_DEVICE_FUNC - static inline void run(const MatrixType& matrix, ResultType& result) - { - typedef typename ResultType::Scalar Scalar; - Matrix<typename MatrixType::Scalar,3,1> cofactors_col0; - cofactors_col0.coeffRef(0) = cofactor_3x3<MatrixType,0,0>(matrix); - cofactors_col0.coeffRef(1) = cofactor_3x3<MatrixType,1,0>(matrix); - cofactors_col0.coeffRef(2) = cofactor_3x3<MatrixType,2,0>(matrix); - const Scalar det = (cofactors_col0.cwiseProduct(matrix.col(0))).sum(); - const Scalar invdet = Scalar(1) / det; - compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result); - } -}; - -template<typename MatrixType, typename ResultType> -struct compute_inverse_and_det_with_check<MatrixType, ResultType, 3> -{ - EIGEN_DEVICE_FUNC - static inline void run( - const MatrixType& matrix, - const typename MatrixType::RealScalar& absDeterminantThreshold, - ResultType& inverse, - typename ResultType::Scalar& determinant, - bool& invertible - ) - { - using std::abs; - typedef typename ResultType::Scalar Scalar; - Matrix<Scalar,3,1> cofactors_col0; - cofactors_col0.coeffRef(0) = cofactor_3x3<MatrixType,0,0>(matrix); - cofactors_col0.coeffRef(1) = cofactor_3x3<MatrixType,1,0>(matrix); - cofactors_col0.coeffRef(2) = cofactor_3x3<MatrixType,2,0>(matrix); - determinant = (cofactors_col0.cwiseProduct(matrix.col(0))).sum(); - invertible = abs(determinant) > absDeterminantThreshold; - if(!invertible) return; - const Scalar invdet = Scalar(1) / determinant; - compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse); - } -}; - -/**************************** -*** Size 4 implementation *** -****************************/ - -template<typename Derived> -EIGEN_DEVICE_FUNC -inline const typename Derived::Scalar general_det3_helper -(const MatrixBase<Derived>& matrix, int i1, int i2, int i3, int j1, int j2, int j3) -{ - return matrix.coeff(i1,j1) - * (matrix.coeff(i2,j2) * matrix.coeff(i3,j3) - matrix.coeff(i2,j3) * matrix.coeff(i3,j2)); -} - -template<typename MatrixType, int i, int j> -EIGEN_DEVICE_FUNC -inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix) -{ - enum { - i1 = (i+1) % 4, - i2 = (i+2) % 4, - i3 = (i+3) % 4, - j1 = (j+1) % 4, - j2 = (j+2) % 4, - j3 = (j+3) % 4 - }; - return general_det3_helper(matrix, i1, i2, i3, j1, j2, j3) - + general_det3_helper(matrix, i2, i3, i1, j1, j2, j3) - + general_det3_helper(matrix, i3, i1, i2, j1, j2, j3); -} - -template<int Arch, typename Scalar, typename MatrixType, typename ResultType> -struct compute_inverse_size4 -{ - EIGEN_DEVICE_FUNC - static void run(const MatrixType& matrix, ResultType& result) - { - result.coeffRef(0,0) = cofactor_4x4<MatrixType,0,0>(matrix); - result.coeffRef(1,0) = -cofactor_4x4<MatrixType,0,1>(matrix); - result.coeffRef(2,0) = cofactor_4x4<MatrixType,0,2>(matrix); - result.coeffRef(3,0) = -cofactor_4x4<MatrixType,0,3>(matrix); - result.coeffRef(0,2) = cofactor_4x4<MatrixType,2,0>(matrix); - result.coeffRef(1,2) = -cofactor_4x4<MatrixType,2,1>(matrix); - result.coeffRef(2,2) = cofactor_4x4<MatrixType,2,2>(matrix); - result.coeffRef(3,2) = -cofactor_4x4<MatrixType,2,3>(matrix); - result.coeffRef(0,1) = -cofactor_4x4<MatrixType,1,0>(matrix); - result.coeffRef(1,1) = cofactor_4x4<MatrixType,1,1>(matrix); - result.coeffRef(2,1) = -cofactor_4x4<MatrixType,1,2>(matrix); - result.coeffRef(3,1) = cofactor_4x4<MatrixType,1,3>(matrix); - result.coeffRef(0,3) = -cofactor_4x4<MatrixType,3,0>(matrix); - result.coeffRef(1,3) = cofactor_4x4<MatrixType,3,1>(matrix); - result.coeffRef(2,3) = -cofactor_4x4<MatrixType,3,2>(matrix); - result.coeffRef(3,3) = cofactor_4x4<MatrixType,3,3>(matrix); - result /= (matrix.col(0).cwiseProduct(result.row(0).transpose())).sum(); - } -}; - -template<typename MatrixType, typename ResultType> -struct compute_inverse<MatrixType, ResultType, 4> - : compute_inverse_size4<Architecture::Target, typename MatrixType::Scalar, - MatrixType, ResultType> -{ -}; - -template<typename MatrixType, typename ResultType> -struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4> -{ - EIGEN_DEVICE_FUNC - static inline void run( - const MatrixType& matrix, - const typename MatrixType::RealScalar& absDeterminantThreshold, - ResultType& inverse, - typename ResultType::Scalar& determinant, - bool& invertible - ) - { - using std::abs; - determinant = matrix.determinant(); - invertible = abs(determinant) > absDeterminantThreshold; - if(invertible) compute_inverse<MatrixType, ResultType>::run(matrix, inverse); - } -}; - -/************************* -*** MatrixBase methods *** -*************************/ - -template<typename MatrixType> -struct traits<inverse_impl<MatrixType> > -{ - typedef typename MatrixType::PlainObject ReturnType; -}; - -template<typename MatrixType> -struct inverse_impl : public ReturnByValue<inverse_impl<MatrixType> > -{ - typedef typename MatrixType::Index Index; - typedef typename internal::eval<MatrixType>::type MatrixTypeNested; - typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; - MatrixTypeNested m_matrix; - - EIGEN_DEVICE_FUNC - inverse_impl(const MatrixType& matrix) - : m_matrix(matrix) - {} - - EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } - EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } - - template<typename Dest> - EIGEN_DEVICE_FUNC - inline void evalTo(Dest& dst) const - { - const int Size = EIGEN_PLAIN_ENUM_MIN(MatrixType::ColsAtCompileTime,Dest::ColsAtCompileTime); - EIGEN_ONLY_USED_FOR_DEBUG(Size); - eigen_assert(( (Size<=1) || (Size>4) || (extract_data(m_matrix)!=extract_data(dst))) - && "Aliasing problem detected in inverse(), you need to do inverse().eval() here."); - - compute_inverse<MatrixTypeNestedCleaned, Dest>::run(m_matrix, dst); - } -}; - -} // end namespace internal - -/** \lu_module - * - * \returns the matrix inverse of this matrix. - * - * For small fixed sizes up to 4x4, this method uses cofactors. - * In the general case, this method uses class PartialPivLU. - * - * \note This matrix must be invertible, otherwise the result is undefined. If you need an - * invertibility check, do the following: - * \li for fixed sizes up to 4x4, use computeInverseAndDetWithCheck(). - * \li for the general case, use class FullPivLU. - * - * Example: \include MatrixBase_inverse.cpp - * Output: \verbinclude MatrixBase_inverse.out - * - * \sa computeInverseAndDetWithCheck() - */ -template<typename Derived> -inline const internal::inverse_impl<Derived> MatrixBase<Derived>::inverse() const -{ - EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES) - eigen_assert(rows() == cols()); - return internal::inverse_impl<Derived>(derived()); -} - -/** \lu_module - * - * Computation of matrix inverse and determinant, with invertibility check. - * - * This is only for fixed-size square matrices of size up to 4x4. - * - * \param inverse Reference to the matrix in which to store the inverse. - * \param determinant Reference to the variable in which to store the determinant. - * \param invertible Reference to the bool variable in which to store whether the matrix is invertible. - * \param absDeterminantThreshold Optional parameter controlling the invertibility check. - * The matrix will be declared invertible if the absolute value of its - * determinant is greater than this threshold. - * - * Example: \include MatrixBase_computeInverseAndDetWithCheck.cpp - * Output: \verbinclude MatrixBase_computeInverseAndDetWithCheck.out - * - * \sa inverse(), computeInverseWithCheck() - */ -template<typename Derived> -template<typename ResultType> -inline void MatrixBase<Derived>::computeInverseAndDetWithCheck( - ResultType& inverse, - typename ResultType::Scalar& determinant, - bool& invertible, - const RealScalar& absDeterminantThreshold - ) const -{ - // i'd love to put some static assertions there, but SFINAE means that they have no effect... - eigen_assert(rows() == cols()); - // for 2x2, it's worth giving a chance to avoid evaluating. - // for larger sizes, evaluating has negligible cost and limits code size. - typedef typename internal::conditional< - RowsAtCompileTime == 2, - typename internal::remove_all<typename internal::nested<Derived, 2>::type>::type, - PlainObject - >::type MatrixType; - internal::compute_inverse_and_det_with_check<MatrixType, ResultType>::run - (derived(), absDeterminantThreshold, inverse, determinant, invertible); -} - -/** \lu_module - * - * Computation of matrix inverse, with invertibility check. - * - * This is only for fixed-size square matrices of size up to 4x4. - * - * \param inverse Reference to the matrix in which to store the inverse. - * \param invertible Reference to the bool variable in which to store whether the matrix is invertible. - * \param absDeterminantThreshold Optional parameter controlling the invertibility check. - * The matrix will be declared invertible if the absolute value of its - * determinant is greater than this threshold. - * - * Example: \include MatrixBase_computeInverseWithCheck.cpp - * Output: \verbinclude MatrixBase_computeInverseWithCheck.out - * - * \sa inverse(), computeInverseAndDetWithCheck() - */ -template<typename Derived> -template<typename ResultType> -inline void MatrixBase<Derived>::computeInverseWithCheck( - ResultType& inverse, - bool& invertible, - const RealScalar& absDeterminantThreshold - ) const -{ - RealScalar determinant; - // i'd love to put some static assertions there, but SFINAE means that they have no effect... - eigen_assert(rows() == cols()); - computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold); -} - -} // end namespace Eigen - -#endif // EIGEN_INVERSE_H diff --git a/third_party/eigen3/Eigen/src/LU/PartialPivLU.h b/third_party/eigen3/Eigen/src/LU/PartialPivLU.h deleted file mode 100644 index 1d389ecac7..0000000000 --- a/third_party/eigen3/Eigen/src/LU/PartialPivLU.h +++ /dev/null @@ -1,506 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com> -// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_PARTIALLU_H -#define EIGEN_PARTIALLU_H - -namespace Eigen { - -/** \ingroup LU_Module - * - * \class PartialPivLU - * - * \brief LU decomposition of a matrix with partial pivoting, and related features - * - * \param MatrixType the type of the matrix of which we are computing the LU decomposition - * - * This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A - * is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P - * is a permutation matrix. - * - * Typically, partial pivoting LU decomposition is only considered numerically stable for square invertible - * matrices. Thus LAPACK's dgesv and dgesvx require the matrix to be square and invertible. The present class - * does the same. It will assert that the matrix is square, but it won't (actually it can't) check that the - * matrix is invertible: it is your task to check that you only use this decomposition on invertible matrices. - * - * The guaranteed safe alternative, working for all matrices, is the full pivoting LU decomposition, provided - * by class FullPivLU. - * - * This is \b not a rank-revealing LU decomposition. Many features are intentionally absent from this class, - * such as rank computation. If you need these features, use class FullPivLU. - * - * This LU decomposition is suitable to invert invertible matrices. It is what MatrixBase::inverse() uses - * in the general case. - * On the other hand, it is \b not suitable to determine whether a given matrix is invertible. - * - * The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP(). - * - * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU - */ -template<typename _MatrixType> class PartialPivLU -{ - public: - - typedef _MatrixType MatrixType; - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, - MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime - }; - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; - typedef typename internal::traits<MatrixType>::StorageKind StorageKind; - typedef typename MatrixType::Index Index; - typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType; - typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType; - - - /** - * \brief Default Constructor. - * - * The default constructor is useful in cases in which the user intends to - * perform decompositions via PartialPivLU::compute(const MatrixType&). - */ - PartialPivLU(); - - /** \brief Default Constructor with memory preallocation - * - * Like the default constructor but with preallocation of the internal data - * according to the specified problem \a size. - * \sa PartialPivLU() - */ - PartialPivLU(Index size); - - /** Constructor. - * - * \param matrix the matrix of which to compute the LU decomposition. - * - * \warning The matrix should have full rank (e.g. if it's square, it should be invertible). - * If you need to deal with non-full rank, use class FullPivLU instead. - */ - PartialPivLU(const MatrixType& matrix); - - PartialPivLU& compute(const MatrixType& matrix); - - /** \returns the LU decomposition matrix: the upper-triangular part is U, the - * unit-lower-triangular part is L (at least for square matrices; in the non-square - * case, special care is needed, see the documentation of class FullPivLU). - * - * \sa matrixL(), matrixU() - */ - inline const MatrixType& matrixLU() const - { - eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); - return m_lu; - } - - /** \returns the permutation matrix P. - */ - inline const PermutationType& permutationP() const - { - eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); - return m_p; - } - - /** This method returns the solution x to the equation Ax=b, where A is the matrix of which - * *this is the LU decomposition. - * - * \param b the right-hand-side of the equation to solve. Can be a vector or a matrix, - * the only requirement in order for the equation to make sense is that - * b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition. - * - * \returns the solution. - * - * Example: \include PartialPivLU_solve.cpp - * Output: \verbinclude PartialPivLU_solve.out - * - * Since this PartialPivLU class assumes anyway that the matrix A is invertible, the solution - * theoretically exists and is unique regardless of b. - * - * \sa TriangularView::solve(), inverse(), computeInverse() - */ - template<typename Rhs> - inline const internal::solve_retval<PartialPivLU, Rhs> - solve(const MatrixBase<Rhs>& b) const - { - eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); - return internal::solve_retval<PartialPivLU, Rhs>(*this, b.derived()); - } - - /** \returns the inverse of the matrix of which *this is the LU decomposition. - * - * \warning The matrix being decomposed here is assumed to be invertible. If you need to check for - * invertibility, use class FullPivLU instead. - * - * \sa MatrixBase::inverse(), LU::inverse() - */ - inline const internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType> inverse() const - { - eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); - return internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType> - (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols())); - } - - /** \returns the determinant of the matrix of which - * *this is the LU decomposition. It has only linear complexity - * (that is, O(n) where n is the dimension of the square matrix) - * as the LU decomposition has already been computed. - * - * \note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers - * optimized paths. - * - * \warning a determinant can be very big or small, so for matrices - * of large enough dimension, there is a risk of overflow/underflow. - * - * \sa MatrixBase::determinant() - */ - typename internal::traits<MatrixType>::Scalar determinant() const; - - MatrixType reconstructedMatrix() const; - - inline Index rows() const { return m_lu.rows(); } - inline Index cols() const { return m_lu.cols(); } - - protected: - MatrixType m_lu; - PermutationType m_p; - TranspositionType m_rowsTranspositions; - Index m_det_p; - bool m_isInitialized; -}; - -template<typename MatrixType> -PartialPivLU<MatrixType>::PartialPivLU() - : m_lu(), - m_p(), - m_rowsTranspositions(), - m_det_p(0), - m_isInitialized(false) -{ -} - -template<typename MatrixType> -PartialPivLU<MatrixType>::PartialPivLU(Index size) - : m_lu(size, size), - m_p(size), - m_rowsTranspositions(size), - m_det_p(0), - m_isInitialized(false) -{ -} - -template<typename MatrixType> -PartialPivLU<MatrixType>::PartialPivLU(const MatrixType& matrix) - : m_lu(matrix.rows(), matrix.rows()), - m_p(matrix.rows()), - m_rowsTranspositions(matrix.rows()), - m_det_p(0), - m_isInitialized(false) -{ - compute(matrix); -} - -namespace internal { - -/** \internal This is the blocked version of fullpivlu_unblocked() */ -template<typename Scalar, int StorageOrder, typename PivIndex> -struct partial_lu_impl -{ - // FIXME add a stride to Map, so that the following mapping becomes easier, - // another option would be to create an expression being able to automatically - // warp any Map, Matrix, and Block expressions as a unique type, but since that's exactly - // a Map + stride, why not adding a stride to Map, and convenient ctors from a Matrix, - // and Block. - typedef Map<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > MapLU; - typedef Block<MapLU, Dynamic, Dynamic> MatrixType; - typedef Block<MatrixType,Dynamic,Dynamic> BlockType; - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - - /** \internal performs the LU decomposition in-place of the matrix \a lu - * using an unblocked algorithm. - * - * In addition, this function returns the row transpositions in the - * vector \a row_transpositions which must have a size equal to the number - * of columns of the matrix \a lu, and an integer \a nb_transpositions - * which returns the actual number of transpositions. - * - * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise. - */ - static Index unblocked_lu(MatrixType& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions) - { - const Index rows = lu.rows(); - const Index cols = lu.cols(); - const Index size = (std::min)(rows,cols); - nb_transpositions = 0; - Index first_zero_pivot = -1; - for(Index k = 0; k < size; ++k) - { - Index rrows = rows-k-1; - Index rcols = cols-k-1; - - Index row_of_biggest_in_col; - RealScalar biggest_in_corner - = lu.col(k).tail(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col); - row_of_biggest_in_col += k; - - row_transpositions[k] = PivIndex(row_of_biggest_in_col); - - if(biggest_in_corner != RealScalar(0)) - { - if(k != row_of_biggest_in_col) - { - lu.row(k).swap(lu.row(row_of_biggest_in_col)); - ++nb_transpositions; - } - - // FIXME shall we introduce a safe quotient expression in cas 1/lu.coeff(k,k) - // overflow but not the actual quotient? - lu.col(k).tail(rrows) /= lu.coeff(k,k); - } - else if(first_zero_pivot==-1) - { - // the pivot is exactly zero, we record the index of the first pivot which is exactly 0, - // and continue the factorization such we still have A = PLU - first_zero_pivot = k; - } - - if(k<rows-1) - lu.bottomRightCorner(rrows,rcols).noalias() -= lu.col(k).tail(rrows) * lu.row(k).tail(rcols); - } - return first_zero_pivot; - } - - /** \internal performs the LU decomposition in-place of the matrix represented - * by the variables \a rows, \a cols, \a lu_data, and \a lu_stride using a - * recursive, blocked algorithm. - * - * In addition, this function returns the row transpositions in the - * vector \a row_transpositions which must have a size equal to the number - * of columns of the matrix \a lu, and an integer \a nb_transpositions - * which returns the actual number of transpositions. - * - * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise. - * - * \note This very low level interface using pointers, etc. is to: - * 1 - reduce the number of instanciations to the strict minimum - * 2 - avoid infinite recursion of the instanciations with Block<Block<Block<...> > > - */ - static Index blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, PivIndex* row_transpositions, PivIndex& nb_transpositions, Index maxBlockSize=256) - { - MapLU lu1(lu_data,StorageOrder==RowMajor?rows:luStride,StorageOrder==RowMajor?luStride:cols); - MatrixType lu(lu1,0,0,rows,cols); - - const Index size = (std::min)(rows,cols); - - // if the matrix is too small, no blocking: - if(size<=16) - { - return unblocked_lu(lu, row_transpositions, nb_transpositions); - } - - // automatically adjust the number of subdivisions to the size - // of the matrix so that there is enough sub blocks: - Index blockSize; - { - blockSize = size/8; - blockSize = (blockSize/16)*16; - blockSize = (std::min)((std::max)(blockSize,Index(8)), maxBlockSize); - } - - nb_transpositions = 0; - Index first_zero_pivot = -1; - for(Index k = 0; k < size; k+=blockSize) - { - Index bs = (std::min)(size-k,blockSize); // actual size of the block - Index trows = rows - k - bs; // trailing rows - Index tsize = size - k - bs; // trailing size - - // partition the matrix: - // A00 | A01 | A02 - // lu = A_0 | A_1 | A_2 = A10 | A11 | A12 - // A20 | A21 | A22 - BlockType A_0(lu,0,0,rows,k); - BlockType A_2(lu,0,k+bs,rows,tsize); - BlockType A11(lu,k,k,bs,bs); - BlockType A12(lu,k,k+bs,bs,tsize); - BlockType A21(lu,k+bs,k,trows,bs); - BlockType A22(lu,k+bs,k+bs,trows,tsize); - - PivIndex nb_transpositions_in_panel; - // recursively call the blocked LU algorithm on [A11^T A21^T]^T - // with a very small blocking size: - Index ret = blocked_lu(trows+bs, bs, &lu.coeffRef(k,k), luStride, - row_transpositions+k, nb_transpositions_in_panel, 16); - if(ret>=0 && first_zero_pivot==-1) - first_zero_pivot = k+ret; - - nb_transpositions += nb_transpositions_in_panel; - // update permutations and apply them to A_0 - for(Index i=k; i<k+bs; ++i) - { - Index piv = (row_transpositions[i] += k); - A_0.row(i).swap(A_0.row(piv)); - } - - if(trows) - { - // apply permutations to A_2 - for(Index i=k;i<k+bs; ++i) - A_2.row(i).swap(A_2.row(row_transpositions[i])); - - // A12 = A11^-1 A12 - A11.template triangularView<UnitLower>().solveInPlace(A12); - - A22.noalias() -= A21 * A12; - } - } - return first_zero_pivot; - } -}; - -/** \internal performs the LU decomposition with partial pivoting in-place. - */ -template<typename MatrixType, typename TranspositionType> -void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::Index& nb_transpositions) -{ - eigen_assert(lu.cols() == row_transpositions.size()); - eigen_assert((&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1); - - partial_lu_impl - <typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor, typename TranspositionType::Index> - ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions); -} - -} // end namespace internal - -template<typename MatrixType> -PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const MatrixType& matrix) -{ - // the row permutation is stored as int indices, so just to be sure: - eigen_assert(matrix.rows()<NumTraits<int>::highest()); - - m_lu = matrix; - - eigen_assert(matrix.rows() == matrix.cols() && "PartialPivLU is only for square (and moreover invertible) matrices"); - const Index size = matrix.rows(); - - m_rowsTranspositions.resize(size); - - typename TranspositionType::Index nb_transpositions; - internal::partial_lu_inplace(m_lu, m_rowsTranspositions, nb_transpositions); - m_det_p = (nb_transpositions%2) ? -1 : 1; - - m_p = m_rowsTranspositions; - - m_isInitialized = true; - return *this; -} - -template<typename MatrixType> -typename internal::traits<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const -{ - eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); - return Scalar(m_det_p) * m_lu.diagonal().prod(); -} - -/** \returns the matrix represented by the decomposition, - * i.e., it returns the product: P^{-1} L U. - * This function is provided for debug purpose. */ -template<typename MatrixType> -MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const -{ - eigen_assert(m_isInitialized && "LU is not initialized."); - // LU - MatrixType res = m_lu.template triangularView<UnitLower>().toDenseMatrix() - * m_lu.template triangularView<Upper>(); - - // P^{-1}(LU) - res = m_p.inverse() * res; - - return res; -} - -/***** Implementation of solve() *****************************************************/ - -namespace internal { - -template<typename _MatrixType, typename Rhs> -struct solve_retval<PartialPivLU<_MatrixType>, Rhs> - : solve_retval_base<PartialPivLU<_MatrixType>, Rhs> -{ - EIGEN_MAKE_SOLVE_HELPERS(PartialPivLU<_MatrixType>,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const - { - /* The decomposition PA = LU can be rewritten as A = P^{-1} L U. - * So we proceed as follows: - * Step 1: compute c = Pb. - * Step 2: replace c by the solution x to Lx = c. - * Step 3: replace c by the solution x to Ux = c. - */ - - eigen_assert(rhs().rows() == dec().matrixLU().rows()); - - // Step 1 - dst = dec().permutationP() * rhs(); - - // Step 2 - dec().matrixLU().template triangularView<UnitLower>().solveInPlace(dst); - - // Step 3 - dec().matrixLU().template triangularView<Upper>().solveInPlace(dst); - } -}; - -} // end namespace internal - -/******** MatrixBase methods *******/ - -/** \lu_module - * - * \return the partial-pivoting LU decomposition of \c *this. - * - * \sa class PartialPivLU - */ -#ifndef __CUDACC__ -template<typename Derived> -inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject> -MatrixBase<Derived>::partialPivLu() const -{ - return PartialPivLU<PlainObject>(eval()); -} -#endif - -#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS -/** \lu_module - * - * Synonym of partialPivLu(). - * - * \return the partial-pivoting LU decomposition of \c *this. - * - * \sa class PartialPivLU - */ -#ifndef __CUDACC__ -template<typename Derived> -inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject> -MatrixBase<Derived>::lu() const -{ - return PartialPivLU<PlainObject>(eval()); -} -#endif - -#endif - -} // end namespace Eigen - -#endif // EIGEN_PARTIALLU_H diff --git a/third_party/eigen3/Eigen/src/LU/PartialPivLU_MKL.h b/third_party/eigen3/Eigen/src/LU/PartialPivLU_MKL.h deleted file mode 100644 index 9035953c82..0000000000 --- a/third_party/eigen3/Eigen/src/LU/PartialPivLU_MKL.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * LU decomposition with partial pivoting based on LAPACKE_?getrf function. - ******************************************************************************** -*/ - -#ifndef EIGEN_PARTIALLU_LAPACK_H -#define EIGEN_PARTIALLU_LAPACK_H - -#include "Eigen/src/Core/util/MKL_support.h" - -namespace Eigen { - -namespace internal { - -/** \internal Specialization for the data types supported by MKL */ - -#define EIGEN_MKL_LU_PARTPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \ -template<int StorageOrder> \ -struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \ -{ \ - /* \internal performs the LU decomposition in-place of the matrix represented */ \ - static lapack_int blocked_lu(lapack_int rows, lapack_int cols, EIGTYPE* lu_data, lapack_int luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \ - { \ - EIGEN_UNUSED_VARIABLE(maxBlockSize);\ - lapack_int matrix_order, first_zero_pivot; \ - lapack_int m, n, lda, *ipiv, info; \ - EIGTYPE* a; \ -/* Set up parameters for ?getrf */ \ - matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ - lda = luStride; \ - a = lu_data; \ - ipiv = row_transpositions; \ - m = rows; \ - n = cols; \ - nb_transpositions = 0; \ -\ - info = LAPACKE_##MKLPREFIX##getrf( matrix_order, m, n, (MKLTYPE*)a, lda, ipiv ); \ -\ - for(int i=0;i<m;i++) { ipiv[i]--; if (ipiv[i]!=i) nb_transpositions++; } \ -\ - eigen_assert(info >= 0); \ -/* something should be done with nb_transpositions */ \ -\ - first_zero_pivot = info; \ - return first_zero_pivot; \ - } \ -}; - -EIGEN_MKL_LU_PARTPIV(double, double, d) -EIGEN_MKL_LU_PARTPIV(float, float, s) -EIGEN_MKL_LU_PARTPIV(dcomplex, MKL_Complex16, z) -EIGEN_MKL_LU_PARTPIV(scomplex, MKL_Complex8, c) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_PARTIALLU_LAPACK_H diff --git a/third_party/eigen3/Eigen/src/LU/arch/Inverse_SSE.h b/third_party/eigen3/Eigen/src/LU/arch/Inverse_SSE.h deleted file mode 100644 index 60b7a23763..0000000000 --- a/third_party/eigen3/Eigen/src/LU/arch/Inverse_SSE.h +++ /dev/null @@ -1,329 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2001 Intel Corporation -// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> -// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// The SSE code for the 4x4 float and double matrix inverse in this file -// comes from the following Intel's library: -// http://software.intel.com/en-us/articles/optimized-matrix-library-for-use-with-the-intel-pentiumr-4-processors-sse2-instructions/ -// -// Here is the respective copyright and license statement: -// -// Copyright (c) 2001 Intel Corporation. -// -// Permition is granted to use, copy, distribute and prepare derivative works -// of this library for any purpose and without fee, provided, that the above -// copyright notice and this statement appear in all copies. -// Intel makes no representations about the suitability of this software for -// any purpose, and specifically disclaims all warranties. -// See LEGAL.TXT for all the legal information. - -#ifndef EIGEN_INVERSE_SSE_H -#define EIGEN_INVERSE_SSE_H - -namespace Eigen { - -namespace internal { - -template<typename MatrixType, typename ResultType> -struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType> -{ - enum { - MatrixAlignment = bool(MatrixType::Flags&AlignedBit), - ResultAlignment = bool(ResultType::Flags&AlignedBit), - StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) - }; - - static void run(const MatrixType& matrix, ResultType& result) - { - EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 }; - - // Load the full matrix into registers - __m128 _L1 = matrix.template packet<MatrixAlignment>( 0); - __m128 _L2 = matrix.template packet<MatrixAlignment>( 4); - __m128 _L3 = matrix.template packet<MatrixAlignment>( 8); - __m128 _L4 = matrix.template packet<MatrixAlignment>(12); - - // The inverse is calculated using "Divide and Conquer" technique. The - // original matrix is divide into four 2x2 sub-matrices. Since each - // register holds four matrix element, the smaller matrices are - // represented as a registers. Hence we get a better locality of the - // calculations. - - __m128 A, B, C, D; // the four sub-matrices - if(!StorageOrdersMatch) - { - A = _mm_unpacklo_ps(_L1, _L2); - B = _mm_unpacklo_ps(_L3, _L4); - C = _mm_unpackhi_ps(_L1, _L2); - D = _mm_unpackhi_ps(_L3, _L4); - } - else - { - A = _mm_movelh_ps(_L1, _L2); - B = _mm_movehl_ps(_L2, _L1); - C = _mm_movelh_ps(_L3, _L4); - D = _mm_movehl_ps(_L4, _L3); - } - - __m128 iA, iB, iC, iD, // partial inverse of the sub-matrices - DC, AB; - __m128 dA, dB, dC, dD; // determinant of the sub-matrices - __m128 det, d, d1, d2; - __m128 rd; // reciprocal of the determinant - - // AB = A# * B - AB = _mm_mul_ps(_mm_shuffle_ps(A,A,0x0F), B); - AB = _mm_sub_ps(AB,_mm_mul_ps(_mm_shuffle_ps(A,A,0xA5), _mm_shuffle_ps(B,B,0x4E))); - // DC = D# * C - DC = _mm_mul_ps(_mm_shuffle_ps(D,D,0x0F), C); - DC = _mm_sub_ps(DC,_mm_mul_ps(_mm_shuffle_ps(D,D,0xA5), _mm_shuffle_ps(C,C,0x4E))); - - // dA = |A| - dA = _mm_mul_ps(_mm_shuffle_ps(A, A, 0x5F),A); - dA = _mm_sub_ss(dA, _mm_movehl_ps(dA,dA)); - // dB = |B| - dB = _mm_mul_ps(_mm_shuffle_ps(B, B, 0x5F),B); - dB = _mm_sub_ss(dB, _mm_movehl_ps(dB,dB)); - - // dC = |C| - dC = _mm_mul_ps(_mm_shuffle_ps(C, C, 0x5F),C); - dC = _mm_sub_ss(dC, _mm_movehl_ps(dC,dC)); - // dD = |D| - dD = _mm_mul_ps(_mm_shuffle_ps(D, D, 0x5F),D); - dD = _mm_sub_ss(dD, _mm_movehl_ps(dD,dD)); - - // d = trace(AB*DC) = trace(A#*B*D#*C) - d = _mm_mul_ps(_mm_shuffle_ps(DC,DC,0xD8),AB); - - // iD = C*A#*B - iD = _mm_mul_ps(_mm_shuffle_ps(C,C,0xA0), _mm_movelh_ps(AB,AB)); - iD = _mm_add_ps(iD,_mm_mul_ps(_mm_shuffle_ps(C,C,0xF5), _mm_movehl_ps(AB,AB))); - // iA = B*D#*C - iA = _mm_mul_ps(_mm_shuffle_ps(B,B,0xA0), _mm_movelh_ps(DC,DC)); - iA = _mm_add_ps(iA,_mm_mul_ps(_mm_shuffle_ps(B,B,0xF5), _mm_movehl_ps(DC,DC))); - - // d = trace(AB*DC) = trace(A#*B*D#*C) [continue] - d = _mm_add_ps(d, _mm_movehl_ps(d, d)); - d = _mm_add_ss(d, _mm_shuffle_ps(d, d, 1)); - d1 = _mm_mul_ss(dA,dD); - d2 = _mm_mul_ss(dB,dC); - - // iD = D*|A| - C*A#*B - iD = _mm_sub_ps(_mm_mul_ps(D,_mm_shuffle_ps(dA,dA,0)), iD); - - // iA = A*|D| - B*D#*C; - iA = _mm_sub_ps(_mm_mul_ps(A,_mm_shuffle_ps(dD,dD,0)), iA); - - // det = |A|*|D| + |B|*|C| - trace(A#*B*D#*C) - det = _mm_sub_ss(_mm_add_ss(d1,d2),d); - rd = _mm_div_ss(_mm_set_ss(1.0f), det); - -// #ifdef ZERO_SINGULAR -// rd = _mm_and_ps(_mm_cmpneq_ss(det,_mm_setzero_ps()), rd); -// #endif - - // iB = D * (A#B)# = D*B#*A - iB = _mm_mul_ps(D, _mm_shuffle_ps(AB,AB,0x33)); - iB = _mm_sub_ps(iB, _mm_mul_ps(_mm_shuffle_ps(D,D,0xB1), _mm_shuffle_ps(AB,AB,0x66))); - // iC = A * (D#C)# = A*C#*D - iC = _mm_mul_ps(A, _mm_shuffle_ps(DC,DC,0x33)); - iC = _mm_sub_ps(iC, _mm_mul_ps(_mm_shuffle_ps(A,A,0xB1), _mm_shuffle_ps(DC,DC,0x66))); - - rd = _mm_shuffle_ps(rd,rd,0); - rd = _mm_xor_ps(rd, _mm_load_ps((float*)_Sign_PNNP)); - - // iB = C*|B| - D*B#*A - iB = _mm_sub_ps(_mm_mul_ps(C,_mm_shuffle_ps(dB,dB,0)), iB); - - // iC = B*|C| - A*C#*D; - iC = _mm_sub_ps(_mm_mul_ps(B,_mm_shuffle_ps(dC,dC,0)), iC); - - // iX = iX / det - iA = _mm_mul_ps(rd,iA); - iB = _mm_mul_ps(rd,iB); - iC = _mm_mul_ps(rd,iC); - iD = _mm_mul_ps(rd,iD); - - result.template writePacket<ResultAlignment>( 0, _mm_shuffle_ps(iA,iB,0x77)); - result.template writePacket<ResultAlignment>( 4, _mm_shuffle_ps(iA,iB,0x22)); - result.template writePacket<ResultAlignment>( 8, _mm_shuffle_ps(iC,iD,0x77)); - result.template writePacket<ResultAlignment>(12, _mm_shuffle_ps(iC,iD,0x22)); - } - -}; - -template<typename MatrixType, typename ResultType> -struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType> -{ - enum { - MatrixAlignment = bool(MatrixType::Flags&AlignedBit), - ResultAlignment = bool(ResultType::Flags&AlignedBit), - StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) - }; - static void run(const MatrixType& matrix, ResultType& result) - { - const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); - const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); - - // The inverse is calculated using "Divide and Conquer" technique. The - // original matrix is divide into four 2x2 sub-matrices. Since each - // register of the matrix holds two element, the smaller matrices are - // consisted of two registers. Hence we get a better locality of the - // calculations. - - // the four sub-matrices - __m128d A1, A2, B1, B2, C1, C2, D1, D2; - - if(StorageOrdersMatch) - { - A1 = matrix.template packet<MatrixAlignment>( 0); B1 = matrix.template packet<MatrixAlignment>( 2); - A2 = matrix.template packet<MatrixAlignment>( 4); B2 = matrix.template packet<MatrixAlignment>( 6); - C1 = matrix.template packet<MatrixAlignment>( 8); D1 = matrix.template packet<MatrixAlignment>(10); - C2 = matrix.template packet<MatrixAlignment>(12); D2 = matrix.template packet<MatrixAlignment>(14); - } - else - { - __m128d tmp; - A1 = matrix.template packet<MatrixAlignment>( 0); C1 = matrix.template packet<MatrixAlignment>( 2); - A2 = matrix.template packet<MatrixAlignment>( 4); C2 = matrix.template packet<MatrixAlignment>( 6); - tmp = A1; - A1 = _mm_unpacklo_pd(A1,A2); - A2 = _mm_unpackhi_pd(tmp,A2); - tmp = C1; - C1 = _mm_unpacklo_pd(C1,C2); - C2 = _mm_unpackhi_pd(tmp,C2); - - B1 = matrix.template packet<MatrixAlignment>( 8); D1 = matrix.template packet<MatrixAlignment>(10); - B2 = matrix.template packet<MatrixAlignment>(12); D2 = matrix.template packet<MatrixAlignment>(14); - tmp = B1; - B1 = _mm_unpacklo_pd(B1,B2); - B2 = _mm_unpackhi_pd(tmp,B2); - tmp = D1; - D1 = _mm_unpacklo_pd(D1,D2); - D2 = _mm_unpackhi_pd(tmp,D2); - } - - __m128d iA1, iA2, iB1, iB2, iC1, iC2, iD1, iD2, // partial invese of the sub-matrices - DC1, DC2, AB1, AB2; - __m128d dA, dB, dC, dD; // determinant of the sub-matrices - __m128d det, d1, d2, rd; - - // dA = |A| - dA = _mm_shuffle_pd(A2, A2, 1); - dA = _mm_mul_pd(A1, dA); - dA = _mm_sub_sd(dA, _mm_shuffle_pd(dA,dA,3)); - // dB = |B| - dB = _mm_shuffle_pd(B2, B2, 1); - dB = _mm_mul_pd(B1, dB); - dB = _mm_sub_sd(dB, _mm_shuffle_pd(dB,dB,3)); - - // AB = A# * B - AB1 = _mm_mul_pd(B1, _mm_shuffle_pd(A2,A2,3)); - AB2 = _mm_mul_pd(B2, _mm_shuffle_pd(A1,A1,0)); - AB1 = _mm_sub_pd(AB1, _mm_mul_pd(B2, _mm_shuffle_pd(A1,A1,3))); - AB2 = _mm_sub_pd(AB2, _mm_mul_pd(B1, _mm_shuffle_pd(A2,A2,0))); - - // dC = |C| - dC = _mm_shuffle_pd(C2, C2, 1); - dC = _mm_mul_pd(C1, dC); - dC = _mm_sub_sd(dC, _mm_shuffle_pd(dC,dC,3)); - // dD = |D| - dD = _mm_shuffle_pd(D2, D2, 1); - dD = _mm_mul_pd(D1, dD); - dD = _mm_sub_sd(dD, _mm_shuffle_pd(dD,dD,3)); - - // DC = D# * C - DC1 = _mm_mul_pd(C1, _mm_shuffle_pd(D2,D2,3)); - DC2 = _mm_mul_pd(C2, _mm_shuffle_pd(D1,D1,0)); - DC1 = _mm_sub_pd(DC1, _mm_mul_pd(C2, _mm_shuffle_pd(D1,D1,3))); - DC2 = _mm_sub_pd(DC2, _mm_mul_pd(C1, _mm_shuffle_pd(D2,D2,0))); - - // rd = trace(AB*DC) = trace(A#*B*D#*C) - d1 = _mm_mul_pd(AB1, _mm_shuffle_pd(DC1, DC2, 0)); - d2 = _mm_mul_pd(AB2, _mm_shuffle_pd(DC1, DC2, 3)); - rd = _mm_add_pd(d1, d2); - rd = _mm_add_sd(rd, _mm_shuffle_pd(rd, rd,3)); - - // iD = C*A#*B - iD1 = _mm_mul_pd(AB1, _mm_shuffle_pd(C1,C1,0)); - iD2 = _mm_mul_pd(AB1, _mm_shuffle_pd(C2,C2,0)); - iD1 = _mm_add_pd(iD1, _mm_mul_pd(AB2, _mm_shuffle_pd(C1,C1,3))); - iD2 = _mm_add_pd(iD2, _mm_mul_pd(AB2, _mm_shuffle_pd(C2,C2,3))); - - // iA = B*D#*C - iA1 = _mm_mul_pd(DC1, _mm_shuffle_pd(B1,B1,0)); - iA2 = _mm_mul_pd(DC1, _mm_shuffle_pd(B2,B2,0)); - iA1 = _mm_add_pd(iA1, _mm_mul_pd(DC2, _mm_shuffle_pd(B1,B1,3))); - iA2 = _mm_add_pd(iA2, _mm_mul_pd(DC2, _mm_shuffle_pd(B2,B2,3))); - - // iD = D*|A| - C*A#*B - dA = _mm_shuffle_pd(dA,dA,0); - iD1 = _mm_sub_pd(_mm_mul_pd(D1, dA), iD1); - iD2 = _mm_sub_pd(_mm_mul_pd(D2, dA), iD2); - - // iA = A*|D| - B*D#*C; - dD = _mm_shuffle_pd(dD,dD,0); - iA1 = _mm_sub_pd(_mm_mul_pd(A1, dD), iA1); - iA2 = _mm_sub_pd(_mm_mul_pd(A2, dD), iA2); - - d1 = _mm_mul_sd(dA, dD); - d2 = _mm_mul_sd(dB, dC); - - // iB = D * (A#B)# = D*B#*A - iB1 = _mm_mul_pd(D1, _mm_shuffle_pd(AB2,AB1,1)); - iB2 = _mm_mul_pd(D2, _mm_shuffle_pd(AB2,AB1,1)); - iB1 = _mm_sub_pd(iB1, _mm_mul_pd(_mm_shuffle_pd(D1,D1,1), _mm_shuffle_pd(AB2,AB1,2))); - iB2 = _mm_sub_pd(iB2, _mm_mul_pd(_mm_shuffle_pd(D2,D2,1), _mm_shuffle_pd(AB2,AB1,2))); - - // det = |A|*|D| + |B|*|C| - trace(A#*B*D#*C) - det = _mm_add_sd(d1, d2); - det = _mm_sub_sd(det, rd); - - // iC = A * (D#C)# = A*C#*D - iC1 = _mm_mul_pd(A1, _mm_shuffle_pd(DC2,DC1,1)); - iC2 = _mm_mul_pd(A2, _mm_shuffle_pd(DC2,DC1,1)); - iC1 = _mm_sub_pd(iC1, _mm_mul_pd(_mm_shuffle_pd(A1,A1,1), _mm_shuffle_pd(DC2,DC1,2))); - iC2 = _mm_sub_pd(iC2, _mm_mul_pd(_mm_shuffle_pd(A2,A2,1), _mm_shuffle_pd(DC2,DC1,2))); - - rd = _mm_div_sd(_mm_set_sd(1.0), det); -// #ifdef ZERO_SINGULAR -// rd = _mm_and_pd(_mm_cmpneq_sd(det,_mm_setzero_pd()), rd); -// #endif - rd = _mm_shuffle_pd(rd,rd,0); - - // iB = C*|B| - D*B#*A - dB = _mm_shuffle_pd(dB,dB,0); - iB1 = _mm_sub_pd(_mm_mul_pd(C1, dB), iB1); - iB2 = _mm_sub_pd(_mm_mul_pd(C2, dB), iB2); - - d1 = _mm_xor_pd(rd, _Sign_PN); - d2 = _mm_xor_pd(rd, _Sign_NP); - - // iC = B*|C| - A*C#*D; - dC = _mm_shuffle_pd(dC,dC,0); - iC1 = _mm_sub_pd(_mm_mul_pd(B1, dC), iC1); - iC2 = _mm_sub_pd(_mm_mul_pd(B2, dC), iC2); - - result.template writePacket<ResultAlignment>( 0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1)); // iA# / det - result.template writePacket<ResultAlignment>( 4, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2)); - result.template writePacket<ResultAlignment>( 2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1)); // iB# / det - result.template writePacket<ResultAlignment>( 6, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2)); - result.template writePacket<ResultAlignment>( 8, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1)); // iC# / det - result.template writePacket<ResultAlignment>(12, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2)); - result.template writePacket<ResultAlignment>(10, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1)); // iD# / det - result.template writePacket<ResultAlignment>(14, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2)); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_INVERSE_SSE_H |