From ce1dc1ab165929edd7608d83e448d81e4f968038 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 3 Aug 2009 12:11:18 +0200 Subject: implements a blocked version of PartialLU --- Eigen/src/LU/PartialLU.h | 123 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 96 insertions(+), 27 deletions(-) (limited to 'Eigen') diff --git a/Eigen/src/LU/PartialLU.h b/Eigen/src/LU/PartialLU.h index b3a40f0e5..337438e2f 100644 --- a/Eigen/src/LU/PartialLU.h +++ b/Eigen/src/LU/PartialLU.h @@ -201,50 +201,119 @@ PartialLU::PartialLU(const MatrixType& matrix) compute(matrix); } -template -void PartialLU::compute(const MatrixType& matrix) +/** \internal performs the LU decomposition in place of the matrix \a lu. + * In addition, this function returns the row transpositions in the + * vector \a row_transpositions which must have a size equal to the number + * of columns of the matrix \a lu, and an integer \a nb_transpositions + * which returns the actual number of transpositions. + */ +template +void ei_lu_unblocked(MatrixType& lu, IntVector& row_transpositions, int& nb_transpositions) { - m_lu = matrix; - m_p.resize(matrix.rows()); - - ei_assert(matrix.rows() == matrix.cols() && "PartialLU is only for square (and moreover invertible) matrices"); - const int size = matrix.rows(); - - IntColVectorType rows_transpositions(size); - int number_of_transpositions = 0; - + const int rows = lu.rows(); + const int size = std::min(lu.rows(),lu.cols()); + nb_transpositions = 0; for(int k = 0; k < size; ++k) { int row_of_biggest_in_col; - m_lu.block(k,k,size-k,1).cwise().abs().maxCoeff(&row_of_biggest_in_col); + lu.block(k,k,rows-k,1).cwise().abs().maxCoeff(&row_of_biggest_in_col); row_of_biggest_in_col += k; - rows_transpositions.coeffRef(k) = row_of_biggest_in_col; + row_transpositions.coeffRef(k) = row_of_biggest_in_col; - if(k != row_of_biggest_in_col) { - m_lu.row(k).swap(m_lu.row(row_of_biggest_in_col)); - ++number_of_transpositions; + if(k != row_of_biggest_in_col) + { + lu.row(k).swap(lu.row(row_of_biggest_in_col)); + ++nb_transpositions; } - if(k +void ei_lu_blocked(MatrixType& lu, IntVector& row_transpositions, int& nb_transpositions) +{ + const int size = lu.rows(); + + // automatically adjust the number of subdivisions to the size + // of the matrix so that there is enough sub blocks: + int blockSize = size/8; + blockSize = (blockSize/16)*16; + blockSize = std::min(std::max(blockSize,8), 256); + // if the matrix is too small, no blocking: + if(size<32) + blockSize = size; + + nb_transpositions = 0; + for(int k = 0; k < size; k+=blockSize) + { + int bs = std::min(size-k,blockSize); + int ps = size - k; + int rs = size - k - bs; + // partition the matrix: + // A00 | A01 | A02 + // lu = A10 | A11 | A12 + // A20 | A21 | A22 + Block A_0(lu,0,0,size,k); + Block A11_21(lu,k,k,ps,bs); + Block A_2(lu,0,k+bs,size,rs); + Block A11(lu,k,k,bs,bs); + Block A12(lu,k,k+bs,bs,rs); + Block A21(lu,k+bs,k,rs,bs); + Block A22(lu,k+bs,k+bs,rs,rs); + + VectorBlock row_transpositions_in_panel(row_transpositions,k,bs); + int nb_transpositions_in_panel; + ei_lu_unblocked(A11_21, row_transpositions_in_panel, nb_transpositions_in_panel); + nb_transpositions_in_panel += nb_transpositions_in_panel; + + // update permutations and apply them to A10 + for(int i=k;i().solveInPlace(A12); + + A22 -= A21 * A12; + } + } +} + +template +void PartialLU::compute(const MatrixType& matrix) +{ + m_lu = matrix; + m_p.resize(matrix.rows()); + + ei_assert(matrix.rows() == matrix.cols() && "PartialLU is only for square (and moreover invertible) matrices"); + const int size = matrix.rows(); + + IntColVectorType rows_transpositions(size); + + int nb_transpositions; + ei_lu_blocked(m_lu, rows_transpositions, nb_transpositions); + m_det_p = (nb_transpositions%2) ? -1 : 1; for(int k = 0; k < size; ++k) m_p.coeffRef(k) = k; for(int k = size-1; k >= 0; --k) std::swap(m_p.coeffRef(k), m_p.coeffRef(rows_transpositions.coeff(k))); - m_det_p = (number_of_transpositions%2) ? -1 : 1; - m_isInitialized = true; } -- cgit v1.2.3