diff options
22 files changed, 547 insertions, 536 deletions
diff --git a/Eigen/OrderingMethods b/Eigen/OrderingMethods index 1e2d87452..bb43220e8 100644 --- a/Eigen/OrderingMethods +++ b/Eigen/OrderingMethods @@ -17,7 +17,7 @@ */ #include "src/OrderingMethods/Amd.h" - +#include "src/OrderingMethods/Ordering.h" #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/Eigen/SparseLU b/Eigen/SparseLU new file mode 100644 index 000000000..452bc9f83 --- /dev/null +++ b/Eigen/SparseLU @@ -0,0 +1,17 @@ +#ifndef EIGEN_SPARSELU_MODULE_H +#define EIGEN_SPARSELU_MODULE_H + +#include "SparseCore" + + +/** \ingroup Sparse_modules + * \defgroup SparseLU_Module SparseLU module + * + */ + +// Ordering interface +#include "OrderingMethods" + +#include "src/SparseLU/SparseLU.h" + +#endif // EIGEN_SPARSELU_MODULE_H diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index c43c381a4..3a3e3f6fc 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -26,9 +26,7 @@ #ifndef EIGEN_ORDERING_H #define EIGEN_ORDERING_H -#include <Eigen_Colamd.h> -#include <Amd.h> - +#include "Amd.h" namespace Eigen { template<class Derived> class OrderingBase @@ -68,8 +66,23 @@ class OrderingBase if (m_isInitialized = true) return m_P; else abort(); // FIXME Should find a smoother way to exit with error code } + + /** + * Get the symmetric pattern A^T+A from the input matrix A. + * FIXME: The values should not be considered here + */ template<typename MatrixType> - void at_plus_a(const MatrixType& mat); + void at_plus_a(const MatrixType& mat) + { + MatrixType C; + C = mat.transpose(); // NOTE: Could be costly + for (int i = 0; i < C.rows(); i++) + { + for (typename MatrixType::InnerIterator it(C, i); it; ++it) + it.valueRef() = 0.0; + } + m_mat = C + mat; + } /** keeps off-diagonal entries; drops diagonal entries */ struct keep_diag { @@ -87,99 +100,30 @@ class OrderingBase PermutationType m_P; // The computed permutation mutable bool m_isInitialized; SparseMatrix<Scalar,ColMajor,Index> m_mat; // Stores the (symmetrized) matrix to permute -} -/** - * Get the symmetric pattern A^T+A from the input matrix A. - * NOTE: The values should not be considered here - */ -template<typename MatrixType> -void OrderingBase::at_plus_a(const MatrixType& mat) -{ - MatrixType C; - C = mat.transpose(); // NOTE: Could be costly - for (int i = 0; i < C.rows(); i++) - { - for (typename MatrixType::InnerIterator it(C, i); it; ++it) - it.valueRef() = 0.0; - } - m_mat = C + mat; - -/** - * Get the column approximate minimum degree ordering - * The matrix should be in column-major format - */ -template<typename Scalar, typename Index> -class COLAMDOrdering: public OrderingBase< ColamdOrdering<Scalar, Index> > -{ - public: - typedef OrderingBase< ColamdOrdering<Scalar, Index> > Base; - typedef SparseMatrix<Scalar,ColMajor,Index> MatrixType; - - public: - COLAMDOrdering():Base() {} - - COLAMDOrdering(const MatrixType& matrix):Base() - { - compute(matrix); - } - COLAMDOrdering(const MatrixType& mat, PermutationType& perm_c):Base() - { - compute(matrix); - perm_c = this.get_perm(); - } - void compute(const MatrixType& mat) - { - // Test if the matrix is column major... - - int m = mat.rows(); - int n = mat.cols(); - int nnz = mat.nonZeros(); - // Get the recommended value of Alen to be used by colamd - int Alen = colamd_recommended(nnz, m, n); - // Set the default parameters - double knobs[COLAMD_KNOBS]; - colamd_set_defaults(knobs); - - int info; - VectorXi p(n), A(nnz); - for(int i=0; i < n; i++) p(i) = mat.outerIndexPtr()(i); - for(int i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()(i); - // Call Colamd routine to compute the ordering - info = colamd(m, n, Alen, A,p , knobs, stats) - eigen_assert( (info != FALSE)&& "COLAMD failed " ); - - m_P.resize(n); - for (int i = 0; i < n; i++) m_P(p(i)) = i; - m_isInitialized = true; - } - protected: - using Base::m_isInitialized; - using Base m_P; -} +}; /** * Get the approximate minimum degree ordering * If the matrix is not structurally symmetric, an ordering of A^T+A is computed * \tparam Scalar The type of the scalar of the matrix for which the ordering is applied * \tparam Index The type of indices of the matrix - * \tparam _UpLo If the matrix is symmetric, indicates which part to use */ -template <typename Scalar, typename Index, typename _UpLo> -class AMDordering : public OrderingBase<AMDOrdering<Scalar, Index> > +template <typename Scalar, typename Index> +class AMDOrdering : public OrderingBase<AMDOrdering<Scalar, Index> > { public: - enum { UpLo = _UpLo }; typedef OrderingBase< AMDOrdering<Scalar, Index> > Base; typedef SparseMatrix<Scalar, ColMajor,Index> MatrixType; + typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; public: AMDOrdering():Base(){} AMDOrdering(const MatrixType& mat):Base() { - compute(matrix); + compute(mat); } AMDOrdering(const MatrixType& mat, PermutationType& perm_c):Base() { - compute(matrix); + compute(mat); perm_c = this.get_perm(); } /** Compute the permutation vector from a column-major sparse matrix */ @@ -200,15 +144,75 @@ class AMDordering : public OrderingBase<AMDOrdering<Scalar, Index> > m_mat = mat; // Call the AMD routine - m_mat.prune(keep_diag()); + m_mat.prune(keep_diag()); //Remove the diagonal elements internal::minimum_degree_ordering(m_mat, m_P); if (m_P.size()>0) m_isInitialized = true; } protected: + struct keep_diag{ + inline bool operator() (const Index& row, const Index& col, const Scalar&) const + { + return row!=col; + } + }; using Base::m_isInitialized; using Base::m_P; using Base::m_mat; -} +}; + + +/** + * Get the column approximate minimum degree ordering + * The matrix should be in column-major format + */ +// template<typename Scalar, typename Index> +// class COLAMDOrdering: public OrderingBase< ColamdOrdering<Scalar, Index> > +// { +// public: +// typedef OrderingBase< ColamdOrdering<Scalar, Index> > Base; +// typedef SparseMatrix<Scalar,ColMajor,Index> MatrixType; +// +// public: +// COLAMDOrdering():Base() {} +// +// COLAMDOrdering(const MatrixType& matrix):Base() +// { +// compute(matrix); +// } +// COLAMDOrdering(const MatrixType& mat, PermutationType& perm_c):Base() +// { +// compute(matrix); +// perm_c = this.get_perm(); +// } +// void compute(const MatrixType& mat) +// { +// // Test if the matrix is column major... +// +// int m = mat.rows(); +// int n = mat.cols(); +// int nnz = mat.nonZeros(); +// // Get the recommended value of Alen to be used by colamd +// int Alen = colamd_recommended(nnz, m, n); +// // Set the default parameters +// double knobs[COLAMD_KNOBS]; +// colamd_set_defaults(knobs); +// +// int info; +// VectorXi p(n), A(nnz); +// for(int i=0; i < n; i++) p(i) = mat.outerIndexPtr()(i); +// for(int i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()(i); +// // Call Colamd routine to compute the ordering +// info = colamd(m, n, Alen, A,p , knobs, stats) +// eigen_assert( (info != FALSE)&& "COLAMD failed " ); +// +// m_P.resize(n); +// for (int i = 0; i < n; i++) m_P(p(i)) = i; +// m_isInitialized = true; +// } +// protected: +// using Base::m_isInitialized; +// using Base m_P; +// }; } // end namespace Eigen #endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/CMakeLists.txt b/Eigen/src/SparseLU/CMakeLists.txt new file mode 100644 index 000000000..69729ee89 --- /dev/null +++ b/Eigen/src/SparseLU/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_SparseLU_SRCS "*.h") + +INSTALL(FILES + ${Eigen_SparseLU_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseLU COMPONENT Devel + ) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 36b1ce570..293dcd0b3 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -30,8 +30,8 @@ namespace Eigen { // Data structure needed by all routines -#include <SparseLU_Structs.h> -#include <SparseLU_Matrix.h> +#include "SparseLU_Structs.h" +#include "SparseLU_Matrix.h" /** * \ingroup SparseLU_Module @@ -41,18 +41,20 @@ namespace Eigen { * * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<> */ -template <typename _MatrixType> +template <typename _MatrixType, typename _OrderingType> class SparseLU { public: typedef _MatrixType MatrixType; + typedef _OrderingType OrderingType; typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::Index Index; typedef SparseMatrix<Scalar,ColMajor,Index> NCMatrix; typedef SuperNodalMatrix<Scalar, Index> SCMatrix; - typedef GlobalLU_t<Scalar, Index> LU_GlobalLU_t; typedef Matrix<Scalar,Dynamic,1> ScalarVector; typedef Matrix<Index,Dynamic,1> IndexVector; +// typedef GlobalLU_t<ScalarVector, IndexVector> LU_GlobalLU_t; typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; public: SparseLU():m_isInitialized(true),m_symmetricmode(false),m_diagpivotthresh(1.0) @@ -82,10 +84,10 @@ class SparseLU analyzePattern(matrix); //Factorize factorize(matrix); - } - template<typename Rhs, typename Dest> - bool SparseLU::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const + } + inline Index rows() const { return m_mat.rows(); } + inline Index cols() const { return m_mat.cols(); } /** Indicate that the pattern of the input matrix is symmetric */ void isSymmetric(bool sym) { @@ -99,45 +101,152 @@ class SparseLU } - /** \returns the solution X of \f$ A X = b \f$ using the current decomposition of A. + /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. * * \sa compute() */ - template<typename Rhs> - inline const internal::solve_retval<SparseLU, Rhs> solve(const MatrixBase<Rhs>& b) const - { - eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); - eigen_assert(rows()==b.rows() - && "SparseLU::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval<SuperLUBase, Rhs>(*this, b.derived()); +// template<typename Rhs> +// inline const solve_retval<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const +// { +// eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); +// eigen_assert(rows()==B.rows() +// && "SparseLU::solve(): invalid number of rows of the right hand side matrix B"); +// return solve_retval<SparseLU, Rhs>(*this, B.derived()); +// } + + template<typename Rhs, typename Dest> + bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X) const + { + eigen_assert(m_isInitialized && "The matrix should be factorized first"); + EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + + X = B; /* on return, X is overwritten by the computed solution */ + + int nrhs = B.cols(); + + // Permute the right hand side to form Pr*B + X = m_perm_r * X; + + // Forward solve PLy = Pb; + Index n = B.rows(); + Index fsupc; // First column of the current supernode + Index istart; // Pointer index to the subscript of the current column + Index nsupr; // Number of rows in the current supernode + Index nsupc; // Number of columns in the current supernode + Index nrow; // Number of rows in the non-diagonal part of the supernode + Index luptr; // Pointer index to the current nonzero value + Index iptr; // row index pointer iterator + Index irow; //Current index row + const Scalar * Lval = m_Lstore.valuePtr(); // Nonzero values + Matrix<Scalar,Dynamic,Dynamic> work(n, nrhs); // working vector + work.setZero(); + int j, k, i, icol,jcol; + for (k = 0; k <= m_Lstore.nsuper(); k ++) + { + fsupc = m_Lstore.supToCol()[k]; + istart = m_Lstore.rowIndexPtr()[fsupc]; + nsupr = m_Lstore.rowIndexPtr()[fsupc+1] - istart; + nsupc = m_Lstore.supToCol()[k+1] - fsupc; + nrow = nsupr - nsupc; + luptr = m_Lstore.colIndexPtr()[fsupc]; + + if (nsupc == 1 ) + { + for (j = 0; j < nrhs; j++) + { + for (iptr = istart+1; iptr < m_Lstore.rowIndexPtr()[fsupc+1]; iptr++) + { + irow = m_Lstore.rowIndex()[iptr]; + ++luptr; + X(irow, j) -= X(fsupc, j) * Lval[luptr]; + } + } + } + else + { + // The supernode has more than one column + + // Triangular solve + Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + // Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride > u( &(X(fsupc,0)), nsupc, nrhs, OuterStride<>(X.rows()) ); + Matrix<Scalar,Dynamic,Dynamic>& U = X.block(fsupc, 0, nsupc, nrhs); //FIXME Check this + U = A.template triangularView<Lower>().solve(U); + + // Matrix-vector product + new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + work.block(0, 0, nrow, nrhs) = A * U; + + //Begin Scatter + for (j = 0; j < nrhs; j++) + { + iptr = istart + nsupc; + for (i = 0; i < nrow; i++) + { + irow = m_Lstore.rowIndex()[iptr]; + X(irow, j) -= work(i, j); // Scatter operation + work(i, j) = Scalar(0); + iptr++; + } + } + } + } // end for all supernodes + + // Back solve Ux = y + for (k = m_Lstore.nsuper(); k >= 0; k--) + { + fsupc = m_Lstore.supToCol()[k]; + istart = m_Lstore.rowIndexPtr()[fsupc]; + nsupr = m_Lstore.rowIndexPtr()[fsupc+1] - istart; + nsupc = m_Lstore.supToCol()[k+1] - fsupc; + luptr = m_Lstore.colIndexPtr()[fsupc]; + + if (nsupc == 1) + { + for (j = 0; j < nrhs; j++) + { + X(fsupc, j) /= Lval[luptr]; + } + } + else + { + Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Matrix<Scalar,Dynamic,Dynamic>& U = X.block(fsupc, 0, nsupc, nrhs); + U = A.template triangularView<Upper>().solve(U); + } + + for (j = 0; j < nrhs; ++j) + { + for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) + { + for (i = m_Ustore.outerIndexPtr()[jcol]; i < m_Ustore.outerIndexPtr()[jcol]; i++) + { + irow = m_Ustore.InnerIndices()[i]; + X(irow, j) -= X(jcol, j) * m_Ustore.Values()[i]; + } + } + } + } // End For U-solve + + // Permute back the solution + X = m_perm_c * X; + + return true; } + protected: // Functions - void initperfvalues(); - int LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, - const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu); - int LU_dsnode_bmod (const Index jcol, const Index jsupno, const Index fsupc, - ScalarVector& dense, LU_GlobalLU_t& Glu); - int LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, - IndexVector& iperm_c, int& pivrow, GlobalLU_t& Glu); - void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, - IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, - IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, - IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& Glu); - void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, - ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, - IndexVector& repfnz, LU_GlobalLU_t& glu); - int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, IndexVector& nseg, - IndexVector& lsub_col, IndexVector& segrep, IndexVector& repfnz, - IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu); - int LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, - IndexVector& segrep, IndexVector& repfnz, int fpanelc, LU_GlobalLU_t& Glu); - int LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, IndexVector& repfnz, - IndexVector& perm_r, ScalarVector& dense, LU_GlobalLU_t& glu); - void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, - const IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, GlobalLU_t& Glu) - + void initperfvalues() + { + m_panel_size = 12; + m_relax = 1; + m_maxsuper = 100; + m_rowblk = 200; + m_colblk = 60; + m_fillfactor = 20; + } + // Variables mutable ComputationInfo m_info; bool m_isInitialized; @@ -150,9 +259,7 @@ class SparseLU PermutationType m_perm_r ; // Row permutation IndexVector m_etree; // Column elimination tree - ScalarVector m_work; // Scalar work vector - IndexVector m_iwork; //Index work vector - static LU_GlobalLU_t m_glu; // persistent data to facilitate multiple factors + static LU_GlobalLU_t<IndexVector, ScalarVector> m_glu; // persistent data to facilitate multiple factors // FIXME All fields of this struct can be defined separately as class members // SuperLU/SparseLU options @@ -176,21 +283,9 @@ class SparseLU }; // End class SparseLU -/* Set the default values for performance */ -void SparseLU::initperfvalues() -{ - m_panel_size = 12; - m_relax = 1; - m_maxsuper = 100; - m_rowblk = 200; - m_colblk = 60; - m_fillfactor = 20; -} // Functions needed by the anaysis phase -#include <SparseLU_Coletree.h> -// Ordering interface -#include <Ordering.h> +#include "SparseLU_Coletree.h" /** * Compute the column permutation to minimize the fill-in (file amd.c ) * @@ -202,7 +297,7 @@ void SparseLU::initperfvalues() * */ template <typename MatrixType, typename OrderingType> -void SparseLU::analyzePattern(const MatrixType& mat) +void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat) { //TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat. @@ -218,6 +313,7 @@ void SparseLU::analyzePattern(const MatrixType& mat) // Apply the permutation to the column of the input matrix m_mat = mat * m_perm_c; + // Compute the column elimination tree of the permuted matrix if (m_etree.size() == 0) m_etree.resize(m_mat.cols()); @@ -230,8 +326,9 @@ void SparseLU::analyzePattern(const MatrixType& mat) LU_TreePostorder(m_mat.cols(), m_etree, post); // Renumber etree in postorder - iwork.resize(n+1); - for (i = 0; i < n; ++i) iwork(post(i)) = post(m_etree(i)); + int m = m_mat.cols(); + iwork.resize(m+1); + for (int i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i)); m_etree = iwork; // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree @@ -242,23 +339,23 @@ void SparseLU::analyzePattern(const MatrixType& mat) m_perm_c = m_perm_c * post_perm; } // end postordering - m_analysisIsok = true; + m_analysisIsOk = true; } // Functions needed by the numerical factorization phase -#include <SparseLU_Memory.h> -#include <SparseLU_heap_relax_snode.h> -#include <SparseLU_relax_snode.h> -#include <SparseLU_snode_dfs.h> -#include <SparseLU_snode_bmod.h> -#include <SparseLU_pivotL.h> -#include <SparseLU_panel_dfs.h> -#include <SparseLU_panel_bmod.h> -#include <SparseLU_column_dfs.h> -#include <SparseLU_column_bmod.h> -#include <SparseLU_copy_to_ucol.h> -#include <SparseLU_pruneL.h> -#include <SparseLU_Utils.h> +#include "SparseLU_Memory.h" +#include "SparseLU_heap_relax_snode.h" +#include "SparseLU_relax_snode.h" +#include "SparseLU_snode_dfs.h" +#include "SparseLU_snode_bmod.h" +#include "SparseLU_pivotL.h" +#include "SparseLU_panel_dfs.h" +#include "SparseLU_panel_bmod.h" +#include "SparseLU_column_dfs.h" +#include "SparseLU_column_bmod.h" +#include "SparseLU_copy_to_ucol.h" +#include "SparseLU_pruneL.h" +#include "SparseLU_Utils.h" /** * - Numerical factorization @@ -276,13 +373,17 @@ void SparseLU::analyzePattern(const MatrixType& mat) * failure occurred, plus A->ncol. If lwork = -1, it is * the estimated amount of space needed, plus A->ncol. */ -template <typename MatrixType> -void SparseLU::factorize(const MatrixType& matrix) +template <typename MatrixType, typename OrderingType> +void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix) { - eigen_assert(m_analysisIsok && "analyzePattern() should be called first"); + eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices"); + + ScalarVector work; // Scalar work vector + IndexVector iwork; //Index work vector + // Apply the column permutation computed in analyzepattern() m_mat = matrix * m_perm_c; m_mat.makeCompressed(); @@ -293,7 +394,7 @@ void SparseLU::factorize(const MatrixType& matrix) int maxpanel = m_panel_size * m; // Allocate storage common to the factor routines int lwork = 0; - int info = LUMemInit(m, n, nnz, m_work, m_iwork, lwork, m_fillratio, m_panel_size, m_maxsuper, m_rowblk, m_glu); + int info = LUMemInit(m, n, nnz, work, iwork, lwork, m_fillfactor, m_panel_size, m_maxsuper, m_rowblk, m_glu); if (info) { std::cerr << "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ; @@ -304,27 +405,27 @@ void SparseLU::factorize(const MatrixType& matrix) // Set up pointers for integer working arrays int idx = 0; - VectorBlock<IndexVector> segrep(m_iwork, idx, m); + VectorBlock<IndexVector> segrep(iwork, idx, m); idx += m; - VectorBlock<IndexVector> parent(m_iwork, idx, m); + VectorBlock<IndexVector> parent(iwork, idx, m); idx += m; - VectorBlock<IndexVector> xplore(m_iwork, idx, m); + VectorBlock<IndexVector> xplore(iwork, idx, m); idx += m; - VectorBlock<IndexVector> repfnz(m_iwork, idx, maxpanel); + VectorBlock<IndexVector> repfnz(iwork, idx, maxpanel); idx += maxpanel; - VectorBlock<IndexVector> panel_lsub(m_iwork, idx, maxpanel) + VectorBlock<IndexVector> panel_lsub(iwork, idx, maxpanel); idx += maxpanel; - VectorBlock<IndexVector> xprune(m_iwork, idx, n); + VectorBlock<IndexVector> xprune(iwork, idx, n); idx += n; - VectorBlock<IndexVector> marker(m_iwork, idx, m * LU_NO_MARKER); + VectorBlock<IndexVector> marker(iwork, idx, m * LU_NO_MARKER); repfnz.setConstant(-1); panel_lsub.setConstant(-1); // Set up pointers for scalar working arrays - VectorBlock<ScalarVector> dense(m_work, 0, maxpanel); + VectorBlock<ScalarVector> dense(work, 0, maxpanel); dense.setZero(); - VectorBlock<ScalarVector> tempv(m_work, maxpanel, LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk) ); + VectorBlock<ScalarVector> tempv(work, maxpanel, LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk) ); tempv.setZero(); // Setup Permutation vectors @@ -334,9 +435,9 @@ void SparseLU::factorize(const MatrixType& matrix) // Identify initial relaxed snodes IndexVector relax_end(n); if ( m_symmetricmode = true ) - internal::LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); + LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); else - internal::LU_relax_snode(n, m_etree, m_relax, marker, relax_end); + LU_relax_snode(n, m_etree, m_relax, marker, relax_end); m_perm_r.setConstant(-1); marker.setConstant(-1); @@ -346,6 +447,7 @@ void SparseLU::factorize(const MatrixType& matrix) IndexVector& xlsub = m_glu.xlsub; IndexVector& xlusup = m_glu.xlusup; IndexVector& xusub = m_glu.xusub; + ScalarVector& lusup = m_glu.lusup; Index& nzlumax = m_glu.nzlumax; supno(0) = IND_EMPTY; @@ -360,7 +462,8 @@ void SparseLU::factorize(const MatrixType& matrix) Index pivrow; // Pivotal row number in the original row matrix int nseg1; // Number of segments in U-column above panel row jcol int nseg; // Number of segments in each U-column - int irep,ir; + int irep,ir, icol; + int i, k, jj,j; for (jcol = 0; jcol < n; ) { if (relax_end(jcol) != IND_EMPTY) @@ -382,9 +485,10 @@ void SparseLU::factorize(const MatrixType& matrix) jsupno = supno(jcol); // Supernode number which column jcol belongs to fsupc = xsup(jsupno); //First column number of the current supernode new_next = nextlu + (xlsub(fsupc+1)-xlsub(fsupc)) * (kcol - jcol + 1); + int mem; while (new_next > nzlumax ) { - mem = LUMemXpand<Scalar>(lusup, nzlumax, nextlu, LUSUP, m_glu); + mem = LUMemXpand<Scalar>(lusup, nzlumax, nextlu, LUSUP, m_glu.num_expansions); if (mem) { std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n"; @@ -401,10 +505,10 @@ void SparseLU::factorize(const MatrixType& matrix) dense(it.row()) = it.val(); // Numeric update within the snode - LU_snode_bmod(icol, jsupno, fsupc, dense, glu); + LU_snode_bmod(icol, jsupno, fsupc, dense, m_glu); // Eliminate the current column - info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r, m_iperm_c, pivrow, m_glu); + info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r, iperm_c, pivrow, m_glu); if ( info ) { m_info = NumericalIssue; @@ -419,7 +523,7 @@ void SparseLU::factorize(const MatrixType& matrix) { // Work on one panel of panel_size columns // Adjust panel size so that a panel won't overlap with the next relaxed snode. - int panel_size = wdef; // upper bound on panel width + int panel_size = m_panel_size; // upper bound on panel width for (k = jcol + 1; k < std::min(jcol+panel_size, n); k++) { if (relax_end(k) != IND_EMPTY) @@ -438,7 +542,7 @@ void SparseLU::factorize(const MatrixType& matrix) LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); // Sparse LU within the panel, and below the panel diagonal - for ( jj = jcol, j< jcol + panel_size; jj++) + for ( jj = jcol; j< jcol + panel_size; jj++) { k = (jj - jcol) * m; // Column index for w-wide arrays @@ -446,7 +550,7 @@ void SparseLU::factorize(const MatrixType& matrix) //Depth-first-search for the current column VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m); VectorBlock<IndexVector> repfnz_k(repfnz, k, m); - info = LU_column_dfs(m, jj, perm_r, nseg, panel_lsub(k), segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); + info = LU_column_dfs(m, jj, m_perm_r, m_maxsuper, nseg, panel_lsub(k), segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); if ( !info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n"; @@ -467,7 +571,7 @@ void SparseLU::factorize(const MatrixType& matrix) } // Copy the U-segments to ucol(*) - info = LU_copy_to_col(jj, nseg, segrep, repfnz_k, perm_r, dense_k, m_glu); + info = LU_copy_to_col(jj, nseg, segrep, repfnz_k, m_perm_r, dense_k, m_glu); if ( info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n"; @@ -506,9 +610,9 @@ void SparseLU::factorize(const MatrixType& matrix) k = 0; for (i = 0; i < m; ++i) { - if ( perm_r(i) == IND_EMPTY ) + if ( m_perm_r(i) == IND_EMPTY ) { - perm_r(i) = n + k; + m_perm_r(i) = n + k; ++k; } } @@ -518,140 +622,21 @@ void SparseLU::factorize(const MatrixType& matrix) // Apply permutation to the L subscripts LU_fixupL(n, m_perm_r, m_glu); - // Free work space iwork and work - //... + // Create supernode matrix L - m_Lstore.setInfos(m, n, m_nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno; Glu.xsup); - // Create the column major upper sparse matrix U - new (&m_Ustore) Map<SparseMatrix<Scalar, ColumnMajor> > ( m, n, m_nnzU, Glu.xusub.data(), Glu.usub.data(), Glu.ucol.data() ); //FIXME - this.m_Ustore = m_Ustore; + m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); + // Create the column major upper sparse matrix U; + // it is assumed here that MatrixType = SparseMatrix<Scalar,ColumnMajor> + new (&m_Ustore) Map<MatrixType > ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); + this.m_Ustore = m_Ustore; //FIXME Is it necessary m_info = Success; - m_factorizationIsOk = ok; + m_factorizationIsOk = true; } -template<typename Rhs, typename Dest> -bool SparseLU::_solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &X) const -{ - eigen_assert(m_isInitialized && "The matrix should be factorized first"); - EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, - THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); - - X = b; /* on return, X is overwritten by the computed solution */ - - int nrhs = b.cols(); - - // Permute the right hand side to form Pr*B - X = m_perm_r * X; - - // Forward solve PLy = Pb; - Index fsupc; // First column of the current supernode - Index istart; // Pointer index to the subscript of the current column - Index nsupr; // Number of rows in the current supernode - Index nsupc; // Number of columns in the current supernode - Index nrow; // Number of rows in the non-diagonal part of the supernode - Index luptr; // Pointer index to the current nonzero value - Index iptr; // row index pointer iterator - Index irow; //Current index row - Scalar * Lval = m_Lstore.valuePtr(); // Nonzero values - Matrix<Scalar,Dynamic,Dynamic> work(n,nrhs); // working vector - work.setZero(); - int j; - for (k = 0; k <= m_Lstore.nsuper(); k ++) - { - fsupc = m_Lstore.sup_to_col()[k]; - istart = m_Lstore.rowIndexPtr()[fsupc]; - nsupr = m_Lstore..rowIndexPtr()[fsupc+1] - istart; - nsupc = m_Lstore.sup_to_col()[k+1] - fsupc; - nrow = nsupr - nsupc; - - if (nsupc == 1 ) - { - for (j = 0; j < nrhs; j++) - { - luptr = m_Lstore.colIndexPtr()[fsupc]; //FIXME Should be outside the for loop - for (iptr = istart+1; iptr < m_Lstore.rowIndexPtr()[fsupc+1]; iptr++) - { - irow = m_Lstore.rowIndex()[iptr]; - ++luptr; - X(irow, j) -= X(fsupc, j) * Lval[luptr]; - } - } - } - else - { - // The supernode has more than one column - - // Triangular solve - luptr = m_Lstore.colIndexPtr()[fsupc]; //FIXME Should be outside the loop - Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); -// Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride > u( &(X(fsupc,0)), nsupc, nrhs, OuterStride<>(X.rows()) ); - Matrix<Scalar,Dynamic,Dynamic>& u = X.block(fsupc, 0, nsupc, nrhs); //FIXME Check this - u = A.triangularView<Lower>().solve(u); - - // Matrix-vector product - new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); - work.block(0, 0, nrow, nrhs) = A * u; - - //Begin Scatter - for (j = 0; j < nrhs; j++) - { - iptr = istart + nsupc; - for (i = 0; i < nrow; i++) - { - irow = m_Lstore.rowIndex()[iptr]; - X(irow, j) -= work(i, j); // Scatter operation - work(i, j) = Scalar(0); - iptr++; - } - } - } - } // end for all supernodes - - // Back solve Ux = y - for (k = m_Lstore.nsuper(); k >= 0; k--) - { - fsupc = m_Lstore.sup_to_col()[k]; - istart = m_Lstore.rowIndexPtr()[fsupc]; - nsupr = m_Lstore..rowIndexPtr()[fsupc+1] - istart; - nsupc = m_Lstore.sup_to_col()[k+1] - fsupc; - luptr = m_Lstore.colIndexPtr()[fsupc]; - - if (nsupc == 1) - { - for (j = 0; j < nrhs; j++) - { - X(fsupc, j) /= Lval[luptr]; - } - } - else - { - Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Matrix<Scalar,Dynamic,Dynamic>& u = X.block(fsupc, 0, nsupc, nrhs); - u = A.triangularView<Upper>().solve(u); - } - - for (j = 0; j < nrhs; ++j) - { - for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) - { - for (i = m_Ustore.outerIndexPtr()[jcol]; i < m_Ustore.outerIndexPtr()[jcol]; i++) - { - irow = m_Ustore.InnerIndices()[i]; - X(irow, j) -= X(irow, jcol) * m_Ustore.Values()[i]; - } - } - } - } // End For U-solve - - // Permute back the solution - X = m_perm_c * X; - - return true; -} -namespace internal { +/*namespace internal { template<typename _MatrixType, typename Derived, typename Rhs> struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs> @@ -666,7 +651,7 @@ struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs> } }; -} // end namespace internal +}*/ // end namespace internal diff --git a/Eigen/src/SparseLU/SparseLU_Coletree.h b/Eigen/src/SparseLU/SparseLU_Coletree.h index 4c42387be..00bb97796 100644 --- a/Eigen/src/SparseLU/SparseLU_Coletree.h +++ b/Eigen/src/SparseLU/SparseLU_Coletree.h @@ -44,13 +44,28 @@ */ #ifndef SPARSELU_COLETREE_H #define SPARSELU_COLETREE_H +/** Find the root of the tree/set containing the vertex i : Use Path halving */ +template<typename IndexVector> +int etree_find (int i, IndexVector& pp) +{ + int p = pp(i); // Parent + int gp = pp(p); // Grand parent + while (gp != p) + { + pp(i) = gp; // Parent pointer on find path is changed to former grand parent + i = gp; + p = pp(i); + gp = pp(p); + } + return p; +} /** Compute the column elimination tree of a sparse matrix * NOTE : The matrix is supposed to be in column-major format. * */ template<typename MatrixType, typename IndexVector> -int SparseLU::LU_sp_coletree(const MatrixType& mat, IndexVector& parent) +int LU_sp_coletree(const MatrixType& mat, IndexVector& parent) { int nc = mat.cols(); // Number of columns int nr = mat.rows(); // Number of rows @@ -87,7 +102,7 @@ int SparseLU::LU_sp_coletree(const MatrixType& mat, IndexVector& parent) { // A sequence of interleaved find and union is performed row = firstcol(it.row()); if (row >= col) continue; - rset = internal::etree_find(row, pp); // Find the name of the set containing row + rset = etree_find(row, pp); // Find the name of the set containing row rroot = root(rset); if (rroot != col) { @@ -100,52 +115,6 @@ int SparseLU::LU_sp_coletree(const MatrixType& mat, IndexVector& parent) return 0; } -/** Find the root of the tree/set containing the vertex i : Use Path halving */ -template<typename IndexVector> -int etree_find (int i, IndexVector& pp) -{ - int p = pp(i); // Parent - int gp = pp(p); // Grand parent - while (gp != p) - { - pp(i) = gp; // Parent pointer on find path is changed to former grand parent - i = gp; - p = pp(i); - gp = pp(p); - } - return p; -} - -/** - * Post order a tree - * \param parent Input tree - * \param post postordered tree - */ -template<typename IndexVector> -void SparseLU::LU_TreePostorder(int n, IndexVector& parent, IndexVector& post) -{ - IndexVector first_kid, next_kid; // Linked list of children - int postnum; - // Allocate storage for working arrays and results - first_kid.resize(n+1); - next_kid.setZero(n+1); - post.setZero(n+1); - - // Set up structure describing children - int v, dad; - first_kid.setConstant(-1); - for (v = n-1, v >= 0; v--) - { - dad = parent(v); - next_kid(v) = first_kid(dad); - first_kid(dad) = v; - } - - // Depth-first search from dummy root vertex #n - postnum = 0; - internal::LU_nr_etdfs(n, parent, first_kid, next_kid, post, postnum); - return post; -} /** * Depth-first search from vertex n. No recursion. * This routine was contributed by Cédric Doucet, CEDRAT Group, Meylan, France. @@ -190,4 +159,36 @@ void LU_nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVecto } } + +/** + * Post order a tree + * \param parent Input tree + * \param post postordered tree + */ +template<typename IndexVector> +void LU_TreePostorder(int n, IndexVector& parent, IndexVector& post) +{ + IndexVector first_kid, next_kid; // Linked list of children + int postnum; + // Allocate storage for working arrays and results + first_kid.resize(n+1); + next_kid.setZero(n+1); + post.setZero(n+1); + + // Set up structure describing children + int v, dad; + first_kid.setConstant(-1); + for (v = n-1; v >= 0; v--) + { + dad = parent(v); + next_kid(v) = first_kid(dad); + first_kid(dad) = v; + } + + // Depth-first search from dummy root vertex #n + postnum = 0; + LU_nr_etdfs(n, parent, first_kid, next_kid, post, postnum); + return post; +} + #endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h index e4bf7eda8..70570ab9c 100644 --- a/Eigen/src/SparseLU/SparseLU_Matrix.h +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -45,17 +45,17 @@ template <typename _Scalar, typename _Index> class SuperNodalMatrix { public: - typedef typename _Scalar Scalar; - typedef typename _Index Index; + typedef _Scalar Scalar; + typedef _Index Index; public: SuperNodalMatrix() { } - SuperNodalMatrix(Index m, Index n, Index nnz, Scalar *nzval, Index* nzval_colptr, Index* rowind, + SuperNodalMatrix(Index m, Index n, Scalar *nzval, Index* nzval_colptr, Index* rowind, Index* rowind_colptr, Index* col_to_sup, Index* sup_to_col ) { - setInfos(m, n, nnz, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col); + setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col); } ~SuperNodalMatrix() @@ -68,12 +68,11 @@ class SuperNodalMatrix * FIXME This class will be modified such that it can be use in the course * of the factorization. */ - void setInfos(Index m, Index n, Index nnz, Scalar *nzval, Index* nzval_colptr, Index* rowind, + void setInfos(Index m, Index n, Scalar *nzval, Index* nzval_colptr, Index* rowind, Index* rowind_colptr, Index* col_to_sup, Index* sup_to_col ) { m_row = m; m_col = n; - m_nnz = nnz; m_nzval = nzval; m_nzval_colptr = nzval_colptr; m_rowind = rowind; @@ -159,14 +158,14 @@ class SuperNodalMatrix protected: Index m_row; // Number of rows Index m_col; // Number of columns - Index m_nnz; // Number of nonzero values +// Index m_nnz; // Number of nonzero values Index m_nsuper; // Number of supernodes Scalar* m_nzval; //array of nonzero values packed by column Index* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j Index* m_rowind; // Array of compressed row indices of rectangular supernodes Index* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j - Index *m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs - Index *m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode + Index* m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs + Index* m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode private : }; @@ -176,7 +175,7 @@ class SuperNodalMatrix * */ template<typename Scalar, typename Index> -class SuperNodalMatrix::InnerIterator +class SuperNodalMatrix<Scalar,Index>::InnerIterator { public: InnerIterator(const SuperNodalMatrix& mat, Index outer) @@ -184,7 +183,7 @@ class SuperNodalMatrix::InnerIterator m_outer(outer), m_idval(mat.colIndexPtr()[outer]), m_startval(m_idval), - m_endval(mat.colIndexPtr()[outer+1]) + m_endval(mat.colIndexPtr()[outer+1]), m_idrow(mat.rowIndexPtr()[outer]), m_startidrow(m_idrow), m_endidrow(mat.rowIndexPtr()[outer+1]) @@ -197,7 +196,7 @@ class SuperNodalMatrix::InnerIterator } inline Scalar value() const { return m_matrix.valuePtr()[m_idval]; } - inline Scalar& valueRef() { return const_cast<Scalar&>(m_matrix.valuePtr()[m_idval]; } + inline Scalar& valueRef() { return const_cast<Scalar&>(m_matrix.valuePtr()[m_idval]); } inline Index index() const { return m_matrix.rowIndex()[m_idrow]; } inline Index row() const { return index(); } @@ -221,13 +220,14 @@ class SuperNodalMatrix::InnerIterator const Index m_startidrow; // Start of the row indices of the current column value const Index m_endidrow; // End of the row indices of the current column value }; + /** - * \brief Iterator class to iterate over nonzeros Supernodes in the triangular supernodal matrix + * \brief Iterator class to iterate over Supernodes in the triangular supernodal matrix * * The final goal is to use this class when dealing with supernodes during numerical factorization */ template<typename Scalar, typename Index> -class SuperNodalMatrix::SuperNodeIterator +class SuperNodalMatrix<Scalar,Index>::SuperNodeIterator { public: SuperNodeIterator(const SuperNodalMatrix& mat) diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index b2888e9a0..ea9ef6d89 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -54,9 +54,67 @@ #define LU_GluIntArray(n) (5* (n) + 5) #define LU_TempSpace(m, w) ( (2*w + 4 + LU_NO_MARKER) * m * sizeof(Index) \ + (w + 1) * m * sizeof(Scalar) ) - -namespace internal { + + +/** + * Expand the existing storage to accomodate more fill-ins + * \param vec Valid pointer to the vector to allocate or expand + * \param [in,out]length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector + * \param [in]len_to_copy Current number of elements in the factors + * \param keep_prev true: use length and do not expand the vector; false: compute new_len and expand + * \param [in,out]num_expansions Number of times the memory has been expanded + */ +template <typename VectorType > +int expand(VectorType& vec, int& length, int len_to_copy, bool keep_prev, int& num_expansions) +{ + + float alpha = 1.5; // Ratio of the memory increase + int new_len; // New size of the allocated memory + if(num_expansions == 0 || keep_prev) + new_len = length ; // First time allocate requested + else + new_len = alpha * length ; + + VectorType old_vec; // Temporary vector to hold the previous values + if (len_to_copy > 0 ) + old_vec = vec; // old_vec should be of size len_to_copy... to be checked + + //expand the current vector //FIXME Should be in a try ... catch region + vec.resize(new_len); + /* + * Test if the memory has been well allocated + * otherwise reduce the size and try to reallocate + * copy data from previous vector (if exists) to the newly allocated vector + */ + if ( num_expansions != 0 ) // The memory has been expanded before + { + int tries = 0; + if (keep_prev) + { + if (!vec.size()) return new_len ; + } + else + { + while (!vec.size()) + { + // Reduce the size and allocate again + if ( ++tries > 10) return new_len; + alpha = LU_Reduce(alpha); + new_len = alpha * length ; + vec.resize(new_len); //FIXME Should be in a try catch section + } + } // end allocation + + //Copy the previous values to the newly allocated space + if (len_to_copy > 0) + vec.segment(0, len_to_copy) = old_vec; + } // end expansion + length = new_len; + if(num_expansions) ++num_expansions; + return 0; +} + /** * \brief Allocate various working space for the numerical factorization phase. * \param m number of rows of the input matrix @@ -70,10 +128,10 @@ namespace internal { * NOTE Unlike SuperLU, this routine does not support successive factorization with the same pattern and the row permutation */ template <typename ScalarVector,typename IndexVector> -int LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, int lwork, int fillratio, int panel_size, int maxsuper, int rowblk, GlobalLU_t<ScalarVector, IndexVector>& glu) +int LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, int lwork, int fillratio, int panel_size, int maxsuper, int rowblk, LU_GlobalLU_t<ScalarVector, IndexVector>& glu) { - typedef typename ScalarVector::Scalar; - typedef typename IndexVector::Index; + typedef typename ScalarVector::Scalar Scalar; + typedef typename IndexVector::Index Index; int& num_expansions = glu.num_expansions; //No memory expansions so far num_expansions = 0; @@ -82,14 +140,14 @@ int LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, in Index& nzumax = glu.nzumax; Index& nzlumax = glu.nzlumax; nzumax = nzlumax = fillratio * annz; // estimated number of nonzeros in U - nzlmax = std::max(1, m_fill_ratio/4.) * annz; // estimated nnz in L factor + nzlmax = std::max(1., fillratio/4.) * annz; // estimated nnz in L factor // Return the estimated size to the user if necessary if (lwork == IND_EMPTY) { int estimated_size; - estimated_size = LU_GluIntArray(n) * sizeof(Index) + LU_TempSpace(m, m_panel_size) - + (nzlmax + nzumax) * sizeof(Index) + (nzlumax+nzumax) * sizeof(Scalar) + n); + estimated_size = LU_GluIntArray(n) * sizeof(Index) + LU_TempSpace(m, panel_size) + + (nzlmax + nzumax) * sizeof(Index) + (nzlumax+nzumax) * sizeof(Scalar) + n; return estimated_size; } @@ -126,8 +184,8 @@ int LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, in } // LUWorkInit : Now, allocate known working storage - int isize = (2 * m_panel_size + 3 + LU_NO_MARKER) * m + n; - int dsize = m * m_panel_size + LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk); + int isize = (2 * panel_size + 3 + LU_NO_MARKER) * m + n; + int dsize = m * panel_size + LU_NUM_TEMPV(m, panel_size, maxsuper, rowblk); iwork.resize(isize); work.resize(isize); @@ -137,65 +195,6 @@ int LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, in } // end LuMemInit /** - * Expand the existing storage to accomodate more fill-ins - * \param vec Valid pointer to the vector to allocate or expand - * \param [in,out]length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector - * \param [in]len_to_copy Current number of elements in the factors - * \param keep_prev true: use length and do not expand the vector; false: compute new_len and expand - * \param [in,out]num_expansions Number of times the memory has been expanded - */ -template <typename VectorType > -int SparseLU::expand(VectorType& vec, int& length, int len_to_copy, bool keep_prev, int& num_expansions) -{ - - float alpha = 1.5; // Ratio of the memory increase - int new_len; // New size of the allocated memory - - if(num_expansions == 0 || keep_prev) - new_len = length ; // First time allocate requested - else - new_len = alpha * length ; - - VectorType old_vec; // Temporary vector to hold the previous values - if (len_to_copy > 0 ) - old_vec = vec; // old_vec should be of size len_to_copy... to be checked - - //expand the current vector //FIXME Should be in a try ... catch region - vec.resize(new_len); - /* - * Test if the memory has been well allocated - * otherwise reduce the size and try to reallocate - * copy data from previous vector (if exists) to the newly allocated vector - */ - if ( num_expansions != 0 ) // The memory has been expanded before - { - int tries = 0; - if (keep_prev) - { - if (!vec.size()) return new_len ; - } - else - { - while (!vec.size()) - { - // Reduce the size and allocate again - if ( ++tries > 10) return new_len; - alpha = LU_Reduce(alpha); - new_len = alpha * length ; - vec.resize(new_len); //FIXME Should be in a try catch section - } - } // end allocation - - //Copy the previous values to the newly allocated space - if (len_to_copy > 0) - vec.segment(0, len_to_copy) = old_vec; - } // end expansion - length = new_len; - if(num_expansions) ++num_expansions; - return 0; -} - -/** * \brief Expand the existing storage * \param vec vector to expand * \param [in,out]maxlen On input, previous size of vec (Number of elements to copy ). on output, new size @@ -203,18 +202,17 @@ int SparseLU::expand(VectorType& vec, int& length, int len_to_copy, bool keep_p * \param glu Global data structure * \return 0 on success, > 0 size of the memory allocated so far */ -template <typename IndexVector> -int SparseLU::LUMemXpand(VectorType& vec, int& maxlen, int next, LU_MemType memtype, LU_GlobalLu_t& glu) +template <typename VectorType> +int LUMemXpand(VectorType& vec, int& maxlen, int next, LU_MemType memtype, int& num_expansions) { int failed_size; - int& num_expansions = glu.num_expansions; if (memtype == USUB) - failed_size = expand<IndexVector>(vec, maxlen, next, 1, num_expansions); + failed_size = expand<VectorType>(vec, maxlen, next, 1, num_expansions); else - failed_size = expand<IndexVector>(vec, maxlen, next, 0, num_expansions); + failed_size = expand<VectorType>(vec, maxlen, next, 0, num_expansions); if (failed_size) - return faileld_size; + return failed_size; // The following code is not really needed since maxlen is passed by reference // and correspond to the appropriate field in glu @@ -236,6 +234,4 @@ int SparseLU::LUMemXpand(VectorType& vec, int& maxlen, int next, LU_MemType memt return 0 ; } - -}// Namespace Internal #endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h index 618d05eac..fd2a59a41 100644 --- a/Eigen/src/SparseLU/SparseLU_Structs.h +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -82,11 +82,11 @@ */ #ifndef EIGEN_LU_STRUCTS #define EIGEN_LU_STRUCTS -typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType; +typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} LU_MemType; -template <typename ScalarVector, typename IndexVector> -struct { +template <typename IndexVector, typename ScalarVector> +struct LU_GlobalLU_t { typedef typename IndexVector::Index Index; IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping) @@ -96,14 +96,11 @@ struct { IndexVector xlsub; // pointers to the beginning of each column in lsub Index nzlmax; // Current max size of lsub Index nzlumax; // Current max size of lusup - ScalarVector ucol; // nonzero values of U ordered by columns IndexVector usub; // row indices of U columns in ucol IndexVector xusub; // Pointers to the beginning of each column of U in ucol Index nzumax; // Current max size of ucol - Index n; // Number of columns in the matrix - + Index n; // Number of columns in the matrix int num_expansions; -} GlobalLU_t; - +}; #endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h index 5c12b6243..9e63bf7e4 100644 --- a/Eigen/src/SparseLU/SparseLU_Utils.h +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -25,7 +25,6 @@ #ifdef EIGEN_SPARSELU_UTILS_H #define EIGEN_SPARSELU_UTILS_H -// Number of marker arrays used in the factorization each of size n template <typename IndexVector> void SparseLU::LU_countnz(const int n, IndexVector& xprune, int& nnzL, int& nnzU, GlobalLU_t& Glu) @@ -34,7 +33,6 @@ void SparseLU::LU_countnz(const int n, IndexVector& xprune, int& nnzL, int& nnzU IndexVector& xlsub = Glu.xlsub; nnzL = 0; nnzU = (Glu.xusub)(n); - int nnzL0 = 0; int nsuper = (Glu.supno)(n); int jlen, irep; @@ -52,7 +50,6 @@ void SparseLU::LU_countnz(const int n, IndexVector& xprune, int& nnzL, int& nnzU jlen--; } irep = xsup(i+1) - 1; - nnzL0 += xprune(irep) - xlsub(irep); } } diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index 965a0c0ad..da464cbfc 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -44,6 +44,7 @@ */ #ifndef SPARSELU_COLUMN_BMOD_H #define SPARSELU_COLUMN_BMOD_H + /** * \brief Performs numeric block updates (sup-col) in topological order * @@ -59,11 +60,13 @@ * > 0 - number of bytes allocated when run out of space * */ -template <typename ScalarVector, typename IndexVector> -int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, int fpanelc, LU_GlobalLU_t& glu) +template <typename IndexVector, typename ScalarVector> +int LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, int fpanelc, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) { - + typedef typename IndexVector::Index Index; + typedef typename ScalarVector::Scalar Scalar; int jsupno, k, ksub, krep, krep_ind, ksupno; + int lptr, nrow, isub, i, irow, nextlu, new_next, ufirst; int fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; /* krep = representative of current k-th supernode * fsupc = first supernodal column @@ -81,7 +84,7 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense ScalarVector& lusup = glu.lusup; Index& nzlumax = glu.nzlumax; - int jsupno = supno(jcol); + jsupno = supno(jcol); // For each nonzero supernode segment of U[*,j] in topological order k = nseg - 1; int d_fsupc; // distance between the first column of the current panel and the @@ -134,7 +137,7 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); VectorBlock<ScalarVector> u(tempv, 0, segsize); - u = A.triangularView<Lower>().solve(u); + u = A.template triangularView<Lower>().solve(u); // Dense matrix-vector product y <-- A*x luptr += segsize; @@ -168,18 +171,18 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense fsupc = xsup(jsupno); // copy the SPA dense into L\U[*,j] + int mem; new_next = nextlu + xlsub(fsupc + 1) - xlsub(fsupc); while (new_next > nzlumax ) { - mem = LUmemXpand<Scalar>(glu.lusup, nzlumax, nextlu, LUSUP, glu); + mem = LUMemXpand<ScalarVector>(glu.lusup, nzlumax, nextlu, LUSUP, glu.num_expansions); if (mem) return mem; - //lsub = glu.lsub; // Should not be updated here } for (isub = xlsub(fsupc); isub < xlsub(fsupc+1); isub++) { irow = lsub(isub); - lusub(nextlu) = dense(irow); + lusup(nextlu) = dense(irow); dense(irow) = Scalar(0.0); ++nextlu; } @@ -210,7 +213,7 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense ufirst = xlusup(jcol) + d_fsupc; Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); VectorBlock<ScalarVector> u(lusup, ufirst, nsupc); - u = A.triangularView().solve(u); + u = A.template triangularView().solve(u); new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); VectorBlock<ScalarVector> l(lusup, ufirst+nsupc, nrow); diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h index 7fda536a9..8c6202d67 100644 --- a/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -44,6 +44,7 @@ */ #ifndef SPARSELU_COLUMN_DFS_H #define SPARSELU_COLUMN_DFS_H + /** * \brief Performs a symbolic factorization on column jcol and decide the supernode boundary * @@ -57,6 +58,7 @@ * \param m number of rows in the matrix * \param jcol Current column * \param perm_r Row permutation + * \param maxsuper * \param [in,out] nseg Number of segments in current U[*,j] - new segments appended * \param lsub_col defines the rhs vector to start the dfs * \param [in,out] segrep Segment representatives - new segments appended @@ -71,9 +73,10 @@ * */ template <typename IndexVector, typename ScalarVector> -int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, IndexVector& nseg IndexVector& lsub_col, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) +int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, IndexVector& nseg, IndexVector& lsub_col, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) { - typedef typename IndexVector::IndexVector; + typedef typename IndexVector::Index Index; + typedef typename ScalarVector::Scalar Scalar; int jcolp1, jcolm1, jsuper, nsuper, nextl; int krow; // Row index of the current element @@ -95,6 +98,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In jsuper = nsuper; nextl = xlsub(jcol); VectorBlock<IndexVector> marker2(marker, 2*m, m); + int fsupc, jptr, jm1ptr, ito, ifrom, istop; // For each nonzero in A(*,jcol) do dfs for (k = 0; lsub_col[k] != IND_EMPTY; k++) { @@ -115,7 +119,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In lsub(nextl++) = krow; // krow is indexed into A if ( nextl >= nzlmax ) { - mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB, glu); + mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB, glu.num_expansions); if ( mem ) return mem; } if (kmark != jcolm1) jsuper = IND_EMPTY; // Row index subset testing @@ -163,7 +167,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In lsub(nextl++) = kchild; if (nextl >= nzlmax) { - mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB, glu); + mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB, glu.num_expansions); if (mem) return mem; } if (chmark != jcolm1) jsuper = IND_EMPTY; @@ -186,7 +190,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In xplore(krep) = xdfs; oldrep = krep; krep = chrep; // Go deeped down G(L^t) - parent(krep) = olddrep; + parent(krep) = oldrep; repfnz(krep) = chperm; xdfs = xlsub(krep); maxdfs = xprune(krep); @@ -230,7 +234,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In // Make sure the number of columns in a supernode doesn't // exceed threshold - if ( (jcol - fsupc) >= m_maxsuper) jsuper = IND_EMPTY; + if ( (jcol - fsupc) >= maxsuper) jsuper = IND_EMPTY; /* If jcol starts a new supernode, reclaim storage space in * lsub from previous supernode. Note we only store @@ -241,7 +245,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In { // starts a new supernode if ( (fsupc < jcolm1-1) ) { // >= 3 columns in nsuper - ito = xlsub(fsupcc+1) + ito = xlsub(fsupc+1); xlsub(jcolm1) = ito; istop = ito + jptr - jm1ptr; xprune(jcolm1) = istop; // intialize xprune(jcol-1) diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h index c97bc6aa4..31411175c 100644 --- a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -44,6 +44,7 @@ */ #ifndef SPARSELU_COPY_TO_UCOL_H #define SPARSELU_COPY_TO_UCOL_H + /** * \brief Performs numeric block updates (sup-col) in topological order * @@ -58,17 +59,18 @@ * > 0 - number of bytes allocated when run out of space * */ -template <typename ScalarVector, typename IndexVector> -int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, IndexVector& repfnz, IndexVector& perm_r, ScalarVector& dense, LU_GlobalLU_t& glu) +template < typename IndexVector, typename ScalarVector> +int LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, IndexVector& repfnz, IndexVector& perm_r, ScalarVector& dense, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) { - Index ksupno, k, ksub, krep, ksupno; - typedef typename IndexVector::Index; + typedef typename IndexVector::Index Index; + typedef typename ScalarVector::Scalar Scalar; + Index ksub, krep, ksupno; IndexVector& xsup = glu.xsup; IndexVector& supno = glu.supno; IndexVector& lsub = glu.lsub; IndexVector& xlsub = glu.xlsub; - ScalarVector& ucol = GLu.ucol; + ScalarVector& ucol = glu.ucol; IndexVector& usub = glu.usub; IndexVector& xusub = glu.xusub; Index& nzumax = glu.nzumax; @@ -76,10 +78,11 @@ int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segre Index jsupno = supno(jcol); // For each nonzero supernode segment of U[*,j] in topological order - k = nseg - 1; + int k = nseg - 1, i; Index nextu = xusub(jcol); Index kfnz, isub, segsize; Index new_next,irow; + Index fsupc, mem; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep(k); k--; @@ -95,9 +98,9 @@ int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segre new_next = nextu + segsize; while (new_next > nzumax) { - mem = LU_MemXpand<ScalarVector>(ucol, nzumax, nextu, UCOL, glu); + mem = LUMemXpand<ScalarVector>(ucol, nzumax, nextu, UCOL, glu.num_expansions); if (mem) return mem; - mem = LU_MemXpand<Index>(usub, nzumax, nextu, USUB, glu); + mem = LUMemXpand<IndexVector>(usub, nzumax, nextu, USUB, glu.num_expansions); if (mem) return mem; } @@ -120,4 +123,5 @@ int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segre xusub(jcol + 1) = nextu; // close U(*,jcol) return 0; } + #endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h index 4190e0462..1766c3c2b 100644 --- a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h @@ -42,8 +42,7 @@ #ifndef SPARSELU_HEAP_RELAX_SNODE_H #define SPARSELU_HEAP_RELAX_SNODE_H -#include <SparseLU_coletree.h> -namespace internal { +#include "SparseLU_Coletree.h" /** * \brief Identify the initial relaxed supernodes * @@ -85,12 +84,12 @@ void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, if (parent != n) // not the dummy root descendants(parent) += descendants(j) + 1; } - // Identify the relaxed supernodes by postorder traversal of the etree register int snode_start; // beginning of a snode register int k; int nsuper_et_post = 0; // Number of relaxed snodes in postordered etree int nsuper_et = 0; // Number of relaxed snodes in the original etree + int l; for (j = 0; j < n; ) { parent = et(j); @@ -132,5 +131,4 @@ void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, // Recover the original etree et = et_save; } -} // end namespace internal #endif diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 212ecfa6a..4f19b5ac8 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -63,8 +63,9 @@ * */ template <typename IndexVector, typename ScalarVector> -void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, LU_GlobalLU_t& glu) +void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, LU_GlobalLU_t<IndexVector,ScalarVector>& glu) { + typedef typename ScalarVector::Scalar Scalar; IndexVector& xsup = glu.xsup; IndexVector& supno = glu.supno; IndexVector& lsub = glu.lsub; @@ -74,11 +75,10 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int int i,ksub,jj,nextl_col,irow; int fsupc, nsupc, nsupr, nrow; - int krep, krep_ind; - int nrow; + int krep, krep_ind, kfnz; int lptr; // points to the row subscripts of a supernode int luptr; // ... - int segsze,no_zeros,irow ; + int segsize,no_zeros,isub ; // For each nonz supernode segment of U[*,j] in topological order int k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) @@ -105,7 +105,7 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int { nextl_col = (jj-jcol) * m; VectorBlock<IndexVector> repfnz_col(repfnz.segment(nextl_col, m)); // First nonzero column index for each row - VectorBLock<IndexVector> dense_col(dense.segment(nextl_col, m)); // Scatter/gather entire matrix column from/to here + VectorBlock<IndexVector> dense_col(dense.segment(nextl_col, m)); // Scatter/gather entire matrix column from/to here kfnz = repfnz_col(krep); if ( kfnz == IND_EMPTY ) @@ -134,14 +134,12 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int luptr += nsupr * no_zeros + no_zeros; // triangular solve with Eigen Map<Matrix<Scalar,Dynamic, Dynamic>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); -// Map<Matrix<Scalar,Dynamic,1> > u( tempv.data(), segsize); VectorBlock<ScalarVector> u(tempv, 0, segsize); - u = A.triangularView<Lower>().solve(u); + u = A.template triangularView<Lower>().solve(u); luptr += segsize; // Dense Matrix vector product y <-- A*x; new (&A) Map<Matrix<Scalar,Dynamic, Dynamic>, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); -// Map<ScalarVector> l( &(tempv.data()[segsize]), nrow); VectorBlock<ScalarVector> l(tempv, segsize, nrow); l= A * u; diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index d3c2906b2..6f6922ee0 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -78,7 +78,7 @@ * */ template <typename MatrixType, typename IndexVector, typename ScalarVector> -void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& Glu) +void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) { int jj; // Index through each column in the panel @@ -95,10 +95,10 @@ void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType // IndexVector& marker1 = marker.block(m, m); VectorBlock<IndexVector> marker1(marker, m, m); nseg = 0; - IndexVector& xsup = Glu.xsup; - IndexVector& supno = Glu.supno; - IndexVector& lsub = Glu.lsub; - IndexVector& xlsub = Glu.xlsub; + IndexVector& xsup = glu.xsup; + IndexVector& supno = glu.supno; + IndexVector& lsub = glu.lsub; + IndexVector& xlsub = glu.xlsub; // For each column in the panel for (jj = jcol; jj < jcol + w; jj++) { @@ -109,7 +109,7 @@ void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType // For each nnz in A[*, jj] do depth first search - for (MatrixType::InnerIterator it(A, jj); it; ++it) + for (typename MatrixType::InnerIterator it(A, jj); it; ++it) { krow = it.row(); dense_col(krow) = it.val(); diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h index 32da92481..4a50b2cca 100644 --- a/Eigen/src/SparseLU/SparseLU_pivotL.h +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -63,22 +63,22 @@ * \param [in,out]perm_r Row permutation (threshold pivoting) * \param [in] iperm_c column permutation - used to finf diagonal of Pc*A*Pc' * \param [out]pivrow The pivot row - * \param Glu Global LU data + * \param glu Global LU data * \return 0 if success, i > 0 if U(i,i) is exactly zero * */ template <typename IndexVector, typename ScalarVector> -int SparseLU::LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, GlobalLU_t& Glu) +int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) { typedef typename IndexVector::Index Index; typedef typename ScalarVector::Scalar Scalar; // Initialize pointers - IndexVector& lsub = Glu.lsub; // Compressed row subscripts of L rectangular supernodes. - IndexVector& xlsub = Glu.xlsub; // pointers to the beginning of each column subscript in lsub - ScalarVector& lusup = Glu.lusup; // Numerical values of L ordered by columns - IndexVector& xlusup = Glu.xlusup; // pointers to the beginning of each colum in lusup + IndexVector& lsub = glu.lsub; // Compressed row subscripts of L rectangular supernodes. + IndexVector& xlsub = glu.xlsub; // pointers to the beginning of each column subscript in lsub + ScalarVector& lusup = glu.lusup; // Numerical values of L ordered by columns + IndexVector& xlusup = glu.xlusup; // pointers to the beginning of each colum in lusup - Index fsupc = (Glu.xsup)((Glu.supno)(jcol)); // First column in the supernode containing the column jcol + Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0 Index lptr = xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion Index nsupr = xlsub(fsupc+1) - lptr; // Number of rows in the supernode @@ -93,6 +93,7 @@ int SparseLU::LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexV Index diag = IND_EMPTY; Index old_pivptr = nsupc; Scalar rtemp; + Index isub, icol, itemp, k; for (isub = nsupc; isub < nsupr; ++isub) { rtemp = std::abs(lu_col_ptr[isub]); if (rtemp > pivmax) { diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h index dd092b778..c006f6707 100644 --- a/Eigen/src/SparseLU/SparseLU_pruneL.h +++ b/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -44,6 +44,7 @@ */ #ifndef SPARSELU_PRUNEL_H #define SPARSELU_PRUNEL_H + /** * \brief Prunes the L-structure. * @@ -57,25 +58,27 @@ * \param segrep * \param repfnz * \param [out]xprune - * \param Glu Global LU data + * \param glu Global LU data * */ template <typename IndexVector, typename ScalarVector> -void SparseLU::LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, GlobalLU_t& Glu) +void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) { + typedef typename IndexVector::Index Index; + typedef typename ScalarVector::Scalar Scalar; // Initialize pointers - IndexVector& xsup = Glu.xsup; - IndexVector& supno = Glu.supno; - IndexVector& lsub = Glu.lsub; - IndexVector& xlsub = Glu.xlsub; - ScalarVector& lusup = Glu.lusup; - IndexVector& xlusup = Glu.xlusup; + IndexVector& xsup = glu.xsup; + IndexVector& supno = glu.supno; + IndexVector& lsub = glu.lsub; + IndexVector& xlsub = glu.xlsub; + ScalarVector& lusup = glu.lusup; + IndexVector& xlusup = glu.xlusup; // For each supernode-rep irep in U(*,j] int jsupno = supno(jcol); int i,irep,irep1; bool movnum, do_prune = false; - Index kmin, kmax, ktemp, minloc, maxloc; + Index kmin, kmax, ktemp, minloc, maxloc,krow; for (i = 0; i < nseg; i++) { irep = segrep(i); @@ -88,12 +91,12 @@ void SparseLU::LU_pruneL(const int jcol, const IndexVector& perm_r, const int pi // If a snode overlaps with the next panel, then the U-segment // is fragmented into two parts -- irep and irep1. We should let // pruning occur at the rep-column in irep1s snode. - if (supno(irep) == supno(irep1) continue; // don't prune + if (supno(irep) == supno(irep1) ) continue; // don't prune // If it has not been pruned & it has a nonz in row L(pivrow,i) if (supno(irep) != jsupno ) { - if ( xprune (irep) >= xlsub(irep1) + if ( xprune (irep) >= xlsub(irep1) ) { kmin = xlsub(irep); kmax = xlsub(irep1) - 1; @@ -147,4 +150,5 @@ void SparseLU::LU_pruneL(const int jcol, const IndexVector& perm_r, const int pi } // end pruning } // End for each U-segment } + #endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_relax_snode.h b/Eigen/src/SparseLU/SparseLU_relax_snode.h index f7b478560..0006dde33 100644 --- a/Eigen/src/SparseLU/SparseLU_relax_snode.h +++ b/Eigen/src/SparseLU/SparseLU_relax_snode.h @@ -42,7 +42,6 @@ #ifndef SPARSELU_RELAX_SNODE_H #define SPARSELU_RELAX_SNODE_H -namespace internal { /** * \brief Identify the initial relaxed supernodes * @@ -87,6 +86,4 @@ void LU_relax_snode (const int n, IndexVector& et, const int relax_columns, Inde } // End postorder traversal of the etree } - -} // end namespace internal #endif diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index 1d6bed8bb..a7034e607 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -42,14 +42,13 @@ * granted, provided the above notices are retained, and a notice that * the code was modified is included with the above copyright notice. */ -namespace internal { #ifndef SPARSELU_SNODE_BMOD_H #define SPARSELU_SNODE_BMOD_H -template <typename Index, typename ScalarVector> -int SparseLU::LU_dsnode_bmod (const Index jcol, const Index jsupno, const Index fsupc, - ScalarVector& dense, LU_GlobalLU_t& glu) +template <typename IndexVector, typename ScalarVector> +int LU_snode_bmod (const int jcol, const int jsupno, const int fsupc, + ScalarVector& dense, LU_GlobalLU_t<IndexVector,ScalarVector>& glu) { - typedef typename Matrix<Index, Dynamic, Dynamic> IndexVector; + typedef typename ScalarVector::Scalar Scalar; IndexVector& lsub = glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) IndexVector& xlsub = glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) ScalarVector& lusup = glu.lusup; // Numerical values of the rectangular supernodes @@ -77,17 +76,15 @@ int SparseLU::LU_dsnode_bmod (const Index jcol, const Index jsupno, const Index // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol) Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); -// Map<Matrix<Scalar,Dynamic,1> > u(&(lusup.data()[ufirst]), nsupc); VectorBlock<ScalarVector> u(lusup, ufirst, nsupc); - u = A.triangularView<Lower>().solve(u); // Call the Eigen dense triangular solve interface + u = A.template triangularView<Lower>().solve(u); // Call the Eigen dense triangular solve interface // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); // Map<Matrix<Scalar,Dynamic,1> > l(&(lusup.data()[ufirst+nsupc], nrow); VectorBlock<ScalarVector> l(lusup, ufirst+nsupc, nrow); l = l - A * u; - - return 0; + } + return 0; } -} // End namespace internal #endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h index 669f172f5..c49fc1461 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h @@ -44,7 +44,6 @@ */ #ifdef SPARSELU_SNODE_DFS_H #define SPARSELU_SNODE_DFS_H -namespace eigen { /** * \brief Determine the union of the row structures of those columns within the relaxed snode. * NOTE: The relaxed snodes are leaves of the supernodal etree, therefore, @@ -59,7 +58,7 @@ namespace eigen { * \return 0 on success, > 0 size of the memory when memory allocation failed */ template <typename IndexVector, typename ScalarVector> - int SparseLU::LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu) + int LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu) { typedef typename IndexVector::Index; IndexVector& xsup = glu.xsup; @@ -86,7 +85,7 @@ namespace eigen { lsub(nextl++) = krow; if( nextl >= nzlmax ) { - mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB, glu); + mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB, glu.num_expansions); if (mem) return mem; } } @@ -100,7 +99,7 @@ namespace eigen { Index new_next = nextl + (nextl - xlsub(jcol)); while (new_next > nzlmax) { - mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB, glu); + mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB, glu.num_expansions); if (mem) return mem; } Index ifrom, ito = nextl; @@ -115,6 +114,4 @@ namespace eigen { xlsub(kcol+1) = nextl; return 0; } - -} // end namespace eigen #endif
\ No newline at end of file diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt index 079912266..4b3c6f8e3 100644 --- a/bench/spbench/CMakeLists.txt +++ b/bench/spbench/CMakeLists.txt @@ -63,3 +63,8 @@ endif(RT_LIBRARY) add_executable(spbenchsolver spbenchsolver.cpp) target_link_libraries (spbenchsolver ${SPARSE_LIBS}) +add_executable(spsolver sp_solver.cpp) +target_link_libraries (spsolver ${SPARSE_LIBS}) + +add_executable(test_sparseLU test_sparseLU.cpp) +target_link_libraries (test_sparseLU ${SPARSE_LIBS})
\ No newline at end of file |