From f8a0745cb0426eb3095dbea24288a64eddab04f0 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Wed, 13 Jun 2012 18:26:05 +0200 Subject: Build process... --- bench/spbench/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'bench/spbench') diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt index 079912266..4b3c6f8e3 100644 --- a/bench/spbench/CMakeLists.txt +++ b/bench/spbench/CMakeLists.txt @@ -63,3 +63,8 @@ endif(RT_LIBRARY) add_executable(spbenchsolver spbenchsolver.cpp) target_link_libraries (spbenchsolver ${SPARSE_LIBS}) +add_executable(spsolver sp_solver.cpp) +target_link_libraries (spsolver ${SPARSE_LIBS}) + +add_executable(test_sparseLU test_sparseLU.cpp) +target_link_libraries (test_sparseLU ${SPARSE_LIBS}) \ No newline at end of file -- cgit v1.2.3 From 0c9b08e46e7507d9f13200f0702bc57ed6aae52c Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Thu, 14 Jun 2012 18:45:04 +0200 Subject: build complete... almost --- Eigen/src/OrderingMethods/Ordering.h | 21 +++- Eigen/src/SparseLU/SparseLU.h | 162 +++++++++++++------------ Eigen/src/SparseLU/SparseLU_Coletree.h | 1 - Eigen/src/SparseLU/SparseLU_Matrix.h | 61 +++++++--- Eigen/src/SparseLU/SparseLU_Memory.h | 19 +-- Eigen/src/SparseLU/SparseLU_Structs.h | 2 +- Eigen/src/SparseLU/SparseLU_Utils.h | 38 +++--- Eigen/src/SparseLU/SparseLU_column_bmod.h | 11 +- Eigen/src/SparseLU/SparseLU_column_dfs.h | 12 +- Eigen/src/SparseLU/SparseLU_copy_to_ucol.h | 6 +- Eigen/src/SparseLU/SparseLU_heap_relax_snode.h | 10 +- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 12 +- Eigen/src/SparseLU/SparseLU_panel_dfs.h | 10 +- Eigen/src/SparseLU/SparseLU_pivotL.h | 3 +- Eigen/src/SparseLU/SparseLU_pruneL.h | 8 +- Eigen/src/SparseLU/SparseLU_snode_bmod.h | 3 +- Eigen/src/SparseLU/SparseLU_snode_dfs.h | 10 +- bench/spbench/test_sparseLU.cpp | 64 ++++++++++ 18 files changed, 280 insertions(+), 173 deletions(-) create mode 100644 bench/spbench/test_sparseLU.cpp (limited to 'bench/spbench') diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index 3a3e3f6fc..eedaed144 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -32,9 +32,8 @@ template class OrderingBase { public: - typedef typename internal::traits::MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::Index Index; typedef PermutationMatrix PermutationType; public: @@ -42,10 +41,12 @@ class OrderingBase { } + template OrderingBase(const MatrixType& mat):OrderingBase() { compute(mat); } + template Derived& compute(const MatrixType& mat) { return derived().compute(mat); @@ -61,9 +62,9 @@ class OrderingBase /** * Get the permutation vector */ - PermutationType& get_perm(const MatrixType& mat) + PermutationType& get_perm() { - if (m_isInitialized = true) return m_P; + if (m_isInitialized == true) return m_P; else abort(); // FIXME Should find a smoother way to exit with error code } @@ -101,7 +102,6 @@ class OrderingBase mutable bool m_isInitialized; SparseMatrix m_mat; // Stores the (symmetrized) matrix to permute }; - /** * Get the approximate minimum degree ordering * If the matrix is not structurally symmetric, an ordering of A^T+A is computed @@ -161,6 +161,15 @@ class AMDOrdering : public OrderingBase > }; +namespace internal { + template + struct traits > + { + typedef _Scalar Scalar; + typedef _Index Index; + }; +} + /** * Get the column approximate minimum degree ordering * The matrix should be in column-major format diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 293dcd0b3..682cd465c 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -54,15 +54,15 @@ class SparseLU typedef SuperNodalMatrix SCMatrix; typedef Matrix ScalarVector; typedef Matrix IndexVector; -// typedef GlobalLU_t LU_GlobalLU_t; typedef PermutationMatrix PermutationType; public: - SparseLU():m_isInitialized(true),m_symmetricmode(false),m_diagpivotthresh(1.0) + SparseLU():m_isInitialized(true),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0) { initperfvalues(); } - SparseLU(const MatrixType& matrix):SparseLU() + SparseLU(const MatrixType& matrix):m_isInitialized(true),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0) { + initperfvalues(); compute(matrix); } @@ -114,8 +114,23 @@ class SparseLU // return solve_retval(*this, B.derived()); // } + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the PaStiX reports a problem + * \c InvalidInput if the input matrix is invalid + * + * \sa iparm() + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "Decomposition is not initialized."); + return m_info; + } + template - bool _solve(const MatrixBase &B, MatrixBase &X) const + bool _solve(const MatrixBase &B, MatrixBase &X) const { eigen_assert(m_isInitialized && "The matrix should be factorized first"); EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, @@ -141,7 +156,7 @@ class SparseLU const Scalar * Lval = m_Lstore.valuePtr(); // Nonzero values Matrix work(n, nrhs); // working vector work.setZero(); - int j, k, i, icol,jcol; + int j, k, i,jcol; for (k = 0; k <= m_Lstore.nsuper(); k ++) { fsupc = m_Lstore.supToCol()[k]; @@ -168,13 +183,12 @@ class SparseLU // The supernode has more than one column // Triangular solve - Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - // Map, 0, OuterStride > u( &(X(fsupc,0)), nsupc, nrhs, OuterStride<>(X.rows()) ); - Matrix& U = X.block(fsupc, 0, nsupc, nrhs); //FIXME Check this + Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Block U(X, fsupc, 0, nsupc, nrhs); //FIXME TODO Consider more RHS U = A.template triangularView().solve(U); // Matrix-vector product - new (&A) Map, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + new (&A) Map, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); work.block(0, 0, nrow, nrhs) = A * U; //Begin Scatter @@ -210,8 +224,8 @@ class SparseLU } else { - Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Matrix& U = X.block(fsupc, 0, nsupc, nrhs); + Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Block U(X, fsupc, 0, nsupc, nrhs); U = A.template triangularView().solve(U); } @@ -221,8 +235,8 @@ class SparseLU { for (i = m_Ustore.outerIndexPtr()[jcol]; i < m_Ustore.outerIndexPtr()[jcol]; i++) { - irow = m_Ustore.InnerIndices()[i]; - X(irow, j) -= X(jcol, j) * m_Ustore.Values()[i]; + irow = m_Ustore.innerIndexPtr()[i]; + X(irow, j) -= X(jcol, j) * m_Ustore.valuePtr()[i]; } } } @@ -254,12 +268,12 @@ class SparseLU bool m_analysisIsOk; NCMatrix m_mat; // The input (permuted ) matrix SCMatrix m_Lstore; // The lower triangular matrix (supernodal) - NCMatrix m_Ustore; // The upper triangular matrix + MappedSparseMatrix m_Ustore; // The upper triangular matrix PermutationType m_perm_c; // Column permutation PermutationType m_perm_r ; // Row permutation IndexVector m_etree; // Column elimination tree - static LU_GlobalLU_t m_glu; // persistent data to facilitate multiple factors + LU_GlobalLU_t m_glu; // persistent data to facilitate multiple factors // FIXME All fields of this struct can be defined separately as class members // SuperLU/SparseLU options @@ -332,9 +346,11 @@ void SparseLU::analyzePattern(const MatrixType& mat) m_etree = iwork; // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree - PermutationType post_perm(post); + + PermutationType post_perm(m);; + for (int i = 0; i < m; i++) + post_perm.indices()(i) = post(i); //m_mat = m_mat * post_perm; // FIXME This should surely be in factorize() - // Composition of the two permutations m_perm_c = m_perm_c * post_perm; } // end postordering @@ -357,6 +373,7 @@ void SparseLU::analyzePattern(const MatrixType& mat) #include "SparseLU_pruneL.h" #include "SparseLU_Utils.h" + /** * - Numerical factorization * - Interleaved with the symbolic factorization @@ -380,9 +397,8 @@ void SparseLU::factorize(const MatrixType& matrix) eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices"); + typedef typename IndexVector::Scalar Index; - ScalarVector work; // Scalar work vector - IndexVector iwork; //Index work vector // Apply the column permutation computed in analyzepattern() m_mat = matrix * m_perm_c; @@ -394,7 +410,7 @@ void SparseLU::factorize(const MatrixType& matrix) int maxpanel = m_panel_size * m; // Allocate storage common to the factor routines int lwork = 0; - int info = LUMemInit(m, n, nnz, work, iwork, lwork, m_fillfactor, m_panel_size, m_maxsuper, m_rowblk, m_glu); + int info = LUMemInit(m, n, nnz, lwork, m_fillfactor, m_panel_size, m_glu); if (info) { std::cerr << "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ; @@ -404,29 +420,37 @@ void SparseLU::factorize(const MatrixType& matrix) // Set up pointers for integer working arrays - int idx = 0; - VectorBlock segrep(iwork, idx, m); - idx += m; - VectorBlock parent(iwork, idx, m); - idx += m; - VectorBlock xplore(iwork, idx, m); - idx += m; - VectorBlock repfnz(iwork, idx, maxpanel); - idx += maxpanel; - VectorBlock panel_lsub(iwork, idx, maxpanel); - idx += maxpanel; - VectorBlock xprune(iwork, idx, n); - idx += n; - VectorBlock marker(iwork, idx, m * LU_NO_MARKER); +// int idx = 0; +// VectorBlock segrep(iwork, idx, m); +// idx += m; +// VectorBlock parent(iwork, idx, m); +// idx += m; +// VectorBlock xplore(iwork, idx, m); +// idx += m; +// VectorBlock repfnz(iwork, idx, maxpanel); +// idx += maxpanel; +// VectorBlock panel_lsub(iwork, idx, maxpanel); +// idx += maxpanel; +// VectorBlock xprune(iwork, idx, n); +// idx += n; +// VectorBlock marker(iwork, idx, m * LU_NO_MARKER); + // Set up pointers for integer working arrays + IndexVector segrep(m); + IndexVector parent(m); + IndexVector xplore(m); + IndexVector repfnz(maxpanel); + IndexVector panel_lsub(maxpanel); + IndexVector xprune(n); + IndexVector marker(m*LU_NO_MARKER); repfnz.setConstant(-1); panel_lsub.setConstant(-1); // Set up pointers for scalar working arrays - VectorBlock dense(work, 0, maxpanel); - dense.setZero(); - VectorBlock tempv(work, maxpanel, LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk) ); - tempv.setZero(); + ScalarVector dense; + dense.setZero(maxpanel); + ScalarVector tempv; + tempv.setZero(LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk) ); // Setup Permutation vectors // Compute the inverse of perm_c @@ -434,12 +458,13 @@ void SparseLU::factorize(const MatrixType& matrix) // Identify initial relaxed snodes IndexVector relax_end(n); - if ( m_symmetricmode = true ) - LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); + if ( m_symmetricmode == true ) + LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); else - LU_relax_snode(n, m_etree, m_relax, marker, relax_end); + LU_relax_snode(n, m_etree, m_relax, marker, relax_end); - m_perm_r.setConstant(-1); + m_perm_r.resize(m); + m_perm_r.indices().setConstant(-1); //FIXME marker.setConstant(-1); IndexVector& xsup = m_glu.xsup; @@ -451,19 +476,19 @@ void SparseLU::factorize(const MatrixType& matrix) Index& nzlumax = m_glu.nzlumax; supno(0) = IND_EMPTY; - xsup(0) = xlsub(0) = xusub(0) = xlusup(0) = 0; + xsup(0) = xlsub(0) = xusub(0) = xlusup(0) = Index(0); // Work on one 'panel' at a time. A panel is one of the following : // (a) a relaxed supernode at the bottom of the etree, or // (b) panel_size contiguous columns, defined by the user - register int jcol,kcol; + int jcol,kcol; IndexVector panel_histo(n); Index nextu, nextlu, jsupno, fsupc, new_next; Index pivrow; // Pivotal row number in the original row matrix int nseg1; // Number of segments in U-column above panel row jcol int nseg; // Number of segments in each U-column - int irep,ir, icol; - int i, k, jj,j; + int irep, icol; + int i, k, jj; for (jcol = 0; jcol < n; ) { if (relax_end(jcol) != IND_EMPTY) @@ -472,7 +497,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Factorize the relaxed supernode(jcol:kcol) // First, determine the union of the row structure of the snode - info = LU_snode_dfs(jcol, kcol, m_mat.innerIndexPtr(), m_mat.outerIndexPtr(), xprune, marker); + info = LU_snode_dfs(jcol, kcol, m_mat.innerIndexPtr(), m_mat.outerIndexPtr(), xprune, marker, m_glu); if ( info ) { std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n"; @@ -488,7 +513,7 @@ void SparseLU::factorize(const MatrixType& matrix) int mem; while (new_next > nzlumax ) { - mem = LUMemXpand(lusup, nzlumax, nextlu, LUSUP, m_glu.num_expansions); + mem = LUMemXpand(lusup, nzlumax, nextlu, LUSUP, m_glu.num_expansions); if (mem) { std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n"; @@ -502,13 +527,13 @@ void SparseLU::factorize(const MatrixType& matrix) xusub(icol+1) = nextu; // Scatter into SPA dense(*) for (typename MatrixType::InnerIterator it(m_mat, icol); it; ++it) - dense(it.row()) = it.val(); + dense(it.row()) = it.value(); // Numeric update within the snode - LU_snode_bmod(icol, jsupno, fsupc, dense, m_glu); + LU_snode_bmod(icol, fsupc, dense, m_glu); // Eliminate the current column - info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r, iperm_c, pivrow, m_glu); + info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); if ( info ) { m_info = NumericalIssue; @@ -536,13 +561,13 @@ void SparseLU::factorize(const MatrixType& matrix) panel_size = n - jcol; // Symbolic outer factorization on a panel of columns - LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r, nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); + LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); // Numeric sup-panel updates in topological order LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); // Sparse LU within the panel, and below the panel diagonal - for ( jj = jcol; j< jcol + panel_size; jj++) + for ( jj = jcol; jj< jcol + panel_size; jj++) { k = (jj - jcol) * m; // Column index for w-wide arrays @@ -550,7 +575,7 @@ void SparseLU::factorize(const MatrixType& matrix) //Depth-first-search for the current column VectorBlock panel_lsubk(panel_lsub, k, m); VectorBlock repfnz_k(repfnz, k, m); - info = LU_column_dfs(m, jj, m_perm_r, m_maxsuper, nseg, panel_lsub(k), segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); + info = LU_column_dfs(m, jj, m_perm_r.indices(), m_maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); if ( !info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n"; @@ -559,7 +584,7 @@ void SparseLU::factorize(const MatrixType& matrix) return; } // Numeric updates to this column - VectorBlock dense_k(dense, k, m); + VectorBlock dense_k(dense, k, m); VectorBlock segrep_k(segrep, nseg1, m); info = LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); if ( info ) @@ -571,7 +596,7 @@ void SparseLU::factorize(const MatrixType& matrix) } // Copy the U-segments to ucol(*) - info = LU_copy_to_col(jj, nseg, segrep, repfnz_k, m_perm_r, dense_k, m_glu); + info = LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); if ( info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n"; @@ -581,7 +606,7 @@ void SparseLU::factorize(const MatrixType& matrix) } // Form the L-segment - info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r, iperm_c, pivrow, m_glu); + info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); if ( info ) { std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <::factorize(const MatrixType& matrix) } // Prune columns (0:jj-1) using column jj - LU_pruneL(jj, m_perm_r, pivrow, nseg, segrep, repfnz_k, xprune, m_glu); + LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); // Reset repfnz for this column for (i = 0; i < nseg; i++) @@ -604,23 +629,10 @@ void SparseLU::factorize(const MatrixType& matrix) } // end else } // end for -- end elimination - // Adjust row permutation in the case of rectangular matrices... Deprecated - if (m > n ) - { - k = 0; - for (i = 0; i < m; ++i) - { - if ( m_perm_r(i) == IND_EMPTY ) - { - m_perm_r(i) = n + k; - ++k; - } - } - } // Count the number of nonzeros in factors - LU_countnz(n, xprune, m_nnzL, m_nnzU, m_glu); + LU_countnz(n, m_nnzL, m_nnzU, m_glu); // Apply permutation to the L subscripts - LU_fixupL(n, m_perm_r, m_glu); + LU_fixupL/**/(n, m_perm_r.indices(), m_glu); @@ -628,8 +640,8 @@ void SparseLU::factorize(const MatrixType& matrix) m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); // Create the column major upper sparse matrix U; // it is assumed here that MatrixType = SparseMatrix - new (&m_Ustore) Map ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); - this.m_Ustore = m_Ustore; //FIXME Is it necessary + new (&m_Ustore) MappedSparseMatrix ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); + //this.m_Ustore = m_Ustore; //FIXME Is it necessary m_info = Success; m_factorizationIsOk = true; diff --git a/Eigen/src/SparseLU/SparseLU_Coletree.h b/Eigen/src/SparseLU/SparseLU_Coletree.h index 00bb97796..585b02fdf 100644 --- a/Eigen/src/SparseLU/SparseLU_Coletree.h +++ b/Eigen/src/SparseLU/SparseLU_Coletree.h @@ -188,7 +188,6 @@ void LU_TreePostorder(int n, IndexVector& parent, IndexVector& post) // Depth-first search from dummy root vertex #n postnum = 0; LU_nr_etdfs(n, parent, first_kid, next_kid, post, postnum); - return post; } #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h index 70570ab9c..5b2c64154 100644 --- a/Eigen/src/SparseLU/SparseLU_Matrix.h +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -46,14 +46,16 @@ class SuperNodalMatrix { public: typedef _Scalar Scalar; - typedef _Index Index; + typedef _Index Index; + typedef Matrix IndexVector; + typedef Matrix ScalarVector; public: SuperNodalMatrix() { } - SuperNodalMatrix(Index m, Index n, Scalar *nzval, Index* nzval_colptr, Index* rowind, - Index* rowind_colptr, Index* col_to_sup, Index* sup_to_col ) + SuperNodalMatrix(int m, int n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, + IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col ) { setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col); } @@ -68,17 +70,17 @@ class SuperNodalMatrix * FIXME This class will be modified such that it can be use in the course * of the factorization. */ - void setInfos(Index m, Index n, Scalar *nzval, Index* nzval_colptr, Index* rowind, - Index* rowind_colptr, Index* col_to_sup, Index* sup_to_col ) + void setInfos(int m, int n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, + IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col ) { m_row = m; m_col = n; - m_nzval = nzval; - m_nzval_colptr = nzval_colptr; - m_rowind = rowind; - m_rowind_colptr = rowind_colptr; - m_col_to_sup = col_to_sup; - m_sup_to_col = sup_to_col; + m_nzval = nzval.data(); + m_nzval_colptr = nzval_colptr.data(); + m_rowind = rowind.data(); + m_rowind_colptr = rowind_colptr.data(); + m_col_to_sup = col_to_sup.data(); + m_sup_to_col = sup_to_col.data(); } @@ -108,6 +110,10 @@ class SuperNodalMatrix return m_nzval; } + const Scalar* valuePtr() const + { + return m_nzval; + } /** * Return the pointers to the beginning of each column in \ref valuePtr() */ @@ -116,6 +122,11 @@ class SuperNodalMatrix return m_nzval_colptr; } + const Index* colIndexPtr() const + { + return m_nzval_colptr; + } + /** * Return the array of compressed row indices of all supernodes */ @@ -123,6 +134,12 @@ class SuperNodalMatrix { return m_rowind; } + + const Index* rowIndex() const + { + return m_rowind; + } + /** * Return the location in \em rowvaluePtr() which starts each column */ @@ -130,17 +147,33 @@ class SuperNodalMatrix { return m_rowind_colptr; } + + const Index* rowIndexPtr() const + { + return m_rowind_colptr; + } + /** * Return the array of column-to-supernode mapping */ - Index colToSup() + Index* colToSup() + { + return m_col_to_sup; + } + + const Index* colToSup() const { return m_col_to_sup; } /** * Return the array of supernode-to-column mapping */ - Index supToCol() + Index* supToCol() + { + return m_sup_to_col; + } + + const Index* supToCol() const { return m_sup_to_col; } @@ -148,7 +181,7 @@ class SuperNodalMatrix /** * Return the number of supernodes */ - int nsuper() + int nsuper() const { return m_nsuper; } diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index ea9ef6d89..60ebfcaa1 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -61,11 +61,11 @@ * \param vec Valid pointer to the vector to allocate or expand * \param [in,out]length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector * \param [in]len_to_copy Current number of elements in the factors - * \param keep_prev true: use length and do not expand the vector; false: compute new_len and expand + * \param keep_prev 1: use length and do not expand the vector; 0: compute new_len and expand * \param [in,out]num_expansions Number of times the memory has been expanded */ template -int expand(VectorType& vec, int& length, int len_to_copy, bool keep_prev, int& num_expansions) +int expand(VectorType& vec, int& length, int len_to_copy, int keep_prev, int& num_expansions) { float alpha = 1.5; // Ratio of the memory increase @@ -120,18 +120,16 @@ int expand(VectorType& vec, int& length, int len_to_copy, bool keep_prev, int& * \param m number of rows of the input matrix * \param n number of columns * \param annz number of initial nonzeros in the matrix - * \param work scalar working space needed by all factor routines - * \param iwork Integer working space * \param lwork if lwork=-1, this routine returns an estimated size of the required memory * \param glu persistent data to facilitate multiple factors : will be deleted later ?? * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated when memory allocation failed * NOTE Unlike SuperLU, this routine does not support successive factorization with the same pattern and the row permutation */ -template -int LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, int lwork, int fillratio, int panel_size, int maxsuper, int rowblk, LU_GlobalLU_t& glu) +template +int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, LU_GlobalLU_t& glu) { typedef typename ScalarVector::Scalar Scalar; - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; int& num_expansions = glu.num_expansions; //No memory expansions so far num_expansions = 0; @@ -177,17 +175,12 @@ int LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, in if (nzlumax < annz ) return nzlumax; - expand(glu.lsup, nzlumax, 0, 0, num_expansions); + expand(glu.lusup, nzlumax, 0, 0, num_expansions); expand(glu.ucol, nzumax, 0, 0, num_expansions); expand(glu.lsub, nzlmax, 0, 0, num_expansions); expand(glu.usub, nzumax, 0, 1, num_expansions); } - // LUWorkInit : Now, allocate known working storage - int isize = (2 * panel_size + 3 + LU_NO_MARKER) * m + n; - int dsize = m * panel_size + LU_NUM_TEMPV(m, panel_size, maxsuper, rowblk); - iwork.resize(isize); - work.resize(isize); ++num_expansions; return 0; diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h index fd2a59a41..e05eabe2a 100644 --- a/Eigen/src/SparseLU/SparseLU_Structs.h +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -87,7 +87,7 @@ typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} LU_MemType; template struct LU_GlobalLU_t { - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping) ScalarVector lusup; // nonzero values of L ordered by columns diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h index 9e63bf7e4..0352c7872 100644 --- a/Eigen/src/SparseLU/SparseLU_Utils.h +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -22,20 +22,21 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see . -#ifdef EIGEN_SPARSELU_UTILS_H +#ifndef EIGEN_SPARSELU_UTILS_H #define EIGEN_SPARSELU_UTILS_H -template -void SparseLU::LU_countnz(const int n, IndexVector& xprune, int& nnzL, int& nnzU, GlobalLU_t& Glu) + +template +void LU_countnz(const int n, int& nnzL, int& nnzU, LU_GlobalLU_t& glu) { - IndexVector& xsup = Glu.xsup; - IndexVector& xlsub = Glu.xlsub; + IndexVector& xsup = glu.xsup; + IndexVector& xlsub = glu.xlsub; nnzL = 0; - nnzU = (Glu.xusub)(n); - int nsuper = (Glu.supno)(n); - int jlen, irep; - + nnzU = (glu.xusub)(n); + int nsuper = (glu.supno)(n); + int jlen; + int i, j, fsupc; if (n <= 0 ) return; // For each supernode for (i = 0; i <= nsuper; i++) @@ -46,10 +47,9 @@ void SparseLU::LU_countnz(const int n, IndexVector& xprune, int& nnzL, int& nnzU for (j = fsupc; j < xsup(i+1); j++) { nnzL += jlen; - nnzLU += j - fsupc + 1; + nnzU += j - fsupc + 1; jlen--; } - irep = xsup(i+1) - 1; } } @@ -60,16 +60,16 @@ void SparseLU::LU_countnz(const int n, IndexVector& xprune, int& nnzL, int& nnzU * and applies permutation to the remaining subscripts * */ -template -void SparseLU::LU_fixupL(const int n, const IndexVector& perm_r, GlobalLU_t& Glu) +template +void LU_fixupL(const int n, const IndexVector& perm_r, LU_GlobalLU_t& glu) { - int nsuper, fsupc, i, j, k, jstart; - IndexVector& xsup = GLu.xsup; - IndexVector& lsub = Glu.lsub; - IndexVector& xlsub = Glu.xlsub; + int fsupc, i, j, k, jstart; + IndexVector& xsup = glu.xsup; + IndexVector& lsub = glu.lsub; + IndexVector& xlsub = glu.xlsub; int nextl = 0; - int nsuper = (Glu.supno)(n); + int nsuper = (glu.supno)(n); // For each supernode for (i = 0; i <= nsuper; i++) @@ -80,7 +80,7 @@ void SparseLU::LU_fixupL(const int n, const IndexVector& perm_r, GlobalLU_t& Glu for (j = jstart; j < xlsub(fsupc + 1); j++) { lsub(nextl) = perm_r(lsub(j)); // Now indexed into P*A - nextl++ + nextl++; } for (k = fsupc+1; k < xsup(i+1); k++) xlsub(k) = nextl; // other columns in supernode i diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index da464cbfc..8dadeaa93 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -60,12 +60,12 @@ * > 0 - number of bytes allocated when run out of space * */ -template -int LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, int fpanelc, LU_GlobalLU_t& glu) +template +int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, ScalarVector& tempv, BlockIndexVector& segrep, BlockIndexVector& repfnz, int fpanelc, LU_GlobalLU_t& glu) { - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; - int jsupno, k, ksub, krep, krep_ind, ksupno; + int jsupno, k, ksub, krep, ksupno; int lptr, nrow, isub, i, irow, nextlu, new_next, ufirst; int fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; /* krep = representative of current k-th supernode @@ -115,7 +115,6 @@ int LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVe nsupc = krep - fst_col + 1; nsupr = xlsub(fsupc+1) - xlsub(fsupc); nrow = nsupr - d_fsupc - nsupc; - krep_ind = lptr + nsupc - 1; // NOTE Unlike the original implementation in SuperLU, the only feature // available here is a sup-col update. @@ -213,7 +212,7 @@ int LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVe ufirst = xlusup(jcol) + d_fsupc; Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); VectorBlock u(lusup, ufirst, nsupc); - u = A.template triangularView().solve(u); + u = A.template triangularView().solve(u); new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); VectorBlock l(lusup, ufirst+nsupc, nrow); diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h index 8c6202d67..7d9e8be79 100644 --- a/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -72,13 +72,13 @@ * > 0 number of bytes allocated when run out of space * */ -template -int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, IndexVector& nseg, IndexVector& lsub_col, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) +template +int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg, BlockIndexVector& lsub_col, IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) { - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; - int jcolp1, jcolm1, jsuper, nsuper, nextl; + int jsuper, nsuper, nextl; int krow; // Row index of the current element int kperm; // permuted row index int krep; // Supernode reprentative of the current row @@ -92,8 +92,10 @@ int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper IndexVector& supno = glu.supno; IndexVector& lsub = glu.lsub; IndexVector& xlsub = glu.xlsub; - IndexVector& nzlmax = glu.nzlmax; + Index& nzlmax = glu.nzlmax; + int jcolm1 = jcol - 1; + int jcolp1 = jcol + 1; nsuper = supno(jcol); jsuper = nsuper; nextl = xlsub(jcol); diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h index 31411175c..a0cab563d 100644 --- a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -59,10 +59,10 @@ * > 0 - number of bytes allocated when run out of space * */ -template < typename IndexVector, typename ScalarVector> -int LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, IndexVector& repfnz, IndexVector& perm_r, ScalarVector& dense, LU_GlobalLU_t& glu) +template +int LU_copy_to_ucol(const int jcol, const int nseg, SegRepType& segrep, RepfnzType& repfnz ,IndexVector& perm_r, DenseType& dense, LU_GlobalLU_t& glu) { - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; Index ksub, krep, ksupno; diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h index 1766c3c2b..791538729 100644 --- a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h @@ -59,9 +59,9 @@ void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, // The etree may not be postordered, but its heap ordered IndexVector post; - TreePostorder(n, et, post); // Post order etree + LU_TreePostorder(n, et, post); // Post order etree IndexVector inv_post(n+1); - register int i; + int i; for (i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()??? // Renumber etree in postorder @@ -76,7 +76,7 @@ void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, // compute the number of descendants of each node in the etree relax_end.setConstant(IND_EMPTY); - register int j, parent; + int j, parent; descendants.setZero(); for (j = 0; j < n; j++) { @@ -85,8 +85,8 @@ void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, descendants(parent) += descendants(j) + 1; } // Identify the relaxed supernodes by postorder traversal of the etree - register int snode_start; // beginning of a snode - register int k; + int snode_start; // beginning of a snode + int k; int nsuper_et_post = 0; // Number of relaxed snodes in postordered etree int nsuper_et = 0; // Number of relaxed snodes in the original etree int l; diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 4f19b5ac8..ffd085357 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -62,8 +62,8 @@ * * */ -template -void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, LU_GlobalLU_t& glu) +template +void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, DenseIndexBlock& segrep, DenseIndexBlock& repfnz, LU_GlobalLU_t& glu) { typedef typename ScalarVector::Scalar Scalar; IndexVector& xsup = glu.xsup; @@ -75,7 +75,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca int i,ksub,jj,nextl_col,irow; int fsupc, nsupc, nsupr, nrow; - int krep, krep_ind, kfnz; + int krep, kfnz; int lptr; // points to the row subscripts of a supernode int luptr; // ... int segsize,no_zeros,isub ; @@ -95,8 +95,6 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca nsupr = xlsub(fsupc+1) - xlsub(fsupc); nrow = nsupr - nsupc; lptr = xlsub(fsupc); - krep_ind = lptr + nsupc - 1; - // NOTE : Unlike the original implementation in SuperLU, the present implementation // does not include a 2-D block update. @@ -104,8 +102,8 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca for (jj = jcol; jj < jcol + w; jj++) { nextl_col = (jj-jcol) * m; - VectorBlock repfnz_col(repfnz.segment(nextl_col, m)); // First nonzero column index for each row - VectorBlock dense_col(dense.segment(nextl_col, m)); // Scatter/gather entire matrix column from/to here + VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here kfnz = repfnz_col(krep); if ( kfnz == IND_EMPTY ) diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index 6f6922ee0..f7a93ab48 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -77,8 +77,8 @@ * * */ -template -void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) +template +void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) { int jj; // Index through each column in the panel @@ -105,14 +105,14 @@ void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, Index nextl_col = (jj - jcol) * m; VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero location in each row - VectorBlock dense_col(dense,nextl_col, m); // Accumulate a column vector here + VectorBlock dense_col(dense,nextl_col, m); // Accumulate a column vector here // For each nnz in A[*, jj] do depth first search for (typename MatrixType::InnerIterator it(A, jj); it; ++it) { krow = it.row(); - dense_col(krow) = it.val(); + dense_col(krow) = it.value(); kmark = marker(krow); if (kmark == jj) continue; // krow visited before, go to the next nonzero @@ -126,7 +126,7 @@ void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, Index } else { - // krow is in U : if its supernode-representative krep + // krow is in U : if its sup²ernode-representative krep // has been explored, update repfnz(*) krep = xsup(supno(kperm)+1) - 1; myfnz = repfnz_col(krep); diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h index 4a50b2cca..39151f1e0 100644 --- a/Eigen/src/SparseLU/SparseLU_pivotL.h +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -70,7 +70,7 @@ template int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, LU_GlobalLU_t& glu) { - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; // Initialize pointers IndexVector& lsub = glu.lsub; // Compressed row subscripts of L rectangular supernodes. @@ -91,7 +91,6 @@ int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivott Scalar pivmax = 0.0; Index pivptr = nsupc; Index diag = IND_EMPTY; - Index old_pivptr = nsupc; Scalar rtemp; Index isub, icol, itemp, k; for (isub = nsupc; isub < nsupr; ++isub) { diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h index c006f6707..42218ba4a 100644 --- a/Eigen/src/SparseLU/SparseLU_pruneL.h +++ b/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -61,10 +61,10 @@ * \param glu Global LU data * */ -template -void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, LU_GlobalLU_t& glu) +template +void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, LU_GlobalLU_t& glu) { - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; // Initialize pointers IndexVector& xsup = glu.xsup; @@ -78,7 +78,7 @@ void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, cons int jsupno = supno(jcol); int i,irep,irep1; bool movnum, do_prune = false; - Index kmin, kmax, ktemp, minloc, maxloc,krow; + Index kmin, kmax, minloc, maxloc,krow; for (i = 0; i < nseg; i++) { irep = segrep(i); diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index a7034e607..47145bc0c 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -45,8 +45,7 @@ #ifndef SPARSELU_SNODE_BMOD_H #define SPARSELU_SNODE_BMOD_H template -int LU_snode_bmod (const int jcol, const int jsupno, const int fsupc, - ScalarVector& dense, LU_GlobalLU_t& glu) +int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, LU_GlobalLU_t& glu) { typedef typename ScalarVector::Scalar Scalar; IndexVector& lsub = glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h index c49fc1461..3e7033c67 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h @@ -42,7 +42,7 @@ * granted, provided the above notices are retained, and a notice that * the code was modified is included with the above copyright notice. */ -#ifdef SPARSELU_SNODE_DFS_H +#ifndef SPARSELU_SNODE_DFS_H #define SPARSELU_SNODE_DFS_H /** * \brief Determine the union of the row structures of those columns within the relaxed snode. @@ -58,9 +58,9 @@ * \return 0 on success, > 0 size of the memory when memory allocation failed */ template - int LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu) + int LU_snode_dfs(const int jcol, const int kcol, const typename IndexVector::Scalar* asub, const typename IndexVector::Scalar* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu) { - typedef typename IndexVector::Index; + typedef typename IndexVector::Scalar Index; IndexVector& xsup = glu.xsup; IndexVector& supno = glu.supno; // Supernode number corresponding to this column IndexVector& lsub = glu.lsub; @@ -74,9 +74,9 @@ for (i = jcol; i <=kcol; i++) { // For each nonzero in A(*,i) - for (k = colptr(i); k < colptr(i+1); k++) + for (k = colptr[i]; k < colptr[i+1]; k++) { - krow = asub(k); + krow = asub[k]; kmark = marker(krow); if ( kmark != kcol ) { diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp new file mode 100644 index 000000000..0bbbb0627 --- /dev/null +++ b/bench/spbench/test_sparseLU.cpp @@ -0,0 +1,64 @@ +// Small bench routine for Eigen available in Eigen +// (C) Desire NUENTSA WAKAM, INRIA + +#include +#include +#include +#include +#include + +using namespace std; +using namespace Eigen; + +int main(int argc, char **args) +{ + SparseMatrix A; + typedef SparseMatrix::Index Index; + typedef Matrix DenseMatrix; + typedef Matrix DenseRhs; + VectorXd b, x, tmp; + SparseLU, AMDOrdering > solver; + ifstream matrix_file; + string line; + int n; + + // Set parameters + /* Fill the matrix with sparse matrix stored in Matrix-Market coordinate column-oriented format */ + if (argc < 2) assert(false && "please, give the matrix market file "); + loadMarket(A, args[1]); + cout << "End charging matrix " << endl; + bool iscomplex=false, isvector=false; + int sym; + getMarketHeader(args[1], sym, iscomplex, isvector); + if (iscomplex) { cout<< " Not for complex matrices \n"; return -1; } + if (isvector) { cout << "The provided file is not a matrix file\n"; return -1;} + if (sym != 0) { // symmetric matrices, only the lower part is stored + SparseMatrix temp; + temp = A; + A = temp.selfadjointView(); + } + n = A.cols(); + /* Fill the right hand side */ + + if (argc > 2) + loadMarketVector(b, args[2]); + else + { + b.resize(n); + tmp.resize(n); +// tmp.setRandom(); + for (int i = 0; i < n; i++) tmp(i) = i; + b = A * tmp ; + } + + /* Compute the factorization */ + solver.compute(A); + + solver._solve(b, x); + /* Check the accuracy */ + VectorXd tmp2 = b - A*x; + double tempNorm = tmp2.norm()/b.norm(); + cout << "Relative norm of the computed solution : " << tempNorm <<"\n"; + + return 0; +} \ No newline at end of file -- cgit v1.2.3 From 15f15635335d459e9515aa89f0e5a9618e7f3924 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 29 Jun 2012 17:45:10 +0200 Subject: Before moving to the new building --- Eigen/src/SparseLU/SparseLU.h | 3 ++- Eigen/src/SparseLU/SparseLU_Matrix.h | 1 + Eigen/src/SparseLU/SparseLU_column_bmod.h | 1 + Eigen/src/SparseLU/SparseLU_pruneL.h | 2 +- bench/spbench/test_sparseLU.cpp | 1 + 5 files changed, 6 insertions(+), 2 deletions(-) (limited to 'bench/spbench') diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index a07b5a9f3..e4a4c3a7b 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -186,7 +186,6 @@ class SparseLU // Triangular solve Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); Map< Matrix, 0, OuterStride<> > U (&(X.data()[fsupc]), nsupc, nrhs, OuterStride<>(X.rows()) ); -// Block > U(X, fsupc, 0, nsupc, nrhs); //FIXME TODO Consider more RHS U = A.template triangularView().solve(U); // Matrix-vector product @@ -536,6 +535,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Eliminate the current column info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); + eigen_assert(info==0 && " SINGULAR MATRIX"); if ( info ) { m_info = NumericalIssue; @@ -609,6 +609,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Form the L-segment info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); + eigen_assert(info==0 && " SINGULAR MATRIX"); if ( info ) { std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); VectorBlock u(tempv, 0, segsize); diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h index 42218ba4a..91c795fac 100644 --- a/Eigen/src/SparseLU/SparseLU_pruneL.h +++ b/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -123,7 +123,7 @@ void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, cons if (perm_r(lsub(kmax)) == IND_EMPTY) kmax--; else if ( perm_r(lsub(kmin)) != IND_EMPTY) - kmin--; + kmin++; else { // kmin below pivrow (not yet pivoted), and kmax diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 0bbbb0627..4727cc12b 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -52,6 +52,7 @@ int main(int argc, char **args) } /* Compute the factorization */ + solver.isSymmetric(true); solver.compute(A); solver._solve(b, x); -- cgit v1.2.3 From b5a83867cac92a539b3a4d1cf8fcdcf8e9e9f5b2 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 6 Jul 2012 20:18:16 +0200 Subject: Update Ordering interface --- Eigen/src/OrderingMethods/Ordering.h | 26 +++++++++++++++++++++-- Eigen/src/SparseLU/SparseLU.h | 35 +++++++++++++++++++++++-------- Eigen/src/SparseLU/SparseLU_Coletree.h | 9 +++++--- Eigen/src/SparseLU/SparseLU_relax_snode.h | 5 ++--- Eigen/src/SparseLU/SparseLU_snode_dfs.h | 8 +++---- Eigen/src/SuperLUSupport/SuperLUSupport.h | 3 +++ bench/spbench/test_sparseLU.cpp | 4 ++-- 7 files changed, 67 insertions(+), 23 deletions(-) (limited to 'bench/spbench') diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index 3751f9bee..670cca9c4 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -60,7 +60,9 @@ class AMDOrdering public: typedef PermutationMatrix PermutationType; - /** Compute the permutation vector from a column-major sparse matrix */ + /** Compute the permutation vector from a sparse matrix + * This routine is much faster if the input matrix is column-major + */ template void operator()(const MatrixType& mat, PermutationType& perm) { @@ -73,7 +75,7 @@ class AMDOrdering internal::minimum_degree_ordering(symm, perm); } - /** Compute the permutation with a self adjoint matrix */ + /** Compute the permutation with a selfadjoint matrix */ template void operator()(const SparseSelfAdjointView& mat, PermutationType& perm) { @@ -85,6 +87,26 @@ class AMDOrdering } }; +/** + * Get the natural ordering + * + *NOTE Returns an empty permutation matrix + * \tparam Index The type of indices of the matrix + */ +template +class NaturalOrdering +{ + public: + typedef PermutationMatrix PermutationType; + + /** Compute the permutation vector from a column-major sparse matrix */ + template + void operator()(const MatrixType& mat, PermutationType& perm) + { + perm.resize(0); + } + +}; /** * Get the column approximate minimum degree ordering diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index e4a4c3a7b..74f710563 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -255,7 +255,7 @@ class SparseLU void initperfvalues() { m_panel_size = 12; - m_relax = 1; + m_relax = 6; m_maxsuper = 100; m_rowblk = 200; m_colblk = 60; @@ -320,26 +320,31 @@ void SparseLU::analyzePattern(const MatrixType& mat) // Compute the fill-reducing ordering // TODO Currently, the only available ordering method is AMD. - OrderingType ord(mat); - m_perm_c = ord.get_perm(); + OrderingType ord; + ord(mat,m_perm_c); //FIXME Check the right semantic behind m_perm_c // that is, column j of mat goes to column m_perm_c(j) of mat * m_perm_c; + //DEBUG : Set the natural ordering + for (int i = 0; i < mat.cols(); i++) + m_perm_c.indices()(i) = i; // Apply the permutation to the column of the input matrix - m_mat = mat * m_perm_c; + m_mat = mat * m_perm_c.inverse(); // Compute the column elimination tree of the permuted matrix if (m_etree.size() == 0) m_etree.resize(m_mat.cols()); + LU_sp_coletree(m_mat, m_etree); - + // In symmetric mode, do not do postorder here if (!m_symmetricmode) { IndexVector post, iwork; // Post order etree LU_TreePostorder(m_mat.cols(), m_etree, post); + // Renumber etree in postorder int m = m_mat.cols(); iwork.resize(m+1); @@ -348,12 +353,15 @@ void SparseLU::analyzePattern(const MatrixType& mat) // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree - PermutationType post_perm(m);; + PermutationType post_perm(m); //FIXME Use vector constructor for (int i = 0; i < m; i++) post_perm.indices()(i) = post(i); - //m_mat = m_mat * post_perm; // FIXME This should surely be in factorize() + +// m_mat = m_mat * post_perm.inverse(); // FIXME This should surely be in factorize() + // Composition of the two permutations m_perm_c = m_perm_c * post_perm; + } // end postordering m_analysisIsOk = true; @@ -402,9 +410,14 @@ void SparseLU::factorize(const MatrixType& matrix) // Apply the column permutation computed in analyzepattern() - m_mat = matrix * m_perm_c; + m_mat = matrix * m_perm_c.inverse(); m_mat.makeCompressed(); + // DEBUG ... Watch matrix permutation + const int *asub_in = matrix.innerIndexPtr(); + const int *colptr_in = matrix.outerIndexPtr(); + int * asub = m_mat.innerIndexPtr(); + int * colptr = m_mat.outerIndexPtr(); int m = m_mat.rows(); int n = m_mat.cols(); int nnz = m_mat.nonZeros(); @@ -455,7 +468,8 @@ void SparseLU::factorize(const MatrixType& matrix) // Setup Permutation vectors // Compute the inverse of perm_c - PermutationType iperm_c (m_perm_c.inverse() ); +// PermutationType iperm_c (m_perm_c.inverse() ); + PermutationType iperm_c (m_perm_c); // Identify initial relaxed snodes IndexVector relax_end(n); @@ -464,6 +478,9 @@ void SparseLU::factorize(const MatrixType& matrix) else LU_relax_snode(n, m_etree, m_relax, marker, relax_end); + //DEBUG +// std::cout<< "relax_end " <= nzlmax ) { mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu.num_expansions); - if (mem) return mem; + if (mem) return mem; // Memory expansion failed... Return the memory allocated so far } } } @@ -100,7 +100,7 @@ while (new_next > nzlmax) { mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu.num_expansions); - if (mem) return mem; + if (mem) return mem; // Memory expansion failed... Return the memory allocated so far } Index ifrom, ito = nextl; for (ifrom = xlsub(jcol); ifrom < nextl;) diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index 60a3eb09a..9c2e6e17e 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -627,6 +627,9 @@ void SuperLU::factorize(const MatrixType& a) this->initFactorization(a); + //DEBUG + m_sluOptions.ColPerm = NATURAL; + m_sluOptions.Equil = NO; int info = 0; RealScalar recip_pivot_growth, rcond; RealScalar ferr, berr; diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 4727cc12b..841011f30 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -17,7 +17,7 @@ int main(int argc, char **args) typedef Matrix DenseMatrix; typedef Matrix DenseRhs; VectorXd b, x, tmp; - SparseLU, AMDOrdering > solver; + SparseLU, AMDOrdering > solver; ifstream matrix_file; string line; int n; @@ -52,7 +52,7 @@ int main(int argc, char **args) } /* Compute the factorization */ - solver.isSymmetric(true); +// solver.isSymmetric(true); solver.compute(A); solver._solve(b, x); -- cgit v1.2.3 From b0cba2d988de3f4535e0b7ac9799b19700e09b7c Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Wed, 18 Jul 2012 16:59:00 +0200 Subject: Add a draft (not clean ) version of the COLAMD ordering implementation --- Eigen/src/OrderingMethods/Eigen_Colamd.h | 2515 ++++++++++++++++++++++++++++- Eigen/src/OrderingMethods/Ordering.h | 93 +- Eigen/src/SparseLU/SparseLU.h | 28 +- Eigen/src/SuperLUSupport/SuperLUSupport.h | 2 +- bench/spbench/test_sparseLU.cpp | 22 +- 5 files changed, 2604 insertions(+), 56 deletions(-) (limited to 'bench/spbench') diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h index 8caee7740..39701d0af 100644 --- a/Eigen/src/OrderingMethods/Eigen_Colamd.h +++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -1,5 +1,2518 @@ +// // This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Desire Nuentsa Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// This file is modified from the eigen_colamd/symamd library. The copyright is below + +// The authors of the code itself are Stefan I. Larimore and Timothy A. +// Davis (davis@cise.ufl.edu), University of Florida. The algorithm was +// developed in collaboration with John Gilbert, Xerox PARC, and Esmond +// Ng, Oak Ridge National Laboratory. +// +// Date: +// +// September 8, 2003. Version 2.3. +// +// Acknowledgements: +// +// This work was supported by the National Science Foundation, under +// grants DMS-9504974 and DMS-9803599. +// +// Notice: +// +// Copyright (c) 1998-2003 by the University of Florida. +// All Rights Reserved. +// +// THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY +// EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. +// +// Permission is hereby granted to use, copy, modify, and/or distribute +// this program, provided that the Copyright, this License, and the +// Availability of the original version is retained on all copies and made +// accessible to the end-user of any code or package that includes COLAMD +// or any modified version of COLAMD. +// +// Availability: +// +// The eigen_colamd/symamd library is available at +// +// http://www.cise.ufl.edu/research/sparse/eigen_colamd/ + +// This is the http://www.cise.ufl.edu/research/sparse/eigen_colamd/eigen_colamd.h +// file. It is required by the eigen_colamd.c, colamdmex.c, and symamdmex.c +// files, and by any C code that calls the routines whose prototypes are +// listed below, or that uses the eigen_colamd/symamd definitions listed below. + #ifndef EIGEN_COLAMD_H #define EIGEN_COLAMD_H -#endif \ No newline at end of file +/* Ensure that debugging is turned off: */ +#ifndef COLAMD_NDEBUG +#define COLAMD_NDEBUG +#endif /* NDEBUG */ + +/* ========================================================================== */ +/* === Knob and statistics definitions ====================================== */ +/* ========================================================================== */ + +/* size of the knobs [ ] array. Only knobs [0..1] are currently used. */ +#define EIGEN_COLAMD_KNOBS 20 + +/* number of output statistics. Only stats [0..6] are currently used. */ +#define EIGEN_COLAMD_STATS 20 + +/* knobs [0] and stats [0]: dense row knob and output statistic. */ +#define EIGEN_COLAMD_DENSE_ROW 0 + +/* knobs [1] and stats [1]: dense column knob and output statistic. */ +#define EIGEN_COLAMD_DENSE_COL 1 + +/* stats [2]: memory defragmentation count output statistic */ +#define EIGEN_COLAMD_DEFRAG_COUNT 2 + +/* stats [3]: eigen_colamd status: zero OK, > 0 warning or notice, < 0 error */ +#define EIGEN_COLAMD_STATUS 3 + +/* stats [4..6]: error info, or info on jumbled columns */ +#define EIGEN_COLAMD_INFO1 4 +#define EIGEN_COLAMD_INFO2 5 +#define EIGEN_COLAMD_INFO3 6 + +/* error codes returned in stats [3]: */ +#define EIGEN_COLAMD_OK (0) +#define EIGEN_COLAMD_OK_BUT_JUMBLED (1) +#define EIGEN_COLAMD_ERROR_A_not_present (-1) +#define EIGEN_COLAMD_ERROR_p_not_present (-2) +#define EIGEN_COLAMD_ERROR_nrow_negative (-3) +#define EIGEN_COLAMD_ERROR_ncol_negative (-4) +#define EIGEN_COLAMD_ERROR_nnz_negative (-5) +#define EIGEN_COLAMD_ERROR_p0_nonzero (-6) +#define EIGEN_COLAMD_ERROR_A_too_small (-7) +#define EIGEN_COLAMD_ERROR_col_length_negative (-8) +#define EIGEN_COLAMD_ERROR_row_index_out_of_bounds (-9) +#define EIGEN_COLAMD_ERROR_out_of_memory (-10) +#define EIGEN_COLAMD_ERROR_internal_error (-999) + +/* ========================================================================== */ +/* === Definitions ========================================================== */ +/* ========================================================================== */ + +#define COLAMD_MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define COLAMD_MIN(a,b) (((a) < (b)) ? (a) : (b)) + +#define EIGEN_ONES_COMPLEMENT(r) (-(r)-1) + +/* -------------------------------------------------------------------------- */ + +#define EIGEN_COLAMD_EMPTY (-1) + +/* Row and column status */ +#define EIGEN_ALIVE (0) +#define EIGEN_DEAD (-1) + +/* Column status */ +#define EIGEN_DEAD_PRINCIPAL (-1) +#define EIGEN_DEAD_NON_PRINCIPAL (-2) + +/* Macros for row and column status update and checking. */ +#define EIGEN_ROW_IS_DEAD(r) EIGEN_ROW_IS_MARKED_DEAD (Row[r].shared2.mark) +#define EIGEN_ROW_IS_MARKED_DEAD(row_mark) (row_mark < EIGEN_ALIVE) +#define EIGEN_ROW_IS_ALIVE(r) (Row [r].shared2.mark >= EIGEN_ALIVE) +#define EIGEN_COL_IS_DEAD(c) (Col [c].start < EIGEN_ALIVE) +#define EIGEN_COL_IS_ALIVE(c) (Col [c].start >= EIGEN_ALIVE) +#define EIGEN_EIGEN_COL_IS_DEAD_PRINCIPAL(c) (Col [c].start == EIGEN_DEAD_PRINCIPAL) +#define EIGEN_KILL_ROW(r) { Row [r].shared2.mark = EIGEN_DEAD ; } +#define EIGEN_KILL_PRINCIPAL_COL(c) { Col [c].start = EIGEN_DEAD_PRINCIPAL ; } +#define EIGEN_KILL_NON_PRINCIPAL_COL(c) { Col [c].start = EIGEN_DEAD_NON_PRINCIPAL ; } + +/* ========================================================================== */ +/* === Colamd reporting mechanism =========================================== */ +/* ========================================================================== */ + +#ifdef MATLAB_MEX_FILE + +/* use mexPrintf in a MATLAB mexFunction, for debugging and statistics output */ +#define PRINTF mexPrintf + +/* In MATLAB, matrices are 1-based to the user, but 0-based internally */ +#define INDEX(i) ((i)+1) + +#else + +/* Use printf in standard C environment, for debugging and statistics output. */ +/* Output is generated only if debugging is enabled at compile time, or if */ +/* the caller explicitly calls eigen_colamd_report or symamd_report. */ +#define PRINTF printf + +/* In C, matrices are 0-based and indices are reported as such in *_report */ +#define INDEX(i) (i) + +#endif /* MATLAB_MEX_FILE */ + + // == Row and Column structures == + +typedef struct EIGEN_Colamd_Col_struct +{ + int start ; /* index for A of first row in this column, or EIGEN_DEAD */ + /* if column is dead */ + int length ; /* number of rows in this column */ + union + { + int thickness ; /* number of original columns represented by this */ + /* col, if the column is alive */ + int parent ; /* parent in parent tree super-column structure, if */ + /* the column is dead */ + } shared1 ; + union + { + int score ; /* the score used to maintain heap, if col is alive */ + int order ; /* pivot ordering of this column, if col is dead */ + } shared2 ; + union + { + int headhash ; /* head of a hash bucket, if col is at the head of */ + /* a degree list */ + int hash ; /* hash value, if col is not in a degree list */ + int prev ; /* previous column in degree list, if col is in a */ + /* degree list (but not at the head of a degree list) */ + } shared3 ; + union + { + int degree_next ; /* next column, if col is in a degree list */ + int hash_next ; /* next column, if col is in a hash list */ + } shared4 ; + +} EIGEN_Colamd_Col ; + +typedef struct EIGEN_Colamd_Row_struct +{ + int start ; /* index for A of first col in this row */ + int length ; /* number of principal columns in this row */ + union + { + int degree ; /* number of principal & non-principal columns in row */ + int p ; /* used as a row pointer in eigen_init_rows_cols () */ + } shared1 ; + union + { + int mark ; /* for computing set differences and marking dead rows*/ + int first_column ;/* first column in row (used in garbage collection) */ + } shared2 ; + +} EIGEN_Colamd_Row ; + +/* ========================================================================== */ +/* === Colamd recommended memory size ======================================= */ +/* ========================================================================== */ + +/* + The recommended length Alen of the array A passed to eigen_colamd is given by + the EIGEN_COLAMD_RECOMMENDED (nnz, n_row, n_col) macro. It returns -1 if any + argument is negative. 2*nnz space is required for the row and column + indices of the matrix. EIGEN_COLAMD_C (n_col) + EIGEN_COLAMD_R (n_row) space is + required for the Col and Row arrays, respectively, which are internal to + eigen_colamd. An additional n_col space is the minimal amount of "elbow room", + and nnz/5 more space is recommended for run time efficiency. + + This macro is not needed when using symamd. + + Explicit typecast to int added Sept. 23, 2002, COLAMD version 2.2, to avoid + gcc -pedantic warning messages. +*/ + +#define EIGEN_COLAMD_C(n_col) ((int) (((n_col) + 1) * sizeof (EIGEN_Colamd_Col) / sizeof (int))) +#define EIGEN_COLAMD_R(n_row) ((int) (((n_row) + 1) * sizeof (EIGEN_Colamd_Row) / sizeof (int))) + +#define EIGEN_COLAMD_RECOMMENDED(nnz, n_row, n_col) \ +( \ +((nnz) < 0 || (n_row) < 0 || (n_col) < 0) \ +? \ + (-1) \ +: \ + (2 * (nnz) + EIGEN_COLAMD_C (n_col) + EIGEN_COLAMD_R (n_row) + (n_col) + ((nnz) / 5)) \ +) + + // Various routines +int eigen_colamd_recommended (int nnz, int n_row, int n_col) ; + +void eigen_colamd_set_defaults (double knobs [EIGEN_COLAMD_KNOBS]) ; + +bool eigen_colamd (int n_row, int n_col, int Alen, int A [], int p [], double knobs[EIGEN_COLAMD_KNOBS], int stats [EIGEN_COLAMD_STATS]) ; + +void eigen_colamd_report (int stats [EIGEN_COLAMD_STATS]); + +int eigen_init_rows_cols (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col col [], int A [], int p [], int stats[EIGEN_COLAMD_STATS] ); + +void eigen_init_scoring (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], + double knobs[EIGEN_COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg); + +int eigen_find_ordering (int n_row, int n_col, int Alen, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], + int n_col2, int max_deg, int pfree); + +void eigen_order_children (int n_col, EIGEN_Colamd_Col Col [], int p []); + +void eigen_detect_super_cols ( +#ifndef COLAMD_NDEBUG + int n_col, + EIGEN_Colamd_Row Row [], +#endif /* COLAMD_NDEBUG */ + EIGEN_Colamd_Col Col [], + int A [], + int head [], + int row_start, + int row_length ) ; + + int eigen_garbage_collection (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int *pfree) ; + + int eigen_clear_mark (int n_row, EIGEN_Colamd_Row Row [] ) ; + + void eigen_print_report (char *method, int stats [EIGEN_COLAMD_STATS]) ; + +/* ========================================================================== */ +/* === Debugging prototypes and definitions ================================= */ +/* ========================================================================== */ + +#ifndef COLAMD_NDEBUG + +/* colamd_debug is the *ONLY* global variable, and is only */ +/* present when debugging */ + + int colamd_debug ; /* debug print level */ + +#define COLAMD_DEBUG0(params) { (void) PRINTF params ; } +#define COLAMD_DEBUG1(params) { if (colamd_debug >= 1) (void) PRINTF params ; } +#define COLAMD_DEBUG2(params) { if (colamd_debug >= 2) (void) PRINTF params ; } +#define COLAMD_DEBUG3(params) { if (colamd_debug >= 3) (void) PRINTF params ; } +#define COLAMD_DEBUG4(params) { if (colamd_debug >= 4) (void) PRINTF params ; } + +#ifdef MATLAB_MEX_FILE +#define COLAMD_ASSERT(expression) (mxAssert ((expression), "")) +#else +#define COLAMD_ASSERT(expression) (assert (expression)) +#endif /* MATLAB_MEX_FILE */ + + void eigen_colamd_get_debug /* gets the debug print level from getenv */ +( + char *method +) ; + + void eigen_debug_deg_lists +( + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int head [], + int min_score, + int should, + int max_deg +) ; + + void eigen_debug_mark +( + int n_row, + EIGEN_Colamd_Row Row [], + int tag_mark, + int max_mark +) ; + + void eigen_debug_matrix +( + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int A [] +) ; + + void eigen_debug_structures +( + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int A [], + int n_col2 +) ; + +#else /* COLAMD_NDEBUG */ + +/* === No debugging ========================================================= */ + +#define COLAMD_DEBUG0(params) ; +#define COLAMD_DEBUG1(params) ; +#define COLAMD_DEBUG2(params) ; +#define COLAMD_DEBUG3(params) ; +#define COLAMD_DEBUG4(params) ; + +#define COLAMD_ASSERT(expression) ((void) 0) + +#endif /* COLAMD_NDEBUG */ + + + +/** + * \brief Returns the recommended value of Alen + * + * Returns recommended value of Alen for use by eigen_colamd. + * Returns -1 if any input argument is negative. + * The use of this routine or macro is optional. + * Note that the macro uses its arguments more than once, + * so be careful for side effects, if you pass expressions as arguments to EIGEN_COLAMD_RECOMMENDED. + * + * \param nnz nonzeros in A + * \param n_row number of rows in A + * \param n_col number of columns in A + * \return recommended value of Alen for use by eigen_colamd + */ +int eigen_colamd_recommended ( int nnz, int n_row, int n_col) +{ + + return (EIGEN_COLAMD_RECOMMENDED (nnz, n_row, n_col)) ; +} + +/** + * \brief set default parameters The use of this routine is optional. + * + * Colamd: rows with more than (knobs [EIGEN_COLAMD_DENSE_ROW] * n_col) + * entries are removed prior to ordering. Columns with more than + * (knobs [EIGEN_COLAMD_DENSE_COL] * n_row) entries are removed prior to + * ordering, and placed last in the output column ordering. + * + * EIGEN_COLAMD_DENSE_ROW and EIGEN_COLAMD_DENSE_COL are defined as 0 and 1, + * respectively, in eigen_colamd.h. Default values of these two knobs + * are both 0.5. Currently, only knobs [0] and knobs [1] are + * used, but future versions may use more knobs. If so, they will + * be properly set to their defaults by the future version of + * eigen_colamd_set_defaults, so that the code that calls eigen_colamd will + * not need to change, assuming that you either use + * eigen_colamd_set_defaults, or pass a (double *) NULL pointer as the + * knobs array to eigen_colamd or symamd. + * + * \param knobs parameter settings for eigen_colamd + */ +void eigen_colamd_set_defaults(double knobs[EIGEN_COLAMD_KNOBS]) +{ + /* === Local variables ================================================== */ + + int i ; + + if (!knobs) + { + return ; /* no knobs to initialize */ + } + for (i = 0 ; i < EIGEN_COLAMD_KNOBS ; i++) + { + knobs [i] = 0 ; + } + knobs [EIGEN_COLAMD_DENSE_ROW] = 0.5 ; /* ignore rows over 50% dense */ + knobs [EIGEN_COLAMD_DENSE_COL] = 0.5 ; /* ignore columns over 50% dense */ +} + +/** + * \brief Computes a column ordering using the column approximate minimum degree ordering + * + * Computes a column ordering (Q) of A such that P(AQ)=LU or + * (AQ)'AQ=LL' have less fill-in and require fewer floating point + * operations than factorizing the unpermuted matrix A or A'A, + * respectively. + * + * + * \param n_row number of rows in A + * \param n_col number of columns in A + * \param Alen, size of the array A + * \param A row indices of the matrix, of size ALen + * \param p column pointers of A, of size n_col+1 + * \param knobs parameter settings for eigen_colamd + * \param stats eigen_colamd output statistics and error codes + */ +bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[EIGEN_COLAMD_KNOBS], int stats[EIGEN_COLAMD_STATS]) +{ + /* === Local variables ================================================== */ + + int i ; /* loop index */ + int nnz ; /* nonzeros in A */ + int Row_size ; /* size of Row [], in integers */ + int Col_size ; /* size of Col [], in integers */ + int need ; /* minimum required length of A */ + EIGEN_Colamd_Row *Row ; /* pointer into A of Row [0..n_row] array */ + EIGEN_Colamd_Col *Col ; /* pointer into A of Col [0..n_col] array */ + int n_col2 ; /* number of non-dense, non-empty columns */ + int n_row2 ; /* number of non-dense, non-empty rows */ + int ngarbage ; /* number of garbage collections performed */ + int max_deg ; /* maximum row degree */ + double default_knobs [EIGEN_COLAMD_KNOBS] ; /* default knobs array */ + +#ifndef COLAMD_NDEBUG + eigen_colamd_get_debug ("eigen_colamd") ; +#endif /* COLAMD_NDEBUG */ + + /* === Check the input arguments ======================================== */ + + if (!stats) + { + COLAMD_DEBUG0 (("eigen_colamd: stats not present\n")) ; + return (false) ; + } + for (i = 0 ; i < EIGEN_COLAMD_STATS ; i++) + { + stats [i] = 0 ; + } + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_OK ; + stats [EIGEN_COLAMD_INFO1] = -1 ; + stats [EIGEN_COLAMD_INFO2] = -1 ; + + if (!A) /* A is not present */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_A_not_present ; + COLAMD_DEBUG0 (("eigen_colamd: A not present\n")) ; + return (false) ; + } + + if (!p) /* p is not present */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_p_not_present ; + COLAMD_DEBUG0 (("eigen_colamd: p not present\n")) ; + return (false) ; + } + + if (n_row < 0) /* n_row must be >= 0 */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_nrow_negative ; + stats [EIGEN_COLAMD_INFO1] = n_row ; + COLAMD_DEBUG0 (("eigen_colamd: nrow negative %d\n", n_row)) ; + return (false) ; + } + + if (n_col < 0) /* n_col must be >= 0 */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_ncol_negative ; + stats [EIGEN_COLAMD_INFO1] = n_col ; + COLAMD_DEBUG0 (("eigen_colamd: ncol negative %d\n", n_col)) ; + return (false) ; + } + + nnz = p [n_col] ; + if (nnz < 0) /* nnz must be >= 0 */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_nnz_negative ; + stats [EIGEN_COLAMD_INFO1] = nnz ; + COLAMD_DEBUG0 (("eigen_colamd: number of entries negative %d\n", nnz)) ; + return (false) ; + } + + if (p [0] != 0) + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_p0_nonzero ; + stats [EIGEN_COLAMD_INFO1] = p [0] ; + COLAMD_DEBUG0 (("eigen_colamd: p[0] not zero %d\n", p [0])) ; + return (false) ; + } + + /* === If no knobs, set default knobs =================================== */ + + if (!knobs) + { + eigen_colamd_set_defaults (default_knobs) ; + knobs = default_knobs ; + } + + /* === Allocate the Row and Col arrays from array A ===================== */ + + Col_size = EIGEN_COLAMD_C (n_col) ; + Row_size = EIGEN_COLAMD_R (n_row) ; + need = 2*nnz + n_col + Col_size + Row_size ; + + if (need > Alen) + { + /* not enough space in array A to perform the ordering */ + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_A_too_small ; + stats [EIGEN_COLAMD_INFO1] = need ; + stats [EIGEN_COLAMD_INFO2] = Alen ; + COLAMD_DEBUG0 (("eigen_colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen)); + return (false) ; + } + + Alen -= Col_size + Row_size ; + Col = (EIGEN_Colamd_Col *) &A [Alen] ; + Row = (EIGEN_Colamd_Row *) &A [Alen + Col_size] ; + + /* === Construct the row and column data structures ===================== */ + + if (!eigen_init_rows_cols (n_row, n_col, Row, Col, A, p, stats)) + { + /* input matrix is invalid */ + COLAMD_DEBUG0 (("eigen_colamd: Matrix invalid\n")) ; + return (false) ; + } + + /* === Initialize scores, kill dense rows/columns ======================= */ + + eigen_init_scoring (n_row, n_col, Row, Col, A, p, knobs, + &n_row2, &n_col2, &max_deg) ; + + /* === Order the supercolumns =========================================== */ + + ngarbage = eigen_find_ordering (n_row, n_col, Alen, Row, Col, A, p, + n_col2, max_deg, 2*nnz) ; + + /* === Order the non-principal columns ================================== */ + + eigen_order_children (n_col, Col, p) ; + + /* === Return statistics in stats ======================================= */ + + stats [EIGEN_COLAMD_DENSE_ROW] = n_row - n_row2 ; + stats [EIGEN_COLAMD_DENSE_COL] = n_col - n_col2 ; + stats [EIGEN_COLAMD_DEFRAG_COUNT] = ngarbage ; + COLAMD_DEBUG0 (("eigen_colamd: done.\n")) ; + return (true) ; +} + +/* ========================================================================== */ +/* === eigen_colamd_report ======================================================== */ +/* ========================================================================== */ + + void eigen_colamd_report +( + int stats [EIGEN_COLAMD_STATS] +) +{ + eigen_print_report ("eigen_colamd", stats) ; +} + + +/* ========================================================================== */ +/* === NON-USER-CALLABLE ROUTINES: ========================================== */ +/* ========================================================================== */ + +/* There are no user-callable routines beyond this point in the file */ + + +/* ========================================================================== */ +/* === eigen_init_rows_cols ======================================================= */ +/* ========================================================================== */ + +/* + Takes the column form of the matrix in A and creates the row form of the + matrix. Also, row and column attributes are stored in the Col and Row + structs. If the columns are un-sorted or contain duplicate row indices, + this routine will also sort and remove duplicate row indices from the + column form of the matrix. Returns false if the matrix is invalid, + true otherwise. Not user-callable. +*/ + + int eigen_init_rows_cols /* returns true if OK, or false otherwise */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows of A */ + int n_col, /* number of columns of A */ + EIGEN_Colamd_Row Row [], /* of size n_row+1 */ + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int A [], /* row indices of A, of size Alen */ + int p [], /* pointers to columns in A, of size n_col+1 */ + int stats [EIGEN_COLAMD_STATS] /* eigen_colamd statistics */ +) +{ + /* === Local variables ================================================== */ + + int col ; /* a column index */ + int row ; /* a row index */ + int *cp ; /* a column pointer */ + int *cp_end ; /* a pointer to the end of a column */ + int *rp ; /* a row pointer */ + int *rp_end ; /* a pointer to the end of a row */ + int last_row ; /* previous row */ + + /* === Initialize columns, and check column pointers ==================== */ + + for (col = 0 ; col < n_col ; col++) + { + Col [col].start = p [col] ; + Col [col].length = p [col+1] - p [col] ; + + if (Col [col].length < 0) + { + /* column pointers must be non-decreasing */ + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_col_length_negative ; + stats [EIGEN_COLAMD_INFO1] = col ; + stats [EIGEN_COLAMD_INFO2] = Col [col].length ; + COLAMD_DEBUG0 (("eigen_colamd: col %d length %d < 0\n", col, Col [col].length)) ; + return (false) ; + } + + Col [col].shared1.thickness = 1 ; + Col [col].shared2.score = 0 ; + Col [col].shared3.prev = EIGEN_COLAMD_EMPTY ; + Col [col].shared4.degree_next = EIGEN_COLAMD_EMPTY ; + } + + /* p [0..n_col] no longer needed, used as "head" in subsequent routines */ + + /* === Scan columns, compute row degrees, and check row indices ========= */ + + stats [EIGEN_COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/ + + for (row = 0 ; row < n_row ; row++) + { + Row [row].length = 0 ; + Row [row].shared2.mark = -1 ; + } + + for (col = 0 ; col < n_col ; col++) + { + last_row = -1 ; + + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + + while (cp < cp_end) + { + row = *cp++ ; + + /* make sure row indices within range */ + if (row < 0 || row >= n_row) + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_row_index_out_of_bounds ; + stats [EIGEN_COLAMD_INFO1] = col ; + stats [EIGEN_COLAMD_INFO2] = row ; + stats [EIGEN_COLAMD_INFO3] = n_row ; + COLAMD_DEBUG0 (("eigen_colamd: row %d col %d out of bounds\n", row, col)) ; + return (false) ; + } + + if (row <= last_row || Row [row].shared2.mark == col) + { + /* row index are unsorted or repeated (or both), thus col */ + /* is jumbled. This is a notice, not an error condition. */ + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_OK_BUT_JUMBLED ; + stats [EIGEN_COLAMD_INFO1] = col ; + stats [EIGEN_COLAMD_INFO2] = row ; + (stats [EIGEN_COLAMD_INFO3]) ++ ; + COLAMD_DEBUG1 (("eigen_colamd: row %d col %d unsorted/duplicate\n",row,col)); + } + + if (Row [row].shared2.mark != col) + { + Row [row].length++ ; + } + else + { + /* this is a repeated entry in the column, */ + /* it will be removed */ + Col [col].length-- ; + } + + /* mark the row as having been seen in this column */ + Row [row].shared2.mark = col ; + + last_row = row ; + } + } + + /* === Compute row pointers ============================================= */ + + /* row form of the matrix starts directly after the column */ + /* form of matrix in A */ + Row [0].start = p [n_col] ; + Row [0].shared1.p = Row [0].start ; + Row [0].shared2.mark = -1 ; + for (row = 1 ; row < n_row ; row++) + { + Row [row].start = Row [row-1].start + Row [row-1].length ; + Row [row].shared1.p = Row [row].start ; + Row [row].shared2.mark = -1 ; + } + + /* === Create row form ================================================== */ + + if (stats [EIGEN_COLAMD_STATUS] == EIGEN_COLAMD_OK_BUT_JUMBLED) + { + /* if cols jumbled, watch for repeated row indices */ + for (col = 0 ; col < n_col ; col++) + { + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + while (cp < cp_end) + { + row = *cp++ ; + if (Row [row].shared2.mark != col) + { + A [(Row [row].shared1.p)++] = col ; + Row [row].shared2.mark = col ; + } + } + } + } + else + { + /* if cols not jumbled, we don't need the mark (this is faster) */ + for (col = 0 ; col < n_col ; col++) + { + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + while (cp < cp_end) + { + A [(Row [*cp++].shared1.p)++] = col ; + } + } + } + + /* === Clear the row marks and set row degrees ========================== */ + + for (row = 0 ; row < n_row ; row++) + { + Row [row].shared2.mark = 0 ; + Row [row].shared1.degree = Row [row].length ; + } + + /* === See if we need to re-create columns ============================== */ + + if (stats [EIGEN_COLAMD_STATUS] == EIGEN_COLAMD_OK_BUT_JUMBLED) + { + COLAMD_DEBUG0 (("eigen_colamd: reconstructing column form, matrix jumbled\n")) ; + +#ifndef COLAMD_NDEBUG + /* make sure column lengths are correct */ + for (col = 0 ; col < n_col ; col++) + { + p [col] = Col [col].length ; + } + for (row = 0 ; row < n_row ; row++) + { + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + p [*rp++]-- ; + } + } + for (col = 0 ; col < n_col ; col++) + { + COLAMD_ASSERT (p [col] == 0) ; + } + /* now p is all zero (different than when debugging is turned off) */ +#endif /* COLAMD_NDEBUG */ + + /* === Compute col pointers ========================================= */ + + /* col form of the matrix starts at A [0]. */ + /* Note, we may have a gap between the col form and the row */ + /* form if there were duplicate entries, if so, it will be */ + /* removed upon the first garbage collection */ + Col [0].start = 0 ; + p [0] = Col [0].start ; + for (col = 1 ; col < n_col ; col++) + { + /* note that the lengths here are for pruned columns, i.e. */ + /* no duplicate row indices will exist for these columns */ + Col [col].start = Col [col-1].start + Col [col-1].length ; + p [col] = Col [col].start ; + } + + /* === Re-create col form =========================================== */ + + for (row = 0 ; row < n_row ; row++) + { + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + A [(p [*rp++])++] = row ; + } + } + } + + /* === Done. Matrix is not (or no longer) jumbled ====================== */ + + return (true) ; +} + + +/* ========================================================================== */ +/* === eigen_init_scoring ========================================================= */ +/* ========================================================================== */ + +/* + Kills dense or empty columns and rows, calculates an initial score for + each column, and places all columns in the degree lists. Not user-callable. +*/ + + void eigen_init_scoring +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows of A */ + int n_col, /* number of columns of A */ + EIGEN_Colamd_Row Row [], /* of size n_row+1 */ + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int A [], /* column form and row form of A */ + int head [], /* of size n_col+1 */ + double knobs [EIGEN_COLAMD_KNOBS],/* parameters */ + int *p_n_row2, /* number of non-dense, non-empty rows */ + int *p_n_col2, /* number of non-dense, non-empty columns */ + int *p_max_deg /* maximum row degree */ +) +{ + /* === Local variables ================================================== */ + + int c ; /* a column index */ + int r, row ; /* a row index */ + int *cp ; /* a column pointer */ + int deg ; /* degree of a row or column */ + int *cp_end ; /* a pointer to the end of a column */ + int *new_cp ; /* new column pointer */ + int col_length ; /* length of pruned column */ + int score ; /* current column score */ + int n_col2 ; /* number of non-dense, non-empty columns */ + int n_row2 ; /* number of non-dense, non-empty rows */ + int dense_row_count ; /* remove rows with more entries than this */ + int dense_col_count ; /* remove cols with more entries than this */ + int min_score ; /* smallest column score */ + int max_deg ; /* maximum row degree */ + int next_col ; /* Used to add to degree list.*/ + +#ifndef COLAMD_NDEBUG + int debug_count ; /* debug only. */ +#endif /* COLAMD_NDEBUG */ + + /* === Extract knobs ==================================================== */ + + dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [EIGEN_COLAMD_DENSE_ROW] * n_col, n_col)) ; + dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [EIGEN_COLAMD_DENSE_COL] * n_row, n_row)) ; + COLAMD_DEBUG1 (("eigen_colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ; + max_deg = 0 ; + n_col2 = n_col ; + n_row2 = n_row ; + + /* === Kill empty columns =============================================== */ + + /* Put the empty columns at the end in their natural order, so that LU */ + /* factorization can proceed as far as possible. */ + for (c = n_col-1 ; c >= 0 ; c--) + { + deg = Col [c].length ; + if (deg == 0) + { + /* this is a empty column, kill and order it last */ + Col [c].shared2.order = --n_col2 ; + EIGEN_KILL_PRINCIPAL_COL (c) ; + } + } + COLAMD_DEBUG1 (("eigen_colamd: null columns killed: %d\n", n_col - n_col2)) ; + + /* === Kill dense columns =============================================== */ + + /* Put the dense columns at the end, in their natural order */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* skip any dead columns */ + if (EIGEN_COL_IS_DEAD (c)) + { + continue ; + } + deg = Col [c].length ; + if (deg > dense_col_count) + { + /* this is a dense column, kill and order it last */ + Col [c].shared2.order = --n_col2 ; + /* decrement the row degrees */ + cp = &A [Col [c].start] ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + Row [*cp++].shared1.degree-- ; + } + EIGEN_KILL_PRINCIPAL_COL (c) ; + } + } + COLAMD_DEBUG1 (("eigen_colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ; + + /* === Kill dense and empty rows ======================================== */ + + for (r = 0 ; r < n_row ; r++) + { + deg = Row [r].shared1.degree ; + COLAMD_ASSERT (deg >= 0 && deg <= n_col) ; + if (deg > dense_row_count || deg == 0) + { + /* kill a dense or empty row */ + EIGEN_KILL_ROW (r) ; + --n_row2 ; + } + else + { + /* keep track of max degree of remaining rows */ + max_deg = COLAMD_MAX (max_deg, deg) ; + } + } + COLAMD_DEBUG1 (("eigen_colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ; + + /* === Compute initial column scores ==================================== */ + + /* At this point the row degrees are accurate. They reflect the number */ + /* of "live" (non-dense) columns in each row. No empty rows exist. */ + /* Some "live" columns may contain only dead rows, however. These are */ + /* pruned in the code below. */ + + /* now find the initial matlab score for each column */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* skip dead column */ + if (EIGEN_COL_IS_DEAD (c)) + { + continue ; + } + score = 0 ; + cp = &A [Col [c].start] ; + new_cp = cp ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + /* skip if dead */ + if (EIGEN_ROW_IS_DEAD (row)) + { + continue ; + } + /* compact the column */ + *new_cp++ = row ; + /* add row's external degree */ + score += Row [row].shared1.degree - 1 ; + /* guard against integer overflow */ + score = COLAMD_MIN (score, n_col) ; + } + /* determine pruned column length */ + col_length = (int) (new_cp - &A [Col [c].start]) ; + if (col_length == 0) + { + /* a newly-made null column (all rows in this col are "dense" */ + /* and have already been killed) */ + COLAMD_DEBUG2 (("Newly null killed: %d\n", c)) ; + Col [c].shared2.order = --n_col2 ; + EIGEN_KILL_PRINCIPAL_COL (c) ; + } + else + { + /* set column length and set score */ + COLAMD_ASSERT (score >= 0) ; + COLAMD_ASSERT (score <= n_col) ; + Col [c].length = col_length ; + Col [c].shared2.score = score ; + } + } + COLAMD_DEBUG1 (("eigen_colamd: Dense, null, and newly-null columns killed: %d\n", + n_col-n_col2)) ; + + /* At this point, all empty rows and columns are dead. All live columns */ + /* are "clean" (containing no dead rows) and simplicial (no supercolumns */ + /* yet). Rows may contain dead columns, but all live rows contain at */ + /* least one live column. */ + +#ifndef COLAMD_NDEBUG + eigen_debug_structures (n_row, n_col, Row, Col, A, n_col2) ; +#endif /* COLAMD_NDEBUG */ + + /* === Initialize degree lists ========================================== */ + +#ifndef COLAMD_NDEBUG + debug_count = 0 ; +#endif /* COLAMD_NDEBUG */ + + /* clear the hash buckets */ + for (c = 0 ; c <= n_col ; c++) + { + head [c] = EIGEN_COLAMD_EMPTY ; + } + min_score = n_col ; + /* place in reverse order, so low column indices are at the front */ + /* of the lists. This is to encourage natural tie-breaking */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* only add principal columns to degree lists */ + if (EIGEN_COL_IS_ALIVE (c)) + { + COLAMD_DEBUG4 (("place %d score %d minscore %d ncol %d\n", + c, Col [c].shared2.score, min_score, n_col)) ; + + /* === Add columns score to DList =============================== */ + + score = Col [c].shared2.score ; + + COLAMD_ASSERT (min_score >= 0) ; + COLAMD_ASSERT (min_score <= n_col) ; + COLAMD_ASSERT (score >= 0) ; + COLAMD_ASSERT (score <= n_col) ; + COLAMD_ASSERT (head [score] >= EIGEN_COLAMD_EMPTY) ; + + /* now add this column to dList at proper score location */ + next_col = head [score] ; + Col [c].shared3.prev = EIGEN_COLAMD_EMPTY ; + Col [c].shared4.degree_next = next_col ; + + /* if there already was a column with the same score, set its */ + /* previous pointer to this new column */ + if (next_col != EIGEN_COLAMD_EMPTY) + { + Col [next_col].shared3.prev = c ; + } + head [score] = c ; + + /* see if this score is less than current min */ + min_score = COLAMD_MIN (min_score, score) ; + +#ifndef COLAMD_NDEBUG + debug_count++ ; +#endif /* COLAMD_NDEBUG */ + + } + } + +#ifndef COLAMD_NDEBUG + COLAMD_DEBUG1 (("eigen_colamd: Live cols %d out of %d, non-princ: %d\n", + debug_count, n_col, n_col-debug_count)) ; + COLAMD_ASSERT (debug_count == n_col2) ; + eigen_debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) ; +#endif /* COLAMD_NDEBUG */ + + /* === Return number of remaining columns, and max row degree =========== */ + + *p_n_col2 = n_col2 ; + *p_n_row2 = n_row2 ; + *p_max_deg = max_deg ; +} + + +/* ========================================================================== */ +/* === eigen_find_ordering ======================================================== */ +/* ========================================================================== */ + +/* + Order the principal columns of the supercolumn form of the matrix + (no supercolumns on input). Uses a minimum approximate column minimum + degree ordering method. Not user-callable. +*/ + + int eigen_find_ordering /* return the number of garbage collections */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows of A */ + int n_col, /* number of columns of A */ + int Alen, /* size of A, 2*nnz + n_col or larger */ + EIGEN_Colamd_Row Row [], /* of size n_row+1 */ + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int A [], /* column form and row form of A */ + int head [], /* of size n_col+1 */ + int n_col2, /* Remaining columns to order */ + int max_deg, /* Maximum row degree */ + int pfree /* index of first free slot (2*nnz on entry) */ +) +{ + /* === Local variables ================================================== */ + + int k ; /* current pivot ordering step */ + int pivot_col ; /* current pivot column */ + int *cp ; /* a column pointer */ + int *rp ; /* a row pointer */ + int pivot_row ; /* current pivot row */ + int *new_cp ; /* modified column pointer */ + int *new_rp ; /* modified row pointer */ + int pivot_row_start ; /* pointer to start of pivot row */ + int pivot_row_degree ; /* number of columns in pivot row */ + int pivot_row_length ; /* number of supercolumns in pivot row */ + int pivot_col_score ; /* score of pivot column */ + int needed_memory ; /* free space needed for pivot row */ + int *cp_end ; /* pointer to the end of a column */ + int *rp_end ; /* pointer to the end of a row */ + int row ; /* a row index */ + int col ; /* a column index */ + int max_score ; /* maximum possible score */ + int cur_score ; /* score of current column */ + unsigned int hash ; /* hash value for supernode detection */ + int head_column ; /* head of hash bucket */ + int first_col ; /* first column in hash bucket */ + int tag_mark ; /* marker value for mark array */ + int row_mark ; /* Row [row].shared2.mark */ + int set_difference ; /* set difference size of row with pivot row */ + int min_score ; /* smallest column score */ + int col_thickness ; /* "thickness" (no. of columns in a supercol) */ + int max_mark ; /* maximum value of tag_mark */ + int pivot_col_thickness ; /* number of columns represented by pivot col */ + int prev_col ; /* Used by Dlist operations. */ + int next_col ; /* Used by Dlist operations. */ + int ngarbage ; /* number of garbage collections performed */ + +#ifndef COLAMD_NDEBUG + int debug_d ; /* debug loop counter */ + int debug_step = 0 ; /* debug loop counter */ +#endif /* COLAMD_NDEBUG */ + + /* === Initialization and clear mark ==================================== */ + + max_mark = INT_MAX - n_col ; /* INT_MAX defined in */ + tag_mark = eigen_clear_mark (n_row, Row) ; + min_score = 0 ; + ngarbage = 0 ; + COLAMD_DEBUG1 (("eigen_colamd: Ordering, n_col2=%d\n", n_col2)) ; + + /* === Order the columns ================================================ */ + + for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */) + { + +#ifndef COLAMD_NDEBUG + if (debug_step % 100 == 0) + { + COLAMD_DEBUG2 (("\n... Step k: %d out of n_col2: %d\n", k, n_col2)) ; + } + else + { + COLAMD_DEBUG3 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ; + } + debug_step++ ; + eigen_debug_deg_lists (n_row, n_col, Row, Col, head, + min_score, n_col2-k, max_deg) ; + eigen_debug_matrix (n_row, n_col, Row, Col, A) ; +#endif /* COLAMD_NDEBUG */ + + /* === Select pivot column, and order it ============================ */ + + /* make sure degree list isn't empty */ + COLAMD_ASSERT (min_score >= 0) ; + COLAMD_ASSERT (min_score <= n_col) ; + COLAMD_ASSERT (head [min_score] >= EIGEN_COLAMD_EMPTY) ; + +#ifndef COLAMD_NDEBUG + for (debug_d = 0 ; debug_d < min_score ; debug_d++) + { + COLAMD_ASSERT (head [debug_d] == EIGEN_COLAMD_EMPTY) ; + } +#endif /* COLAMD_NDEBUG */ + + /* get pivot column from head of minimum degree list */ + while (head [min_score] == EIGEN_COLAMD_EMPTY && min_score < n_col) + { + min_score++ ; + } + pivot_col = head [min_score] ; + COLAMD_ASSERT (pivot_col >= 0 && pivot_col <= n_col) ; + next_col = Col [pivot_col].shared4.degree_next ; + head [min_score] = next_col ; + if (next_col != EIGEN_COLAMD_EMPTY) + { + Col [next_col].shared3.prev = EIGEN_COLAMD_EMPTY ; + } + + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (pivot_col)) ; + COLAMD_DEBUG3 (("Pivot col: %d\n", pivot_col)) ; + + /* remember score for defrag check */ + pivot_col_score = Col [pivot_col].shared2.score ; + + /* the pivot column is the kth column in the pivot order */ + Col [pivot_col].shared2.order = k ; + + /* increment order count by column thickness */ + pivot_col_thickness = Col [pivot_col].shared1.thickness ; + k += pivot_col_thickness ; + COLAMD_ASSERT (pivot_col_thickness > 0) ; + + /* === Garbage_collection, if necessary ============================= */ + + needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ; + if (pfree + needed_memory >= Alen) + { + pfree = eigen_garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ; + ngarbage++ ; + /* after garbage collection we will have enough */ + COLAMD_ASSERT (pfree + needed_memory < Alen) ; + /* garbage collection has wiped out the Row[].shared2.mark array */ + tag_mark = eigen_clear_mark (n_row, Row) ; + +#ifndef COLAMD_NDEBUG + eigen_debug_matrix (n_row, n_col, Row, Col, A) ; +#endif /* COLAMD_NDEBUG */ + } + + /* === Compute pivot row pattern ==================================== */ + + /* get starting location for this new merged row */ + pivot_row_start = pfree ; + + /* initialize new row counts to zero */ + pivot_row_degree = 0 ; + + /* tag pivot column as having been visited so it isn't included */ + /* in merged pivot row */ + Col [pivot_col].shared1.thickness = -pivot_col_thickness ; + + /* pivot row is the union of all rows in the pivot column pattern */ + cp = &A [Col [pivot_col].start] ; + cp_end = cp + Col [pivot_col].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + COLAMD_DEBUG4 (("Pivot col pattern %d %d\n", EIGEN_ROW_IS_ALIVE (row), row)) ; + /* skip if row is dead */ + if (EIGEN_ROW_IS_DEAD (row)) + { + continue ; + } + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + /* get a column */ + col = *rp++ ; + /* add the column, if alive and untagged */ + col_thickness = Col [col].shared1.thickness ; + if (col_thickness > 0 && EIGEN_COL_IS_ALIVE (col)) + { + /* tag column in pivot row */ + Col [col].shared1.thickness = -col_thickness ; + COLAMD_ASSERT (pfree < Alen) ; + /* place column in pivot row */ + A [pfree++] = col ; + pivot_row_degree += col_thickness ; + } + } + } + + /* clear tag on pivot column */ + Col [pivot_col].shared1.thickness = pivot_col_thickness ; + max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ; + +#ifndef COLAMD_NDEBUG + COLAMD_DEBUG3 (("check2\n")) ; + eigen_debug_mark (n_row, Row, tag_mark, max_mark) ; +#endif /* COLAMD_NDEBUG */ + + /* === Kill all rows used to construct pivot row ==================== */ + + /* also kill pivot row, temporarily */ + cp = &A [Col [pivot_col].start] ; + cp_end = cp + Col [pivot_col].length ; + while (cp < cp_end) + { + /* may be killing an already dead row */ + row = *cp++ ; + COLAMD_DEBUG3 (("Kill row in pivot col: %d\n", row)) ; + EIGEN_KILL_ROW (row) ; + } + + /* === Select a row index to use as the new pivot row =============== */ + + pivot_row_length = pfree - pivot_row_start ; + if (pivot_row_length > 0) + { + /* pick the "pivot" row arbitrarily (first row in col) */ + pivot_row = A [Col [pivot_col].start] ; + COLAMD_DEBUG3 (("Pivotal row is %d\n", pivot_row)) ; + } + else + { + /* there is no pivot row, since it is of zero length */ + pivot_row = EIGEN_COLAMD_EMPTY ; + COLAMD_ASSERT (pivot_row_length == 0) ; + } + COLAMD_ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ; + + /* === Approximate degree computation =============================== */ + + /* Here begins the computation of the approximate degree. The column */ + /* score is the sum of the pivot row "length", plus the size of the */ + /* set differences of each row in the column minus the pattern of the */ + /* pivot row itself. The column ("thickness") itself is also */ + /* excluded from the column score (we thus use an approximate */ + /* external degree). */ + + /* The time taken by the following code (compute set differences, and */ + /* add them up) is proportional to the size of the data structure */ + /* being scanned - that is, the sum of the sizes of each column in */ + /* the pivot row. Thus, the amortized time to compute a column score */ + /* is proportional to the size of that column (where size, in this */ + /* context, is the column "length", or the number of row indices */ + /* in that column). The number of row indices in a column is */ + /* monotonically non-decreasing, from the length of the original */ + /* column on input to eigen_colamd. */ + + /* === Compute set differences ====================================== */ + + COLAMD_DEBUG3 (("** Computing set differences phase. **\n")) ; + + /* pivot row is currently dead - it will be revived later. */ + + COLAMD_DEBUG3 (("Pivot row: ")) ; + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + col = *rp++ ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col) && col != pivot_col) ; + COLAMD_DEBUG3 (("Col: %d\n", col)) ; + + /* clear tags used to construct pivot row pattern */ + col_thickness = -Col [col].shared1.thickness ; + COLAMD_ASSERT (col_thickness > 0) ; + Col [col].shared1.thickness = col_thickness ; + + /* === Remove column from degree list =========================== */ + + cur_score = Col [col].shared2.score ; + prev_col = Col [col].shared3.prev ; + next_col = Col [col].shared4.degree_next ; + COLAMD_ASSERT (cur_score >= 0) ; + COLAMD_ASSERT (cur_score <= n_col) ; + COLAMD_ASSERT (cur_score >= EIGEN_COLAMD_EMPTY) ; + if (prev_col == EIGEN_COLAMD_EMPTY) + { + head [cur_score] = next_col ; + } + else + { + Col [prev_col].shared4.degree_next = next_col ; + } + if (next_col != EIGEN_COLAMD_EMPTY) + { + Col [next_col].shared3.prev = prev_col ; + } + + /* === Scan the column ========================================== */ + + cp = &A [Col [col].start] ; + cp_end = cp + Col [col].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + row_mark = Row [row].shared2.mark ; + /* skip if dead */ + if (EIGEN_ROW_IS_MARKED_DEAD (row_mark)) + { + continue ; + } + COLAMD_ASSERT (row != pivot_row) ; + set_difference = row_mark - tag_mark ; + /* check if the row has been seen yet */ + if (set_difference < 0) + { + COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ; + set_difference = Row [row].shared1.degree ; + } + /* subtract column thickness from this row's set difference */ + set_difference -= col_thickness ; + COLAMD_ASSERT (set_difference >= 0) ; + /* absorb this row if the set difference becomes zero */ + if (set_difference == 0) + { + COLAMD_DEBUG3 (("aggressive absorption. Row: %d\n", row)) ; + EIGEN_KILL_ROW (row) ; + } + else + { + /* save the new mark */ + Row [row].shared2.mark = set_difference + tag_mark ; + } + } + } + +#ifndef COLAMD_NDEBUG + eigen_debug_deg_lists (n_row, n_col, Row, Col, head, + min_score, n_col2-k-pivot_row_degree, max_deg) ; +#endif /* COLAMD_NDEBUG */ + + /* === Add up set differences for each column ======================= */ + + COLAMD_DEBUG3 (("** Adding set differences phase. **\n")) ; + + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + /* get a column */ + col = *rp++ ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col) && col != pivot_col) ; + hash = 0 ; + cur_score = 0 ; + cp = &A [Col [col].start] ; + /* compact the column */ + new_cp = cp ; + cp_end = cp + Col [col].length ; + + COLAMD_DEBUG4 (("Adding set diffs for Col: %d.\n", col)) ; + + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + COLAMD_ASSERT(row >= 0 && row < n_row) ; + row_mark = Row [row].shared2.mark ; + /* skip if dead */ + if (EIGEN_ROW_IS_MARKED_DEAD (row_mark)) + { + continue ; + } + COLAMD_ASSERT (row_mark > tag_mark) ; + /* compact the column */ + *new_cp++ = row ; + /* compute hash function */ + hash += row ; + /* add set difference */ + cur_score += row_mark - tag_mark ; + /* integer overflow... */ + cur_score = COLAMD_MIN (cur_score, n_col) ; + } + + /* recompute the column's length */ + Col [col].length = (int) (new_cp - &A [Col [col].start]) ; + + /* === Further mass elimination ================================= */ + + if (Col [col].length == 0) + { + COLAMD_DEBUG4 (("further mass elimination. Col: %d\n", col)) ; + /* nothing left but the pivot row in this column */ + EIGEN_KILL_PRINCIPAL_COL (col) ; + pivot_row_degree -= Col [col].shared1.thickness ; + COLAMD_ASSERT (pivot_row_degree >= 0) ; + /* order it */ + Col [col].shared2.order = k ; + /* increment order count by column thickness */ + k += Col [col].shared1.thickness ; + } + else + { + /* === Prepare for supercolumn detection ==================== */ + + COLAMD_DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ; + + /* save score so far */ + Col [col].shared2.score = cur_score ; + + /* add column to hash table, for supercolumn detection */ + hash %= n_col + 1 ; + + COLAMD_DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ; + COLAMD_ASSERT (hash <= n_col) ; + + head_column = head [hash] ; + if (head_column > EIGEN_COLAMD_EMPTY) + { + /* degree list "hash" is non-empty, use prev (shared3) of */ + /* first column in degree list as head of hash bucket */ + first_col = Col [head_column].shared3.headhash ; + Col [head_column].shared3.headhash = col ; + } + else + { + /* degree list "hash" is empty, use head as hash bucket */ + first_col = - (head_column + 2) ; + head [hash] = - (col + 2) ; + } + Col [col].shared4.hash_next = first_col ; + + /* save hash function in Col [col].shared3.hash */ + Col [col].shared3.hash = (int) hash ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col)) ; + } + } + + /* The approximate external column degree is now computed. */ + + /* === Supercolumn detection ======================================== */ + + COLAMD_DEBUG3 (("** Supercolumn detection phase. **\n")) ; + + eigen_detect_super_cols ( + +#ifndef COLAMD_NDEBUG + n_col, Row, +#endif /* COLAMD_NDEBUG */ + + Col, A, head, pivot_row_start, pivot_row_length) ; + + /* === Kill the pivotal column ====================================== */ + + EIGEN_KILL_PRINCIPAL_COL (pivot_col) ; + + /* === Clear mark =================================================== */ + + tag_mark += (max_deg + 1) ; + if (tag_mark >= max_mark) + { + COLAMD_DEBUG2 (("clearing tag_mark\n")) ; + tag_mark = eigen_clear_mark (n_row, Row) ; + } + +#ifndef COLAMD_NDEBUG + COLAMD_DEBUG3 (("check3\n")) ; + eigen_debug_mark (n_row, Row, tag_mark, max_mark) ; +#endif /* COLAMD_NDEBUG */ + + /* === Finalize the new pivot row, and column scores ================ */ + + COLAMD_DEBUG3 (("** Finalize scores phase. **\n")) ; + + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + /* compact the pivot row */ + new_rp = rp ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + col = *rp++ ; + /* skip dead columns */ + if (EIGEN_COL_IS_DEAD (col)) + { + continue ; + } + *new_rp++ = col ; + /* add new pivot row to column */ + A [Col [col].start + (Col [col].length++)] = pivot_row ; + + /* retrieve score so far and add on pivot row's degree. */ + /* (we wait until here for this in case the pivot */ + /* row's degree was reduced due to mass elimination). */ + cur_score = Col [col].shared2.score + pivot_row_degree ; + + /* calculate the max possible score as the number of */ + /* external columns minus the 'k' value minus the */ + /* columns thickness */ + max_score = n_col - k - Col [col].shared1.thickness ; + + /* make the score the external degree of the union-of-rows */ + cur_score -= Col [col].shared1.thickness ; + + /* make sure score is less or equal than the max score */ + cur_score = COLAMD_MIN (cur_score, max_score) ; + COLAMD_ASSERT (cur_score >= 0) ; + + /* store updated score */ + Col [col].shared2.score = cur_score ; + + /* === Place column back in degree list ========================= */ + + COLAMD_ASSERT (min_score >= 0) ; + COLAMD_ASSERT (min_score <= n_col) ; + COLAMD_ASSERT (cur_score >= 0) ; + COLAMD_ASSERT (cur_score <= n_col) ; + COLAMD_ASSERT (head [cur_score] >= EIGEN_COLAMD_EMPTY) ; + next_col = head [cur_score] ; + Col [col].shared4.degree_next = next_col ; + Col [col].shared3.prev = EIGEN_COLAMD_EMPTY ; + if (next_col != EIGEN_COLAMD_EMPTY) + { + Col [next_col].shared3.prev = col ; + } + head [cur_score] = col ; + + /* see if this score is less than current min */ + min_score = COLAMD_MIN (min_score, cur_score) ; + + } + +#ifndef COLAMD_NDEBUG + eigen_debug_deg_lists (n_row, n_col, Row, Col, head, + min_score, n_col2-k, max_deg) ; +#endif /* COLAMD_NDEBUG */ + + /* === Resurrect the new pivot row ================================== */ + + if (pivot_row_degree > 0) + { + /* update pivot row length to reflect any cols that were killed */ + /* during super-col detection and mass elimination */ + Row [pivot_row].start = pivot_row_start ; + Row [pivot_row].length = (int) (new_rp - &A[pivot_row_start]) ; + Row [pivot_row].shared1.degree = pivot_row_degree ; + Row [pivot_row].shared2.mark = 0 ; + /* pivot row is no longer dead */ + } + } + + /* === All principal columns have now been ordered ====================== */ + + return (ngarbage) ; +} + + +/* ========================================================================== */ +/* === eigen_order_children ======================================================= */ +/* ========================================================================== */ + +/* + The eigen_find_ordering routine has ordered all of the principal columns (the + representatives of the supercolumns). The non-principal columns have not + yet been ordered. This routine orders those columns by walking up the + parent tree (a column is a child of the column which absorbed it). The + final permutation vector is then placed in p [0 ... n_col-1], with p [0] + being the first column, and p [n_col-1] being the last. It doesn't look + like it at first glance, but be assured that this routine takes time linear + in the number of columns. Although not immediately obvious, the time + taken by this routine is O (n_col), that is, linear in the number of + columns. Not user-callable. +*/ + + void eigen_order_children +( + /* === Parameters ======================================================= */ + + int n_col, /* number of columns of A */ + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int p [] /* p [0 ... n_col-1] is the column permutation*/ +) +{ + /* === Local variables ================================================== */ + + int i ; /* loop counter for all columns */ + int c ; /* column index */ + int parent ; /* index of column's parent */ + int order ; /* column's order */ + + /* === Order each non-principal column ================================== */ + + for (i = 0 ; i < n_col ; i++) + { + /* find an un-ordered non-principal column */ + COLAMD_ASSERT (EIGEN_COL_IS_DEAD (i)) ; + if (!EIGEN_EIGEN_COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EIGEN_COLAMD_EMPTY) + { + parent = i ; + /* once found, find its principal parent */ + do + { + parent = Col [parent].shared1.parent ; + } while (!EIGEN_EIGEN_COL_IS_DEAD_PRINCIPAL (parent)) ; + + /* now, order all un-ordered non-principal columns along path */ + /* to this parent. collapse tree at the same time */ + c = i ; + /* get order of parent */ + order = Col [parent].shared2.order ; + + do + { + COLAMD_ASSERT (Col [c].shared2.order == EIGEN_COLAMD_EMPTY) ; + + /* order this column */ + Col [c].shared2.order = order++ ; + /* collaps tree */ + Col [c].shared1.parent = parent ; + + /* get immediate parent of this column */ + c = Col [c].shared1.parent ; + + /* continue until we hit an ordered column. There are */ + /* guarranteed not to be anymore unordered columns */ + /* above an ordered column */ + } while (Col [c].shared2.order == EIGEN_COLAMD_EMPTY) ; + + /* re-order the super_col parent to largest order for this group */ + Col [parent].shared2.order = order ; + } + } + + /* === Generate the permutation ========================================= */ + + for (c = 0 ; c < n_col ; c++) + { + p [Col [c].shared2.order] = c ; + } +} + + +/* ========================================================================== */ +/* === eigen_detect_super_cols ==================================================== */ +/* ========================================================================== */ + +/* + Detects supercolumns by finding matches between columns in the hash buckets. + Check amongst columns in the set A [row_start ... row_start + row_length-1]. + The columns under consideration are currently *not* in the degree lists, + and have already been placed in the hash buckets. + + The hash bucket for columns whose hash function is equal to h is stored + as follows: + + if head [h] is >= 0, then head [h] contains a degree list, so: + + head [h] is the first column in degree bucket h. + Col [head [h]].headhash gives the first column in hash bucket h. + + otherwise, the degree list is empty, and: + + -(head [h] + 2) is the first column in hash bucket h. + + For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous + column" pointer. Col [c].shared3.hash is used instead as the hash number + for that column. The value of Col [c].shared4.hash_next is the next column + in the same hash bucket. + + Assuming no, or "few" hash collisions, the time taken by this routine is + linear in the sum of the sizes (lengths) of each column whose score has + just been computed in the approximate degree computation. + Not user-callable. +*/ + + void eigen_detect_super_cols +( + /* === Parameters ======================================================= */ + +#ifndef COLAMD_NDEBUG + /* these two parameters are only needed when debugging is enabled: */ + int n_col, /* number of columns of A */ + EIGEN_Colamd_Row Row [], /* of size n_row+1 */ +#endif /* COLAMD_NDEBUG */ + + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int A [], /* row indices of A */ + int head [], /* head of degree lists and hash buckets */ + int row_start, /* pointer to set of columns to check */ + int row_length /* number of columns to check */ +) +{ + /* === Local variables ================================================== */ + + int hash ; /* hash value for a column */ + int *rp ; /* pointer to a row */ + int c ; /* a column index */ + int super_c ; /* column index of the column to absorb into */ + int *cp1 ; /* column pointer for column super_c */ + int *cp2 ; /* column pointer for column c */ + int length ; /* length of column super_c */ + int prev_c ; /* column preceding c in hash bucket */ + int i ; /* loop counter */ + int *rp_end ; /* pointer to the end of the row */ + int col ; /* a column index in the row to check */ + int head_column ; /* first column in hash bucket or degree list */ + int first_col ; /* first column in hash bucket */ + + /* === Consider each column in the row ================================== */ + + rp = &A [row_start] ; + rp_end = rp + row_length ; + while (rp < rp_end) + { + col = *rp++ ; + if (EIGEN_COL_IS_DEAD (col)) + { + continue ; + } + + /* get hash number for this column */ + hash = Col [col].shared3.hash ; + COLAMD_ASSERT (hash <= n_col) ; + + /* === Get the first column in this hash bucket ===================== */ + + head_column = head [hash] ; + if (head_column > EIGEN_COLAMD_EMPTY) + { + first_col = Col [head_column].shared3.headhash ; + } + else + { + first_col = - (head_column + 2) ; + } + + /* === Consider each column in the hash bucket ====================== */ + + for (super_c = first_col ; super_c != EIGEN_COLAMD_EMPTY ; + super_c = Col [super_c].shared4.hash_next) + { + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (super_c)) ; + COLAMD_ASSERT (Col [super_c].shared3.hash == hash) ; + length = Col [super_c].length ; + + /* prev_c is the column preceding column c in the hash bucket */ + prev_c = super_c ; + + /* === Compare super_c with all columns after it ================ */ + + for (c = Col [super_c].shared4.hash_next ; + c != EIGEN_COLAMD_EMPTY ; c = Col [c].shared4.hash_next) + { + COLAMD_ASSERT (c != super_c) ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (c)) ; + COLAMD_ASSERT (Col [c].shared3.hash == hash) ; + + /* not identical if lengths or scores are different */ + if (Col [c].length != length || + Col [c].shared2.score != Col [super_c].shared2.score) + { + prev_c = c ; + continue ; + } + + /* compare the two columns */ + cp1 = &A [Col [super_c].start] ; + cp2 = &A [Col [c].start] ; + + for (i = 0 ; i < length ; i++) + { + /* the columns are "clean" (no dead rows) */ + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (*cp1)) ; + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (*cp2)) ; + /* row indices will same order for both supercols, */ + /* no gather scatter nessasary */ + if (*cp1++ != *cp2++) + { + break ; + } + } + + /* the two columns are different if the for-loop "broke" */ + if (i != length) + { + prev_c = c ; + continue ; + } + + /* === Got it! two columns are identical =================== */ + + COLAMD_ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ; + + Col [super_c].shared1.thickness += Col [c].shared1.thickness ; + Col [c].shared1.parent = super_c ; + EIGEN_KILL_NON_PRINCIPAL_COL (c) ; + /* order c later, in eigen_order_children() */ + Col [c].shared2.order = EIGEN_COLAMD_EMPTY ; + /* remove c from hash bucket */ + Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ; + } + } + + /* === Empty this hash bucket ======================================= */ + + if (head_column > EIGEN_COLAMD_EMPTY) + { + /* corresponding degree list "hash" is not empty */ + Col [head_column].shared3.headhash = EIGEN_COLAMD_EMPTY ; + } + else + { + /* corresponding degree list "hash" is empty */ + head [hash] = EIGEN_COLAMD_EMPTY ; + } + } +} + + +/* ========================================================================== */ +/* === eigen_garbage_collection =================================================== */ +/* ========================================================================== */ + +/* + Defragments and compacts columns and rows in the workspace A. Used when + all avaliable memory has been used while performing row merging. Returns + the index of the first free position in A, after garbage collection. The + time taken by this routine is linear is the size of the array A, which is + itself linear in the number of nonzeros in the input matrix. + Not user-callable. +*/ + + int eigen_garbage_collection /* returns the new value of pfree */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows */ + int n_col, /* number of columns */ + EIGEN_Colamd_Row Row [], /* row info */ + EIGEN_Colamd_Col Col [], /* column info */ + int A [], /* A [0 ... Alen-1] holds the matrix */ + int *pfree /* &A [0] ... pfree is in use */ +) +{ + /* === Local variables ================================================== */ + + int *psrc ; /* source pointer */ + int *pdest ; /* destination pointer */ + int j ; /* counter */ + int r ; /* a row index */ + int c ; /* a column index */ + int length ; /* length of a row or column */ + +#ifndef COLAMD_NDEBUG + int debug_rows ; + COLAMD_DEBUG2 (("Defrag..\n")) ; + for (psrc = &A[0] ; psrc < pfree ; psrc++) COLAMD_ASSERT (*psrc >= 0) ; + debug_rows = 0 ; +#endif /* COLAMD_NDEBUG */ + + /* === Defragment the columns =========================================== */ + + pdest = &A[0] ; + for (c = 0 ; c < n_col ; c++) + { + if (EIGEN_COL_IS_ALIVE (c)) + { + psrc = &A [Col [c].start] ; + + /* move and compact the column */ + COLAMD_ASSERT (pdest <= psrc) ; + Col [c].start = (int) (pdest - &A [0]) ; + length = Col [c].length ; + for (j = 0 ; j < length ; j++) + { + r = *psrc++ ; + if (EIGEN_ROW_IS_ALIVE (r)) + { + *pdest++ = r ; + } + } + Col [c].length = (int) (pdest - &A [Col [c].start]) ; + } + } + + /* === Prepare to defragment the rows =================================== */ + + for (r = 0 ; r < n_row ; r++) + { + if (EIGEN_ROW_IS_ALIVE (r)) + { + if (Row [r].length == 0) + { + /* this row is of zero length. cannot compact it, so kill it */ + COLAMD_DEBUG3 (("Defrag row kill\n")) ; + EIGEN_KILL_ROW (r) ; + } + else + { + /* save first column index in Row [r].shared2.first_column */ + psrc = &A [Row [r].start] ; + Row [r].shared2.first_column = *psrc ; + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (r)) ; + /* flag the start of the row with the one's complement of row */ + *psrc = EIGEN_ONES_COMPLEMENT (r) ; + +#ifndef COLAMD_NDEBUG + debug_rows++ ; +#endif /* COLAMD_NDEBUG */ + + } + } + } + + /* === Defragment the rows ============================================== */ + + psrc = pdest ; + while (psrc < pfree) + { + /* find a negative number ... the start of a row */ + if (*psrc++ < 0) + { + psrc-- ; + /* get the row index */ + r = EIGEN_ONES_COMPLEMENT (*psrc) ; + COLAMD_ASSERT (r >= 0 && r < n_row) ; + /* restore first column index */ + *psrc = Row [r].shared2.first_column ; + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (r)) ; + + /* move and compact the row */ + COLAMD_ASSERT (pdest <= psrc) ; + Row [r].start = (int) (pdest - &A [0]) ; + length = Row [r].length ; + for (j = 0 ; j < length ; j++) + { + c = *psrc++ ; + if (EIGEN_COL_IS_ALIVE (c)) + { + *pdest++ = c ; + } + } + Row [r].length = (int) (pdest - &A [Row [r].start]) ; + +#ifndef COLAMD_NDEBUG + debug_rows-- ; +#endif /* COLAMD_NDEBUG */ + + } + } + /* ensure we found all the rows */ + COLAMD_ASSERT (debug_rows == 0) ; + + /* === Return the new value of pfree ==================================== */ + + return ((int) (pdest - &A [0])) ; +} + + +/* ========================================================================== */ +/* === eigen_clear_mark =========================================================== */ +/* ========================================================================== */ + +/* + Clears the Row [].shared2.mark array, and returns the new tag_mark. + Return value is the new tag_mark. Not user-callable. +*/ + + int eigen_clear_mark /* return the new value for tag_mark */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows in A */ + EIGEN_Colamd_Row Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */ +) +{ + /* === Local variables ================================================== */ + + int r ; + + for (r = 0 ; r < n_row ; r++) + { + if (EIGEN_ROW_IS_ALIVE (r)) + { + Row [r].shared2.mark = 0 ; + } + } + return (1) ; +} + + + +/* ========================================================================== */ +/* === eigen_print_report ========================================================= */ +/* ========================================================================== */ + + void eigen_print_report +( + char *method, + int stats [EIGEN_COLAMD_STATS] +) +{ + + int i1, i2, i3 ; + + if (!stats) + { + PRINTF ("%s: No statistics available.\n", method) ; + return ; + } + + i1 = stats [EIGEN_COLAMD_INFO1] ; + i2 = stats [EIGEN_COLAMD_INFO2] ; + i3 = stats [EIGEN_COLAMD_INFO3] ; + + if (stats [EIGEN_COLAMD_STATUS] >= 0) + { + PRINTF ("%s: OK. ", method) ; + } + else + { + PRINTF ("%s: ERROR. ", method) ; + } + + switch (stats [EIGEN_COLAMD_STATUS]) + { + + case EIGEN_COLAMD_OK_BUT_JUMBLED: + + PRINTF ("Matrix has unsorted or duplicate row indices.\n") ; + + PRINTF ("%s: number of duplicate or out-of-order row indices: %d\n", + method, i3) ; + + PRINTF ("%s: last seen duplicate or out-of-order row index: %d\n", + method, INDEX (i2)) ; + + PRINTF ("%s: last seen in column: %d", + method, INDEX (i1)) ; + + /* no break - fall through to next case instead */ + + case EIGEN_COLAMD_OK: + + PRINTF ("\n") ; + + PRINTF ("%s: number of dense or empty rows ignored: %d\n", + method, stats [EIGEN_COLAMD_DENSE_ROW]) ; + + PRINTF ("%s: number of dense or empty columns ignored: %d\n", + method, stats [EIGEN_COLAMD_DENSE_COL]) ; + + PRINTF ("%s: number of garbage collections performed: %d\n", + method, stats [EIGEN_COLAMD_DEFRAG_COUNT]) ; + break ; + + case EIGEN_COLAMD_ERROR_A_not_present: + + PRINTF ("Array A (row indices of matrix) not present.\n") ; + break ; + + case EIGEN_COLAMD_ERROR_p_not_present: + + PRINTF ("Array p (column pointers for matrix) not present.\n") ; + break ; + + case EIGEN_COLAMD_ERROR_nrow_negative: + + PRINTF ("Invalid number of rows (%d).\n", i1) ; + break ; + + case EIGEN_COLAMD_ERROR_ncol_negative: + + PRINTF ("Invalid number of columns (%d).\n", i1) ; + break ; + + case EIGEN_COLAMD_ERROR_nnz_negative: + + PRINTF ("Invalid number of nonzero entries (%d).\n", i1) ; + break ; + + case EIGEN_COLAMD_ERROR_p0_nonzero: + + PRINTF ("Invalid column pointer, p [0] = %d, must be zero.\n", i1) ; + break ; + + case EIGEN_COLAMD_ERROR_A_too_small: + + PRINTF ("Array A too small.\n") ; + PRINTF (" Need Alen >= %d, but given only Alen = %d.\n", + i1, i2) ; + break ; + + case EIGEN_COLAMD_ERROR_col_length_negative: + + PRINTF + ("Column %d has a negative number of nonzero entries (%d).\n", + INDEX (i1), i2) ; + break ; + + case EIGEN_COLAMD_ERROR_row_index_out_of_bounds: + + PRINTF + ("Row index (row %d) out of bounds (%d to %d) in column %d.\n", + INDEX (i2), INDEX (0), INDEX (i3-1), INDEX (i1)) ; + break ; + + case EIGEN_COLAMD_ERROR_out_of_memory: + + PRINTF ("Out of memory.\n") ; + break ; + + case EIGEN_COLAMD_ERROR_internal_error: + + /* if this happens, there is a bug in the code */ + PRINTF + ("Internal error! Please contact authors (davis@cise.ufl.edu).\n") ; + break ; + } +} + + + + +/* ========================================================================== */ +/* === eigen_colamd debugging routines ============================================ */ +/* ========================================================================== */ + +/* When debugging is disabled, the remainder of this file is ignored. */ + +#ifndef COLAMD_NDEBUG + + +/* ========================================================================== */ +/* === eigen_debug_structures ===================================================== */ +/* ========================================================================== */ + +/* + At this point, all empty rows and columns are dead. All live columns + are "clean" (containing no dead rows) and simplicial (no supercolumns + yet). Rows may contain dead columns, but all live rows contain at + least one live column. +*/ + + void eigen_debug_structures +( + /* === Parameters ======================================================= */ + + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int A [], + int n_col2 +) +{ + /* === Local variables ================================================== */ + + int i ; + int c ; + int *cp ; + int *cp_end ; + int len ; + int score ; + int r ; + int *rp ; + int *rp_end ; + int deg ; + + /* === Check A, Row, and Col ============================================ */ + + for (c = 0 ; c < n_col ; c++) + { + if (EIGEN_COL_IS_ALIVE (c)) + { + len = Col [c].length ; + score = Col [c].shared2.score ; + COLAMD_DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ; + COLAMD_ASSERT (len > 0) ; + COLAMD_ASSERT (score >= 0) ; + COLAMD_ASSERT (Col [c].shared1.thickness == 1) ; + cp = &A [Col [c].start] ; + cp_end = cp + len ; + while (cp < cp_end) + { + r = *cp++ ; + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (r)) ; + } + } + else + { + i = Col [c].shared2.order ; + COLAMD_ASSERT (i >= n_col2 && i < n_col) ; + } + } + + for (r = 0 ; r < n_row ; r++) + { + if (EIGEN_ROW_IS_ALIVE (r)) + { + i = 0 ; + len = Row [r].length ; + deg = Row [r].shared1.degree ; + COLAMD_ASSERT (len > 0) ; + COLAMD_ASSERT (deg > 0) ; + rp = &A [Row [r].start] ; + rp_end = rp + len ; + while (rp < rp_end) + { + c = *rp++ ; + if (EIGEN_COL_IS_ALIVE (c)) + { + i++ ; + } + } + COLAMD_ASSERT (i > 0) ; + } + } +} + + +/* ========================================================================== */ +/* === eigen_debug_deg_lists ====================================================== */ +/* ========================================================================== */ + +/* + Prints the contents of the degree lists. Counts the number of columns + in the degree list and compares it to the total it should have. Also + checks the row degrees. +*/ + + void eigen_debug_deg_lists +( + /* === Parameters ======================================================= */ + + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int head [], + int min_score, + int should, + int max_deg +) +{ + /* === Local variables ================================================== */ + + int deg ; + int col ; + int have ; + int row ; + + /* === Check the degree lists =========================================== */ + + if (n_col > 10000 && colamd_debug <= 0) + { + return ; + } + have = 0 ; + COLAMD_DEBUG4 (("Degree lists: %d\n", min_score)) ; + for (deg = 0 ; deg <= n_col ; deg++) + { + col = head [deg] ; + if (col == EIGEN_COLAMD_EMPTY) + { + continue ; + } + COLAMD_DEBUG4 (("%d:", deg)) ; + while (col != EIGEN_COLAMD_EMPTY) + { + COLAMD_DEBUG4 ((" %d", col)) ; + have += Col [col].shared1.thickness ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col)) ; + col = Col [col].shared4.degree_next ; + } + COLAMD_DEBUG4 (("\n")) ; + } + COLAMD_DEBUG4 (("should %d have %d\n", should, have)) ; + COLAMD_ASSERT (should == have) ; + + /* === Check the row degrees ============================================ */ + + if (n_row > 10000 && colamd_debug <= 0) + { + return ; + } + for (row = 0 ; row < n_row ; row++) + { + if (EIGEN_ROW_IS_ALIVE (row)) + { + COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ; + } + } +} + + +/* ========================================================================== */ +/* === eigen_debug_mark =========================================================== */ +/* ========================================================================== */ + +/* + Ensures that the tag_mark is less that the maximum and also ensures that + each entry in the mark array is less than the tag mark. +*/ + + void eigen_debug_mark +( + /* === Parameters ======================================================= */ + + int n_row, + EIGEN_Colamd_Row Row [], + int tag_mark, + int max_mark +) +{ + /* === Local variables ================================================== */ + + int r ; + + /* === Check the Row marks ============================================== */ + + COLAMD_ASSERT (tag_mark > 0 && tag_mark <= max_mark) ; + if (n_row > 10000 && colamd_debug <= 0) + { + return ; + } + for (r = 0 ; r < n_row ; r++) + { + COLAMD_ASSERT (Row [r].shared2.mark < tag_mark) ; + } +} + + +/* ========================================================================== */ +/* === eigen_debug_matrix ========================================================= */ +/* ========================================================================== */ + +/* + Prints out the contents of the columns and the rows. +*/ + + void eigen_debug_matrix +( + /* === Parameters ======================================================= */ + + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int A [] +) +{ + /* === Local variables ================================================== */ + + int r ; + int c ; + int *rp ; + int *rp_end ; + int *cp ; + int *cp_end ; + + /* === Dump the rows and columns of the matrix ========================== */ + + if (colamd_debug < 3) + { + return ; + } + COLAMD_DEBUG3 (("DUMP MATRIX:\n")) ; + for (r = 0 ; r < n_row ; r++) + { + COLAMD_DEBUG3 (("Row %d alive? %d\n", r, EIGEN_ROW_IS_ALIVE (r))) ; + if (EIGEN_ROW_IS_DEAD (r)) + { + continue ; + } + COLAMD_DEBUG3 (("start %d length %d degree %d\n", + Row [r].start, Row [r].length, Row [r].shared1.degree)) ; + rp = &A [Row [r].start] ; + rp_end = rp + Row [r].length ; + while (rp < rp_end) + { + c = *rp++ ; + COLAMD_DEBUG4 ((" %d col %d\n", EIGEN_COL_IS_ALIVE (c), c)) ; + } + } + + for (c = 0 ; c < n_col ; c++) + { + COLAMD_DEBUG3 (("Col %d alive? %d\n", c, EIGEN_COL_IS_ALIVE (c))) ; + if (EIGEN_COL_IS_DEAD (c)) + { + continue ; + } + COLAMD_DEBUG3 (("start %d length %d shared1 %d shared2 %d\n", + Col [c].start, Col [c].length, + Col [c].shared1.thickness, Col [c].shared2.score)) ; + cp = &A [Col [c].start] ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + r = *cp++ ; + COLAMD_DEBUG4 ((" %d row %d\n", EIGEN_ROW_IS_ALIVE (r), r)) ; + } + } +} + + void eigen_colamd_get_debug +( + char *method +) +{ + colamd_debug = 0 ; /* no debug printing */ + + /* get "D" environment variable, which gives the debug printing level */ + if (getenv ("D")) + { + colamd_debug = atoi (getenv ("D")) ; + } + + COLAMD_DEBUG0 (("%s: debug version, D = %d (THIS WILL BE SLOW!)\n", + method, colamd_debug)) ; +} + +#endif /* NDEBUG */ + +#endif diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index 670cca9c4..cbd2e5d34 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -27,6 +27,7 @@ #define EIGEN_ORDERING_H #include "Amd.h" +#include "Eigen_Colamd.h" namespace Eigen { namespace internal { @@ -112,54 +113,50 @@ class NaturalOrdering * Get the column approximate minimum degree ordering * The matrix should be in column-major format */ -// template -// class COLAMDOrdering: public OrderingBase< ColamdOrdering > -// { -// public: -// typedef OrderingBase< ColamdOrdering > Base; -// typedef SparseMatrix MatrixType; -// -// public: -// COLAMDOrdering():Base() {} -// -// COLAMDOrdering(const MatrixType& matrix):Base() -// { -// compute(matrix); -// } -// COLAMDOrdering(const MatrixType& mat, PermutationType& perm_c):Base() -// { -// compute(matrix); -// perm_c = this.get_perm(); -// } -// void compute(const MatrixType& mat) -// { -// // Test if the matrix is column major... -// -// int m = mat.rows(); -// int n = mat.cols(); -// int nnz = mat.nonZeros(); -// // Get the recommended value of Alen to be used by colamd -// int Alen = colamd_recommended(nnz, m, n); -// // Set the default parameters -// double knobs[COLAMD_KNOBS]; -// colamd_set_defaults(knobs); -// -// int info; -// VectorXi p(n), A(nnz); -// for(int i=0; i < n; i++) p(i) = mat.outerIndexPtr()(i); -// for(int i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()(i); -// // Call Colamd routine to compute the ordering -// info = colamd(m, n, Alen, A,p , knobs, stats) -// eigen_assert( (info != FALSE)&& "COLAMD failed " ); -// -// m_P.resize(n); -// for (int i = 0; i < n; i++) m_P(p(i)) = i; -// m_isInitialized = true; -// } -// protected: -// using Base::m_isInitialized; -// using Base m_P; -// }; +template +class COLAMDOrdering; +#include "Eigen_Colamd.h" + +template +class COLAMDOrdering +{ + public: + typedef PermutationMatrix PermutationType; + typedef Matrix IndexVector; + /** Compute the permutation vector form a sparse matrix */ + + + + template + void operator() (const MatrixType& mat, PermutationType& perm) + { + int m = mat.rows(); + int n = mat.cols(); + int nnz = mat.nonZeros(); + // Get the recommended value of Alen to be used by colamd + int Alen = eigen_colamd_recommended(nnz, m, n); + // Set the default parameters + double knobs [EIGEN_COLAMD_KNOBS]; + int stats [EIGEN_COLAMD_STATS]; + eigen_colamd_set_defaults(knobs); + + int info; + IndexVector p(n+1), A(Alen); + for(int i=0; i <= n; i++) p(i) = mat.outerIndexPtr()[i]; + for(int i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()[i]; + // Call Colamd routine to compute the ordering + info = eigen_colamd(m, n, Alen, A.data(), p.data(), knobs, stats); + eigen_assert( info && "COLAMD failed " ); + + perm.resize(n); + for (int i = 0; i < n; i++) perm.indices()(p(i)) = i; + + } + + private: + + +}; } // end namespace Eigen #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 3d8c8532f..bb1decc4c 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -99,8 +99,29 @@ class SparseLU { m_diagpivotthresh = thresh; } - - + + /** Return the number of nonzero elements in the L factor */ + int nnzL() + { + if (m_factorizationIsOk) + return m_nnzL; + else + { + std::cerr<<"Numerical factorization should be done before\n"; + return 0; + } + } + /** Return the number of nonzero elements in the U factor */ + int nnzU() + { + if (m_factorizationIsOk) + return m_nnzU; + else + { + std::cerr<<"Numerical factorization should be done before\n"; + return 0; + } + } /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. * * \sa compute() @@ -325,7 +346,8 @@ void SparseLU::analyzePattern(const MatrixType& mat) ord(mat,m_perm_c); //FIXME Check the right semantic behind m_perm_c // that is, column j of mat goes to column m_perm_c(j) of mat * m_perm_c; - + + // Apply the permutation to the column of the input matrix m_mat = mat * m_perm_c.inverse(); //FIXME It should be less expensive here to permute only the structural pattern of the matrix diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index 9c2e6e17e..e3fae4a36 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -628,7 +628,7 @@ void SuperLU::factorize(const MatrixType& a) this->initFactorization(a); //DEBUG - m_sluOptions.ColPerm = NATURAL; +// m_sluOptions.ColPerm = COLAMD; m_sluOptions.Equil = NO; int info = 0; RealScalar recip_pivot_growth, rcond; diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 841011f30..6fbf03454 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -6,6 +6,7 @@ #include #include #include +#include using namespace std; using namespace Eigen; @@ -17,10 +18,12 @@ int main(int argc, char **args) typedef Matrix DenseMatrix; typedef Matrix DenseRhs; VectorXd b, x, tmp; - SparseLU, AMDOrdering > solver; +// SparseLU, AMDOrdering > solver; + SparseLU, COLAMDOrdering > solver; ifstream matrix_file; string line; int n; + BenchTimer timer; // Set parameters /* Fill the matrix with sparse matrix stored in Matrix-Market coordinate column-oriented format */ @@ -53,13 +56,26 @@ int main(int argc, char **args) /* Compute the factorization */ // solver.isSymmetric(true); - solver.compute(A); - + timer.start(); +// solver.compute(A); + solver.analyzePattern(A); + timer.stop(); + cout << "Time to analyze " << timer.value() << std::endl; + timer.reset(); + timer.start(); + solver.factorize(A); + timer.stop(); + cout << "Factorize Time " << timer.value() << std::endl; + timer.reset(); + timer.start(); solver._solve(b, x); + timer.stop(); + cout << "solve time " << timer.value() << std::endl; /* Check the accuracy */ VectorXd tmp2 = b - A*x; double tempNorm = tmp2.norm()/b.norm(); cout << "Relative norm of the computed solution : " << tempNorm <<"\n"; + cout << "Number of nonzeros in the factor : " << solver.nnzL() + solver.nnzU() << std::endl; return 0; } \ No newline at end of file -- cgit v1.2.3 From 59642da88bf83709e918667680e4ed63af4c31e5 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Thu, 19 Jul 2012 18:03:44 +0200 Subject: Add exception handler to memory allocation --- Eigen/src/OrderingMethods/Eigen_Colamd.h | 8 +- Eigen/src/OrderingMethods/Ordering.h | 6 -- Eigen/src/SparseLU/SparseLU.h | 3 - Eigen/src/SparseLU/SparseLU_Coletree.h | 2 - Eigen/src/SparseLU/SparseLU_Matrix.h | 1 - Eigen/src/SparseLU/SparseLU_Memory.h | 147 +++++++++++++++++------------- Eigen/src/SparseLU/SparseLU_column_bmod.h | 1 - Eigen/src/SparseLU/SparseLU_panel_dfs.h | 1 - Eigen/src/SparseLU/SparseLU_snode_bmod.h | 1 - bench/spbench/CMakeLists.txt | 2 +- bench/spbench/test_sparseLU.cpp | 21 +++-- 11 files changed, 99 insertions(+), 94 deletions(-) (limited to 'bench/spbench') diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h index 39701d0af..0af137d54 100644 --- a/Eigen/src/OrderingMethods/Eigen_Colamd.h +++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -155,7 +155,6 @@ #endif /* MATLAB_MEX_FILE */ // == Row and Column structures == - typedef struct EIGEN_Colamd_Col_struct { int start ; /* index for A of first row in this column, or EIGEN_DEAD */ @@ -248,11 +247,9 @@ void eigen_colamd_report (int stats [EIGEN_COLAMD_STATS]); int eigen_init_rows_cols (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col col [], int A [], int p [], int stats[EIGEN_COLAMD_STATS] ); -void eigen_init_scoring (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], - double knobs[EIGEN_COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg); +void eigen_init_scoring (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], double knobs[EIGEN_COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg); -int eigen_find_ordering (int n_row, int n_col, int Alen, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], - int n_col2, int max_deg, int pfree); +int eigen_find_ordering (int n_row, int n_col, int Alen, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], int n_col2, int max_deg, int pfree); void eigen_order_children (int n_col, EIGEN_Colamd_Col Col [], int p []); @@ -2514,5 +2511,4 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E } #endif /* NDEBUG */ - #endif diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index cbd2e5d34..47cd6f169 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -124,9 +124,6 @@ class COLAMDOrdering typedef PermutationMatrix PermutationType; typedef Matrix IndexVector; /** Compute the permutation vector form a sparse matrix */ - - - template void operator() (const MatrixType& mat, PermutationType& perm) { @@ -152,9 +149,6 @@ class COLAMDOrdering for (int i = 0; i < n; i++) perm.indices()(p(i)) = i; } - - private: - }; diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index bb1decc4c..25fad0f29 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -339,9 +339,6 @@ void SparseLU::analyzePattern(const MatrixType& mat) //TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat. - // Compute the fill-reducing ordering - // TODO Currently, the only available ordering method is AMD. - OrderingType ord; ord(mat,m_perm_c); //FIXME Check the right semantic behind m_perm_c diff --git a/Eigen/src/SparseLU/SparseLU_Coletree.h b/Eigen/src/SparseLU/SparseLU_Coletree.h index 1329d383f..142f4995e 100644 --- a/Eigen/src/SparseLU/SparseLU_Coletree.h +++ b/Eigen/src/SparseLU/SparseLU_Coletree.h @@ -94,7 +94,6 @@ int LU_sp_coletree(const MatrixType& mat, IndexVector& parent) int rset, cset, rroot; for (col = 0; col < nc; col++) { -// cset = pp(col) = col; // Initially, each element is in its own set //FIXME pp(col) = col; cset = col; root(cset) = col; @@ -108,7 +107,6 @@ int LU_sp_coletree(const MatrixType& mat, IndexVector& parent) if (rroot != col) { parent(rroot) = col; -// cset = pp(cset) = rset; // Get the union of cset and rset //FIXME pp(cset) = rset; cset = rset; root(cset) = col; diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h index 90a0f2740..9f2dcaa56 100644 --- a/Eigen/src/SparseLU/SparseLU_Matrix.h +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -192,7 +192,6 @@ class SuperNodalMatrix protected: Index m_row; // Number of rows Index m_col; // Number of columns -// Index m_nnz; // Number of nonzero values Index m_nsuper; // Number of supernodes Scalar* m_nzval; //array of nonzero values packed by column Index* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index a17079199..7a2ab93df 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -78,41 +78,82 @@ int expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_ex VectorType old_vec; // Temporary vector to hold the previous values if (nbElts > 0 ) - old_vec = vec.segment(0,nbElts); // old_vec should be of size nbElts... to be checked + old_vec = vec.segment(0,nbElts); - //expand the current vector //FIXME Should be in a try ... catch region - vec.resize(new_len); - /* - * Test if the memory has been well allocated - * otherwise reduce the size and try to reallocate - * copy data from previous vector (if exists) to the newly allocated vector - */ - if ( num_expansions != 0 ) // The memory has been expanded before + //Allocate or expand the current vector + try + { + vec.resize(new_len); + } + catch(std::bad_alloc& ) { - int tries = 0; - if (keep_prev) + if ( !num_expansions ) { - if (!vec.size()) return new_len ; + // First time to allocate from LUMemInit() + throw; // Pass the exception to LUMemInit() which has a try... catch block + } + if (keep_prev) + { + // In this case, the memory length should not not be reduced + return new_len; } else { - while (!vec.size()) + // Reduce the size and increase again + int tries = 0; // Number of attempts + do { - // Reduce the size and allocate again - if ( ++tries > 10) return new_len; - alpha = LU_Reduce(alpha); + alpha = LU_Reduce(alpha); new_len = alpha * length ; - vec.resize(new_len); //FIXME Should be in a try catch section - } - } // end allocation - - //Copy the previous values to the newly allocated space - if (nbElts > 0) - vec.segment(0, nbElts) = old_vec; - } // end expansion + try + { + vec.resize(new_len); + } + catch(std::bad_alloc& ) + { + tries += 1; + if ( tries > 10) return new_len; + } + } while (!vec.size()); + } + } + //Copy the previous values to the newly allocated space + if (nbElts > 0) + vec.segment(0, nbElts) = old_vec; + + length = new_len; if(num_expansions) ++num_expansions; return 0; + + /* + * Test if the memory has been well allocated + * otherwise reduce the size and try to reallocate + * copy data from previous vector (if exists) to the newly allocated vector + */ +// if ( num_expansions != 0 ) // The memory has been expanded before +// { +// int tries = 0; +// if (keep_prev) +// { +// if (!vec.size()) return new_len ; +// } +// else +// { +// while (!vec.size()) +// { +// // Reduce the size and allocate again +// if ( ++tries > 10) return new_len; +// alpha = LU_Reduce(alpha); +// new_len = alpha * length ; +// vec.resize(new_len); //FIXME Should be in a try catch section +// } +// } // end allocation +// +// //Copy the previous values to the newly allocated space +// if (nbElts > 0) +// vec.segment(0, nbElts) = old_vec; +// } // end expansion } /** @@ -122,8 +163,8 @@ int expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_ex * \param annz number of initial nonzeros in the matrix * \param lwork if lwork=-1, this routine returns an estimated size of the required memory * \param glu persistent data to facilitate multiple factors : will be deleted later ?? - * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated when memory allocation failed - * NOTE Unlike SuperLU, this routine does not support successive factorization with the same pattern and the row permutation + * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated memory when allocation failed, and 0 on success + * NOTE Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation */ template int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, LU_GlobalLU_t& glu) @@ -159,27 +200,26 @@ int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, glu.xusub.resize(n+1); // Reserve memory for L/U factors - expand(glu.lusup, nzlumax, 0, 0, num_expansions); - expand(glu.ucol,nzumax, 0, 0, num_expansions); - expand(glu.lsub,nzlmax, 0, 0, num_expansions); - expand(glu.usub,nzumax, 0, 1, num_expansions); - - // Check if the memory is correctly allocated, - // FIXME Should be a try... catch section here - while ( !glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size()) + do { - //Reduce the estimated size and retry - nzlumax /= 2; - nzumax /= 2; - nzlmax /= 2; - - if (nzlumax < annz ) return nzlumax; + try + { + expand(glu.lusup, nzlumax, 0, 0, num_expansions); + expand(glu.ucol,nzumax, 0, 0, num_expansions); + expand(glu.lsub,nzlmax, 0, 0, num_expansions); + expand(glu.usub,nzumax, 0, 1, num_expansions); + } + catch(std::bad_alloc& ) + { + //Reduce the estimated size and retry + nzlumax /= 2; + nzumax /= 2; + nzlmax /= 2; + if (nzlumax < annz ) return nzlumax; + } - expand(glu.lusup, nzlumax, 0, 0, num_expansions); - expand(glu.ucol, nzumax, 0, 0, num_expansions); - expand(glu.lsub, nzlmax, 0, 0, num_expansions); - expand(glu.usub, nzumax, 0, 1, num_expansions); - } + } while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size()); + ++num_expansions; @@ -207,23 +247,6 @@ int LUMemXpand(VectorType& vec, int& maxlen, int nbElts, LU_MemType memtype, int if (failed_size) return failed_size; - // The following code is not really needed since maxlen is passed by reference - // and correspond to the appropriate field in glu -// switch ( mem_type ) { -// case LUSUP: -// glu.nzlumax = maxlen; -// break; -// case UCOL: -// glu.nzumax = maxlen; -// break; -// case LSUB: -// glu.nzlmax = maxlen; -// break; -// case USUB: -// glu.nzumax = maxlen; -// break; -// } - return 0 ; } diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index 3042eb5f8..00787721b 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -133,7 +133,6 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca // Dense triangular solve -- start effective triangle luptr += nsupr * no_zeros + no_zeros; // Form Eigen matrix and vector -// std::cout<< "jcol " << jcol << " rows " << segsize << std::endl; Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); VectorBlock u(tempv, 0, segsize); diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index 908ee67ac..70ea0f51f 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -92,7 +92,6 @@ void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, Index int xdfs, maxdfs, kpar; // Initialize pointers -// IndexVector& marker1 = marker.block(m, m); VectorBlock marker1(marker, m, m); nseg = 0; IndexVector& xsup = glu.xsup; diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index 44438d037..fc8042f52 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -80,7 +80,6 @@ int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, LU_Glob // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) new (&A) Map,0,OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); -// Map > l(&(lusup.data()[ufirst+nsupc], nrow); VectorBlock l(lusup, ufirst+nsupc, nrow); l = l - A * u; } diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt index 4b3c6f8e3..a093cc5d9 100644 --- a/bench/spbench/CMakeLists.txt +++ b/bench/spbench/CMakeLists.txt @@ -67,4 +67,4 @@ add_executable(spsolver sp_solver.cpp) target_link_libraries (spsolver ${SPARSE_LIBS}) add_executable(test_sparseLU test_sparseLU.cpp) -target_link_libraries (test_sparseLU ${SPARSE_LIBS}) \ No newline at end of file +target_link_libraries (test_sparseLU ${SPARSE_LIBS}) diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 6fbf03454..31273add5 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -13,13 +13,14 @@ using namespace Eigen; int main(int argc, char **args) { - SparseMatrix A; - typedef SparseMatrix::Index Index; - typedef Matrix DenseMatrix; - typedef Matrix DenseRhs; - VectorXd b, x, tmp; -// SparseLU, AMDOrdering > solver; - SparseLU, COLAMDOrdering > solver; + typedef complex scalar; + SparseMatrix A; + typedef SparseMatrix::Index Index; + typedef Matrix DenseMatrix; + typedef Matrix DenseRhs; + Matrix b, x, tmp; +// SparseLU, AMDOrdering > solver; + SparseLU, COLAMDOrdering > solver; ifstream matrix_file; string line; int n; @@ -36,7 +37,7 @@ int main(int argc, char **args) if (iscomplex) { cout<< " Not for complex matrices \n"; return -1; } if (isvector) { cout << "The provided file is not a matrix file\n"; return -1;} if (sym != 0) { // symmetric matrices, only the lower part is stored - SparseMatrix temp; + SparseMatrix temp; temp = A; A = temp.selfadjointView(); } @@ -72,8 +73,8 @@ int main(int argc, char **args) timer.stop(); cout << "solve time " << timer.value() << std::endl; /* Check the accuracy */ - VectorXd tmp2 = b - A*x; - double tempNorm = tmp2.norm()/b.norm(); + Matrix tmp2 = b - A*x; + scalar tempNorm = tmp2.norm()/b.norm(); cout << "Relative norm of the computed solution : " << tempNorm <<"\n"; cout << "Number of nonzeros in the factor : " << solver.nnzL() + solver.nnzU() << std::endl; -- cgit v1.2.3 From 925ace196c182759026d3eb3edc06565ab5f01ee Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Thu, 19 Jul 2012 18:15:23 +0200 Subject: correct bug in the complex version --- Eigen/src/SparseLU/SparseLU_pivotL.h | 11 ++++++----- bench/spbench/test_sparseLU.cpp | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'bench/spbench') diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h index 39151f1e0..0c767c23a 100644 --- a/Eigen/src/SparseLU/SparseLU_pivotL.h +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -71,7 +71,8 @@ template int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, LU_GlobalLU_t& glu) { typedef typename IndexVector::Scalar Index; - typedef typename ScalarVector::Scalar Scalar; + typedef typename ScalarVector::Scalar Scalar; + typedef typename ScalarVector::RealScalar RealScalar; // Initialize pointers IndexVector& lsub = glu.lsub; // Compressed row subscripts of L rectangular supernodes. IndexVector& xlsub = glu.xlsub; // pointers to the beginning of each column subscript in lsub @@ -88,10 +89,10 @@ int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivott // Determine the largest abs numerical value for partial pivoting Index diagind = iperm_c(jcol); // diagonal index - Scalar pivmax = 0.0; + RealScalar pivmax = 0.0; Index pivptr = nsupc; Index diag = IND_EMPTY; - Scalar rtemp; + RealScalar rtemp; Index isub, icol, itemp, k; for (isub = nsupc; isub < nsupr; ++isub) { rtemp = std::abs(lu_col_ptr[isub]); @@ -109,7 +110,7 @@ int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivott return (jcol+1); } - Scalar thresh = diagpivotthresh * pivmax; + RealScalar thresh = diagpivotthresh * pivmax; // Choose appropriate pivotal element @@ -119,7 +120,7 @@ int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivott { // Diagonal element exists rtemp = std::abs(lu_col_ptr[diag]); - if (rtemp != Scalar(0.0) && rtemp >= thresh) pivptr = diag; + if (rtemp != 0.0 && rtemp >= thresh) pivptr = diag; } pivrow = lsub_ptr[pivptr]; } diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 31273add5..08b6c926e 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -14,6 +14,7 @@ using namespace Eigen; int main(int argc, char **args) { typedef complex scalar; +// typedef double scalar; SparseMatrix A; typedef SparseMatrix::Index Index; typedef Matrix DenseMatrix; @@ -34,7 +35,7 @@ int main(int argc, char **args) bool iscomplex=false, isvector=false; int sym; getMarketHeader(args[1], sym, iscomplex, isvector); - if (iscomplex) { cout<< " Not for complex matrices \n"; return -1; } +// if (iscomplex) { cout<< " Not for complex matrices \n"; return -1; } if (isvector) { cout << "The provided file is not a matrix file\n"; return -1;} if (sym != 0) { // symmetric matrices, only the lower part is stored SparseMatrix temp; -- cgit v1.2.3 From c0fa5811ec233a5a3065cce78b1bca155a9b4fc8 Mon Sep 17 00:00:00 2001 From: "Desire NUENTSA W." Date: Fri, 27 Jul 2012 11:36:58 +0200 Subject: Refactoring codes for numeric updates --- Eigen/src/SparseLU/SparseLU.h | 1 + Eigen/src/SparseLU/SparseLU_column_bmod.h | 43 +---------- Eigen/src/SparseLU/SparseLU_kernel_bmod.h | 92 ++++++++++++++++++++++ Eigen/src/SparseLU/SparseLU_panel_bmod.h | 51 +----------- bench/spbench/sp_solver.cpp | 124 ++++++++++++++++++++++++++++++ bench/spbench/test_sparseLU.cpp | 4 +- 6 files changed, 224 insertions(+), 91 deletions(-) create mode 100644 Eigen/src/SparseLU/SparseLU_kernel_bmod.h create mode 100644 bench/spbench/sp_solver.cpp (limited to 'bench/spbench') diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 25fad0f29..474dfdedc 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -388,6 +388,7 @@ void SparseLU::analyzePattern(const MatrixType& mat) #include "SparseLU_snode_bmod.h" #include "SparseLU_pivotL.h" #include "SparseLU_panel_dfs.h" +#include "SparseLU_kernel_bmod.h" #include "SparseLU_panel_bmod.h" #include "SparseLU_column_dfs.h" #include "SparseLU_column_bmod.h" diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index 00787721b..1457b6f35 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -66,7 +66,7 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; int jsupno, k, ksub, krep, ksupno; - int lptr, nrow, isub, i, irow, nextlu, new_next, ufirst; + int lptr, nrow, isub, irow, nextlu, new_next, ufirst; int fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; /* krep = representative of current k-th supernode * fsupc = first supernodal column @@ -122,46 +122,7 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca // Perform a triangular solver and block update, // then scatter the result of sup-col update to dense no_zeros = kfnz - fst_col; - // First, copy U[*,j] segment from dense(*) to tempv(*) - isub = lptr + no_zeros; - for (i = 0; i < segsize; i++) - { - irow = lsub(isub); - tempv(i) = dense(irow); - ++isub; - } - // Dense triangular solve -- start effective triangle - luptr += nsupr * no_zeros + no_zeros; - // Form Eigen matrix and vector - Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); - VectorBlock u(tempv, 0, segsize); - - u = A.template triangularView().solve(u); - - // Dense matrix-vector product y <-- A*x - luptr += segsize; - new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); - VectorBlock l(tempv, segsize, nrow); - l= A * u; - - // Scatter tempv[] into SPA dense[] as a temporary storage - isub = lptr + no_zeros; - for (i = 0; i < segsize; i++) - { - irow = lsub(isub); - dense(irow) = tempv(i); - tempv(i) = Scalar(0.0); - ++isub; - } - - // Scatter l into SPA dense[] - for (i = 0; i < nrow; i++) - { - irow = lsub(isub); - dense(irow) -= l(i); - l(i) = Scalar(0.0); - ++isub; - } + LU_kernel_bmod(segsize, dense, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); } // end if jsupno } // end for each segment diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h new file mode 100644 index 000000000..d5df70fd2 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h @@ -0,0 +1,92 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef SPARSELU_KERNEL_BMOD_H +#define SPARSELU_KERNEL_BMOD_H + +/** + * \brief Performs numeric block updates from a given supernode to a single column + * + * \param segsize Size of the segment (and blocks ) to use for updates + * \param [in,out]dense Packed values of the original matrix + * \param tempv temporary vector to use for updates + * \param lusup array containing the supernodes + * \param nsupr Number of rows in the supernode + * \param nrow Number of rows in the rectangular part of the supernode + * \param lsub compressed row subscripts of supernodes + * \param lptr pointer to the first column of the current supernode in lsub + * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode + * \return 0 on success + */ +template +int LU_kernel_bmod(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros) +{ + typedef typename ScalarVector::Scalar Scalar; + // First, copy U[*,j] segment from dense(*) to tempv(*) + // The result of triangular solve is in tempv[*]; + // The result of matric-vector update is in dense[*] + int isub = lptr + no_zeros; + int i, irow; + for (i = 0; i < segsize; i++) + { + irow = lsub(isub); + tempv(i) = dense(irow); + ++isub; + } + // Dense triangular solve -- start effective triangle + luptr += nsupr * no_zeros + no_zeros; + // Form Eigen matrix and vector + Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); + VectorBlock u(tempv, 0, segsize); + + u = A.template triangularView().solve(u); + + // Dense matrix-vector product y <-- A*x + luptr += segsize; + new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); + VectorBlock l(tempv, segsize, nrow); + l= A * u; + + // Scatter tempv[] into SPA dense[] as a temporary storage + isub = lptr + no_zeros; + for (i = 0; i < segsize; i++) + { + irow = lsub(isub); + dense(irow) = tempv(i); + tempv(i) = Scalar(0.0); + ++isub; + } + + // Scatter l into SPA dense[] + for (i = 0; i < nrow; i++) + { + irow = lsub(isub); + dense(irow) -= l(i); + l(i) = Scalar(0.0); + ++isub; + } + + return 0; +} +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 59ec69ec8..ebff787ee 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -73,12 +73,12 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca IndexVector& xlusup = glu.xlusup; ScalarVector& lusup = glu.lusup; - int i,ksub,jj,nextl_col,irow; + int ksub,jj,nextl_col; int fsupc, nsupc, nsupr, nrow; int krep, kfnz; int lptr; // points to the row subscripts of a supernode int luptr; // ... - int segsize,no_zeros,isub ; + int segsize,no_zeros ; // For each nonz supernode segment of U[*,j] in topological order int k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) @@ -118,52 +118,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca // Perform a trianglar solve and block update, // then scatter the result of sup-col update to dense[] no_zeros = kfnz - fsupc; - // First Copy U[*,j] segment from dense[*] to tempv[*] : - // The result of triangular solve is in tempv[*]; - // The result of matric-vector update is in dense_col[*] - isub = lptr + no_zeros; - for (i = 0; i < segsize; ++i) - { - irow = lsub(isub); - tempv(i) = dense_col(irow); // Gather to a compact vector - ++isub; - } - // Start effective triangle - luptr += nsupr * no_zeros + no_zeros; - // triangular solve with Eigen - Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); - VectorBlock u(tempv, 0, segsize); - u = A.template triangularView().solve(u); - - luptr += segsize; - // Dense Matrix vector product y <-- A*x; - new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); - VectorBlock l(tempv, segsize, nrow); - l= A * u; - - // Scatter tempv(*) into SPA dense(*) such that tempv(*) - // can be used for the triangular solve of the next - // column of the panel. The y will be copied into ucol(*) - // after the whole panel has been finished... after column_dfs() and column_bmod() - - isub = lptr + no_zeros; - for (i = 0; i < segsize; i++) - { - irow = lsub(isub); - dense_col(irow) = tempv(i); - tempv(i) = Scalar(0.0); - isub++; - } - - // Scatter the update from &tempv[segsize] into SPA dense(*) - // Start dense rectangular L - for (i = 0; i < nrow; i++) - { - irow = lsub(isub); - dense_col(irow) -= l(i); - l(i) = Scalar(0); - ++isub; - } + LU_kernel_bmod(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); } // End for each column in the panel } // End for each updating supernode diff --git a/bench/spbench/sp_solver.cpp b/bench/spbench/sp_solver.cpp new file mode 100644 index 000000000..e18f2d1c3 --- /dev/null +++ b/bench/spbench/sp_solver.cpp @@ -0,0 +1,124 @@ +// Small bench routine for Eigen available in Eigen +// (C) Desire NUENTSA WAKAM, INRIA + +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +// #include +#include + +using namespace std; +using namespace Eigen; + +int main(int argc, char **args) +{ + SparseMatrix A; + typedef SparseMatrix::Index Index; + typedef Matrix DenseMatrix; + typedef Matrix DenseRhs; + VectorXd b, x, tmp; + BenchTimer timer,totaltime; + //SparseLU > solver; + SuperLU > solver; + ifstream matrix_file; + string line; + int n; + // Set parameters +// solver.iparm(IPARM_THREAD_NBR) = 4; + /* Fill the matrix with sparse matrix stored in Matrix-Market coordinate column-oriented format */ + if (argc < 2) assert(false && "please, give the matrix market file "); + + timer.start(); + totaltime.start(); + loadMarket(A, args[1]); + cout << "End charging matrix " << endl; + bool iscomplex=false, isvector=false; + int sym; + getMarketHeader(args[1], sym, iscomplex, isvector); + if (iscomplex) { cout<< " Not for complex matrices \n"; return -1; } + if (isvector) { cout << "The provided file is not a matrix file\n"; return -1;} + if (sym != 0) { // symmetric matrices, only the lower part is stored + SparseMatrix temp; + temp = A; + A = temp.selfadjointView(); + } + timer.stop(); + + n = A.cols(); + // ====== TESTS FOR SPARSE TUTORIAL ====== +// cout<< "OuterSize " << A.outerSize() << " inner " << A.innerSize() << endl; +// SparseMatrix mat1(A); +// SparseMatrix mat2; +// cout << " norm of A " << mat1.norm() << endl; ; +// PermutationMatrix perm(n); +// perm.resize(n,1); +// perm.indices().setLinSpaced(n, 0, n-1); +// mat2 = perm * mat1; +// mat.subrows(); +// mat2.resize(n,n); +// mat2.reserve(10); +// mat2.setConstant(); +// std::cout<< "NORM " << mat1.squaredNorm()<< endl; + + cout<< "Time to load the matrix " << timer.value() < 2) + loadMarketVector(b, args[2]); + else + { + b.resize(n); + tmp.resize(n); +// tmp.setRandom(); + for (int i = 0; i < n; i++) tmp(i) = i; + b = A * tmp ; + } +// Scaling > scal; +// scal.computeRef(A); +// b = scal.LeftScaling().cwiseProduct(b); + + /* Compute the factorization */ + cout<< "Starting the factorization "<< endl; + timer.reset(); + timer.start(); + cout<< "Size of Input Matrix "<< b.size()<<"\n\n"; + cout<< "Rows and columns "<< A.rows() <<" " < scalar; -// typedef double scalar; +// typedef complex scalar; + typedef double scalar; SparseMatrix A; typedef SparseMatrix::Index Index; typedef Matrix DenseMatrix; -- cgit v1.2.3 From 7dc39b703706b56a4a46255dabfeeddf50e76581 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 3 Aug 2012 13:05:45 +0200 Subject: Add unit tests --- bench/spbench/test_sparseLU.cpp | 2 +- test/CMakeLists.txt | 2 +- test/sparse_solver.h | 4 ++-- test/sparselu.cpp | 43 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 test/sparselu.cpp (limited to 'bench/spbench') diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index ecf254b3d..59f8252d0 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -70,7 +70,7 @@ int main(int argc, char **args) cout << "Factorize Time " << timer.value() << std::endl; timer.reset(); timer.start(); - solver._solve(b, x); + x = solver.solve(b); timer.stop(); cout << "solve time " << timer.value() << std::endl; /* Check the accuracy */ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8de795baa..0d5ab61af 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -200,7 +200,7 @@ ei_add_test(vectorwiseop) ei_add_test(simplicial_cholesky) ei_add_test(conjugate_gradient) ei_add_test(bicgstab) - +ei_add_test(sparselu) if(UMFPACK_FOUND) ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}") diff --git a/test/sparse_solver.h b/test/sparse_solver.h index 1bbaeb2dd..f529ad7e9 100644 --- a/test/sparse_solver.h +++ b/test/sparse_solver.h @@ -163,9 +163,9 @@ inline std::string get_matrixfolder() { std::string mat_folder = TEST_REAL_CASES; if( internal::is_same >::value || internal::is_same >::value ) - mat_folder = mat_folder + static_cast("/complex/"); + mat_folder = mat_folder + static_cast("/complex/"); else - mat_folder = mat_folder + static_cast("/real/"); + mat_folder = mat_folder + static_cast("/real/"); return mat_folder; } #endif diff --git a/test/sparselu.cpp b/test/sparselu.cpp new file mode 100644 index 000000000..e960f9c93 --- /dev/null +++ b/test/sparselu.cpp @@ -0,0 +1,43 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . +#include "sparse_solver.h" +#include +#include + +template void test_sparselu_T() +{ + SparseLU, COLAMDOrdering > sparselu_colamd; + SparseLU, AMDOrdering > sparselu_amd; + + check_sparse_square_solving(sparselu_colamd); + check_sparse_square_solving(sparselu_amd); +} + +void test_sparselu() +{ + CALL_SUBTEST_1(test_sparselu_T()); + CALL_SUBTEST_2(test_sparselu_T()); + CALL_SUBTEST_3(test_sparselu_T >()); + CALL_SUBTEST_4(test_sparselu_T >()); +} \ No newline at end of file -- cgit v1.2.3 From 4d3b7e2a1351d60b9ee26d0fe3442cd5b3a1f8a9 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Mon, 6 Aug 2012 14:55:02 +0200 Subject: Add support for Metis fill-reducing ordering ; it is generally more efficient than COLAMD ordering --- Eigen/MetisSupport | 26 +++++++ Eigen/src/MetisSupport/CMakeLists.txt | 6 ++ Eigen/src/MetisSupport/MetisSupport.h | 138 ++++++++++++++++++++++++++++++++++ bench/spbench/CMakeLists.txt | 6 ++ bench/spbench/test_sparseLU.cpp | 8 ++ cmake/FindMetis.cmake | 3 +- 6 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 Eigen/MetisSupport create mode 100644 Eigen/src/MetisSupport/CMakeLists.txt create mode 100644 Eigen/src/MetisSupport/MetisSupport.h (limited to 'bench/spbench') diff --git a/Eigen/MetisSupport b/Eigen/MetisSupport new file mode 100644 index 000000000..a44086ad9 --- /dev/null +++ b/Eigen/MetisSupport @@ -0,0 +1,26 @@ +#ifndef EIGEN_METISSUPPORT_MODULE_H +#define EIGEN_METISSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +} + + +/** \ingroup Support_modules + * \defgroup MetisSupport_Module MetisSupport module + * + * \code + * #include + * \endcode + */ + + +#include "src/MetisSupport/MetisSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_METISSUPPORT_MODULE_H diff --git a/Eigen/src/MetisSupport/CMakeLists.txt b/Eigen/src/MetisSupport/CMakeLists.txt new file mode 100644 index 000000000..2bad31416 --- /dev/null +++ b/Eigen/src/MetisSupport/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_MetisSupport_SRCS "*.h") + +INSTALL(FILES + ${Eigen_MetisSupport_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/MetisSupport COMPONENT Devel + ) diff --git a/Eigen/src/MetisSupport/MetisSupport.h b/Eigen/src/MetisSupport/MetisSupport.h new file mode 100644 index 000000000..a762d96f6 --- /dev/null +++ b/Eigen/src/MetisSupport/MetisSupport.h @@ -0,0 +1,138 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifndef METIS_SUPPORT_H +#define METIS_SUPPORT_H + +namespace Eigen { +/** + * Get the fill-reducing ordering from the METIS package + * + * If A is the original matrix and Ap is the permuted matrix, + * the fill-reducing permutation is defined as follows : + * Row (column) i of A is the matperm(i) row (column) of Ap. + * WARNING: As computed by METIS, this corresponds to the vector iperm (instead of perm) + */ +template +class MetisOrdering +{ +public: + typedef PermutationMatrix PermutationType; + typedef Matrix IndexVector; + + template + void get_symmetrized_graph(const MatrixType& A) + { + Index m = A.cols(); + + // Get the transpose of the input matrix + MatrixType At = A.transpose(); + // Get the number of nonzeros elements in each row/col of At+A + Index TotNz = 0; + IndexVector visited(m); + visited.setConstant(-1); + for (int j = 0; j < m; j++) + { + // Compute the union structure of of A(j,:) and At(j,:) + visited(j) = j; // Do not include the diagonal element + // Get the nonzeros in row/column j of A + for (typename MatrixType::InnerIterator it(A, j); it; ++it) + { + Index idx = it.index(); // Get the row index (for column major) or column index (for row major) + if (visited(idx) != j ) + { + visited(idx) = j; + ++TotNz; + } + } + //Get the nonzeros in row/column j of At + for (typename MatrixType::InnerIterator it(At, j); it; ++it) + { + Index idx = it.index(); + if(visited(idx) != j) + { + visited(idx) = j; + ++TotNz; + } + } + } + // Reserve place for A + At + m_indexPtr.resize(m+1); + m_innerIndices.resize(TotNz); + + // Now compute the real adjacency list of each column/row + visited.setConstant(-1); + Index CurNz = 0; + for (int j = 0; j < m; j++) + { + m_indexPtr(j) = CurNz; + + visited(j) = j; // Do not include the diagonal element + // Add the pattern of row/column j of A to A+At + for (typename MatrixType::InnerIterator it(A,j); it; ++it) + { + Index idx = it.index(); // Get the row index (for column major) or column index (for row major) + if (visited(idx) != j ) + { + visited(idx) = j; + m_innerIndices(CurNz) = idx; + CurNz++; + } + } + //Add the pattern of row/column j of At to A+At + for (typename MatrixType::InnerIterator it(At, j); it; ++it) + { + Index idx = it.index(); + if(visited(idx) != j) + { + visited(idx) = j; + m_innerIndices(CurNz) = idx; + ++CurNz; + } + } + } + m_indexPtr(m) = CurNz; + } + + template + void operator() (const MatrixType& A, PermutationType& matperm) + { + Index m = A.cols(); + IndexVector perm(m),iperm(m); + // First, symmetrize the matrix graph. + get_symmetrized_graph(A); + int output_error; + + // Call the fill-reducing routine from METIS + output_error = METIS_NodeND(&m, m_indexPtr.data(), m_innerIndices.data(), NULL, NULL, perm.data(), iperm.data()); + + if(output_error != METIS_OK) + { + //FIXME The ordering interface should define a class of possible errors + std::cerr << "ERROR WHILE CALLING THE METIS PACKAGE \n"; + return; + } + + // Get the fill-reducing permutation + //NOTE: If Ap is the permuted matrix then perm and iperm vectors are defined as follows + // Row (column) i of Ap is the perm(i) row(column) of A, and row (column) i of A is the iperm(i) row(column) of Ap + + // To be consistent with the use of the permutation in SparseLU module, we thus keep the iperm vector + matperm.resize(m); + for (int j = 0; j < m; j++) + matperm.indices()(j) = iperm(j); + + } + + protected: + IndexVector m_indexPtr; // Pointer to the adjacenccy list of each row/column + IndexVector m_innerIndices; // Adjacency list +}; + +}// end namespace eigen +#endif \ No newline at end of file diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt index a093cc5d9..2eb0befa9 100644 --- a/bench/spbench/CMakeLists.txt +++ b/bench/spbench/CMakeLists.txt @@ -66,5 +66,11 @@ target_link_libraries (spbenchsolver ${SPARSE_LIBS}) add_executable(spsolver sp_solver.cpp) target_link_libraries (spsolver ${SPARSE_LIBS}) +if(METIS_FOUND) + include_directories(${METIS_INCLUDES}) + set (SPARSE_LIBS ${SPARSE_LIBS} ${METIS_LIBRARIES}) + add_definitions("-DEIGEN_METIS_SUPPORT") +endif(METIS_FOUND) + add_executable(test_sparseLU test_sparseLU.cpp) target_link_libraries (test_sparseLU ${SPARSE_LIBS}) diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 59f8252d0..8c78b0c9b 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -7,6 +7,9 @@ #include #include #include +#ifdef EIGEN_METIS_SUPPORT +#include +#endif using namespace std; using namespace Eigen; @@ -21,7 +24,12 @@ int main(int argc, char **args) typedef Matrix DenseRhs; Matrix b, x, tmp; // SparseLU, AMDOrdering > solver; +#ifdef EIGEN_METIS_SUPPORT + SparseLU, MetisOrdering > solver; +#else SparseLU, COLAMDOrdering > solver; +#endif + ifstream matrix_file; string line; int n; diff --git a/cmake/FindMetis.cmake b/cmake/FindMetis.cmake index e4d6ef258..627c3e9ae 100644 --- a/cmake/FindMetis.cmake +++ b/cmake/FindMetis.cmake @@ -12,10 +12,11 @@ find_path(METIS_INCLUDES ${INCLUDE_INSTALL_DIR} PATH_SUFFIXES metis + include ) -find_library(METIS_LIBRARIES metis PATHS $ENV{METISDIR} ${LIB_INSTALL_DIR}) +find_library(METIS_LIBRARIES metis PATHS $ENV{METISDIR} ${LIB_INSTALL_DIR} PATH_SUFFIXES lib) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(METIS DEFAULT_MSG -- cgit v1.2.3 From 2280f2490e802320838f8e51884abe8667a6112d Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 4 Sep 2012 12:21:07 +0200 Subject: Init perf values --- Eigen/src/SparseLU/SparseLU.h | 37 +++++++++++++------------------- Eigen/src/SparseLU/SparseLU_Structs.h | 12 +++++++++++ Eigen/src/SparseLU/SparseLU_panel_bmod.h | 4 ++-- bench/spbench/test_sparseLU.cpp | 10 +++++---- 4 files changed, 35 insertions(+), 28 deletions(-) (limited to 'bench/spbench') diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 997f4e352..6a6579493 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -204,12 +204,12 @@ class SparseLU // Functions void initperfvalues() { - m_panel_size = 12; - m_relax = 6; - m_maxsuper = 100; - m_rowblk = 200; - m_colblk = 60; - m_fillfactor = 20; + m_perfv.panel_size = 12; + m_perfv.relax = 6; + m_perfv.maxsuper = 100; + m_perfv.rowblk = 200; + m_perfv.colblk = 60; + m_perfv.fillfactor = 20; } // Variables @@ -231,14 +231,7 @@ class SparseLU bool m_symmetricmode; // values for performance - int m_panel_size; // a panel consists of at most consecutive columns - int m_relax; // To control degree of relaxing supernodes. If the number of nodes (columns) - // in a subtree of the elimination tree is less than relax, this subtree is considered - // as one supernode regardless of the row structures of those columns - int m_maxsuper; // The maximum size for a supernode in complete LU - int m_rowblk; // The minimum row dimension for 2-D blocking to be used; - int m_colblk; // The minimum column dimension for 2-D blocking to be used; - int m_fillfactor; // The estimated fills factors for L and U, compared with A + LU_perfvalues m_perfv; RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot int m_nnzL, m_nnzU; // Nonzeros in L and U factors @@ -374,10 +367,10 @@ void SparseLU::factorize(const MatrixType& matrix) int m = m_mat.rows(); int n = m_mat.cols(); int nnz = m_mat.nonZeros(); - int maxpanel = m_panel_size * m; + int maxpanel = m_perfv.panel_size * m; // Allocate working storage common to the factor routines int lwork = 0; - int info = LUMemInit(m, n, nnz, lwork, m_fillfactor, m_panel_size, m_glu); + int info = LUMemInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); if (info) { std::cerr << "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ; @@ -401,7 +394,7 @@ void SparseLU::factorize(const MatrixType& matrix) ScalarVector dense; dense.setZero(maxpanel); ScalarVector tempv; - tempv.setZero(LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk) ); + tempv.setZero(LU_NUM_TEMPV(m, m_perfv.panel_size, m_perfv.maxsuper, m_perfv.rowblk) ); // Compute the inverse of perm_c PermutationType iperm_c(m_perm_c.inverse()); @@ -409,9 +402,9 @@ void SparseLU::factorize(const MatrixType& matrix) // Identify initial relaxed snodes IndexVector relax_end(n); if ( m_symmetricmode == true ) - LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); + LU_heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); else - LU_relax_snode(n, m_etree, m_relax, marker, relax_end); + LU_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); m_perm_r.resize(m); @@ -499,7 +492,7 @@ void SparseLU::factorize(const MatrixType& matrix) { // Work on one panel of panel_size columns // Adjust panel size so that a panel won't overlap with the next relaxed snode. - int panel_size = m_panel_size; // upper bound on panel width + int panel_size = m_perfv.panel_size; // upper bound on panel width for (k = jcol + 1; k < std::min(jcol+panel_size, n); k++) { if (relax_end(k) != IND_EMPTY) @@ -515,7 +508,7 @@ void SparseLU::factorize(const MatrixType& matrix) LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); // Numeric sup-panel updates in topological order - LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); + LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu); // Sparse LU within the panel, and below the panel diagonal for ( jj = jcol; jj< jcol + panel_size; jj++) @@ -526,7 +519,7 @@ void SparseLU::factorize(const MatrixType& matrix) //Depth-first-search for the current column VectorBlock panel_lsubk(panel_lsub, k, m); VectorBlock repfnz_k(repfnz, k, m); - info = LU_column_dfs(m, jj, m_perm_r.indices(), m_maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); + info = LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); if ( info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n"; diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h index 4b4dfdc77..7b3aa250c 100644 --- a/Eigen/src/SparseLU/SparseLU_Structs.h +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -88,4 +88,16 @@ struct LU_GlobalLU_t { Index n; // Number of columns in the matrix int num_expansions; }; + +// Values to set for performance +struct LU_perfvalues { + int panel_size; // a panel consists of at most consecutive columns + int relax; // To control degree of relaxing supernodes. If the number of nodes (columns) + // in a subtree of the elimination tree is less than relax, this subtree is considered + // as one supernode regardless of the row structures of those columns + int maxsuper; // The maximum size for a supernode in complete LU + int rowblk; // The minimum row dimension for 2-D blocking to be used; + int colblk; // The minimum column dimension for 2-D blocking to be used; + int fillfactor; // The estimated fills factors for L and U, compared with A +}; #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 36b4f74df..1b31cc31a 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -49,7 +49,7 @@ * */ template -void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, DenseIndexBlock& segrep, DenseIndexBlock& repfnz, LU_GlobalLU_t& glu) +void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, DenseIndexBlock& segrep, DenseIndexBlock& repfnz, LU_perfvalues& perfv, LU_GlobalLU_t& glu) { typedef typename ScalarVector::Scalar Scalar; @@ -95,7 +95,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca // if the blocks are large enough, use level 3 // TODO find better heuristics! - if( nsupc >= 50 && nrow > 50 && u_cols>6) + if( nsupc >= perfv.colblk && nrow > perfv.rowblk && u_cols>perfv.relax) { Map > U(tempv.data(), u_rows, u_cols); diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 8c78b0c9b..c6511a9bc 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -24,11 +24,13 @@ int main(int argc, char **args) typedef Matrix DenseRhs; Matrix b, x, tmp; // SparseLU, AMDOrdering > solver; -#ifdef EIGEN_METIS_SUPPORT - SparseLU, MetisOrdering > solver; -#else +// #ifdef EIGEN_METIS_SUPPORT +// SparseLU, MetisOrdering > solver; +// std::cout<< "ORDERING : METIS\n"; +// #else SparseLU, COLAMDOrdering > solver; -#endif + std::cout<< "ORDERING : COLAMD\n"; +// #endif ifstream matrix_file; string line; -- cgit v1.2.3 From 063705b5be5a41e324773887d3d5ae065321a719 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 7 Sep 2012 13:14:57 +0200 Subject: Add tutorial for sparse solvers --- Eigen/src/SparseCore/SparseMatrix.h | 2 +- Eigen/src/SparseLU/SparseLU.h | 4 +- Eigen/src/SuperLUSupport/SuperLUSupport.h | 4 +- bench/spbench/CMakeLists.txt | 1 + doc/I17_SparseLinearSystems.dox | 110 ++++++++++++++++++++++++++++++ 5 files changed, 115 insertions(+), 6 deletions(-) create mode 100644 doc/I17_SparseLinearSystems.dox (limited to 'bench/spbench') diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 52a9dab70..87f3fb873 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -478,7 +478,7 @@ class SparseMatrix } /** Turns the matrix into the uncompressed mode */ - void Uncompress() + void uncompress() { if(m_innerNonZeros != 0) return; diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 6a6579493..e2076138a 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -271,7 +271,7 @@ void SparseLU::analyzePattern(const MatrixType& mat) //First copy the whole input matrix. m_mat = mat; - m_mat.Uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used. + m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used. //Then, permute only the column pointers for (int i = 0; i < mat.cols(); i++) { @@ -356,7 +356,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Apply the column permutation computed in analyzepattern() // m_mat = matrix * m_perm_c.inverse(); m_mat = matrix; - m_mat.Uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. + m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. //Then, permute only the column pointers for (int i = 0; i < matrix.cols(); i++) { diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index e3fae4a36..faefd8169 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -627,9 +627,7 @@ void SuperLU::factorize(const MatrixType& a) this->initFactorization(a); - //DEBUG -// m_sluOptions.ColPerm = COLAMD; - m_sluOptions.Equil = NO; + m_sluOptions.ColPerm = COLAMD; int info = 0; RealScalar recip_pivot_growth, rcond; RealScalar ferr, berr; diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt index 2eb0befa9..5451843b9 100644 --- a/bench/spbench/CMakeLists.txt +++ b/bench/spbench/CMakeLists.txt @@ -74,3 +74,4 @@ endif(METIS_FOUND) add_executable(test_sparseLU test_sparseLU.cpp) target_link_libraries (test_sparseLU ${SPARSE_LIBS}) + diff --git a/doc/I17_SparseLinearSystems.dox b/doc/I17_SparseLinearSystems.dox new file mode 100644 index 000000000..740bee18e --- /dev/null +++ b/doc/I17_SparseLinearSystems.dox @@ -0,0 +1,110 @@ +namespace Eigen { +/** \page TopicSparseSystems Solving Sparse Linear Systems +In Eigen, there are several methods available to solve linear systems when the coefficient matrix is sparse. Because of the special representation of this class of matrices, special care should be taken in order to get a good performance. See \ref TutorialSparse for a detailed introduction about sparse matrices in Eigen. In this page, we briefly present the main steps that are common to all the linear solvers in Eigen together with the main concepts behind them. Depending on the properties of the matrix, the desired accuracy, the end-user is able to tune these steps in order to improve the performance of its code. However, an impatient user does not need to know deeply what's hiding behind these steps: the last section presents a benchmark routine that can be easily used to get an insight on the performance of all the available solvers. + +\b Table \b of \b contents \n + - \ref TheSparseCompute + - \ref TheSparseSolve + - \ref BenchmarkRoutine + + As summarized in \ref TutorialSparseDirectSolvers, there are many built-in solvers in Eigen as well as interface to external solvers libraries. All these solvers follow the same calling sequence. The basic steps are as follows : +\code +#include +// ... +SparseMatrix A; +// fill A +VectorXd b, x; +// fill b +// solve Ax = b +SolverClassName > solver; +solver.compute(A); +if(solver.info()!=Succeeded) { + // decomposition failed + return; +} +x = solver.solve(b); +if(solver.info()!=Succeeded) { + // solving failed + return; +} +\endcode + +\section TheSparseCompute The Compute Step +In the compute() function, the matrix is generally factorized: LLT for self-adjoint matrices, LDLT for general hermitian matrices and LU for non hermitian matrices. These are the results of using direct solvers. For this class of solvers precisely, the compute step is further subdivided into analyzePattern() and factorize(). + +The goal of analyzePattern() is to reorder the nonzero elements of the matrix, such that the factorization step creates less fill-in. This step exploits only the structure of the matrix. Hence, the results of this step can be used for other linear systems where the matrix has the same structure. Note however that sometimes, some external solvers (like SuperLU) require that the values of the matrix are set in this step, for instance to equilibrate the rows and columns of the matrix. In this situation, the results of this step can note be used with other matrices. + +Eigen provides a limited set of methods to reorder the matrix in this step, either built-in (COLAMD, AMD) or external (METIS). These methods are set in template parameter list of the solver : +\code +DirectSolverClassName, OrderingMethod > solver; +\endcode + +See \link Ordering_Modules the Ordering module \endlink for the list of available methods and the associated options. + +In factorize(), the factors of the coefficient matrix are computed. This step should be called each time the values of the matrix change. However, the structural pattern of the matrix should not change between multiple calls. + +For iterative solvers, the compute step is used to eventually setup a preconditioner. Remember that, basically, the goal of the preconditioner is to speedup the convergence of an iterative method by solving a modified linear system where the coefficient matrix has more clustered eigenvalues. For real problems, an iterative solver should always be used with a preconditioner. In Eigen, a preconditioner is selected by simply adding it as a template parameter to the iterative solver object. +\code +IterativeSolverClassName, PreconditionerName > solver; +\endcode + +FIXME How to get a reference to the preconditioner, in order to set the parameters + +For instance, with the ILUT preconditioner, the incomplete factors L and U are computed in this step. +See \link Sparse_modules the Sparse module \endlink for the list of available preconditioners in Eigen. +\section TheSparseSolve The Solve step +The solve() function computes the solution of the linear systems with one or many right hand sides. +\code +X = solver.solve(B); +\endcode +Here, B can be a vector or a matrix where the columns form the different right hand sides. The solve() function can be called several times as well, for instance When all the right hand sides are not available at once. +\code +x1 = solver.solve(b1); +// Get the second right hand side b2 +x2 = solver.solve(b2); +// ... +\endcode +For direct methods, the solution are computed at the machine precision. Sometimes, the solution need not be too accurate. In this case, the iterative methods are more suitable and the desired accuracy can be set before the solve step using setTolerance(). For all the available functions, please, refer to the documentation of the \link IterativeLinearSolvers_module Iterative solvers module \endlink. + +\section BenchmarkRoutine +Most of the time, all you need is to know how much time it will take to qolve your system, and hopefully, what is the most suitable solver. In Eigen, we provide a benchmark routine that can be used for this purpose. It is very easy to use. First, it should be activated at the configuration step with the flag TEST_REAL_CASES. Then, in bench/spbench, you can compile the routine by typing \b make \e spbenchsolver. You can then run it with --help option to get the list of all available options. Basically, the matrices to test should be in \link http://math.nist.gov/MatrixMarket/formats.html MatrixMarket Coordinate format \endlink, and the routine returns the statistics from all available solvers in Eigen. + +The following table gives an example of XHTML statistics from several Eigen built-in and external solvers. + + +
Matrix N NNZ UMFPACK SUPERLU PASTIX LU BiCGSTAB BiCGSTAB+ILUT GMRES+ILUT LDLT CHOLMOD LDLT PASTIX LDLT LLT CHOLMOD SP LLT CHOLMOD LLT PASTIX LLT CG
vector_graphics 12855 72069 Compute Time 0.02545490.02156770.07018270.0001533880.01401070.01537090.01016010.009305020.0649689 +
Solve Time 0.003378350.0009518260.004843730.03748860.00464450.008477540.0005418130.0002936960.00485376 +
Total Time 0.02883330.02251950.07502650.0376420.01865520.02384840.01070190.009598710.0698227 +
Error(Iter) 1.299e-16 2.04207e-16 4.83393e-15 3.94856e-11 (80) 1.03861e-12 (3) 5.81088e-14 (6) 1.97578e-16 1.83927e-16 4.24115e-15 +
poisson_SPD 19788 308232 Compute Time 0.4250261.823780.6173670.0004789211.340011.334710.7964190.8575730.4730070.8148260.1847190.8615550.4705590.000458188 +
Solve Time 0.02800530.01944020.02687470.2494370.05484440.09269910.008502040.00531710.02589320.008746030.005781550.005303610.02489420.239093 +
Total Time 0.4530311.843220.6442410.2499161.394861.427410.8049210.8628910.49890.8235720.1905010.8668590.4954530.239551 +
Error(Iter) 4.67146e-16 1.068e-15 1.3397e-15 6.29233e-11 (201) 3.68527e-11 (6) 3.3168e-15 (16) 1.86376e-15 1.31518e-16 1.42593e-15 3.45361e-15 3.14575e-16 2.21723e-15 7.21058e-16 9.06435e-12 (261) +
sherman2 1080 23094 Compute Time 0.006317540.0150520.0247514 -0.02144250.0217988 +
Solve Time 0.0004784240.0003379980.0010291 -0.002431520.00246152 +
Total Time 0.006795970.015390.0257805 -0.0238740.0242603 +
Error(Iter) 1.83099e-15 8.19351e-15 2.625e-14 1.3678e+69 (1080) 4.1911e-12 (7) 5.0299e-13 (12) +
bcsstk01_SPD 48 400 Compute Time 0.0001690790.000107890.0005725381.425e-069.1612e-058.3985e-055.6489e-057.0913e-050.0004682515.7389e-058.0212e-055.8394e-050.0004630171.333e-06 +
Solve Time 1.2288e-051.1124e-050.0002863878.5896e-051.6381e-051.6984e-053.095e-064.115e-060.0003254383.504e-067.369e-063.454e-060.0002940956.0516e-05 +
Total Time 0.0001813670.0001190140.0008589258.7321e-050.0001079930.0001009695.9584e-057.5028e-050.0007936896.0893e-058.7581e-056.1848e-050.0007571126.1849e-05 +
Error(Iter) 1.03474e-16 2.23046e-16 2.01273e-16 4.87455e-07 (48) 1.03553e-16 (2) 3.55965e-16 (2) 2.48189e-16 1.88808e-16 1.97976e-16 2.37248e-16 1.82701e-16 2.71474e-16 2.11322e-16 3.547e-09 (48) +
sherman1 1000 3750 Compute Time 0.002288050.002092310.005282689.846e-060.001635220.001621550.0007892590.0008044950.00438269 +
Solve Time 0.0002137889.7983e-050.0009388310.006298350.0003617640.000787944.3989e-052.5331e-050.000917166 +
Total Time 0.002501840.002190290.006221510.00630820.001996980.002409490.0008332480.0008298260.00529986 +
Error(Iter) 1.16839e-16 2.25968e-16 2.59116e-16 3.76779e-11 (248) 4.13343e-11 (4) 2.22347e-14 (10) 2.05861e-16 1.83555e-16 1.02917e-15 +
young1c 841 4089 Compute Time 0.002358430.002172280.005680751.2735e-050.002648660.00258236 +
Solve Time 0.0003295990.0001686340.000801180.05347380.001871930.00450211 +
Total Time 0.002688030.002340910.006481930.05348650.004520590.00708447 +
Error(Iter) 1.27029e-16 2.81321e-16 5.0492e-15 8.0507e-11 (706) 3.00447e-12 (8) 1.46532e-12 (16) +
mhd1280b 1280 22778 Compute Time 0.002348980.002070790.005709182.5976e-050.003025630.002980360.001445250.0009199220.00426444 +
Solve Time 0.001033920.0002119110.001050.01104320.0006282870.003920890.0001383036.2446e-050.00097564 +
Total Time 0.00338290.00228270.006759180.01106920.003653920.006901240.001583550.0009823680.00524008 +
Error(Iter) 1.32953e-16 3.08646e-16 6.734e-16 8.83132e-11 (40) 1.51153e-16 (1) 6.08556e-16 (8) 1.89264e-16 1.97477e-16 6.68126e-09 +
crashbasis 160000 1750416 Compute Time 3.20195.789215.75730.003835153.10063.09921 +
Solve Time 0.2619150.1062250.4021411.490890.248880.443673 +
Total Time 3.463815.8954216.15941.494733.349483.54288 +
Error(Iter) 1.76348e-16 4.58395e-16 1.67982e-14 8.64144e-11 (61) 8.5996e-12 (2) 6.04042e-14 (5) + +
+*/ +} \ No newline at end of file -- cgit v1.2.3