diff options
author | Gael Guennebaud <g.gael@free.fr> | 2012-10-30 15:17:58 +0100 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2012-10-30 15:17:58 +0100 |
commit | 90fcaf11cf2e2928527fdc9df0a49389000ce603 (patch) | |
tree | 6db89b0e5d50630ddf2b8a42c3e0225f174d2f04 /Eigen | |
parent | ac8c694f3ef627ad604b17613e9d538dc5582c9d (diff) |
SparseLU: remove the "snode" path which appears to bring nearly zero speedup
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/src/SparseLU/SparseLU.h | 184 | ||||
-rw-r--r-- | Eigen/src/SparseLU/SparseLUBase.h | 2 | ||||
-rw-r--r-- | Eigen/src/SparseLU/SparseLU_snode_bmod.h | 84 | ||||
-rw-r--r-- | Eigen/src/SparseLU/SparseLU_snode_dfs.h | 95 |
4 files changed, 63 insertions, 302 deletions
diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 24d120a21..8e82aca31 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -455,144 +455,85 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix) int i, k, jj; for (jcol = 0; jcol < n; ) { - if (relax_end(jcol) != IND_EMPTY) - { // Starting a relaxed node from jcol - kcol = relax_end(jcol); // End index of the relaxed snode + // Adjust panel size so that a panel won't overlap with the next relaxed snode. + int panel_size = m_perfv.panel_size; // upper bound on panel width + for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++) + { + if (relax_end(k) != IND_EMPTY) + { + panel_size = k - jcol; + break; + } + } + if (k == n) + panel_size = n - jcol; + + // Symbolic outer factorization on a panel of columns + SparseLUBase<Scalar,Index>::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); + + // Numeric sup-panel updates in topological order + SparseLUBase<Scalar,Index>::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu); + + // Sparse LU within the panel, and below the panel diagonal + for ( jj = jcol; jj< jcol + panel_size; jj++) + { + k = (jj - jcol) * m; // Column index for w-wide arrays - // Factorize the relaxed supernode(jcol:kcol) - // First, determine the union of the row structure of the snode - info = SparseLUBase<Scalar,Index>::LU_snode_dfs(jcol, kcol, m_mat, xprune, marker, m_glu); + nseg = nseg1; // begin after all the panel segments + //Depth-first-search for the current column + VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m); + VectorBlock<IndexVector> repfnz_k(repfnz, k, m); + info = SparseLUBase<Scalar,Index>::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); if ( info ) { - std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n"; + std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n"; m_info = NumericalIssue; m_factorizationIsOk = false; return; } - nextu = m_glu.xusub(jcol); //starting location of column jcol in ucol - nextlu = m_glu.xlusup(jcol); //Starting location of column jcol in lusup (rectangular supernodes) - jsupno = m_glu.supno(jcol); // Supernode number which column jcol belongs to - fsupc = m_glu.xsup(jsupno); //First column number of the current supernode - int lda = m_glu.xusub(fsupc+1) - m_glu.xusub(fsupc); - lda = m_glu.xlsub(fsupc+1)-m_glu.xlsub(fsupc); - new_next = nextlu + lda * (kcol - jcol + 1); - int mem; - while (new_next > m_glu.nzlumax ) + // Numeric updates to this column + VectorBlock<ScalarVector> dense_k(dense, k, m); + VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1); + info = SparseLUBase<Scalar,Index>::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); + if ( info ) { - mem = SparseLUBase<Scalar,Index>::LUMemXpand(m_glu.lusup, m_glu.nzlumax, nextlu, LUSUP, m_glu.num_expansions); - if (mem) - { - std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n"; - m_factorizationIsOk = false; - return; - } + std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n"; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; } - // Now, left-looking factorize each column within the snode - for (icol = jcol; icol<=kcol; icol++){ - m_glu.xusub(icol+1) = nextu; - // Scatter into SPA dense(*) - for (typename MatrixType::InnerIterator it(m_mat, icol); it; ++it) - dense(it.row()) = it.value(); - - // Numeric update within the snode - SparseLUBase<Scalar,Index>::LU_snode_bmod(icol, fsupc, dense, m_glu); - - // Eliminate the current column - info = SparseLUBase<Scalar,Index>::LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); - if ( info ) - { - m_info = NumericalIssue; - std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl; - m_factorizationIsOk = false; - return; - } + // Copy the U-segments to ucol(*) + info = SparseLUBase<Scalar,Index>::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); + if ( info ) + { + std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n"; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; } - jcol = icol; // The last column te be eliminated - } - else - { // Work on one panel of panel_size columns - // Adjust panel size so that a panel won't overlap with the next relaxed snode. - int panel_size = m_perfv.panel_size; // upper bound on panel width - for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++) + // Form the L-segment + info = SparseLUBase<Scalar,Index>::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); + if ( info ) { - if (relax_end(k) != IND_EMPTY) - { - panel_size = k - jcol; - break; - } + std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; } - if (k == n) - panel_size = n - jcol; - - // Symbolic outer factorization on a panel of columns - SparseLUBase<Scalar,Index>::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); - // Numeric sup-panel updates in topological order - SparseLUBase<Scalar,Index>::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu); + // Prune columns (0:jj-1) using column jj + SparseLUBase<Scalar,Index>::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); - // Sparse LU within the panel, and below the panel diagonal - for ( jj = jcol; jj< jcol + panel_size; jj++) + // Reset repfnz for this column + for (i = 0; i < nseg; i++) { - k = (jj - jcol) * m; // Column index for w-wide arrays - - nseg = nseg1; // begin after all the panel segments - //Depth-first-search for the current column - VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m); - VectorBlock<IndexVector> repfnz_k(repfnz, k, m); - info = SparseLUBase<Scalar,Index>::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); - if ( info ) - { - std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n"; - m_info = NumericalIssue; - m_factorizationIsOk = false; - return; - } - // Numeric updates to this column - VectorBlock<ScalarVector> dense_k(dense, k, m); - VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1); - info = SparseLUBase<Scalar,Index>::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); - if ( info ) - { - std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n"; - m_info = NumericalIssue; - m_factorizationIsOk = false; - return; - } - - // Copy the U-segments to ucol(*) - info = SparseLUBase<Scalar,Index>::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); - if ( info ) - { - std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n"; - m_info = NumericalIssue; - m_factorizationIsOk = false; - return; - } - - // Form the L-segment - info = SparseLUBase<Scalar,Index>::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); - if ( info ) - { - std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl; - m_info = NumericalIssue; - m_factorizationIsOk = false; - return; - } - - // Prune columns (0:jj-1) using column jj - SparseLUBase<Scalar,Index>::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); - - // Reset repfnz for this column - for (i = 0; i < nseg; i++) - { - irep = segrep(i); - repfnz_k(irep) = IND_EMPTY; - } - } // end SparseLU within the panel - jcol += panel_size; // Move to the next panel - } // end else + irep = segrep(i); + repfnz_k(irep) = IND_EMPTY; + } + } // end SparseLU within the panel + jcol += panel_size; // Move to the next panel } // end for -- end elimination // Count the number of nonzeros in factors @@ -628,4 +569,5 @@ struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs> } // end namespace internal } // End namespace Eigen + #endif diff --git a/Eigen/src/SparseLU/SparseLUBase.h b/Eigen/src/SparseLU/SparseLUBase.h index bf6bf0505..20338a646 100644 --- a/Eigen/src/SparseLU/SparseLUBase.h +++ b/Eigen/src/SparseLU/SparseLUBase.h @@ -57,8 +57,6 @@ struct SparseLUBase #include "SparseLU_Memory.h" #include "SparseLU_heap_relax_snode.h" #include "SparseLU_relax_snode.h" -#include "SparseLU_snode_dfs.h" -#include "SparseLU_snode_bmod.h" #include "SparseLU_pivotL.h" #include "SparseLU_panel_dfs.h" #include "SparseLU_kernel_bmod.h" diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h deleted file mode 100644 index 6774dc7bc..000000000 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ /dev/null @@ -1,84 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/* - - * NOTE: This file is the modified version of [s,d,c,z]snode_bmod.c file in SuperLU - - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program for any - * purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is - * granted, provided the above notices are retained, and a notice that - * the code was modified is included with the above copyright notice. - */ -#ifndef SPARSELU_SNODE_BMOD_H -#define SPARSELU_SNODE_BMOD_H -template <typename Scalar, typename Index> -int SparseLUBase<Scalar,Index>::LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu) -{ - /* lsub : Compressed row subscripts of ( rectangular supernodes ) - * xlsub : xlsub[j] is the starting location of the j-th column in lsub(*) - * lusup : Numerical values of the rectangular supernodes - * xlusup[j] is the starting location of the j-th column in lusup(*) - */ - int nextlu = glu.xlusup(jcol); // Starting location of the next column to add - int irow, isub; - // Process the supernodal portion of L\U[*,jcol] - for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++) - { - irow = glu.lsub(isub); - glu.lusup(nextlu) = dense(irow); - dense(irow) = 0; - ++nextlu; - } - // Make sure the leading dimension is a multiple of the underlying packet size - // so that fast fully aligned kernels can be enabled: - { - Index lda = nextlu-glu.xlusup(jcol); - int offset = internal::first_multiple<Index>(lda, internal::packet_traits<Scalar>::size) - lda; - if(offset) - { - glu.lusup.segment(nextlu,offset).setZero(); - nextlu += offset; - } - } - glu.xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 ) - - if (fsupc < jcol ){ - int luptr = glu.xlusup(fsupc); // points to the first column of the supernode - int nsupr = glu.xlsub(fsupc + 1) -glu.xlsub(fsupc); //Number of rows in the supernode - int nsupc = jcol - fsupc; // Number of columns in the supernodal portion of L\U[*,jcol] - int ufirst = glu.xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno) - int lda = glu.xlusup(jcol+1) - ufirst; - - int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks - - // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol) - Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) ); - VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc); - u = A.template triangularView<UnitLower>().solve(u); // Call the Eigen dense triangular solve interface - - // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) - new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) ); - VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow); - l.noalias() -= A * u; - } - return 0; -} -#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h deleted file mode 100644 index 199436cd7..000000000 --- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h +++ /dev/null @@ -1,95 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/* - - * NOTE: This file is the modified version of [s,d,c,z]snode_dfs.c file in SuperLU - - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program for any - * purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is - * granted, provided the above notices are retained, and a notice that - * the code was modified is included with the above copyright notice. - */ -#ifndef SPARSELU_SNODE_DFS_H -#define SPARSELU_SNODE_DFS_H - /** - * \brief Determine the union of the row structures of those columns within the relaxed snode. - * NOTE: The relaxed snodes are leaves of the supernodal etree, therefore, - * the portion outside the rectangular supernode must be zero. - * - * \param jcol start of the supernode - * \param kcol end of the supernode - * \param asub Row indices - * \param colptr Pointer to the beginning of each column - * \param xprune (out) The pruned tree ?? - * \param marker (in/out) working vector - * \return 0 on success, > 0 size of the memory when memory allocation failed - */ -template <typename Scalar, typename Index> -int SparseLUBase<Scalar,Index>::LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu) -{ - int mem; - Index nsuper = ++glu.supno(jcol); // Next available supernode number - int nextl = glu.xlsub(jcol); //Index of the starting location of the jcol-th column in lsub - int krow,kmark; - for (int i = jcol; i <=kcol; i++) - { - // For each nonzero in A(*,i) - for (typename MatrixType::InnerIterator it(mat, i); it; ++it) - { - krow = it.row(); - kmark = marker(krow); - if ( kmark != kcol ) - { - // First time to visit krow - marker(krow) = kcol; - glu.lsub(nextl++) = krow; - if( nextl >= glu.nzlmax ) - { - mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions); - if (mem) return mem; // Memory expansion failed... Return the memory allocated so far - } - } - } - glu.supno(i) = nsuper; - } - - // If supernode > 1, then make a copy of the subscripts for pruning - if (jcol < kcol) - { - Index new_next = nextl + (nextl - glu.xlsub(jcol)); - while (new_next > glu.nzlmax) - { - mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions); - if (mem) return mem; // Memory expansion failed... Return the memory allocated so far - } - Index ifrom, ito = nextl; - for (ifrom = glu.xlsub(jcol); ifrom < nextl;) - glu.lsub(ito++) = glu.lsub(ifrom++); - for (int i = jcol+1; i <=kcol; i++) glu.xlsub(i) = nextl; - nextl = ito; - } - glu.xsup(nsuper+1) = kcol + 1; // Start of next available supernode - glu.supno(kcol+1) = nsuper; - xprune(kcol) = nextl; - glu.xlsub(kcol+1) = nextl; - return 0; -} -#endif
\ No newline at end of file |