aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2012-10-30 15:17:58 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2012-10-30 15:17:58 +0100
commit90fcaf11cf2e2928527fdc9df0a49389000ce603 (patch)
tree6db89b0e5d50630ddf2b8a42c3e0225f174d2f04 /Eigen
parentac8c694f3ef627ad604b17613e9d538dc5582c9d (diff)
SparseLU: remove the "snode" path which appears to bring nearly zero speedup
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/SparseLU/SparseLU.h184
-rw-r--r--Eigen/src/SparseLU/SparseLUBase.h2
-rw-r--r--Eigen/src/SparseLU/SparseLU_snode_bmod.h84
-rw-r--r--Eigen/src/SparseLU/SparseLU_snode_dfs.h95
4 files changed, 63 insertions, 302 deletions
diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h
index 24d120a21..8e82aca31 100644
--- a/Eigen/src/SparseLU/SparseLU.h
+++ b/Eigen/src/SparseLU/SparseLU.h
@@ -455,144 +455,85 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
int i, k, jj;
for (jcol = 0; jcol < n; )
{
- if (relax_end(jcol) != IND_EMPTY)
- { // Starting a relaxed node from jcol
- kcol = relax_end(jcol); // End index of the relaxed snode
+ // Adjust panel size so that a panel won't overlap with the next relaxed snode.
+ int panel_size = m_perfv.panel_size; // upper bound on panel width
+ for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
+ {
+ if (relax_end(k) != IND_EMPTY)
+ {
+ panel_size = k - jcol;
+ break;
+ }
+ }
+ if (k == n)
+ panel_size = n - jcol;
+
+ // Symbolic outer factorization on a panel of columns
+ SparseLUBase<Scalar,Index>::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu);
+
+ // Numeric sup-panel updates in topological order
+ SparseLUBase<Scalar,Index>::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu);
+
+ // Sparse LU within the panel, and below the panel diagonal
+ for ( jj = jcol; jj< jcol + panel_size; jj++)
+ {
+ k = (jj - jcol) * m; // Column index for w-wide arrays
- // Factorize the relaxed supernode(jcol:kcol)
- // First, determine the union of the row structure of the snode
- info = SparseLUBase<Scalar,Index>::LU_snode_dfs(jcol, kcol, m_mat, xprune, marker, m_glu);
+ nseg = nseg1; // begin after all the panel segments
+ //Depth-first-search for the current column
+ VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
+ VectorBlock<IndexVector> repfnz_k(repfnz, k, m);
+ info = SparseLUBase<Scalar,Index>::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu);
if ( info )
{
- std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n";
+ std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n";
m_info = NumericalIssue;
m_factorizationIsOk = false;
return;
}
- nextu = m_glu.xusub(jcol); //starting location of column jcol in ucol
- nextlu = m_glu.xlusup(jcol); //Starting location of column jcol in lusup (rectangular supernodes)
- jsupno = m_glu.supno(jcol); // Supernode number which column jcol belongs to
- fsupc = m_glu.xsup(jsupno); //First column number of the current supernode
- int lda = m_glu.xusub(fsupc+1) - m_glu.xusub(fsupc);
- lda = m_glu.xlsub(fsupc+1)-m_glu.xlsub(fsupc);
- new_next = nextlu + lda * (kcol - jcol + 1);
- int mem;
- while (new_next > m_glu.nzlumax )
+ // Numeric updates to this column
+ VectorBlock<ScalarVector> dense_k(dense, k, m);
+ VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1);
+ info = SparseLUBase<Scalar,Index>::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu);
+ if ( info )
{
- mem = SparseLUBase<Scalar,Index>::LUMemXpand(m_glu.lusup, m_glu.nzlumax, nextlu, LUSUP, m_glu.num_expansions);
- if (mem)
- {
- std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n";
- m_factorizationIsOk = false;
- return;
- }
+ std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n";
+ m_info = NumericalIssue;
+ m_factorizationIsOk = false;
+ return;
}
- // Now, left-looking factorize each column within the snode
- for (icol = jcol; icol<=kcol; icol++){
- m_glu.xusub(icol+1) = nextu;
- // Scatter into SPA dense(*)
- for (typename MatrixType::InnerIterator it(m_mat, icol); it; ++it)
- dense(it.row()) = it.value();
-
- // Numeric update within the snode
- SparseLUBase<Scalar,Index>::LU_snode_bmod(icol, fsupc, dense, m_glu);
-
- // Eliminate the current column
- info = SparseLUBase<Scalar,Index>::LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
- if ( info )
- {
- m_info = NumericalIssue;
- std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl;
- m_factorizationIsOk = false;
- return;
- }
+ // Copy the U-segments to ucol(*)
+ info = SparseLUBase<Scalar,Index>::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu);
+ if ( info )
+ {
+ std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n";
+ m_info = NumericalIssue;
+ m_factorizationIsOk = false;
+ return;
}
- jcol = icol; // The last column te be eliminated
- }
- else
- { // Work on one panel of panel_size columns
- // Adjust panel size so that a panel won't overlap with the next relaxed snode.
- int panel_size = m_perfv.panel_size; // upper bound on panel width
- for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
+ // Form the L-segment
+ info = SparseLUBase<Scalar,Index>::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
+ if ( info )
{
- if (relax_end(k) != IND_EMPTY)
- {
- panel_size = k - jcol;
- break;
- }
+ std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl;
+ m_info = NumericalIssue;
+ m_factorizationIsOk = false;
+ return;
}
- if (k == n)
- panel_size = n - jcol;
-
- // Symbolic outer factorization on a panel of columns
- SparseLUBase<Scalar,Index>::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu);
- // Numeric sup-panel updates in topological order
- SparseLUBase<Scalar,Index>::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu);
+ // Prune columns (0:jj-1) using column jj
+ SparseLUBase<Scalar,Index>::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu);
- // Sparse LU within the panel, and below the panel diagonal
- for ( jj = jcol; jj< jcol + panel_size; jj++)
+ // Reset repfnz for this column
+ for (i = 0; i < nseg; i++)
{
- k = (jj - jcol) * m; // Column index for w-wide arrays
-
- nseg = nseg1; // begin after all the panel segments
- //Depth-first-search for the current column
- VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
- VectorBlock<IndexVector> repfnz_k(repfnz, k, m);
- info = SparseLUBase<Scalar,Index>::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu);
- if ( info )
- {
- std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n";
- m_info = NumericalIssue;
- m_factorizationIsOk = false;
- return;
- }
- // Numeric updates to this column
- VectorBlock<ScalarVector> dense_k(dense, k, m);
- VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1);
- info = SparseLUBase<Scalar,Index>::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu);
- if ( info )
- {
- std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n";
- m_info = NumericalIssue;
- m_factorizationIsOk = false;
- return;
- }
-
- // Copy the U-segments to ucol(*)
- info = SparseLUBase<Scalar,Index>::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu);
- if ( info )
- {
- std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n";
- m_info = NumericalIssue;
- m_factorizationIsOk = false;
- return;
- }
-
- // Form the L-segment
- info = SparseLUBase<Scalar,Index>::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
- if ( info )
- {
- std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl;
- m_info = NumericalIssue;
- m_factorizationIsOk = false;
- return;
- }
-
- // Prune columns (0:jj-1) using column jj
- SparseLUBase<Scalar,Index>::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu);
-
- // Reset repfnz for this column
- for (i = 0; i < nseg; i++)
- {
- irep = segrep(i);
- repfnz_k(irep) = IND_EMPTY;
- }
- } // end SparseLU within the panel
- jcol += panel_size; // Move to the next panel
- } // end else
+ irep = segrep(i);
+ repfnz_k(irep) = IND_EMPTY;
+ }
+ } // end SparseLU within the panel
+ jcol += panel_size; // Move to the next panel
} // end for -- end elimination
// Count the number of nonzeros in factors
@@ -628,4 +569,5 @@ struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
} // end namespace internal
} // End namespace Eigen
+
#endif
diff --git a/Eigen/src/SparseLU/SparseLUBase.h b/Eigen/src/SparseLU/SparseLUBase.h
index bf6bf0505..20338a646 100644
--- a/Eigen/src/SparseLU/SparseLUBase.h
+++ b/Eigen/src/SparseLU/SparseLUBase.h
@@ -57,8 +57,6 @@ struct SparseLUBase
#include "SparseLU_Memory.h"
#include "SparseLU_heap_relax_snode.h"
#include "SparseLU_relax_snode.h"
-#include "SparseLU_snode_dfs.h"
-#include "SparseLU_snode_bmod.h"
#include "SparseLU_pivotL.h"
#include "SparseLU_panel_dfs.h"
#include "SparseLU_kernel_bmod.h"
diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h
deleted file mode 100644
index 6774dc7bc..000000000
--- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-/*
-
- * NOTE: This file is the modified version of [s,d,c,z]snode_bmod.c file in SuperLU
-
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
- *
- * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
- * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
- *
- * Permission is hereby granted to use or copy this program for any
- * purpose, provided the above notices are retained on all copies.
- * Permission to modify the code and to distribute modified code is
- * granted, provided the above notices are retained, and a notice that
- * the code was modified is included with the above copyright notice.
- */
-#ifndef SPARSELU_SNODE_BMOD_H
-#define SPARSELU_SNODE_BMOD_H
-template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu)
-{
- /* lsub : Compressed row subscripts of ( rectangular supernodes )
- * xlsub : xlsub[j] is the starting location of the j-th column in lsub(*)
- * lusup : Numerical values of the rectangular supernodes
- * xlusup[j] is the starting location of the j-th column in lusup(*)
- */
- int nextlu = glu.xlusup(jcol); // Starting location of the next column to add
- int irow, isub;
- // Process the supernodal portion of L\U[*,jcol]
- for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++)
- {
- irow = glu.lsub(isub);
- glu.lusup(nextlu) = dense(irow);
- dense(irow) = 0;
- ++nextlu;
- }
- // Make sure the leading dimension is a multiple of the underlying packet size
- // so that fast fully aligned kernels can be enabled:
- {
- Index lda = nextlu-glu.xlusup(jcol);
- int offset = internal::first_multiple<Index>(lda, internal::packet_traits<Scalar>::size) - lda;
- if(offset)
- {
- glu.lusup.segment(nextlu,offset).setZero();
- nextlu += offset;
- }
- }
- glu.xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 )
-
- if (fsupc < jcol ){
- int luptr = glu.xlusup(fsupc); // points to the first column of the supernode
- int nsupr = glu.xlsub(fsupc + 1) -glu.xlsub(fsupc); //Number of rows in the supernode
- int nsupc = jcol - fsupc; // Number of columns in the supernodal portion of L\U[*,jcol]
- int ufirst = glu.xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno)
- int lda = glu.xlusup(jcol+1) - ufirst;
-
- int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks
-
- // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol)
- Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
- VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc);
- u = A.template triangularView<UnitLower>().solve(u); // Call the Eigen dense triangular solve interface
-
- // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol)
- new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
- VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow);
- l.noalias() -= A * u;
- }
- return 0;
-}
-#endif \ No newline at end of file
diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h
deleted file mode 100644
index 199436cd7..000000000
--- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h
+++ /dev/null
@@ -1,95 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-/*
-
- * NOTE: This file is the modified version of [s,d,c,z]snode_dfs.c file in SuperLU
-
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
- *
- * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
- * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
- *
- * Permission is hereby granted to use or copy this program for any
- * purpose, provided the above notices are retained on all copies.
- * Permission to modify the code and to distribute modified code is
- * granted, provided the above notices are retained, and a notice that
- * the code was modified is included with the above copyright notice.
- */
-#ifndef SPARSELU_SNODE_DFS_H
-#define SPARSELU_SNODE_DFS_H
- /**
- * \brief Determine the union of the row structures of those columns within the relaxed snode.
- * NOTE: The relaxed snodes are leaves of the supernodal etree, therefore,
- * the portion outside the rectangular supernode must be zero.
- *
- * \param jcol start of the supernode
- * \param kcol end of the supernode
- * \param asub Row indices
- * \param colptr Pointer to the beginning of each column
- * \param xprune (out) The pruned tree ??
- * \param marker (in/out) working vector
- * \return 0 on success, > 0 size of the memory when memory allocation failed
- */
-template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu)
-{
- int mem;
- Index nsuper = ++glu.supno(jcol); // Next available supernode number
- int nextl = glu.xlsub(jcol); //Index of the starting location of the jcol-th column in lsub
- int krow,kmark;
- for (int i = jcol; i <=kcol; i++)
- {
- // For each nonzero in A(*,i)
- for (typename MatrixType::InnerIterator it(mat, i); it; ++it)
- {
- krow = it.row();
- kmark = marker(krow);
- if ( kmark != kcol )
- {
- // First time to visit krow
- marker(krow) = kcol;
- glu.lsub(nextl++) = krow;
- if( nextl >= glu.nzlmax )
- {
- mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
- if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
- }
- }
- }
- glu.supno(i) = nsuper;
- }
-
- // If supernode > 1, then make a copy of the subscripts for pruning
- if (jcol < kcol)
- {
- Index new_next = nextl + (nextl - glu.xlsub(jcol));
- while (new_next > glu.nzlmax)
- {
- mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
- if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
- }
- Index ifrom, ito = nextl;
- for (ifrom = glu.xlsub(jcol); ifrom < nextl;)
- glu.lsub(ito++) = glu.lsub(ifrom++);
- for (int i = jcol+1; i <=kcol; i++) glu.xlsub(i) = nextl;
- nextl = ito;
- }
- glu.xsup(nsuper+1) = kcol + 1; // Start of next available supernode
- glu.supno(kcol+1) = nsuper;
- xprune(kcol) = nextl;
- glu.xlsub(kcol+1) = nextl;
- return 0;
-}
-#endif \ No newline at end of file