diff options
author | Desire NUENTSA <desire.nuentsa_wakam@inria.fr> | 2012-09-07 13:18:16 +0200 |
---|---|---|
committer | Desire NUENTSA <desire.nuentsa_wakam@inria.fr> | 2012-09-07 13:18:16 +0200 |
commit | fdd0f0c5fc807f3b90e0442cfe4f3fa1f624b50a (patch) | |
tree | 63a7f166f41a90c9048b1d743fc6fb5b4b47a54a /Eigen | |
parent | 288e6aab14cc12e604bd1a12f0cba20d88edf54f (diff) | |
parent | 063705b5be5a41e324773887d3d5ae065321a719 (diff) |
merge Sparse LU branch
Diffstat (limited to 'Eigen')
28 files changed, 5986 insertions, 1 deletions
diff --git a/Eigen/MetisSupport b/Eigen/MetisSupport new file mode 100644 index 000000000..a44086ad9 --- /dev/null +++ b/Eigen/MetisSupport @@ -0,0 +1,26 @@ +#ifndef EIGEN_METISSUPPORT_MODULE_H +#define EIGEN_METISSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include <metis.h> +} + + +/** \ingroup Support_modules + * \defgroup MetisSupport_Module MetisSupport module + * + * \code + * #include <Eigen/MetisSupport> + * \endcode + */ + + +#include "src/MetisSupport/MetisSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_METISSUPPORT_MODULE_H diff --git a/Eigen/OrderingMethods b/Eigen/OrderingMethods index 1e2d87452..bb43220e8 100644 --- a/Eigen/OrderingMethods +++ b/Eigen/OrderingMethods @@ -17,7 +17,7 @@ */ #include "src/OrderingMethods/Amd.h" - +#include "src/OrderingMethods/Ordering.h" #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/Eigen/SparseLU b/Eigen/SparseLU new file mode 100644 index 000000000..452bc9f83 --- /dev/null +++ b/Eigen/SparseLU @@ -0,0 +1,17 @@ +#ifndef EIGEN_SPARSELU_MODULE_H +#define EIGEN_SPARSELU_MODULE_H + +#include "SparseCore" + + +/** \ingroup Sparse_modules + * \defgroup SparseLU_Module SparseLU module + * + */ + +// Ordering interface +#include "OrderingMethods" + +#include "src/SparseLU/SparseLU.h" + +#endif // EIGEN_SPARSELU_MODULE_H diff --git a/Eigen/src/MetisSupport/CMakeLists.txt b/Eigen/src/MetisSupport/CMakeLists.txt new file mode 100644 index 000000000..2bad31416 --- /dev/null +++ b/Eigen/src/MetisSupport/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_MetisSupport_SRCS "*.h") + +INSTALL(FILES + ${Eigen_MetisSupport_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/MetisSupport COMPONENT Devel + ) diff --git a/Eigen/src/MetisSupport/MetisSupport.h b/Eigen/src/MetisSupport/MetisSupport.h new file mode 100644 index 000000000..a762d96f6 --- /dev/null +++ b/Eigen/src/MetisSupport/MetisSupport.h @@ -0,0 +1,138 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifndef METIS_SUPPORT_H +#define METIS_SUPPORT_H + +namespace Eigen { +/** + * Get the fill-reducing ordering from the METIS package + * + * If A is the original matrix and Ap is the permuted matrix, + * the fill-reducing permutation is defined as follows : + * Row (column) i of A is the matperm(i) row (column) of Ap. + * WARNING: As computed by METIS, this corresponds to the vector iperm (instead of perm) + */ +template <typename Index> +class MetisOrdering +{ +public: + typedef PermutationMatrix<Dynamic,Dynamic,Index> PermutationType; + typedef Matrix<Index,Dynamic,1> IndexVector; + + template <typename MatrixType> + void get_symmetrized_graph(const MatrixType& A) + { + Index m = A.cols(); + + // Get the transpose of the input matrix + MatrixType At = A.transpose(); + // Get the number of nonzeros elements in each row/col of At+A + Index TotNz = 0; + IndexVector visited(m); + visited.setConstant(-1); + for (int j = 0; j < m; j++) + { + // Compute the union structure of of A(j,:) and At(j,:) + visited(j) = j; // Do not include the diagonal element + // Get the nonzeros in row/column j of A + for (typename MatrixType::InnerIterator it(A, j); it; ++it) + { + Index idx = it.index(); // Get the row index (for column major) or column index (for row major) + if (visited(idx) != j ) + { + visited(idx) = j; + ++TotNz; + } + } + //Get the nonzeros in row/column j of At + for (typename MatrixType::InnerIterator it(At, j); it; ++it) + { + Index idx = it.index(); + if(visited(idx) != j) + { + visited(idx) = j; + ++TotNz; + } + } + } + // Reserve place for A + At + m_indexPtr.resize(m+1); + m_innerIndices.resize(TotNz); + + // Now compute the real adjacency list of each column/row + visited.setConstant(-1); + Index CurNz = 0; + for (int j = 0; j < m; j++) + { + m_indexPtr(j) = CurNz; + + visited(j) = j; // Do not include the diagonal element + // Add the pattern of row/column j of A to A+At + for (typename MatrixType::InnerIterator it(A,j); it; ++it) + { + Index idx = it.index(); // Get the row index (for column major) or column index (for row major) + if (visited(idx) != j ) + { + visited(idx) = j; + m_innerIndices(CurNz) = idx; + CurNz++; + } + } + //Add the pattern of row/column j of At to A+At + for (typename MatrixType::InnerIterator it(At, j); it; ++it) + { + Index idx = it.index(); + if(visited(idx) != j) + { + visited(idx) = j; + m_innerIndices(CurNz) = idx; + ++CurNz; + } + } + } + m_indexPtr(m) = CurNz; + } + + template <typename MatrixType> + void operator() (const MatrixType& A, PermutationType& matperm) + { + Index m = A.cols(); + IndexVector perm(m),iperm(m); + // First, symmetrize the matrix graph. + get_symmetrized_graph(A); + int output_error; + + // Call the fill-reducing routine from METIS + output_error = METIS_NodeND(&m, m_indexPtr.data(), m_innerIndices.data(), NULL, NULL, perm.data(), iperm.data()); + + if(output_error != METIS_OK) + { + //FIXME The ordering interface should define a class of possible errors + std::cerr << "ERROR WHILE CALLING THE METIS PACKAGE \n"; + return; + } + + // Get the fill-reducing permutation + //NOTE: If Ap is the permuted matrix then perm and iperm vectors are defined as follows + // Row (column) i of Ap is the perm(i) row(column) of A, and row (column) i of A is the iperm(i) row(column) of Ap + + // To be consistent with the use of the permutation in SparseLU module, we thus keep the iperm vector + matperm.resize(m); + for (int j = 0; j < m; j++) + matperm.indices()(j) = iperm(j); + + } + + protected: + IndexVector m_indexPtr; // Pointer to the adjacenccy list of each row/column + IndexVector m_innerIndices; // Adjacency list +}; + +}// end namespace eigen +#endif
\ No newline at end of file diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h new file mode 100644 index 000000000..0af137d54 --- /dev/null +++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -0,0 +1,2514 @@ +// // This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Desire Nuentsa Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// This file is modified from the eigen_colamd/symamd library. The copyright is below + +// The authors of the code itself are Stefan I. Larimore and Timothy A. +// Davis (davis@cise.ufl.edu), University of Florida. The algorithm was +// developed in collaboration with John Gilbert, Xerox PARC, and Esmond +// Ng, Oak Ridge National Laboratory. +// +// Date: +// +// September 8, 2003. Version 2.3. +// +// Acknowledgements: +// +// This work was supported by the National Science Foundation, under +// grants DMS-9504974 and DMS-9803599. +// +// Notice: +// +// Copyright (c) 1998-2003 by the University of Florida. +// All Rights Reserved. +// +// THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY +// EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. +// +// Permission is hereby granted to use, copy, modify, and/or distribute +// this program, provided that the Copyright, this License, and the +// Availability of the original version is retained on all copies and made +// accessible to the end-user of any code or package that includes COLAMD +// or any modified version of COLAMD. +// +// Availability: +// +// The eigen_colamd/symamd library is available at +// +// http://www.cise.ufl.edu/research/sparse/eigen_colamd/ + +// This is the http://www.cise.ufl.edu/research/sparse/eigen_colamd/eigen_colamd.h +// file. It is required by the eigen_colamd.c, colamdmex.c, and symamdmex.c +// files, and by any C code that calls the routines whose prototypes are +// listed below, or that uses the eigen_colamd/symamd definitions listed below. + +#ifndef EIGEN_COLAMD_H +#define EIGEN_COLAMD_H + +/* Ensure that debugging is turned off: */ +#ifndef COLAMD_NDEBUG +#define COLAMD_NDEBUG +#endif /* NDEBUG */ + +/* ========================================================================== */ +/* === Knob and statistics definitions ====================================== */ +/* ========================================================================== */ + +/* size of the knobs [ ] array. Only knobs [0..1] are currently used. */ +#define EIGEN_COLAMD_KNOBS 20 + +/* number of output statistics. Only stats [0..6] are currently used. */ +#define EIGEN_COLAMD_STATS 20 + +/* knobs [0] and stats [0]: dense row knob and output statistic. */ +#define EIGEN_COLAMD_DENSE_ROW 0 + +/* knobs [1] and stats [1]: dense column knob and output statistic. */ +#define EIGEN_COLAMD_DENSE_COL 1 + +/* stats [2]: memory defragmentation count output statistic */ +#define EIGEN_COLAMD_DEFRAG_COUNT 2 + +/* stats [3]: eigen_colamd status: zero OK, > 0 warning or notice, < 0 error */ +#define EIGEN_COLAMD_STATUS 3 + +/* stats [4..6]: error info, or info on jumbled columns */ +#define EIGEN_COLAMD_INFO1 4 +#define EIGEN_COLAMD_INFO2 5 +#define EIGEN_COLAMD_INFO3 6 + +/* error codes returned in stats [3]: */ +#define EIGEN_COLAMD_OK (0) +#define EIGEN_COLAMD_OK_BUT_JUMBLED (1) +#define EIGEN_COLAMD_ERROR_A_not_present (-1) +#define EIGEN_COLAMD_ERROR_p_not_present (-2) +#define EIGEN_COLAMD_ERROR_nrow_negative (-3) +#define EIGEN_COLAMD_ERROR_ncol_negative (-4) +#define EIGEN_COLAMD_ERROR_nnz_negative (-5) +#define EIGEN_COLAMD_ERROR_p0_nonzero (-6) +#define EIGEN_COLAMD_ERROR_A_too_small (-7) +#define EIGEN_COLAMD_ERROR_col_length_negative (-8) +#define EIGEN_COLAMD_ERROR_row_index_out_of_bounds (-9) +#define EIGEN_COLAMD_ERROR_out_of_memory (-10) +#define EIGEN_COLAMD_ERROR_internal_error (-999) + +/* ========================================================================== */ +/* === Definitions ========================================================== */ +/* ========================================================================== */ + +#define COLAMD_MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define COLAMD_MIN(a,b) (((a) < (b)) ? (a) : (b)) + +#define EIGEN_ONES_COMPLEMENT(r) (-(r)-1) + +/* -------------------------------------------------------------------------- */ + +#define EIGEN_COLAMD_EMPTY (-1) + +/* Row and column status */ +#define EIGEN_ALIVE (0) +#define EIGEN_DEAD (-1) + +/* Column status */ +#define EIGEN_DEAD_PRINCIPAL (-1) +#define EIGEN_DEAD_NON_PRINCIPAL (-2) + +/* Macros for row and column status update and checking. */ +#define EIGEN_ROW_IS_DEAD(r) EIGEN_ROW_IS_MARKED_DEAD (Row[r].shared2.mark) +#define EIGEN_ROW_IS_MARKED_DEAD(row_mark) (row_mark < EIGEN_ALIVE) +#define EIGEN_ROW_IS_ALIVE(r) (Row [r].shared2.mark >= EIGEN_ALIVE) +#define EIGEN_COL_IS_DEAD(c) (Col [c].start < EIGEN_ALIVE) +#define EIGEN_COL_IS_ALIVE(c) (Col [c].start >= EIGEN_ALIVE) +#define EIGEN_EIGEN_COL_IS_DEAD_PRINCIPAL(c) (Col [c].start == EIGEN_DEAD_PRINCIPAL) +#define EIGEN_KILL_ROW(r) { Row [r].shared2.mark = EIGEN_DEAD ; } +#define EIGEN_KILL_PRINCIPAL_COL(c) { Col [c].start = EIGEN_DEAD_PRINCIPAL ; } +#define EIGEN_KILL_NON_PRINCIPAL_COL(c) { Col [c].start = EIGEN_DEAD_NON_PRINCIPAL ; } + +/* ========================================================================== */ +/* === Colamd reporting mechanism =========================================== */ +/* ========================================================================== */ + +#ifdef MATLAB_MEX_FILE + +/* use mexPrintf in a MATLAB mexFunction, for debugging and statistics output */ +#define PRINTF mexPrintf + +/* In MATLAB, matrices are 1-based to the user, but 0-based internally */ +#define INDEX(i) ((i)+1) + +#else + +/* Use printf in standard C environment, for debugging and statistics output. */ +/* Output is generated only if debugging is enabled at compile time, or if */ +/* the caller explicitly calls eigen_colamd_report or symamd_report. */ +#define PRINTF printf + +/* In C, matrices are 0-based and indices are reported as such in *_report */ +#define INDEX(i) (i) + +#endif /* MATLAB_MEX_FILE */ + + // == Row and Column structures == +typedef struct EIGEN_Colamd_Col_struct +{ + int start ; /* index for A of first row in this column, or EIGEN_DEAD */ + /* if column is dead */ + int length ; /* number of rows in this column */ + union + { + int thickness ; /* number of original columns represented by this */ + /* col, if the column is alive */ + int parent ; /* parent in parent tree super-column structure, if */ + /* the column is dead */ + } shared1 ; + union + { + int score ; /* the score used to maintain heap, if col is alive */ + int order ; /* pivot ordering of this column, if col is dead */ + } shared2 ; + union + { + int headhash ; /* head of a hash bucket, if col is at the head of */ + /* a degree list */ + int hash ; /* hash value, if col is not in a degree list */ + int prev ; /* previous column in degree list, if col is in a */ + /* degree list (but not at the head of a degree list) */ + } shared3 ; + union + { + int degree_next ; /* next column, if col is in a degree list */ + int hash_next ; /* next column, if col is in a hash list */ + } shared4 ; + +} EIGEN_Colamd_Col ; + +typedef struct EIGEN_Colamd_Row_struct +{ + int start ; /* index for A of first col in this row */ + int length ; /* number of principal columns in this row */ + union + { + int degree ; /* number of principal & non-principal columns in row */ + int p ; /* used as a row pointer in eigen_init_rows_cols () */ + } shared1 ; + union + { + int mark ; /* for computing set differences and marking dead rows*/ + int first_column ;/* first column in row (used in garbage collection) */ + } shared2 ; + +} EIGEN_Colamd_Row ; + +/* ========================================================================== */ +/* === Colamd recommended memory size ======================================= */ +/* ========================================================================== */ + +/* + The recommended length Alen of the array A passed to eigen_colamd is given by + the EIGEN_COLAMD_RECOMMENDED (nnz, n_row, n_col) macro. It returns -1 if any + argument is negative. 2*nnz space is required for the row and column + indices of the matrix. EIGEN_COLAMD_C (n_col) + EIGEN_COLAMD_R (n_row) space is + required for the Col and Row arrays, respectively, which are internal to + eigen_colamd. An additional n_col space is the minimal amount of "elbow room", + and nnz/5 more space is recommended for run time efficiency. + + This macro is not needed when using symamd. + + Explicit typecast to int added Sept. 23, 2002, COLAMD version 2.2, to avoid + gcc -pedantic warning messages. +*/ + +#define EIGEN_COLAMD_C(n_col) ((int) (((n_col) + 1) * sizeof (EIGEN_Colamd_Col) / sizeof (int))) +#define EIGEN_COLAMD_R(n_row) ((int) (((n_row) + 1) * sizeof (EIGEN_Colamd_Row) / sizeof (int))) + +#define EIGEN_COLAMD_RECOMMENDED(nnz, n_row, n_col) \ +( \ +((nnz) < 0 || (n_row) < 0 || (n_col) < 0) \ +? \ + (-1) \ +: \ + (2 * (nnz) + EIGEN_COLAMD_C (n_col) + EIGEN_COLAMD_R (n_row) + (n_col) + ((nnz) / 5)) \ +) + + // Various routines +int eigen_colamd_recommended (int nnz, int n_row, int n_col) ; + +void eigen_colamd_set_defaults (double knobs [EIGEN_COLAMD_KNOBS]) ; + +bool eigen_colamd (int n_row, int n_col, int Alen, int A [], int p [], double knobs[EIGEN_COLAMD_KNOBS], int stats [EIGEN_COLAMD_STATS]) ; + +void eigen_colamd_report (int stats [EIGEN_COLAMD_STATS]); + +int eigen_init_rows_cols (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col col [], int A [], int p [], int stats[EIGEN_COLAMD_STATS] ); + +void eigen_init_scoring (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], double knobs[EIGEN_COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg); + +int eigen_find_ordering (int n_row, int n_col, int Alen, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], int n_col2, int max_deg, int pfree); + +void eigen_order_children (int n_col, EIGEN_Colamd_Col Col [], int p []); + +void eigen_detect_super_cols ( +#ifndef COLAMD_NDEBUG + int n_col, + EIGEN_Colamd_Row Row [], +#endif /* COLAMD_NDEBUG */ + EIGEN_Colamd_Col Col [], + int A [], + int head [], + int row_start, + int row_length ) ; + + int eigen_garbage_collection (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int *pfree) ; + + int eigen_clear_mark (int n_row, EIGEN_Colamd_Row Row [] ) ; + + void eigen_print_report (char *method, int stats [EIGEN_COLAMD_STATS]) ; + +/* ========================================================================== */ +/* === Debugging prototypes and definitions ================================= */ +/* ========================================================================== */ + +#ifndef COLAMD_NDEBUG + +/* colamd_debug is the *ONLY* global variable, and is only */ +/* present when debugging */ + + int colamd_debug ; /* debug print level */ + +#define COLAMD_DEBUG0(params) { (void) PRINTF params ; } +#define COLAMD_DEBUG1(params) { if (colamd_debug >= 1) (void) PRINTF params ; } +#define COLAMD_DEBUG2(params) { if (colamd_debug >= 2) (void) PRINTF params ; } +#define COLAMD_DEBUG3(params) { if (colamd_debug >= 3) (void) PRINTF params ; } +#define COLAMD_DEBUG4(params) { if (colamd_debug >= 4) (void) PRINTF params ; } + +#ifdef MATLAB_MEX_FILE +#define COLAMD_ASSERT(expression) (mxAssert ((expression), "")) +#else +#define COLAMD_ASSERT(expression) (assert (expression)) +#endif /* MATLAB_MEX_FILE */ + + void eigen_colamd_get_debug /* gets the debug print level from getenv */ +( + char *method +) ; + + void eigen_debug_deg_lists +( + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int head [], + int min_score, + int should, + int max_deg +) ; + + void eigen_debug_mark +( + int n_row, + EIGEN_Colamd_Row Row [], + int tag_mark, + int max_mark +) ; + + void eigen_debug_matrix +( + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int A [] +) ; + + void eigen_debug_structures +( + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int A [], + int n_col2 +) ; + +#else /* COLAMD_NDEBUG */ + +/* === No debugging ========================================================= */ + +#define COLAMD_DEBUG0(params) ; +#define COLAMD_DEBUG1(params) ; +#define COLAMD_DEBUG2(params) ; +#define COLAMD_DEBUG3(params) ; +#define COLAMD_DEBUG4(params) ; + +#define COLAMD_ASSERT(expression) ((void) 0) + +#endif /* COLAMD_NDEBUG */ + + + +/** + * \brief Returns the recommended value of Alen + * + * Returns recommended value of Alen for use by eigen_colamd. + * Returns -1 if any input argument is negative. + * The use of this routine or macro is optional. + * Note that the macro uses its arguments more than once, + * so be careful for side effects, if you pass expressions as arguments to EIGEN_COLAMD_RECOMMENDED. + * + * \param nnz nonzeros in A + * \param n_row number of rows in A + * \param n_col number of columns in A + * \return recommended value of Alen for use by eigen_colamd + */ +int eigen_colamd_recommended ( int nnz, int n_row, int n_col) +{ + + return (EIGEN_COLAMD_RECOMMENDED (nnz, n_row, n_col)) ; +} + +/** + * \brief set default parameters The use of this routine is optional. + * + * Colamd: rows with more than (knobs [EIGEN_COLAMD_DENSE_ROW] * n_col) + * entries are removed prior to ordering. Columns with more than + * (knobs [EIGEN_COLAMD_DENSE_COL] * n_row) entries are removed prior to + * ordering, and placed last in the output column ordering. + * + * EIGEN_COLAMD_DENSE_ROW and EIGEN_COLAMD_DENSE_COL are defined as 0 and 1, + * respectively, in eigen_colamd.h. Default values of these two knobs + * are both 0.5. Currently, only knobs [0] and knobs [1] are + * used, but future versions may use more knobs. If so, they will + * be properly set to their defaults by the future version of + * eigen_colamd_set_defaults, so that the code that calls eigen_colamd will + * not need to change, assuming that you either use + * eigen_colamd_set_defaults, or pass a (double *) NULL pointer as the + * knobs array to eigen_colamd or symamd. + * + * \param knobs parameter settings for eigen_colamd + */ +void eigen_colamd_set_defaults(double knobs[EIGEN_COLAMD_KNOBS]) +{ + /* === Local variables ================================================== */ + + int i ; + + if (!knobs) + { + return ; /* no knobs to initialize */ + } + for (i = 0 ; i < EIGEN_COLAMD_KNOBS ; i++) + { + knobs [i] = 0 ; + } + knobs [EIGEN_COLAMD_DENSE_ROW] = 0.5 ; /* ignore rows over 50% dense */ + knobs [EIGEN_COLAMD_DENSE_COL] = 0.5 ; /* ignore columns over 50% dense */ +} + +/** + * \brief Computes a column ordering using the column approximate minimum degree ordering + * + * Computes a column ordering (Q) of A such that P(AQ)=LU or + * (AQ)'AQ=LL' have less fill-in and require fewer floating point + * operations than factorizing the unpermuted matrix A or A'A, + * respectively. + * + * + * \param n_row number of rows in A + * \param n_col number of columns in A + * \param Alen, size of the array A + * \param A row indices of the matrix, of size ALen + * \param p column pointers of A, of size n_col+1 + * \param knobs parameter settings for eigen_colamd + * \param stats eigen_colamd output statistics and error codes + */ +bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[EIGEN_COLAMD_KNOBS], int stats[EIGEN_COLAMD_STATS]) +{ + /* === Local variables ================================================== */ + + int i ; /* loop index */ + int nnz ; /* nonzeros in A */ + int Row_size ; /* size of Row [], in integers */ + int Col_size ; /* size of Col [], in integers */ + int need ; /* minimum required length of A */ + EIGEN_Colamd_Row *Row ; /* pointer into A of Row [0..n_row] array */ + EIGEN_Colamd_Col *Col ; /* pointer into A of Col [0..n_col] array */ + int n_col2 ; /* number of non-dense, non-empty columns */ + int n_row2 ; /* number of non-dense, non-empty rows */ + int ngarbage ; /* number of garbage collections performed */ + int max_deg ; /* maximum row degree */ + double default_knobs [EIGEN_COLAMD_KNOBS] ; /* default knobs array */ + +#ifndef COLAMD_NDEBUG + eigen_colamd_get_debug ("eigen_colamd") ; +#endif /* COLAMD_NDEBUG */ + + /* === Check the input arguments ======================================== */ + + if (!stats) + { + COLAMD_DEBUG0 (("eigen_colamd: stats not present\n")) ; + return (false) ; + } + for (i = 0 ; i < EIGEN_COLAMD_STATS ; i++) + { + stats [i] = 0 ; + } + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_OK ; + stats [EIGEN_COLAMD_INFO1] = -1 ; + stats [EIGEN_COLAMD_INFO2] = -1 ; + + if (!A) /* A is not present */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_A_not_present ; + COLAMD_DEBUG0 (("eigen_colamd: A not present\n")) ; + return (false) ; + } + + if (!p) /* p is not present */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_p_not_present ; + COLAMD_DEBUG0 (("eigen_colamd: p not present\n")) ; + return (false) ; + } + + if (n_row < 0) /* n_row must be >= 0 */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_nrow_negative ; + stats [EIGEN_COLAMD_INFO1] = n_row ; + COLAMD_DEBUG0 (("eigen_colamd: nrow negative %d\n", n_row)) ; + return (false) ; + } + + if (n_col < 0) /* n_col must be >= 0 */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_ncol_negative ; + stats [EIGEN_COLAMD_INFO1] = n_col ; + COLAMD_DEBUG0 (("eigen_colamd: ncol negative %d\n", n_col)) ; + return (false) ; + } + + nnz = p [n_col] ; + if (nnz < 0) /* nnz must be >= 0 */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_nnz_negative ; + stats [EIGEN_COLAMD_INFO1] = nnz ; + COLAMD_DEBUG0 (("eigen_colamd: number of entries negative %d\n", nnz)) ; + return (false) ; + } + + if (p [0] != 0) + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_p0_nonzero ; + stats [EIGEN_COLAMD_INFO1] = p [0] ; + COLAMD_DEBUG0 (("eigen_colamd: p[0] not zero %d\n", p [0])) ; + return (false) ; + } + + /* === If no knobs, set default knobs =================================== */ + + if (!knobs) + { + eigen_colamd_set_defaults (default_knobs) ; + knobs = default_knobs ; + } + + /* === Allocate the Row and Col arrays from array A ===================== */ + + Col_size = EIGEN_COLAMD_C (n_col) ; + Row_size = EIGEN_COLAMD_R (n_row) ; + need = 2*nnz + n_col + Col_size + Row_size ; + + if (need > Alen) + { + /* not enough space in array A to perform the ordering */ + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_A_too_small ; + stats [EIGEN_COLAMD_INFO1] = need ; + stats [EIGEN_COLAMD_INFO2] = Alen ; + COLAMD_DEBUG0 (("eigen_colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen)); + return (false) ; + } + + Alen -= Col_size + Row_size ; + Col = (EIGEN_Colamd_Col *) &A [Alen] ; + Row = (EIGEN_Colamd_Row *) &A [Alen + Col_size] ; + + /* === Construct the row and column data structures ===================== */ + + if (!eigen_init_rows_cols (n_row, n_col, Row, Col, A, p, stats)) + { + /* input matrix is invalid */ + COLAMD_DEBUG0 (("eigen_colamd: Matrix invalid\n")) ; + return (false) ; + } + + /* === Initialize scores, kill dense rows/columns ======================= */ + + eigen_init_scoring (n_row, n_col, Row, Col, A, p, knobs, + &n_row2, &n_col2, &max_deg) ; + + /* === Order the supercolumns =========================================== */ + + ngarbage = eigen_find_ordering (n_row, n_col, Alen, Row, Col, A, p, + n_col2, max_deg, 2*nnz) ; + + /* === Order the non-principal columns ================================== */ + + eigen_order_children (n_col, Col, p) ; + + /* === Return statistics in stats ======================================= */ + + stats [EIGEN_COLAMD_DENSE_ROW] = n_row - n_row2 ; + stats [EIGEN_COLAMD_DENSE_COL] = n_col - n_col2 ; + stats [EIGEN_COLAMD_DEFRAG_COUNT] = ngarbage ; + COLAMD_DEBUG0 (("eigen_colamd: done.\n")) ; + return (true) ; +} + +/* ========================================================================== */ +/* === eigen_colamd_report ======================================================== */ +/* ========================================================================== */ + + void eigen_colamd_report +( + int stats [EIGEN_COLAMD_STATS] +) +{ + eigen_print_report ("eigen_colamd", stats) ; +} + + +/* ========================================================================== */ +/* === NON-USER-CALLABLE ROUTINES: ========================================== */ +/* ========================================================================== */ + +/* There are no user-callable routines beyond this point in the file */ + + +/* ========================================================================== */ +/* === eigen_init_rows_cols ======================================================= */ +/* ========================================================================== */ + +/* + Takes the column form of the matrix in A and creates the row form of the + matrix. Also, row and column attributes are stored in the Col and Row + structs. If the columns are un-sorted or contain duplicate row indices, + this routine will also sort and remove duplicate row indices from the + column form of the matrix. Returns false if the matrix is invalid, + true otherwise. Not user-callable. +*/ + + int eigen_init_rows_cols /* returns true if OK, or false otherwise */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows of A */ + int n_col, /* number of columns of A */ + EIGEN_Colamd_Row Row [], /* of size n_row+1 */ + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int A [], /* row indices of A, of size Alen */ + int p [], /* pointers to columns in A, of size n_col+1 */ + int stats [EIGEN_COLAMD_STATS] /* eigen_colamd statistics */ +) +{ + /* === Local variables ================================================== */ + + int col ; /* a column index */ + int row ; /* a row index */ + int *cp ; /* a column pointer */ + int *cp_end ; /* a pointer to the end of a column */ + int *rp ; /* a row pointer */ + int *rp_end ; /* a pointer to the end of a row */ + int last_row ; /* previous row */ + + /* === Initialize columns, and check column pointers ==================== */ + + for (col = 0 ; col < n_col ; col++) + { + Col [col].start = p [col] ; + Col [col].length = p [col+1] - p [col] ; + + if (Col [col].length < 0) + { + /* column pointers must be non-decreasing */ + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_col_length_negative ; + stats [EIGEN_COLAMD_INFO1] = col ; + stats [EIGEN_COLAMD_INFO2] = Col [col].length ; + COLAMD_DEBUG0 (("eigen_colamd: col %d length %d < 0\n", col, Col [col].length)) ; + return (false) ; + } + + Col [col].shared1.thickness = 1 ; + Col [col].shared2.score = 0 ; + Col [col].shared3.prev = EIGEN_COLAMD_EMPTY ; + Col [col].shared4.degree_next = EIGEN_COLAMD_EMPTY ; + } + + /* p [0..n_col] no longer needed, used as "head" in subsequent routines */ + + /* === Scan columns, compute row degrees, and check row indices ========= */ + + stats [EIGEN_COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/ + + for (row = 0 ; row < n_row ; row++) + { + Row [row].length = 0 ; + Row [row].shared2.mark = -1 ; + } + + for (col = 0 ; col < n_col ; col++) + { + last_row = -1 ; + + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + + while (cp < cp_end) + { + row = *cp++ ; + + /* make sure row indices within range */ + if (row < 0 || row >= n_row) + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_row_index_out_of_bounds ; + stats [EIGEN_COLAMD_INFO1] = col ; + stats [EIGEN_COLAMD_INFO2] = row ; + stats [EIGEN_COLAMD_INFO3] = n_row ; + COLAMD_DEBUG0 (("eigen_colamd: row %d col %d out of bounds\n", row, col)) ; + return (false) ; + } + + if (row <= last_row || Row [row].shared2.mark == col) + { + /* row index are unsorted or repeated (or both), thus col */ + /* is jumbled. This is a notice, not an error condition. */ + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_OK_BUT_JUMBLED ; + stats [EIGEN_COLAMD_INFO1] = col ; + stats [EIGEN_COLAMD_INFO2] = row ; + (stats [EIGEN_COLAMD_INFO3]) ++ ; + COLAMD_DEBUG1 (("eigen_colamd: row %d col %d unsorted/duplicate\n",row,col)); + } + + if (Row [row].shared2.mark != col) + { + Row [row].length++ ; + } + else + { + /* this is a repeated entry in the column, */ + /* it will be removed */ + Col [col].length-- ; + } + + /* mark the row as having been seen in this column */ + Row [row].shared2.mark = col ; + + last_row = row ; + } + } + + /* === Compute row pointers ============================================= */ + + /* row form of the matrix starts directly after the column */ + /* form of matrix in A */ + Row [0].start = p [n_col] ; + Row [0].shared1.p = Row [0].start ; + Row [0].shared2.mark = -1 ; + for (row = 1 ; row < n_row ; row++) + { + Row [row].start = Row [row-1].start + Row [row-1].length ; + Row [row].shared1.p = Row [row].start ; + Row [row].shared2.mark = -1 ; + } + + /* === Create row form ================================================== */ + + if (stats [EIGEN_COLAMD_STATUS] == EIGEN_COLAMD_OK_BUT_JUMBLED) + { + /* if cols jumbled, watch for repeated row indices */ + for (col = 0 ; col < n_col ; col++) + { + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + while (cp < cp_end) + { + row = *cp++ ; + if (Row [row].shared2.mark != col) + { + A [(Row [row].shared1.p)++] = col ; + Row [row].shared2.mark = col ; + } + } + } + } + else + { + /* if cols not jumbled, we don't need the mark (this is faster) */ + for (col = 0 ; col < n_col ; col++) + { + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + while (cp < cp_end) + { + A [(Row [*cp++].shared1.p)++] = col ; + } + } + } + + /* === Clear the row marks and set row degrees ========================== */ + + for (row = 0 ; row < n_row ; row++) + { + Row [row].shared2.mark = 0 ; + Row [row].shared1.degree = Row [row].length ; + } + + /* === See if we need to re-create columns ============================== */ + + if (stats [EIGEN_COLAMD_STATUS] == EIGEN_COLAMD_OK_BUT_JUMBLED) + { + COLAMD_DEBUG0 (("eigen_colamd: reconstructing column form, matrix jumbled\n")) ; + +#ifndef COLAMD_NDEBUG + /* make sure column lengths are correct */ + for (col = 0 ; col < n_col ; col++) + { + p [col] = Col [col].length ; + } + for (row = 0 ; row < n_row ; row++) + { + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + p [*rp++]-- ; + } + } + for (col = 0 ; col < n_col ; col++) + { + COLAMD_ASSERT (p [col] == 0) ; + } + /* now p is all zero (different than when debugging is turned off) */ +#endif /* COLAMD_NDEBUG */ + + /* === Compute col pointers ========================================= */ + + /* col form of the matrix starts at A [0]. */ + /* Note, we may have a gap between the col form and the row */ + /* form if there were duplicate entries, if so, it will be */ + /* removed upon the first garbage collection */ + Col [0].start = 0 ; + p [0] = Col [0].start ; + for (col = 1 ; col < n_col ; col++) + { + /* note that the lengths here are for pruned columns, i.e. */ + /* no duplicate row indices will exist for these columns */ + Col [col].start = Col [col-1].start + Col [col-1].length ; + p [col] = Col [col].start ; + } + + /* === Re-create col form =========================================== */ + + for (row = 0 ; row < n_row ; row++) + { + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + A [(p [*rp++])++] = row ; + } + } + } + + /* === Done. Matrix is not (or no longer) jumbled ====================== */ + + return (true) ; +} + + +/* ========================================================================== */ +/* === eigen_init_scoring ========================================================= */ +/* ========================================================================== */ + +/* + Kills dense or empty columns and rows, calculates an initial score for + each column, and places all columns in the degree lists. Not user-callable. +*/ + + void eigen_init_scoring +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows of A */ + int n_col, /* number of columns of A */ + EIGEN_Colamd_Row Row [], /* of size n_row+1 */ + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int A [], /* column form and row form of A */ + int head [], /* of size n_col+1 */ + double knobs [EIGEN_COLAMD_KNOBS],/* parameters */ + int *p_n_row2, /* number of non-dense, non-empty rows */ + int *p_n_col2, /* number of non-dense, non-empty columns */ + int *p_max_deg /* maximum row degree */ +) +{ + /* === Local variables ================================================== */ + + int c ; /* a column index */ + int r, row ; /* a row index */ + int *cp ; /* a column pointer */ + int deg ; /* degree of a row or column */ + int *cp_end ; /* a pointer to the end of a column */ + int *new_cp ; /* new column pointer */ + int col_length ; /* length of pruned column */ + int score ; /* current column score */ + int n_col2 ; /* number of non-dense, non-empty columns */ + int n_row2 ; /* number of non-dense, non-empty rows */ + int dense_row_count ; /* remove rows with more entries than this */ + int dense_col_count ; /* remove cols with more entries than this */ + int min_score ; /* smallest column score */ + int max_deg ; /* maximum row degree */ + int next_col ; /* Used to add to degree list.*/ + +#ifndef COLAMD_NDEBUG + int debug_count ; /* debug only. */ +#endif /* COLAMD_NDEBUG */ + + /* === Extract knobs ==================================================== */ + + dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [EIGEN_COLAMD_DENSE_ROW] * n_col, n_col)) ; + dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [EIGEN_COLAMD_DENSE_COL] * n_row, n_row)) ; + COLAMD_DEBUG1 (("eigen_colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ; + max_deg = 0 ; + n_col2 = n_col ; + n_row2 = n_row ; + + /* === Kill empty columns =============================================== */ + + /* Put the empty columns at the end in their natural order, so that LU */ + /* factorization can proceed as far as possible. */ + for (c = n_col-1 ; c >= 0 ; c--) + { + deg = Col [c].length ; + if (deg == 0) + { + /* this is a empty column, kill and order it last */ + Col [c].shared2.order = --n_col2 ; + EIGEN_KILL_PRINCIPAL_COL (c) ; + } + } + COLAMD_DEBUG1 (("eigen_colamd: null columns killed: %d\n", n_col - n_col2)) ; + + /* === Kill dense columns =============================================== */ + + /* Put the dense columns at the end, in their natural order */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* skip any dead columns */ + if (EIGEN_COL_IS_DEAD (c)) + { + continue ; + } + deg = Col [c].length ; + if (deg > dense_col_count) + { + /* this is a dense column, kill and order it last */ + Col [c].shared2.order = --n_col2 ; + /* decrement the row degrees */ + cp = &A [Col [c].start] ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + Row [*cp++].shared1.degree-- ; + } + EIGEN_KILL_PRINCIPAL_COL (c) ; + } + } + COLAMD_DEBUG1 (("eigen_colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ; + + /* === Kill dense and empty rows ======================================== */ + + for (r = 0 ; r < n_row ; r++) + { + deg = Row [r].shared1.degree ; + COLAMD_ASSERT (deg >= 0 && deg <= n_col) ; + if (deg > dense_row_count || deg == 0) + { + /* kill a dense or empty row */ + EIGEN_KILL_ROW (r) ; + --n_row2 ; + } + else + { + /* keep track of max degree of remaining rows */ + max_deg = COLAMD_MAX (max_deg, deg) ; + } + } + COLAMD_DEBUG1 (("eigen_colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ; + + /* === Compute initial column scores ==================================== */ + + /* At this point the row degrees are accurate. They reflect the number */ + /* of "live" (non-dense) columns in each row. No empty rows exist. */ + /* Some "live" columns may contain only dead rows, however. These are */ + /* pruned in the code below. */ + + /* now find the initial matlab score for each column */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* skip dead column */ + if (EIGEN_COL_IS_DEAD (c)) + { + continue ; + } + score = 0 ; + cp = &A [Col [c].start] ; + new_cp = cp ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + /* skip if dead */ + if (EIGEN_ROW_IS_DEAD (row)) + { + continue ; + } + /* compact the column */ + *new_cp++ = row ; + /* add row's external degree */ + score += Row [row].shared1.degree - 1 ; + /* guard against integer overflow */ + score = COLAMD_MIN (score, n_col) ; + } + /* determine pruned column length */ + col_length = (int) (new_cp - &A [Col [c].start]) ; + if (col_length == 0) + { + /* a newly-made null column (all rows in this col are "dense" */ + /* and have already been killed) */ + COLAMD_DEBUG2 (("Newly null killed: %d\n", c)) ; + Col [c].shared2.order = --n_col2 ; + EIGEN_KILL_PRINCIPAL_COL (c) ; + } + else + { + /* set column length and set score */ + COLAMD_ASSERT (score >= 0) ; + COLAMD_ASSERT (score <= n_col) ; + Col [c].length = col_length ; + Col [c].shared2.score = score ; + } + } + COLAMD_DEBUG1 (("eigen_colamd: Dense, null, and newly-null columns killed: %d\n", + n_col-n_col2)) ; + + /* At this point, all empty rows and columns are dead. All live columns */ + /* are "clean" (containing no dead rows) and simplicial (no supercolumns */ + /* yet). Rows may contain dead columns, but all live rows contain at */ + /* least one live column. */ + +#ifndef COLAMD_NDEBUG + eigen_debug_structures (n_row, n_col, Row, Col, A, n_col2) ; +#endif /* COLAMD_NDEBUG */ + + /* === Initialize degree lists ========================================== */ + +#ifndef COLAMD_NDEBUG + debug_count = 0 ; +#endif /* COLAMD_NDEBUG */ + + /* clear the hash buckets */ + for (c = 0 ; c <= n_col ; c++) + { + head [c] = EIGEN_COLAMD_EMPTY ; + } + min_score = n_col ; + /* place in reverse order, so low column indices are at the front */ + /* of the lists. This is to encourage natural tie-breaking */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* only add principal columns to degree lists */ + if (EIGEN_COL_IS_ALIVE (c)) + { + COLAMD_DEBUG4 (("place %d score %d minscore %d ncol %d\n", + c, Col [c].shared2.score, min_score, n_col)) ; + + /* === Add columns score to DList =============================== */ + + score = Col [c].shared2.score ; + + COLAMD_ASSERT (min_score >= 0) ; + COLAMD_ASSERT (min_score <= n_col) ; + COLAMD_ASSERT (score >= 0) ; + COLAMD_ASSERT (score <= n_col) ; + COLAMD_ASSERT (head [score] >= EIGEN_COLAMD_EMPTY) ; + + /* now add this column to dList at proper score location */ + next_col = head [score] ; + Col [c].shared3.prev = EIGEN_COLAMD_EMPTY ; + Col [c].shared4.degree_next = next_col ; + + /* if there already was a column with the same score, set its */ + /* previous pointer to this new column */ + if (next_col != EIGEN_COLAMD_EMPTY) + { + Col [next_col].shared3.prev = c ; + } + head [score] = c ; + + /* see if this score is less than current min */ + min_score = COLAMD_MIN (min_score, score) ; + +#ifndef COLAMD_NDEBUG + debug_count++ ; +#endif /* COLAMD_NDEBUG */ + + } + } + +#ifndef COLAMD_NDEBUG + COLAMD_DEBUG1 (("eigen_colamd: Live cols %d out of %d, non-princ: %d\n", + debug_count, n_col, n_col-debug_count)) ; + COLAMD_ASSERT (debug_count == n_col2) ; + eigen_debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) ; +#endif /* COLAMD_NDEBUG */ + + /* === Return number of remaining columns, and max row degree =========== */ + + *p_n_col2 = n_col2 ; + *p_n_row2 = n_row2 ; + *p_max_deg = max_deg ; +} + + +/* ========================================================================== */ +/* === eigen_find_ordering ======================================================== */ +/* ========================================================================== */ + +/* + Order the principal columns of the supercolumn form of the matrix + (no supercolumns on input). Uses a minimum approximate column minimum + degree ordering method. Not user-callable. +*/ + + int eigen_find_ordering /* return the number of garbage collections */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows of A */ + int n_col, /* number of columns of A */ + int Alen, /* size of A, 2*nnz + n_col or larger */ + EIGEN_Colamd_Row Row [], /* of size n_row+1 */ + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int A [], /* column form and row form of A */ + int head [], /* of size n_col+1 */ + int n_col2, /* Remaining columns to order */ + int max_deg, /* Maximum row degree */ + int pfree /* index of first free slot (2*nnz on entry) */ +) +{ + /* === Local variables ================================================== */ + + int k ; /* current pivot ordering step */ + int pivot_col ; /* current pivot column */ + int *cp ; /* a column pointer */ + int *rp ; /* a row pointer */ + int pivot_row ; /* current pivot row */ + int *new_cp ; /* modified column pointer */ + int *new_rp ; /* modified row pointer */ + int pivot_row_start ; /* pointer to start of pivot row */ + int pivot_row_degree ; /* number of columns in pivot row */ + int pivot_row_length ; /* number of supercolumns in pivot row */ + int pivot_col_score ; /* score of pivot column */ + int needed_memory ; /* free space needed for pivot row */ + int *cp_end ; /* pointer to the end of a column */ + int *rp_end ; /* pointer to the end of a row */ + int row ; /* a row index */ + int col ; /* a column index */ + int max_score ; /* maximum possible score */ + int cur_score ; /* score of current column */ + unsigned int hash ; /* hash value for supernode detection */ + int head_column ; /* head of hash bucket */ + int first_col ; /* first column in hash bucket */ + int tag_mark ; /* marker value for mark array */ + int row_mark ; /* Row [row].shared2.mark */ + int set_difference ; /* set difference size of row with pivot row */ + int min_score ; /* smallest column score */ + int col_thickness ; /* "thickness" (no. of columns in a supercol) */ + int max_mark ; /* maximum value of tag_mark */ + int pivot_col_thickness ; /* number of columns represented by pivot col */ + int prev_col ; /* Used by Dlist operations. */ + int next_col ; /* Used by Dlist operations. */ + int ngarbage ; /* number of garbage collections performed */ + +#ifndef COLAMD_NDEBUG + int debug_d ; /* debug loop counter */ + int debug_step = 0 ; /* debug loop counter */ +#endif /* COLAMD_NDEBUG */ + + /* === Initialization and clear mark ==================================== */ + + max_mark = INT_MAX - n_col ; /* INT_MAX defined in <limits.h> */ + tag_mark = eigen_clear_mark (n_row, Row) ; + min_score = 0 ; + ngarbage = 0 ; + COLAMD_DEBUG1 (("eigen_colamd: Ordering, n_col2=%d\n", n_col2)) ; + + /* === Order the columns ================================================ */ + + for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */) + { + +#ifndef COLAMD_NDEBUG + if (debug_step % 100 == 0) + { + COLAMD_DEBUG2 (("\n... Step k: %d out of n_col2: %d\n", k, n_col2)) ; + } + else + { + COLAMD_DEBUG3 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ; + } + debug_step++ ; + eigen_debug_deg_lists (n_row, n_col, Row, Col, head, + min_score, n_col2-k, max_deg) ; + eigen_debug_matrix (n_row, n_col, Row, Col, A) ; +#endif /* COLAMD_NDEBUG */ + + /* === Select pivot column, and order it ============================ */ + + /* make sure degree list isn't empty */ + COLAMD_ASSERT (min_score >= 0) ; + COLAMD_ASSERT (min_score <= n_col) ; + COLAMD_ASSERT (head [min_score] >= EIGEN_COLAMD_EMPTY) ; + +#ifndef COLAMD_NDEBUG + for (debug_d = 0 ; debug_d < min_score ; debug_d++) + { + COLAMD_ASSERT (head [debug_d] == EIGEN_COLAMD_EMPTY) ; + } +#endif /* COLAMD_NDEBUG */ + + /* get pivot column from head of minimum degree list */ + while (head [min_score] == EIGEN_COLAMD_EMPTY && min_score < n_col) + { + min_score++ ; + } + pivot_col = head [min_score] ; + COLAMD_ASSERT (pivot_col >= 0 && pivot_col <= n_col) ; + next_col = Col [pivot_col].shared4.degree_next ; + head [min_score] = next_col ; + if (next_col != EIGEN_COLAMD_EMPTY) + { + Col [next_col].shared3.prev = EIGEN_COLAMD_EMPTY ; + } + + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (pivot_col)) ; + COLAMD_DEBUG3 (("Pivot col: %d\n", pivot_col)) ; + + /* remember score for defrag check */ + pivot_col_score = Col [pivot_col].shared2.score ; + + /* the pivot column is the kth column in the pivot order */ + Col [pivot_col].shared2.order = k ; + + /* increment order count by column thickness */ + pivot_col_thickness = Col [pivot_col].shared1.thickness ; + k += pivot_col_thickness ; + COLAMD_ASSERT (pivot_col_thickness > 0) ; + + /* === Garbage_collection, if necessary ============================= */ + + needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ; + if (pfree + needed_memory >= Alen) + { + pfree = eigen_garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ; + ngarbage++ ; + /* after garbage collection we will have enough */ + COLAMD_ASSERT (pfree + needed_memory < Alen) ; + /* garbage collection has wiped out the Row[].shared2.mark array */ + tag_mark = eigen_clear_mark (n_row, Row) ; + +#ifndef COLAMD_NDEBUG + eigen_debug_matrix (n_row, n_col, Row, Col, A) ; +#endif /* COLAMD_NDEBUG */ + } + + /* === Compute pivot row pattern ==================================== */ + + /* get starting location for this new merged row */ + pivot_row_start = pfree ; + + /* initialize new row counts to zero */ + pivot_row_degree = 0 ; + + /* tag pivot column as having been visited so it isn't included */ + /* in merged pivot row */ + Col [pivot_col].shared1.thickness = -pivot_col_thickness ; + + /* pivot row is the union of all rows in the pivot column pattern */ + cp = &A [Col [pivot_col].start] ; + cp_end = cp + Col [pivot_col].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + COLAMD_DEBUG4 (("Pivot col pattern %d %d\n", EIGEN_ROW_IS_ALIVE (row), row)) ; + /* skip if row is dead */ + if (EIGEN_ROW_IS_DEAD (row)) + { + continue ; + } + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + /* get a column */ + col = *rp++ ; + /* add the column, if alive and untagged */ + col_thickness = Col [col].shared1.thickness ; + if (col_thickness > 0 && EIGEN_COL_IS_ALIVE (col)) + { + /* tag column in pivot row */ + Col [col].shared1.thickness = -col_thickness ; + COLAMD_ASSERT (pfree < Alen) ; + /* place column in pivot row */ + A [pfree++] = col ; + pivot_row_degree += col_thickness ; + } + } + } + + /* clear tag on pivot column */ + Col [pivot_col].shared1.thickness = pivot_col_thickness ; + max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ; + +#ifndef COLAMD_NDEBUG + COLAMD_DEBUG3 (("check2\n")) ; + eigen_debug_mark (n_row, Row, tag_mark, max_mark) ; +#endif /* COLAMD_NDEBUG */ + + /* === Kill all rows used to construct pivot row ==================== */ + + /* also kill pivot row, temporarily */ + cp = &A [Col [pivot_col].start] ; + cp_end = cp + Col [pivot_col].length ; + while (cp < cp_end) + { + /* may be killing an already dead row */ + row = *cp++ ; + COLAMD_DEBUG3 (("Kill row in pivot col: %d\n", row)) ; + EIGEN_KILL_ROW (row) ; + } + + /* === Select a row index to use as the new pivot row =============== */ + + pivot_row_length = pfree - pivot_row_start ; + if (pivot_row_length > 0) + { + /* pick the "pivot" row arbitrarily (first row in col) */ + pivot_row = A [Col [pivot_col].start] ; + COLAMD_DEBUG3 (("Pivotal row is %d\n", pivot_row)) ; + } + else + { + /* there is no pivot row, since it is of zero length */ + pivot_row = EIGEN_COLAMD_EMPTY ; + COLAMD_ASSERT (pivot_row_length == 0) ; + } + COLAMD_ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ; + + /* === Approximate degree computation =============================== */ + + /* Here begins the computation of the approximate degree. The column */ + /* score is the sum of the pivot row "length", plus the size of the */ + /* set differences of each row in the column minus the pattern of the */ + /* pivot row itself. The column ("thickness") itself is also */ + /* excluded from the column score (we thus use an approximate */ + /* external degree). */ + + /* The time taken by the following code (compute set differences, and */ + /* add them up) is proportional to the size of the data structure */ + /* being scanned - that is, the sum of the sizes of each column in */ + /* the pivot row. Thus, the amortized time to compute a column score */ + /* is proportional to the size of that column (where size, in this */ + /* context, is the column "length", or the number of row indices */ + /* in that column). The number of row indices in a column is */ + /* monotonically non-decreasing, from the length of the original */ + /* column on input to eigen_colamd. */ + + /* === Compute set differences ====================================== */ + + COLAMD_DEBUG3 (("** Computing set differences phase. **\n")) ; + + /* pivot row is currently dead - it will be revived later. */ + + COLAMD_DEBUG3 (("Pivot row: ")) ; + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + col = *rp++ ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col) && col != pivot_col) ; + COLAMD_DEBUG3 (("Col: %d\n", col)) ; + + /* clear tags used to construct pivot row pattern */ + col_thickness = -Col [col].shared1.thickness ; + COLAMD_ASSERT (col_thickness > 0) ; + Col [col].shared1.thickness = col_thickness ; + + /* === Remove column from degree list =========================== */ + + cur_score = Col [col].shared2.score ; + prev_col = Col [col].shared3.prev ; + next_col = Col [col].shared4.degree_next ; + COLAMD_ASSERT (cur_score >= 0) ; + COLAMD_ASSERT (cur_score <= n_col) ; + COLAMD_ASSERT (cur_score >= EIGEN_COLAMD_EMPTY) ; + if (prev_col == EIGEN_COLAMD_EMPTY) + { + head [cur_score] = next_col ; + } + else + { + Col [prev_col].shared4.degree_next = next_col ; + } + if (next_col != EIGEN_COLAMD_EMPTY) + { + Col [next_col].shared3.prev = prev_col ; + } + + /* === Scan the column ========================================== */ + + cp = &A [Col [col].start] ; + cp_end = cp + Col [col].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + row_mark = Row [row].shared2.mark ; + /* skip if dead */ + if (EIGEN_ROW_IS_MARKED_DEAD (row_mark)) + { + continue ; + } + COLAMD_ASSERT (row != pivot_row) ; + set_difference = row_mark - tag_mark ; + /* check if the row has been seen yet */ + if (set_difference < 0) + { + COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ; + set_difference = Row [row].shared1.degree ; + } + /* subtract column thickness from this row's set difference */ + set_difference -= col_thickness ; + COLAMD_ASSERT (set_difference >= 0) ; + /* absorb this row if the set difference becomes zero */ + if (set_difference == 0) + { + COLAMD_DEBUG3 (("aggressive absorption. Row: %d\n", row)) ; + EIGEN_KILL_ROW (row) ; + } + else + { + /* save the new mark */ + Row [row].shared2.mark = set_difference + tag_mark ; + } + } + } + +#ifndef COLAMD_NDEBUG + eigen_debug_deg_lists (n_row, n_col, Row, Col, head, + min_score, n_col2-k-pivot_row_degree, max_deg) ; +#endif /* COLAMD_NDEBUG */ + + /* === Add up set differences for each column ======================= */ + + COLAMD_DEBUG3 (("** Adding set differences phase. **\n")) ; + + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + /* get a column */ + col = *rp++ ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col) && col != pivot_col) ; + hash = 0 ; + cur_score = 0 ; + cp = &A [Col [col].start] ; + /* compact the column */ + new_cp = cp ; + cp_end = cp + Col [col].length ; + + COLAMD_DEBUG4 (("Adding set diffs for Col: %d.\n", col)) ; + + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + COLAMD_ASSERT(row >= 0 && row < n_row) ; + row_mark = Row [row].shared2.mark ; + /* skip if dead */ + if (EIGEN_ROW_IS_MARKED_DEAD (row_mark)) + { + continue ; + } + COLAMD_ASSERT (row_mark > tag_mark) ; + /* compact the column */ + *new_cp++ = row ; + /* compute hash function */ + hash += row ; + /* add set difference */ + cur_score += row_mark - tag_mark ; + /* integer overflow... */ + cur_score = COLAMD_MIN (cur_score, n_col) ; + } + + /* recompute the column's length */ + Col [col].length = (int) (new_cp - &A [Col [col].start]) ; + + /* === Further mass elimination ================================= */ + + if (Col [col].length == 0) + { + COLAMD_DEBUG4 (("further mass elimination. Col: %d\n", col)) ; + /* nothing left but the pivot row in this column */ + EIGEN_KILL_PRINCIPAL_COL (col) ; + pivot_row_degree -= Col [col].shared1.thickness ; + COLAMD_ASSERT (pivot_row_degree >= 0) ; + /* order it */ + Col [col].shared2.order = k ; + /* increment order count by column thickness */ + k += Col [col].shared1.thickness ; + } + else + { + /* === Prepare for supercolumn detection ==================== */ + + COLAMD_DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ; + + /* save score so far */ + Col [col].shared2.score = cur_score ; + + /* add column to hash table, for supercolumn detection */ + hash %= n_col + 1 ; + + COLAMD_DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ; + COLAMD_ASSERT (hash <= n_col) ; + + head_column = head [hash] ; + if (head_column > EIGEN_COLAMD_EMPTY) + { + /* degree list "hash" is non-empty, use prev (shared3) of */ + /* first column in degree list as head of hash bucket */ + first_col = Col [head_column].shared3.headhash ; + Col [head_column].shared3.headhash = col ; + } + else + { + /* degree list "hash" is empty, use head as hash bucket */ + first_col = - (head_column + 2) ; + head [hash] = - (col + 2) ; + } + Col [col].shared4.hash_next = first_col ; + + /* save hash function in Col [col].shared3.hash */ + Col [col].shared3.hash = (int) hash ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col)) ; + } + } + + /* The approximate external column degree is now computed. */ + + /* === Supercolumn detection ======================================== */ + + COLAMD_DEBUG3 (("** Supercolumn detection phase. **\n")) ; + + eigen_detect_super_cols ( + +#ifndef COLAMD_NDEBUG + n_col, Row, +#endif /* COLAMD_NDEBUG */ + + Col, A, head, pivot_row_start, pivot_row_length) ; + + /* === Kill the pivotal column ====================================== */ + + EIGEN_KILL_PRINCIPAL_COL (pivot_col) ; + + /* === Clear mark =================================================== */ + + tag_mark += (max_deg + 1) ; + if (tag_mark >= max_mark) + { + COLAMD_DEBUG2 (("clearing tag_mark\n")) ; + tag_mark = eigen_clear_mark (n_row, Row) ; + } + +#ifndef COLAMD_NDEBUG + COLAMD_DEBUG3 (("check3\n")) ; + eigen_debug_mark (n_row, Row, tag_mark, max_mark) ; +#endif /* COLAMD_NDEBUG */ + + /* === Finalize the new pivot row, and column scores ================ */ + + COLAMD_DEBUG3 (("** Finalize scores phase. **\n")) ; + + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + /* compact the pivot row */ + new_rp = rp ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + col = *rp++ ; + /* skip dead columns */ + if (EIGEN_COL_IS_DEAD (col)) + { + continue ; + } + *new_rp++ = col ; + /* add new pivot row to column */ + A [Col [col].start + (Col [col].length++)] = pivot_row ; + + /* retrieve score so far and add on pivot row's degree. */ + /* (we wait until here for this in case the pivot */ + /* row's degree was reduced due to mass elimination). */ + cur_score = Col [col].shared2.score + pivot_row_degree ; + + /* calculate the max possible score as the number of */ + /* external columns minus the 'k' value minus the */ + /* columns thickness */ + max_score = n_col - k - Col [col].shared1.thickness ; + + /* make the score the external degree of the union-of-rows */ + cur_score -= Col [col].shared1.thickness ; + + /* make sure score is less or equal than the max score */ + cur_score = COLAMD_MIN (cur_score, max_score) ; + COLAMD_ASSERT (cur_score >= 0) ; + + /* store updated score */ + Col [col].shared2.score = cur_score ; + + /* === Place column back in degree list ========================= */ + + COLAMD_ASSERT (min_score >= 0) ; + COLAMD_ASSERT (min_score <= n_col) ; + COLAMD_ASSERT (cur_score >= 0) ; + COLAMD_ASSERT (cur_score <= n_col) ; + COLAMD_ASSERT (head [cur_score] >= EIGEN_COLAMD_EMPTY) ; + next_col = head [cur_score] ; + Col [col].shared4.degree_next = next_col ; + Col [col].shared3.prev = EIGEN_COLAMD_EMPTY ; + if (next_col != EIGEN_COLAMD_EMPTY) + { + Col [next_col].shared3.prev = col ; + } + head [cur_score] = col ; + + /* see if this score is less than current min */ + min_score = COLAMD_MIN (min_score, cur_score) ; + + } + +#ifndef COLAMD_NDEBUG + eigen_debug_deg_lists (n_row, n_col, Row, Col, head, + min_score, n_col2-k, max_deg) ; +#endif /* COLAMD_NDEBUG */ + + /* === Resurrect the new pivot row ================================== */ + + if (pivot_row_degree > 0) + { + /* update pivot row length to reflect any cols that were killed */ + /* during super-col detection and mass elimination */ + Row [pivot_row].start = pivot_row_start ; + Row [pivot_row].length = (int) (new_rp - &A[pivot_row_start]) ; + Row [pivot_row].shared1.degree = pivot_row_degree ; + Row [pivot_row].shared2.mark = 0 ; + /* pivot row is no longer dead */ + } + } + + /* === All principal columns have now been ordered ====================== */ + + return (ngarbage) ; +} + + +/* ========================================================================== */ +/* === eigen_order_children ======================================================= */ +/* ========================================================================== */ + +/* + The eigen_find_ordering routine has ordered all of the principal columns (the + representatives of the supercolumns). The non-principal columns have not + yet been ordered. This routine orders those columns by walking up the + parent tree (a column is a child of the column which absorbed it). The + final permutation vector is then placed in p [0 ... n_col-1], with p [0] + being the first column, and p [n_col-1] being the last. It doesn't look + like it at first glance, but be assured that this routine takes time linear + in the number of columns. Although not immediately obvious, the time + taken by this routine is O (n_col), that is, linear in the number of + columns. Not user-callable. +*/ + + void eigen_order_children +( + /* === Parameters ======================================================= */ + + int n_col, /* number of columns of A */ + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int p [] /* p [0 ... n_col-1] is the column permutation*/ +) +{ + /* === Local variables ================================================== */ + + int i ; /* loop counter for all columns */ + int c ; /* column index */ + int parent ; /* index of column's parent */ + int order ; /* column's order */ + + /* === Order each non-principal column ================================== */ + + for (i = 0 ; i < n_col ; i++) + { + /* find an un-ordered non-principal column */ + COLAMD_ASSERT (EIGEN_COL_IS_DEAD (i)) ; + if (!EIGEN_EIGEN_COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EIGEN_COLAMD_EMPTY) + { + parent = i ; + /* once found, find its principal parent */ + do + { + parent = Col [parent].shared1.parent ; + } while (!EIGEN_EIGEN_COL_IS_DEAD_PRINCIPAL (parent)) ; + + /* now, order all un-ordered non-principal columns along path */ + /* to this parent. collapse tree at the same time */ + c = i ; + /* get order of parent */ + order = Col [parent].shared2.order ; + + do + { + COLAMD_ASSERT (Col [c].shared2.order == EIGEN_COLAMD_EMPTY) ; + + /* order this column */ + Col [c].shared2.order = order++ ; + /* collaps tree */ + Col [c].shared1.parent = parent ; + + /* get immediate parent of this column */ + c = Col [c].shared1.parent ; + + /* continue until we hit an ordered column. There are */ + /* guarranteed not to be anymore unordered columns */ + /* above an ordered column */ + } while (Col [c].shared2.order == EIGEN_COLAMD_EMPTY) ; + + /* re-order the super_col parent to largest order for this group */ + Col [parent].shared2.order = order ; + } + } + + /* === Generate the permutation ========================================= */ + + for (c = 0 ; c < n_col ; c++) + { + p [Col [c].shared2.order] = c ; + } +} + + +/* ========================================================================== */ +/* === eigen_detect_super_cols ==================================================== */ +/* ========================================================================== */ + +/* + Detects supercolumns by finding matches between columns in the hash buckets. + Check amongst columns in the set A [row_start ... row_start + row_length-1]. + The columns under consideration are currently *not* in the degree lists, + and have already been placed in the hash buckets. + + The hash bucket for columns whose hash function is equal to h is stored + as follows: + + if head [h] is >= 0, then head [h] contains a degree list, so: + + head [h] is the first column in degree bucket h. + Col [head [h]].headhash gives the first column in hash bucket h. + + otherwise, the degree list is empty, and: + + -(head [h] + 2) is the first column in hash bucket h. + + For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous + column" pointer. Col [c].shared3.hash is used instead as the hash number + for that column. The value of Col [c].shared4.hash_next is the next column + in the same hash bucket. + + Assuming no, or "few" hash collisions, the time taken by this routine is + linear in the sum of the sizes (lengths) of each column whose score has + just been computed in the approximate degree computation. + Not user-callable. +*/ + + void eigen_detect_super_cols +( + /* === Parameters ======================================================= */ + +#ifndef COLAMD_NDEBUG + /* these two parameters are only needed when debugging is enabled: */ + int n_col, /* number of columns of A */ + EIGEN_Colamd_Row Row [], /* of size n_row+1 */ +#endif /* COLAMD_NDEBUG */ + + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int A [], /* row indices of A */ + int head [], /* head of degree lists and hash buckets */ + int row_start, /* pointer to set of columns to check */ + int row_length /* number of columns to check */ +) +{ + /* === Local variables ================================================== */ + + int hash ; /* hash value for a column */ + int *rp ; /* pointer to a row */ + int c ; /* a column index */ + int super_c ; /* column index of the column to absorb into */ + int *cp1 ; /* column pointer for column super_c */ + int *cp2 ; /* column pointer for column c */ + int length ; /* length of column super_c */ + int prev_c ; /* column preceding c in hash bucket */ + int i ; /* loop counter */ + int *rp_end ; /* pointer to the end of the row */ + int col ; /* a column index in the row to check */ + int head_column ; /* first column in hash bucket or degree list */ + int first_col ; /* first column in hash bucket */ + + /* === Consider each column in the row ================================== */ + + rp = &A [row_start] ; + rp_end = rp + row_length ; + while (rp < rp_end) + { + col = *rp++ ; + if (EIGEN_COL_IS_DEAD (col)) + { + continue ; + } + + /* get hash number for this column */ + hash = Col [col].shared3.hash ; + COLAMD_ASSERT (hash <= n_col) ; + + /* === Get the first column in this hash bucket ===================== */ + + head_column = head [hash] ; + if (head_column > EIGEN_COLAMD_EMPTY) + { + first_col = Col [head_column].shared3.headhash ; + } + else + { + first_col = - (head_column + 2) ; + } + + /* === Consider each column in the hash bucket ====================== */ + + for (super_c = first_col ; super_c != EIGEN_COLAMD_EMPTY ; + super_c = Col [super_c].shared4.hash_next) + { + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (super_c)) ; + COLAMD_ASSERT (Col [super_c].shared3.hash == hash) ; + length = Col [super_c].length ; + + /* prev_c is the column preceding column c in the hash bucket */ + prev_c = super_c ; + + /* === Compare super_c with all columns after it ================ */ + + for (c = Col [super_c].shared4.hash_next ; + c != EIGEN_COLAMD_EMPTY ; c = Col [c].shared4.hash_next) + { + COLAMD_ASSERT (c != super_c) ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (c)) ; + COLAMD_ASSERT (Col [c].shared3.hash == hash) ; + + /* not identical if lengths or scores are different */ + if (Col [c].length != length || + Col [c].shared2.score != Col [super_c].shared2.score) + { + prev_c = c ; + continue ; + } + + /* compare the two columns */ + cp1 = &A [Col [super_c].start] ; + cp2 = &A [Col [c].start] ; + + for (i = 0 ; i < length ; i++) + { + /* the columns are "clean" (no dead rows) */ + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (*cp1)) ; + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (*cp2)) ; + /* row indices will same order for both supercols, */ + /* no gather scatter nessasary */ + if (*cp1++ != *cp2++) + { + break ; + } + } + + /* the two columns are different if the for-loop "broke" */ + if (i != length) + { + prev_c = c ; + continue ; + } + + /* === Got it! two columns are identical =================== */ + + COLAMD_ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ; + + Col [super_c].shared1.thickness += Col [c].shared1.thickness ; + Col [c].shared1.parent = super_c ; + EIGEN_KILL_NON_PRINCIPAL_COL (c) ; + /* order c later, in eigen_order_children() */ + Col [c].shared2.order = EIGEN_COLAMD_EMPTY ; + /* remove c from hash bucket */ + Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ; + } + } + + /* === Empty this hash bucket ======================================= */ + + if (head_column > EIGEN_COLAMD_EMPTY) + { + /* corresponding degree list "hash" is not empty */ + Col [head_column].shared3.headhash = EIGEN_COLAMD_EMPTY ; + } + else + { + /* corresponding degree list "hash" is empty */ + head [hash] = EIGEN_COLAMD_EMPTY ; + } + } +} + + +/* ========================================================================== */ +/* === eigen_garbage_collection =================================================== */ +/* ========================================================================== */ + +/* + Defragments and compacts columns and rows in the workspace A. Used when + all avaliable memory has been used while performing row merging. Returns + the index of the first free position in A, after garbage collection. The + time taken by this routine is linear is the size of the array A, which is + itself linear in the number of nonzeros in the input matrix. + Not user-callable. +*/ + + int eigen_garbage_collection /* returns the new value of pfree */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows */ + int n_col, /* number of columns */ + EIGEN_Colamd_Row Row [], /* row info */ + EIGEN_Colamd_Col Col [], /* column info */ + int A [], /* A [0 ... Alen-1] holds the matrix */ + int *pfree /* &A [0] ... pfree is in use */ +) +{ + /* === Local variables ================================================== */ + + int *psrc ; /* source pointer */ + int *pdest ; /* destination pointer */ + int j ; /* counter */ + int r ; /* a row index */ + int c ; /* a column index */ + int length ; /* length of a row or column */ + +#ifndef COLAMD_NDEBUG + int debug_rows ; + COLAMD_DEBUG2 (("Defrag..\n")) ; + for (psrc = &A[0] ; psrc < pfree ; psrc++) COLAMD_ASSERT (*psrc >= 0) ; + debug_rows = 0 ; +#endif /* COLAMD_NDEBUG */ + + /* === Defragment the columns =========================================== */ + + pdest = &A[0] ; + for (c = 0 ; c < n_col ; c++) + { + if (EIGEN_COL_IS_ALIVE (c)) + { + psrc = &A [Col [c].start] ; + + /* move and compact the column */ + COLAMD_ASSERT (pdest <= psrc) ; + Col [c].start = (int) (pdest - &A [0]) ; + length = Col [c].length ; + for (j = 0 ; j < length ; j++) + { + r = *psrc++ ; + if (EIGEN_ROW_IS_ALIVE (r)) + { + *pdest++ = r ; + } + } + Col [c].length = (int) (pdest - &A [Col [c].start]) ; + } + } + + /* === Prepare to defragment the rows =================================== */ + + for (r = 0 ; r < n_row ; r++) + { + if (EIGEN_ROW_IS_ALIVE (r)) + { + if (Row [r].length == 0) + { + /* this row is of zero length. cannot compact it, so kill it */ + COLAMD_DEBUG3 (("Defrag row kill\n")) ; + EIGEN_KILL_ROW (r) ; + } + else + { + /* save first column index in Row [r].shared2.first_column */ + psrc = &A [Row [r].start] ; + Row [r].shared2.first_column = *psrc ; + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (r)) ; + /* flag the start of the row with the one's complement of row */ + *psrc = EIGEN_ONES_COMPLEMENT (r) ; + +#ifndef COLAMD_NDEBUG + debug_rows++ ; +#endif /* COLAMD_NDEBUG */ + + } + } + } + + /* === Defragment the rows ============================================== */ + + psrc = pdest ; + while (psrc < pfree) + { + /* find a negative number ... the start of a row */ + if (*psrc++ < 0) + { + psrc-- ; + /* get the row index */ + r = EIGEN_ONES_COMPLEMENT (*psrc) ; + COLAMD_ASSERT (r >= 0 && r < n_row) ; + /* restore first column index */ + *psrc = Row [r].shared2.first_column ; + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (r)) ; + + /* move and compact the row */ + COLAMD_ASSERT (pdest <= psrc) ; + Row [r].start = (int) (pdest - &A [0]) ; + length = Row [r].length ; + for (j = 0 ; j < length ; j++) + { + c = *psrc++ ; + if (EIGEN_COL_IS_ALIVE (c)) + { + *pdest++ = c ; + } + } + Row [r].length = (int) (pdest - &A [Row [r].start]) ; + +#ifndef COLAMD_NDEBUG + debug_rows-- ; +#endif /* COLAMD_NDEBUG */ + + } + } + /* ensure we found all the rows */ + COLAMD_ASSERT (debug_rows == 0) ; + + /* === Return the new value of pfree ==================================== */ + + return ((int) (pdest - &A [0])) ; +} + + +/* ========================================================================== */ +/* === eigen_clear_mark =========================================================== */ +/* ========================================================================== */ + +/* + Clears the Row [].shared2.mark array, and returns the new tag_mark. + Return value is the new tag_mark. Not user-callable. +*/ + + int eigen_clear_mark /* return the new value for tag_mark */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows in A */ + EIGEN_Colamd_Row Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */ +) +{ + /* === Local variables ================================================== */ + + int r ; + + for (r = 0 ; r < n_row ; r++) + { + if (EIGEN_ROW_IS_ALIVE (r)) + { + Row [r].shared2.mark = 0 ; + } + } + return (1) ; +} + + + +/* ========================================================================== */ +/* === eigen_print_report ========================================================= */ +/* ========================================================================== */ + + void eigen_print_report +( + char *method, + int stats [EIGEN_COLAMD_STATS] +) +{ + + int i1, i2, i3 ; + + if (!stats) + { + PRINTF ("%s: No statistics available.\n", method) ; + return ; + } + + i1 = stats [EIGEN_COLAMD_INFO1] ; + i2 = stats [EIGEN_COLAMD_INFO2] ; + i3 = stats [EIGEN_COLAMD_INFO3] ; + + if (stats [EIGEN_COLAMD_STATUS] >= 0) + { + PRINTF ("%s: OK. ", method) ; + } + else + { + PRINTF ("%s: ERROR. ", method) ; + } + + switch (stats [EIGEN_COLAMD_STATUS]) + { + + case EIGEN_COLAMD_OK_BUT_JUMBLED: + + PRINTF ("Matrix has unsorted or duplicate row indices.\n") ; + + PRINTF ("%s: number of duplicate or out-of-order row indices: %d\n", + method, i3) ; + + PRINTF ("%s: last seen duplicate or out-of-order row index: %d\n", + method, INDEX (i2)) ; + + PRINTF ("%s: last seen in column: %d", + method, INDEX (i1)) ; + + /* no break - fall through to next case instead */ + + case EIGEN_COLAMD_OK: + + PRINTF ("\n") ; + + PRINTF ("%s: number of dense or empty rows ignored: %d\n", + method, stats [EIGEN_COLAMD_DENSE_ROW]) ; + + PRINTF ("%s: number of dense or empty columns ignored: %d\n", + method, stats [EIGEN_COLAMD_DENSE_COL]) ; + + PRINTF ("%s: number of garbage collections performed: %d\n", + method, stats [EIGEN_COLAMD_DEFRAG_COUNT]) ; + break ; + + case EIGEN_COLAMD_ERROR_A_not_present: + + PRINTF ("Array A (row indices of matrix) not present.\n") ; + break ; + + case EIGEN_COLAMD_ERROR_p_not_present: + + PRINTF ("Array p (column pointers for matrix) not present.\n") ; + break ; + + case EIGEN_COLAMD_ERROR_nrow_negative: + + PRINTF ("Invalid number of rows (%d).\n", i1) ; + break ; + + case EIGEN_COLAMD_ERROR_ncol_negative: + + PRINTF ("Invalid number of columns (%d).\n", i1) ; + break ; + + case EIGEN_COLAMD_ERROR_nnz_negative: + + PRINTF ("Invalid number of nonzero entries (%d).\n", i1) ; + break ; + + case EIGEN_COLAMD_ERROR_p0_nonzero: + + PRINTF ("Invalid column pointer, p [0] = %d, must be zero.\n", i1) ; + break ; + + case EIGEN_COLAMD_ERROR_A_too_small: + + PRINTF ("Array A too small.\n") ; + PRINTF (" Need Alen >= %d, but given only Alen = %d.\n", + i1, i2) ; + break ; + + case EIGEN_COLAMD_ERROR_col_length_negative: + + PRINTF + ("Column %d has a negative number of nonzero entries (%d).\n", + INDEX (i1), i2) ; + break ; + + case EIGEN_COLAMD_ERROR_row_index_out_of_bounds: + + PRINTF + ("Row index (row %d) out of bounds (%d to %d) in column %d.\n", + INDEX (i2), INDEX (0), INDEX (i3-1), INDEX (i1)) ; + break ; + + case EIGEN_COLAMD_ERROR_out_of_memory: + + PRINTF ("Out of memory.\n") ; + break ; + + case EIGEN_COLAMD_ERROR_internal_error: + + /* if this happens, there is a bug in the code */ + PRINTF + ("Internal error! Please contact authors (davis@cise.ufl.edu).\n") ; + break ; + } +} + + + + +/* ========================================================================== */ +/* === eigen_colamd debugging routines ============================================ */ +/* ========================================================================== */ + +/* When debugging is disabled, the remainder of this file is ignored. */ + +#ifndef COLAMD_NDEBUG + + +/* ========================================================================== */ +/* === eigen_debug_structures ===================================================== */ +/* ========================================================================== */ + +/* + At this point, all empty rows and columns are dead. All live columns + are "clean" (containing no dead rows) and simplicial (no supercolumns + yet). Rows may contain dead columns, but all live rows contain at + least one live column. +*/ + + void eigen_debug_structures +( + /* === Parameters ======================================================= */ + + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int A [], + int n_col2 +) +{ + /* === Local variables ================================================== */ + + int i ; + int c ; + int *cp ; + int *cp_end ; + int len ; + int score ; + int r ; + int *rp ; + int *rp_end ; + int deg ; + + /* === Check A, Row, and Col ============================================ */ + + for (c = 0 ; c < n_col ; c++) + { + if (EIGEN_COL_IS_ALIVE (c)) + { + len = Col [c].length ; + score = Col [c].shared2.score ; + COLAMD_DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ; + COLAMD_ASSERT (len > 0) ; + COLAMD_ASSERT (score >= 0) ; + COLAMD_ASSERT (Col [c].shared1.thickness == 1) ; + cp = &A [Col [c].start] ; + cp_end = cp + len ; + while (cp < cp_end) + { + r = *cp++ ; + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (r)) ; + } + } + else + { + i = Col [c].shared2.order ; + COLAMD_ASSERT (i >= n_col2 && i < n_col) ; + } + } + + for (r = 0 ; r < n_row ; r++) + { + if (EIGEN_ROW_IS_ALIVE (r)) + { + i = 0 ; + len = Row [r].length ; + deg = Row [r].shared1.degree ; + COLAMD_ASSERT (len > 0) ; + COLAMD_ASSERT (deg > 0) ; + rp = &A [Row [r].start] ; + rp_end = rp + len ; + while (rp < rp_end) + { + c = *rp++ ; + if (EIGEN_COL_IS_ALIVE (c)) + { + i++ ; + } + } + COLAMD_ASSERT (i > 0) ; + } + } +} + + +/* ========================================================================== */ +/* === eigen_debug_deg_lists ====================================================== */ +/* ========================================================================== */ + +/* + Prints the contents of the degree lists. Counts the number of columns + in the degree list and compares it to the total it should have. Also + checks the row degrees. +*/ + + void eigen_debug_deg_lists +( + /* === Parameters ======================================================= */ + + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int head [], + int min_score, + int should, + int max_deg +) +{ + /* === Local variables ================================================== */ + + int deg ; + int col ; + int have ; + int row ; + + /* === Check the degree lists =========================================== */ + + if (n_col > 10000 && colamd_debug <= 0) + { + return ; + } + have = 0 ; + COLAMD_DEBUG4 (("Degree lists: %d\n", min_score)) ; + for (deg = 0 ; deg <= n_col ; deg++) + { + col = head [deg] ; + if (col == EIGEN_COLAMD_EMPTY) + { + continue ; + } + COLAMD_DEBUG4 (("%d:", deg)) ; + while (col != EIGEN_COLAMD_EMPTY) + { + COLAMD_DEBUG4 ((" %d", col)) ; + have += Col [col].shared1.thickness ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col)) ; + col = Col [col].shared4.degree_next ; + } + COLAMD_DEBUG4 (("\n")) ; + } + COLAMD_DEBUG4 (("should %d have %d\n", should, have)) ; + COLAMD_ASSERT (should == have) ; + + /* === Check the row degrees ============================================ */ + + if (n_row > 10000 && colamd_debug <= 0) + { + return ; + } + for (row = 0 ; row < n_row ; row++) + { + if (EIGEN_ROW_IS_ALIVE (row)) + { + COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ; + } + } +} + + +/* ========================================================================== */ +/* === eigen_debug_mark =========================================================== */ +/* ========================================================================== */ + +/* + Ensures that the tag_mark is less that the maximum and also ensures that + each entry in the mark array is less than the tag mark. +*/ + + void eigen_debug_mark +( + /* === Parameters ======================================================= */ + + int n_row, + EIGEN_Colamd_Row Row [], + int tag_mark, + int max_mark +) +{ + /* === Local variables ================================================== */ + + int r ; + + /* === Check the Row marks ============================================== */ + + COLAMD_ASSERT (tag_mark > 0 && tag_mark <= max_mark) ; + if (n_row > 10000 && colamd_debug <= 0) + { + return ; + } + for (r = 0 ; r < n_row ; r++) + { + COLAMD_ASSERT (Row [r].shared2.mark < tag_mark) ; + } +} + + +/* ========================================================================== */ +/* === eigen_debug_matrix ========================================================= */ +/* ========================================================================== */ + +/* + Prints out the contents of the columns and the rows. +*/ + + void eigen_debug_matrix +( + /* === Parameters ======================================================= */ + + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int A [] +) +{ + /* === Local variables ================================================== */ + + int r ; + int c ; + int *rp ; + int *rp_end ; + int *cp ; + int *cp_end ; + + /* === Dump the rows and columns of the matrix ========================== */ + + if (colamd_debug < 3) + { + return ; + } + COLAMD_DEBUG3 (("DUMP MATRIX:\n")) ; + for (r = 0 ; r < n_row ; r++) + { + COLAMD_DEBUG3 (("Row %d alive? %d\n", r, EIGEN_ROW_IS_ALIVE (r))) ; + if (EIGEN_ROW_IS_DEAD (r)) + { + continue ; + } + COLAMD_DEBUG3 (("start %d length %d degree %d\n", + Row [r].start, Row [r].length, Row [r].shared1.degree)) ; + rp = &A [Row [r].start] ; + rp_end = rp + Row [r].length ; + while (rp < rp_end) + { + c = *rp++ ; + COLAMD_DEBUG4 ((" %d col %d\n", EIGEN_COL_IS_ALIVE (c), c)) ; + } + } + + for (c = 0 ; c < n_col ; c++) + { + COLAMD_DEBUG3 (("Col %d alive? %d\n", c, EIGEN_COL_IS_ALIVE (c))) ; + if (EIGEN_COL_IS_DEAD (c)) + { + continue ; + } + COLAMD_DEBUG3 (("start %d length %d shared1 %d shared2 %d\n", + Col [c].start, Col [c].length, + Col [c].shared1.thickness, Col [c].shared2.score)) ; + cp = &A [Col [c].start] ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + r = *cp++ ; + COLAMD_DEBUG4 ((" %d row %d\n", EIGEN_ROW_IS_ALIVE (r), r)) ; + } + } +} + + void eigen_colamd_get_debug +( + char *method +) +{ + colamd_debug = 0 ; /* no debug printing */ + + /* get "D" environment variable, which gives the debug printing level */ + if (getenv ("D")) + { + colamd_debug = atoi (getenv ("D")) ; + } + + COLAMD_DEBUG0 (("%s: debug version, D = %d (THIS WILL BE SLOW!)\n", + method, colamd_debug)) ; +} + +#endif /* NDEBUG */ +#endif diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h new file mode 100644 index 000000000..47cd6f169 --- /dev/null +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -0,0 +1,156 @@ + +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_ORDERING_H +#define EIGEN_ORDERING_H + +#include "Amd.h" +#include "Eigen_Colamd.h" +namespace Eigen { +namespace internal { + + /** + * Get the symmetric pattern A^T+A from the input matrix A. + * FIXME: The values should not be considered here + */ + template<typename MatrixType> + void ordering_helper_at_plus_a(const MatrixType& mat, MatrixType& symmat) + { + MatrixType C; + C = mat.transpose(); // NOTE: Could be costly + for (int i = 0; i < C.rows(); i++) + { + for (typename MatrixType::InnerIterator it(C, i); it; ++it) + it.valueRef() = 0.0; + } + symmat = C + mat; + } + +} + +/** + * Get the approximate minimum degree ordering + * If the matrix is not structurally symmetric, an ordering of A^T+A is computed + * \tparam Index The type of indices of the matrix + */ +template <typename Index> +class AMDOrdering +{ + public: + typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; + + /** Compute the permutation vector from a sparse matrix + * This routine is much faster if the input matrix is column-major + */ + template <typename MatrixType> + void operator()(const MatrixType& mat, PermutationType& perm) + { + // Compute the symmetric pattern + SparseMatrix<typename MatrixType::Scalar, ColMajor, Index> symm; + internal::ordering_helper_at_plus_a(mat,symm); + + // Call the AMD routine + //m_mat.prune(keep_diag()); + internal::minimum_degree_ordering(symm, perm); + } + + /** Compute the permutation with a selfadjoint matrix */ + template <typename SrcType, unsigned int SrcUpLo> + void operator()(const SparseSelfAdjointView<SrcType, SrcUpLo>& mat, PermutationType& perm) + { + SparseMatrix<typename SrcType::Scalar, ColMajor, Index> C = mat; + + // Call the AMD routine + // m_mat.prune(keep_diag()); //Remove the diagonal elements + internal::minimum_degree_ordering(C, perm); + } +}; + +/** + * Get the natural ordering + * + *NOTE Returns an empty permutation matrix + * \tparam Index The type of indices of the matrix + */ +template <typename Index> +class NaturalOrdering +{ + public: + typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; + + /** Compute the permutation vector from a column-major sparse matrix */ + template <typename MatrixType> + void operator()(const MatrixType& mat, PermutationType& perm) + { + perm.resize(0); + } + +}; + +/** + * Get the column approximate minimum degree ordering + * The matrix should be in column-major format + */ +template<typename Index> +class COLAMDOrdering; +#include "Eigen_Colamd.h" + +template<typename Index> +class COLAMDOrdering +{ + public: + typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; + typedef Matrix<Index, Dynamic, 1> IndexVector; + /** Compute the permutation vector form a sparse matrix */ + template <typename MatrixType> + void operator() (const MatrixType& mat, PermutationType& perm) + { + int m = mat.rows(); + int n = mat.cols(); + int nnz = mat.nonZeros(); + // Get the recommended value of Alen to be used by colamd + int Alen = eigen_colamd_recommended(nnz, m, n); + // Set the default parameters + double knobs [EIGEN_COLAMD_KNOBS]; + int stats [EIGEN_COLAMD_STATS]; + eigen_colamd_set_defaults(knobs); + + int info; + IndexVector p(n+1), A(Alen); + for(int i=0; i <= n; i++) p(i) = mat.outerIndexPtr()[i]; + for(int i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()[i]; + // Call Colamd routine to compute the ordering + info = eigen_colamd(m, n, Alen, A.data(), p.data(), knobs, stats); + eigen_assert( info && "COLAMD failed " ); + + perm.resize(n); + for (int i = 0; i < n; i++) perm.indices()(p(i)) = i; + + } + +}; + +} // end namespace Eigen +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 743fa3afc..573804837 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -469,6 +469,18 @@ class SparseMatrix m_data.squeeze(); } + /** Turns the matrix into the uncompressed mode */ + void uncompress() + { + if(m_innerNonZeros != 0) + return; + m_innerNonZeros = new Index[m_outerSize]; + for (int i = 0; i < m_outerSize; i++) + { + m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; + } + } + /** Suppresses all nonzeros which are \b much \b smaller \b than \a reference under the tolerence \a epsilon */ void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision()) { diff --git a/Eigen/src/SparseLU/CMakeLists.txt b/Eigen/src/SparseLU/CMakeLists.txt new file mode 100644 index 000000000..69729ee89 --- /dev/null +++ b/Eigen/src/SparseLU/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_SparseLU_SRCS "*.h") + +INSTALL(FILES + ${Eigen_SparseLU_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseLU COMPONENT Devel + ) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h new file mode 100644 index 000000000..e2076138a --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU.h @@ -0,0 +1,613 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_SPARSE_LU_H +#define EIGEN_SPARSE_LU_H + +namespace Eigen { + + +// Data structure needed by all routines +#include "SparseLU_Structs.h" +#include "SparseLU_Matrix.h" + +/** + * \ingroup SparseLU_Module + * \brief Sparse supernodal LU factorization for general matrices + * + * This class implements the supernodal LU factorization for general matrices. + * + * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<> + */ +template <typename _MatrixType, typename _OrderingType> +class SparseLU +{ + public: + typedef _MatrixType MatrixType; + typedef _OrderingType OrderingType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::Index Index; + typedef SparseMatrix<Scalar,ColMajor,Index> NCMatrix; + typedef SuperNodalMatrix<Scalar, Index> SCMatrix; + typedef Matrix<Scalar,Dynamic,1> ScalarVector; + typedef Matrix<Index,Dynamic,1> IndexVector; + typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; + public: + SparseLU():m_isInitialized(true),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0) + { + initperfvalues(); + } + SparseLU(const MatrixType& matrix):m_isInitialized(true),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0) + { + initperfvalues(); + compute(matrix); + } + + ~SparseLU() + { + // Free all explicit dynamic pointers + } + + void analyzePattern (const MatrixType& matrix); + void factorize (const MatrixType& matrix); + + /** + * Compute the symbolic and numeric factorization of the input sparse matrix. + * The input matrix should be in column-major storage. + */ + void compute (const MatrixType& matrix) + { + // Analyze + analyzePattern(matrix); + //Factorize + factorize(matrix); + } + + inline Index rows() const { return m_mat.rows(); } + inline Index cols() const { return m_mat.cols(); } + /** Indicate that the pattern of the input matrix is symmetric */ + void isSymmetric(bool sym) + { + m_symmetricmode = sym; + } + + /** Set the threshold used for a diagonal entry to be an acceptable pivot. */ + void diagPivotThresh(RealScalar thresh) + { + m_diagpivotthresh = thresh; + } + + /** Return the number of nonzero elements in the L factor */ + int nnzL() + { + if (m_factorizationIsOk) + return m_nnzL; + else + { + std::cerr<<"Numerical factorization should be done before\n"; + return 0; + } + } + /** Return the number of nonzero elements in the U factor */ + int nnzU() + { + if (m_factorizationIsOk) + return m_nnzU; + else + { + std::cerr<<"Numerical factorization should be done before\n"; + return 0; + } + } + /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. + * + * \sa compute() + */ + template<typename Rhs> + inline const internal::solve_retval<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const + { + eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); + eigen_assert(rows()==B.rows() + && "SparseLU::solve(): invalid number of rows of the right hand side matrix B"); + return internal::solve_retval<SparseLU, Rhs>(*this, B.derived()); + } + + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the PaStiX reports a problem + * \c InvalidInput if the input matrix is invalid + * + * \sa iparm() + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "Decomposition is not initialized."); + return m_info; + } + + template<typename Rhs, typename Dest> + bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &_X) const + { + Dest& X(_X.derived()); + eigen_assert(m_factorizationIsOk && "The matrix should be factorized first"); + EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + + + int nrhs = B.cols(); + Index n = B.rows(); + + // Permute the right hand side to form X = Pr*B + // on return, X is overwritten by the computed solution + X.resize(n,nrhs); + for(int j = 0; j < nrhs; ++j) + X.col(j) = m_perm_r * B.col(j); + + //Forward substitution with L + m_Lstore.solveInPlace(X); + + // Backward solve with U + for (int k = m_Lstore.nsuper(); k >= 0; k--) + { + Index fsupc = m_Lstore.supToCol()[k]; + Index istart = m_Lstore.rowIndexPtr()[fsupc]; + Index nsupr = m_Lstore.rowIndexPtr()[fsupc+1] - istart; + Index nsupc = m_Lstore.supToCol()[k+1] - fsupc; + Index luptr = m_Lstore.colIndexPtr()[fsupc]; + + if (nsupc == 1) + { + for (int j = 0; j < nrhs; j++) + { + X(fsupc, j) /= m_Lstore.valuePtr()[luptr]; + } + } + else + { + Map<const Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(m_Lstore.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Map< Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); + U = A.template triangularView<Upper>().solve(U); + } + + for (int j = 0; j < nrhs; ++j) + { + for (int jcol = fsupc; jcol < fsupc + nsupc; jcol++) + { + typename MappedSparseMatrix<Scalar>::InnerIterator it(m_Ustore, jcol); + for ( ; it; ++it) + { + Index irow = it.index(); + X(irow, j) -= X(jcol, j) * it.value(); + } + } + } + } // End For U-solve + + // Permute back the solution + for (int j = 0; j < nrhs; ++j) + X.col(j) = m_perm_c.inverse() * X.col(j); + + return true; + } + + protected: + // Functions + void initperfvalues() + { + m_perfv.panel_size = 12; + m_perfv.relax = 6; + m_perfv.maxsuper = 100; + m_perfv.rowblk = 200; + m_perfv.colblk = 60; + m_perfv.fillfactor = 20; + } + + // Variables + mutable ComputationInfo m_info; + bool m_isInitialized; + bool m_factorizationIsOk; + bool m_analysisIsOk; + NCMatrix m_mat; // The input (permuted ) matrix + SCMatrix m_Lstore; // The lower triangular matrix (supernodal) + MappedSparseMatrix<Scalar> m_Ustore; // The upper triangular matrix + PermutationType m_perm_c; // Column permutation + PermutationType m_perm_r ; // Row permutation + IndexVector m_etree; // Column elimination tree + + LU_GlobalLU_t<IndexVector, ScalarVector> m_glu; // persistent data to facilitate multiple factors + // FIXME All fields of this struct can be defined separately as class members + + // SuperLU/SparseLU options + bool m_symmetricmode; + + // values for performance + LU_perfvalues m_perfv; + RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot + int m_nnzL, m_nnzU; // Nonzeros in L and U factors + + private: + // Copy constructor + SparseLU (SparseLU& ) {} + +}; // End class SparseLU + + +// Functions needed by the anaysis phase +#include "SparseLU_Coletree.h" +/** + * Compute the column permutation to minimize the fill-in (file amd.c ) + * + * - Apply this permutation to the input matrix - + * + * - Compute the column elimination tree on the permuted matrix (file Eigen_Coletree.h) + * + * - Postorder the elimination tree and the column permutation (file Eigen_Coletree.h) + * + */ +template <typename MatrixType, typename OrderingType> +void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat) +{ + + //TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat. + + OrderingType ord; + ord(mat,m_perm_c); + //FIXME Check the right semantic behind m_perm_c + // that is, column j of mat goes to column m_perm_c(j) of mat * m_perm_c; + + + // Apply the permutation to the column of the input matrix +// m_mat = mat * m_perm_c.inverse(); //FIXME It should be less expensive here to permute only the structural pattern of the matrix + + //First copy the whole input matrix. + m_mat = mat; + m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used. + //Then, permute only the column pointers + for (int i = 0; i < mat.cols(); i++) + { + m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i]; + m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; + } + + // Compute the column elimination tree of the permuted matrix + /*if (m_etree.size() == 0) */m_etree.resize(m_mat.cols()); + + LU_sp_coletree(m_mat, m_etree); + + // In symmetric mode, do not do postorder here + if (!m_symmetricmode) { + IndexVector post, iwork; + // Post order etree + LU_TreePostorder(m_mat.cols(), m_etree, post); + + + // Renumber etree in postorder + int m = m_mat.cols(); + iwork.resize(m+1); + for (int i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i)); + m_etree = iwork; + + // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree + PermutationType post_perm(m); //FIXME Use directly a constructor with post + for (int i = 0; i < m; i++) + post_perm.indices()(i) = post(i); + + // Combine the two permutations : postorder the permutation for future use + m_perm_c = post_perm * m_perm_c; + + } // end postordering + + m_analysisIsOk = true; +} + +// Functions needed by the numerical factorization phase +#include "SparseLU_Memory.h" +#include "SparseLU_heap_relax_snode.h" +#include "SparseLU_relax_snode.h" +#include "SparseLU_snode_dfs.h" +#include "SparseLU_snode_bmod.h" +#include "SparseLU_pivotL.h" +#include "SparseLU_panel_dfs.h" +#include "SparseLU_kernel_bmod.h" +#include "SparseLU_panel_bmod.h" +#include "SparseLU_column_dfs.h" +#include "SparseLU_column_bmod.h" +#include "SparseLU_copy_to_ucol.h" +#include "SparseLU_pruneL.h" +#include "SparseLU_Utils.h" + + +/** + * - Numerical factorization + * - Interleaved with the symbolic factorization + * \tparam MatrixType The type of the matrix, it should be a column-major sparse matrix + * \return info where + * : successful exit + * = 0: successful exit + * > 0: if info = i, and i is + * <= A->ncol: U(i,i) is exactly zero. The factorization has + * been completed, but the factor U is exactly singular, + * and division by zero will occur if it is used to solve a + * system of equations. + * > A->ncol: number of bytes allocated when memory allocation + * failure occurred, plus A->ncol. If lwork = -1, it is + * the estimated amount of space needed, plus A->ncol. + */ +template <typename MatrixType, typename OrderingType> +void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix) +{ + + eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); + eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices"); + + typedef typename IndexVector::Scalar Index; + + + // Apply the column permutation computed in analyzepattern() + // m_mat = matrix * m_perm_c.inverse(); + m_mat = matrix; + m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. + //Then, permute only the column pointers + for (int i = 0; i < matrix.cols(); i++) + { + m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i]; + m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i+1] - matrix.outerIndexPtr()[i]; + } + + int m = m_mat.rows(); + int n = m_mat.cols(); + int nnz = m_mat.nonZeros(); + int maxpanel = m_perfv.panel_size * m; + // Allocate working storage common to the factor routines + int lwork = 0; + int info = LUMemInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); + if (info) + { + std::cerr << "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ; + m_factorizationIsOk = false; + return ; + } + + // Set up pointers for integer working arrays + IndexVector segrep(m); segrep.setZero(); + IndexVector parent(m); parent.setZero(); + IndexVector xplore(m); xplore.setZero(); + IndexVector repfnz(maxpanel); + IndexVector panel_lsub(maxpanel); + IndexVector xprune(n); xprune.setZero(); + IndexVector marker(m*LU_NO_MARKER); marker.setZero(); + + repfnz.setConstant(-1); + panel_lsub.setConstant(-1); + + // Set up pointers for scalar working arrays + ScalarVector dense; + dense.setZero(maxpanel); + ScalarVector tempv; + tempv.setZero(LU_NUM_TEMPV(m, m_perfv.panel_size, m_perfv.maxsuper, m_perfv.rowblk) ); + + // Compute the inverse of perm_c + PermutationType iperm_c(m_perm_c.inverse()); + + // Identify initial relaxed snodes + IndexVector relax_end(n); + if ( m_symmetricmode == true ) + LU_heap_relax_snode<IndexVector>(n, m_etree, m_perfv.relax, marker, relax_end); + else + LU_relax_snode<IndexVector>(n, m_etree, m_perfv.relax, marker, relax_end); + + + m_perm_r.resize(m); + m_perm_r.indices().setConstant(-1); + marker.setConstant(-1); + + IndexVector& xsup = m_glu.xsup; + IndexVector& supno = m_glu.supno; + IndexVector& xlsub = m_glu.xlsub; + IndexVector& xlusup = m_glu.xlusup; + IndexVector& xusub = m_glu.xusub; + ScalarVector& lusup = m_glu.lusup; + Index& nzlumax = m_glu.nzlumax; + + supno(0) = IND_EMPTY; xsup.setConstant(0); + xsup(0) = xlsub(0) = xusub(0) = xlusup(0) = Index(0); + + // Work on one 'panel' at a time. A panel is one of the following : + // (a) a relaxed supernode at the bottom of the etree, or + // (b) panel_size contiguous columns, <panel_size> defined by the user + int jcol,kcol; + IndexVector panel_histo(n); + Index nextu, nextlu, jsupno, fsupc, new_next; + Index pivrow; // Pivotal row number in the original row matrix + int nseg1; // Number of segments in U-column above panel row jcol + int nseg; // Number of segments in each U-column + int irep, icol; + int i, k, jj; + for (jcol = 0; jcol < n; ) + { + if (relax_end(jcol) != IND_EMPTY) + { // Starting a relaxed node from jcol + kcol = relax_end(jcol); // End index of the relaxed snode + + // Factorize the relaxed supernode(jcol:kcol) + // First, determine the union of the row structure of the snode + info = LU_snode_dfs(jcol, kcol, m_mat, xprune, marker, m_glu); + if ( info ) + { + std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n"; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + nextu = xusub(jcol); //starting location of column jcol in ucol + nextlu = xlusup(jcol); //Starting location of column jcol in lusup (rectangular supernodes) + jsupno = supno(jcol); // Supernode number which column jcol belongs to + fsupc = xsup(jsupno); //First column number of the current supernode + new_next = nextlu + (xlsub(fsupc+1)-xlsub(fsupc)) * (kcol - jcol + 1); + int mem; + while (new_next > nzlumax ) + { + mem = LUMemXpand(lusup, nzlumax, nextlu, LUSUP, m_glu.num_expansions); + if (mem) + { + std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n"; + m_factorizationIsOk = false; + return; + } + } + + // Now, left-looking factorize each column within the snode + for (icol = jcol; icol<=kcol; icol++){ + xusub(icol+1) = nextu; + // Scatter into SPA dense(*) + for (typename MatrixType::InnerIterator it(m_mat, icol); it; ++it) + dense(it.row()) = it.value(); + + // Numeric update within the snode + LU_snode_bmod(icol, fsupc, dense, m_glu); + + // Eliminate the current column + info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); + if ( info ) + { + m_info = NumericalIssue; + std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl; + m_factorizationIsOk = false; + return; + } + } + jcol = icol; // The last column te be eliminated + } + else + { // Work on one panel of panel_size columns + + // Adjust panel size so that a panel won't overlap with the next relaxed snode. + int panel_size = m_perfv.panel_size; // upper bound on panel width + for (k = jcol + 1; k < std::min(jcol+panel_size, n); k++) + { + if (relax_end(k) != IND_EMPTY) + { + panel_size = k - jcol; + break; + } + } + if (k == n) + panel_size = n - jcol; + + // Symbolic outer factorization on a panel of columns + LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); + + // Numeric sup-panel updates in topological order + LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu); + + // Sparse LU within the panel, and below the panel diagonal + for ( jj = jcol; jj< jcol + panel_size; jj++) + { + k = (jj - jcol) * m; // Column index for w-wide arrays + + nseg = nseg1; // begin after all the panel segments + //Depth-first-search for the current column + VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m); + VectorBlock<IndexVector> repfnz_k(repfnz, k, m); + info = LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); + if ( info ) + { + std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n"; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + // Numeric updates to this column + VectorBlock<ScalarVector> dense_k(dense, k, m); + VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1); + info = LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); + if ( info ) + { + std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n"; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + + // Copy the U-segments to ucol(*) + info = LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); + if ( info ) + { + std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n"; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + + // Form the L-segment + info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); + if ( info ) + { + std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + + // Prune columns (0:jj-1) using column jj + LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); + + // Reset repfnz for this column + for (i = 0; i < nseg; i++) + { + irep = segrep(i); + repfnz_k(irep) = IND_EMPTY; + } + } // end SparseLU within the panel + jcol += panel_size; // Move to the next panel + } // end else + } // end for -- end elimination + + // Count the number of nonzeros in factors + LU_countnz(n, m_nnzL, m_nnzU, m_glu); + // Apply permutation to the L subscripts + LU_fixupL(n, m_perm_r.indices(), m_glu); + + + + // Create supernode matrix L + m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); + // Create the column major upper sparse matrix U; + new (&m_Ustore) MappedSparseMatrix<Scalar> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); + + m_info = Success; + m_factorizationIsOk = true; +} + + +namespace internal { + +template<typename _MatrixType, typename Derived, typename Rhs> +struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs> + : solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs> +{ + typedef SparseLU<_MatrixType,Derived> Dec; + EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) + + template<typename Dest> void evalTo(Dest& dst) const + { + dec()._solve(rhs(),dst); + } +}; + +} // end namespace internal + + + +} // End namespace Eigen +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Coletree.h b/Eigen/src/SparseLU/SparseLU_Coletree.h new file mode 100644 index 000000000..964f5e433 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Coletree.h @@ -0,0 +1,180 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +/* + + * NOTE: This file is the modified version of sp_coletree.c file in SuperLU + + * -- SuperLU routine (version 3.1) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * August 1, 2008 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COLETREE_H +#define SPARSELU_COLETREE_H +/** Find the root of the tree/set containing the vertex i : Use Path halving */ +template<typename IndexVector> +int etree_find (int i, IndexVector& pp) +{ + int p = pp(i); // Parent + int gp = pp(p); // Grand parent + while (gp != p) + { + pp(i) = gp; // Parent pointer on find path is changed to former grand parent + i = gp; + p = pp(i); + gp = pp(p); + } + return p; +} + +/** Compute the column elimination tree of a sparse matrix + * NOTE : The matrix is supposed to be in column-major format. + * + */ +template<typename MatrixType, typename IndexVector> +int LU_sp_coletree(const MatrixType& mat, IndexVector& parent) +{ + int nc = mat.cols(); // Number of columns + int nr = mat.rows(); // Number of rows + + IndexVector root(nc); // root of subtree of etree + root.setZero(); + IndexVector pp(nc); // disjoint sets + pp.setZero(); // Initialize disjoint sets + IndexVector firstcol(nr); // First nonzero column in each row + + //Compute first nonzero column in each row + int row,col; + firstcol.setConstant(nc); //for (row = 0; row < nr; firstcol(row++) = nc); + for (col = 0; col < nc; col++) + { + for (typename MatrixType::InnerIterator it(mat, col); it; ++it) + { // Is it necessary to browse the whole matrix, the lower part should do the job ?? + row = it.row(); + firstcol(row) = std::min(firstcol(row), col); + } + } + /* Compute etree by Liu's algorithm for symmetric matrices, + except use (firstcol[r],c) in place of an edge (r,c) of A. + Thus each row clique in A'*A is replaced by a star + centered at its first vertex, which has the same fill. */ + int rset, cset, rroot; + for (col = 0; col < nc; col++) + { + pp(col) = col; + cset = col; + root(cset) = col; + parent(col) = nc; + for (typename MatrixType::InnerIterator it(mat, col); it; ++it) + { // A sequence of interleaved find and union is performed + row = firstcol(it.row()); + if (row >= col) continue; + rset = etree_find(row, pp); // Find the name of the set containing row + rroot = root(rset); + if (rroot != col) + { + parent(rroot) = col; + pp(cset) = rset; + cset = rset; + root(cset) = col; + } + } + } + return 0; +} + +/** + * Depth-first search from vertex n. No recursion. + * This routine was contributed by Cédric Doucet, CEDRAT Group, Meylan, France. +*/ +template<typename IndexVector> +void LU_nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVector& next_kid, IndexVector& post, int postnum) +{ + int current = n, first, next; + while (postnum != n) + { + // No kid for the current node + first = first_kid(current); + + // no kid for the current node + if (first == -1) + { + // Numbering this node because it has no kid + post(current) = postnum++; + + // looking for the next kid + next = next_kid(current); + while (next == -1) + { + // No more kids : back to the parent node + current = parent(current); + // numbering the parent node + post(current) = postnum++; + + // Get the next kid + next = next_kid(current); + } + // stopping criterion + if (postnum == n+1) return; + + // Updating current node + current = next; + } + else + { + current = first; + } + } +} + + +/** + * Post order a tree + * \param parent Input tree + * \param post postordered tree + */ +template<typename IndexVector> +void LU_TreePostorder(int n, IndexVector& parent, IndexVector& post) +{ + IndexVector first_kid, next_kid; // Linked list of children + int postnum; + // Allocate storage for working arrays and results + first_kid.resize(n+1); + next_kid.setZero(n+1); + post.setZero(n+1); + + // Set up structure describing children + int v, dad; + first_kid.setConstant(-1); + for (v = n-1; v >= 0; v--) + { + dad = parent(v); + next_kid(v) = first_kid(dad); + first_kid(dad) = v; + } + + // Depth-first search from dummy root vertex #n + postnum = 0; + LU_nr_etdfs(n, parent, first_kid, next_kid, post, postnum); +} + +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h new file mode 100644 index 000000000..31aeee64d --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -0,0 +1,313 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSELU_MATRIX_H +#define EIGEN_SPARSELU_MATRIX_H + +/** \ingroup SparseLU_Module + * \brief a class to manipulate the L supernodal factor from the SparseLU factorization + * + * This class contain the data to easily store + * and manipulate the supernodes during the factorization and solution phase of Sparse LU. + * Only the lower triangular matrix has supernodes. + * + * NOTE : This class corresponds to the SCformat structure in SuperLU + * + */ +/* TO DO + * InnerIterator as for sparsematrix + * SuperInnerIterator to iterate through all supernodes + * Function for triangular solve + */ +template <typename _Scalar, typename _Index> +class SuperNodalMatrix +{ + public: + typedef _Scalar Scalar; + typedef _Index Index; + typedef Matrix<Index,Dynamic,1> IndexVector; + typedef Matrix<Scalar,Dynamic,1> ScalarVector; + public: + SuperNodalMatrix() + { + + } + SuperNodalMatrix(int m, int n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, + IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col ) + { + setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col); + } + + ~SuperNodalMatrix() + { + + } + /** + * Set appropriate pointers for the lower triangular supernodal matrix + * These infos are available at the end of the numerical factorization + * FIXME This class will be modified such that it can be use in the course + * of the factorization. + */ + void setInfos(int m, int n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, + IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col ) + { + m_row = m; + m_col = n; + m_nzval = nzval.data(); + m_nzval_colptr = nzval_colptr.data(); + m_rowind = rowind.data(); + m_rowind_colptr = rowind_colptr.data(); + m_nsuper = col_to_sup(n); + m_col_to_sup = col_to_sup.data(); + m_sup_to_col = sup_to_col.data(); + + } + + /** + * Number of rows + */ + int rows() + { + return m_row; + } + + /** + * Number of columns + */ + int cols() + { + return m_col; + } + + /** + * Return the array of nonzero values packed by column + * + * The size is nnz + */ + Scalar* valuePtr() + { + return m_nzval; + } + + const Scalar* valuePtr() const + { + return m_nzval; + } + /** + * Return the pointers to the beginning of each column in \ref valuePtr() + */ + Index* colIndexPtr() + { + return m_nzval_colptr; + } + + const Index* colIndexPtr() const + { + return m_nzval_colptr; + } + + /** + * Return the array of compressed row indices of all supernodes + */ + Index* rowIndex() + { + return m_rowind; + } + + const Index* rowIndex() const + { + return m_rowind; + } + + /** + * Return the location in \em rowvaluePtr() which starts each column + */ + Index* rowIndexPtr() + { + return m_rowind_colptr; + } + + const Index* rowIndexPtr() const + { + return m_rowind_colptr; + } + + /** + * Return the array of column-to-supernode mapping + */ + Index* colToSup() + { + return m_col_to_sup; + } + + const Index* colToSup() const + { + return m_col_to_sup; + } + /** + * Return the array of supernode-to-column mapping + */ + Index* supToCol() + { + return m_sup_to_col; + } + + const Index* supToCol() const + { + return m_sup_to_col; + } + + /** + * Return the number of supernodes + */ + int nsuper() const + { + return m_nsuper; + } + + class InnerIterator; + template<typename Dest> + void solveInPlace( MatrixBase<Dest>&X) const; + + + + + protected: + Index m_row; // Number of rows + Index m_col; // Number of columns + Index m_nsuper; // Number of supernodes + Scalar* m_nzval; //array of nonzero values packed by column + Index* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j + Index* m_rowind; // Array of compressed row indices of rectangular supernodes + Index* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j + Index* m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs + Index* m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode + + private : +}; + +/** + * \brief InnerIterator class to iterate over nonzero values of the current column in the supernode + * + */ +template<typename Scalar, typename Index> +class SuperNodalMatrix<Scalar,Index>::InnerIterator +{ + public: + InnerIterator(const SuperNodalMatrix& mat, Index outer) + : m_matrix(mat), + m_outer(outer), + m_idval(mat.colIndexPtr()[outer]), + m_startval(m_idval), + m_endval(mat.colIndexPtr()[outer+1]), + m_idrow(mat.rowIndexPtr()[outer]), + m_startidrow(m_idrow), + m_endidrow(mat.rowIndexPtr()[outer+1]) + {} + inline InnerIterator& operator++() + { + m_idval++; + m_idrow++; + return *this; + } + inline Scalar value() const { return m_matrix.valuePtr()[m_idval]; } + + inline Scalar& valueRef() { return const_cast<Scalar&>(m_matrix.valuePtr()[m_idval]); } + + inline Index index() const { return m_matrix.rowIndex()[m_idrow]; } + inline Index row() const { return index(); } + inline Index col() const { return m_outer; } + + inline Index supIndex() const { return m_matrix.colToSup()[m_outer]; } + + inline operator bool() const + { + return ( (m_idval < m_endval) && (m_idval > m_startval) && + (m_idrow < m_endidrow) && (m_idrow > m_startidrow) ); + } + + protected: + const SuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix + const Index m_outer; // Current column + Index m_idval; //Index to browse the values in the current column + const Index m_startval; // Start of the column value + const Index m_endval; // End of the column value + Index m_idrow; //Index to browse the row indices + const Index m_startidrow; // Start of the row indices of the current column value + const Index m_endidrow; // End of the row indices of the current column value +}; + +/** + * \brief Solve with the supernode triangular matrix + * + */ +template<typename Scalar, typename Index> +template<typename Dest> +void SuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const +{ + Index n = X.rows(); + int nrhs = X.cols(); + const Scalar * Lval = valuePtr(); // Nonzero values + Matrix<Scalar,Dynamic,Dynamic> work(n, nrhs); // working vector + work.setZero(); + for (int k = 0; k <= nsuper(); k ++) + { + Index fsupc = supToCol()[k]; // First column of the current supernode + Index istart = rowIndexPtr()[fsupc]; // Pointer index to the subscript of the current column + Index nsupr = rowIndexPtr()[fsupc+1] - istart; // Number of rows in the current supernode + Index nsupc = supToCol()[k+1] - fsupc; // Number of columns in the current supernode + Index nrow = nsupr - nsupc; // Number of rows in the non-diagonal part of the supernode + Index irow; //Current index row + + if (nsupc == 1 ) + { + for (int j = 0; j < nrhs; j++) + { + InnerIterator it(*this, fsupc); + ++it; // Skip the diagonal element + for (; it; ++it) + { + irow = it.row(); + X(irow, j) -= X(fsupc, j) * it.value(); + } + } + } + else + { + // The supernode has more than one column + Index luptr = colIndexPtr()[fsupc]; + + // Triangular solve + Map<const Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Map< Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); + U = A.template triangularView<UnitLower>().solve(U); + + // Matrix-vector product + new (&A) Map<const Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + work.block(0, 0, nrow, nrhs) = A * U; + + //Begin Scatter + for (int j = 0; j < nrhs; j++) + { + Index iptr = istart + nsupc; + for (int i = 0; i < nrow; i++) + { + irow = rowIndex()[iptr]; + X(irow, j) -= work(i, j); // Scatter operation + work(i, j) = Scalar(0); + iptr++; + } + } + } + } +} + + +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h new file mode 100644 index 000000000..48b36f5b4 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -0,0 +1,205 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]memory.c files in SuperLU + + * -- SuperLU routine (version 3.1) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * August 1, 2008 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ + +#ifndef EIGEN_SPARSELU_MEMORY +#define EIGEN_SPARSELU_MEMORY + +#define LU_NO_MARKER 3 +#define LU_NUM_TEMPV(m,w,t,b) (std::max(m, (t+b)*w) ) +#define IND_EMPTY (-1) + +#define LU_Reduce(alpha) ((alpha + 1) / 2) // i.e (alpha-1)/2 + 1 +#define LU_GluIntArray(n) (5* (n) + 5) +#define LU_TempSpace(m, w) ( (2*w + 4 + LU_NO_MARKER) * m * sizeof(Index) \ + + (w + 1) * m * sizeof(Scalar) ) + + +/** + * Expand the existing storage to accomodate more fill-ins + * \param vec Valid pointer to the vector to allocate or expand + * \param [in,out]length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector + * \param [in]nbElts Current number of elements in the factors + * \param keep_prev 1: use length and do not expand the vector; 0: compute new_len and expand + * \param [in,out]num_expansions Number of times the memory has been expanded + */ +template <typename VectorType > +int expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_expansions) +{ + + float alpha = 1.5; // Ratio of the memory increase + int new_len; // New size of the allocated memory + + if(num_expansions == 0 || keep_prev) + new_len = length ; // First time allocate requested + else + new_len = alpha * length ; + + VectorType old_vec; // Temporary vector to hold the previous values + if (nbElts > 0 ) + old_vec = vec.segment(0,nbElts); + + //Allocate or expand the current vector + try + { + vec.resize(new_len); + } + catch(std::bad_alloc& ) + { + if ( !num_expansions ) + { + // First time to allocate from LUMemInit() + throw; // Pass the exception to LUMemInit() which has a try... catch block + } + if (keep_prev) + { + // In this case, the memory length should not not be reduced + return new_len; + } + else + { + // Reduce the size and increase again + int tries = 0; // Number of attempts + do + { + alpha = LU_Reduce(alpha); + new_len = alpha * length ; + try + { + vec.resize(new_len); + } + catch(std::bad_alloc& ) + { + tries += 1; + if ( tries > 10) return new_len; + } + } while (!vec.size()); + } + } + //Copy the previous values to the newly allocated space + if (nbElts > 0) + vec.segment(0, nbElts) = old_vec; + + + length = new_len; + if(num_expansions) ++num_expansions; + return 0; +} + +/** + * \brief Allocate various working space for the numerical factorization phase. + * \param m number of rows of the input matrix + * \param n number of columns + * \param annz number of initial nonzeros in the matrix + * \param lwork if lwork=-1, this routine returns an estimated size of the required memory + * \param glu persistent data to facilitate multiple factors : will be deleted later ?? + * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated memory when allocation failed, and 0 on success + * NOTE Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation + */ +template <typename IndexVector,typename ScalarVector> +int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, LU_GlobalLU_t<IndexVector,ScalarVector>& glu) +{ + typedef typename ScalarVector::Scalar Scalar; + typedef typename IndexVector::Scalar Index; + + int& num_expansions = glu.num_expansions; //No memory expansions so far + num_expansions = 0; + glu.nzumax = glu.nzlumax = std::max(fillratio * annz, m*n); // estimated number of nonzeros in U + glu.nzlmax = std::max(1., fillratio/4.) * annz; // estimated nnz in L factor + + // Return the estimated size to the user if necessary + if (lwork == IND_EMPTY) + { + int estimated_size; + estimated_size = LU_GluIntArray(n) * sizeof(Index) + LU_TempSpace(m, panel_size) + + (glu.nzlmax + glu.nzumax) * sizeof(Index) + (glu.nzlumax+glu.nzumax) * sizeof(Scalar) + n; + return estimated_size; + } + + // Setup the required space + + // First allocate Integer pointers for L\U factors + glu.xsup.resize(n+1); + glu.supno.resize(n+1); + glu.xlsub.resize(n+1); + glu.xlusup.resize(n+1); + glu.xusub.resize(n+1); + + // Reserve memory for L/U factors + do + { + try + { + expand<ScalarVector>(glu.lusup, glu.nzlumax, 0, 0, num_expansions); + expand<ScalarVector>(glu.ucol,glu.nzumax, 0, 0, num_expansions); + expand<IndexVector>(glu.lsub,glu.nzlmax, 0, 0, num_expansions); + expand<IndexVector>(glu.usub,glu.nzumax, 0, 1, num_expansions); + } + catch(std::bad_alloc& ) + { + //Reduce the estimated size and retry + glu.nzlumax /= 2; + glu.nzumax /= 2; + glu.nzlmax /= 2; + if (glu.nzlumax < annz ) return glu.nzlumax; + } + + } while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size()); + + + + ++num_expansions; + return 0; + +} // end LuMemInit + +/** + * \brief Expand the existing storage + * \param vec vector to expand + * \param [in,out]maxlen On input, previous size of vec (Number of elements to copy ). on output, new size + * \param nbElts current number of elements in the vector. + * \param glu Global data structure + * \return 0 on success, > 0 size of the memory allocated so far + */ +template <typename VectorType> +int LUMemXpand(VectorType& vec, int& maxlen, int nbElts, LU_MemType memtype, int& num_expansions) +{ + int failed_size; + if (memtype == USUB) + failed_size = expand<VectorType>(vec, maxlen, nbElts, 1, num_expansions); + else + failed_size = expand<VectorType>(vec, maxlen, nbElts, 0, num_expansions); + + if (failed_size) + return failed_size; + + return 0 ; + +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h new file mode 100644 index 000000000..7b3aa250c --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -0,0 +1,103 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + * NOTE: This file comes from a partly modified version of files slu_[s,d,c,z]defs.h + * -- SuperLU routine (version 4.1) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November, 2010 + * + * Global data structures used in LU factorization - + * + * nsuper: #supernodes = nsuper + 1, numbered [0, nsuper]. + * (xsup,supno): supno[i] is the supernode no to which i belongs; + * xsup(s) points to the beginning of the s-th supernode. + * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12) + * xsup 0 1 2 4 7 12 + * Note: dfs will be performed on supernode rep. relative to the new + * row pivoting ordering + * + * (xlsub,lsub): lsub[*] contains the compressed subscript of + * rectangular supernodes; xlsub[j] points to the starting + * location of the j-th column in lsub[*]. Note that xlsub + * is indexed by column. + * Storage: original row subscripts + * + * During the course of sparse LU factorization, we also use + * (xlsub,lsub) for the purpose of symmetric pruning. For each + * supernode {s,s+1,...,t=s+r} with first column s and last + * column t, the subscript set + * lsub[j], j=xlsub[s], .., xlsub[s+1]-1 + * is the structure of column s (i.e. structure of this supernode). + * It is used for the storage of numerical values. + * Furthermore, + * lsub[j], j=xlsub[t], .., xlsub[t+1]-1 + * is the structure of the last column t of this supernode. + * It is for the purpose of symmetric pruning. Therefore, the + * structural subscripts can be rearranged without making physical + * interchanges among the numerical values. + * + * However, if the supernode has only one column, then we + * only keep one set of subscripts. For any subscript interchange + * performed, similar interchange must be done on the numerical + * values. + * + * The last column structures (for pruning) will be removed + * after the numercial LU factorization phase. + * + * (xlusup,lusup): lusup[*] contains the numerical values of the + * rectangular supernodes; xlusup[j] points to the starting + * location of the j-th column in storage vector lusup[*] + * Note: xlusup is indexed by column. + * Each rectangular supernode is stored by column-major + * scheme, consistent with Fortran 2-dim array storage. + * + * (xusub,ucol,usub): ucol[*] stores the numerical values of + * U-columns outside the rectangular supernodes. The row + * subscript of nonzero ucol[k] is stored in usub[k]. + * xusub[i] points to the starting location of column i in ucol. + * Storage: new row subscripts; that is subscripts of PA. + */ +#ifndef EIGEN_LU_STRUCTS +#define EIGEN_LU_STRUCTS +typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} LU_MemType; + + +template <typename IndexVector, typename ScalarVector> +struct LU_GlobalLU_t { + typedef typename IndexVector::Scalar Index; + IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode + IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping) + ScalarVector lusup; // nonzero values of L ordered by columns + IndexVector lsub; // Compressed row indices of L rectangular supernodes. + IndexVector xlusup; // pointers to the beginning of each column in lusup + IndexVector xlsub; // pointers to the beginning of each column in lsub + Index nzlmax; // Current max size of lsub + Index nzlumax; // Current max size of lusup + ScalarVector ucol; // nonzero values of U ordered by columns + IndexVector usub; // row indices of U columns in ucol + IndexVector xusub; // Pointers to the beginning of each column of U in ucol + Index nzumax; // Current max size of ucol + Index n; // Number of columns in the matrix + int num_expansions; +}; + +// Values to set for performance +struct LU_perfvalues { + int panel_size; // a panel consists of at most <panel_size> consecutive columns + int relax; // To control degree of relaxing supernodes. If the number of nodes (columns) + // in a subtree of the elimination tree is less than relax, this subtree is considered + // as one supernode regardless of the row structures of those columns + int maxsuper; // The maximum size for a supernode in complete LU + int rowblk; // The minimum row dimension for 2-D blocking to be used; + int colblk; // The minimum column dimension for 2-D blocking to be used; + int fillfactor; // The estimated fills factors for L and U, compared with A +}; +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h new file mode 100644 index 000000000..316b09ab0 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -0,0 +1,75 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_SPARSELU_UTILS_H +#define EIGEN_SPARSELU_UTILS_H + + +/** + * \brief Count Nonzero elements in the factors + */ +template <typename IndexVector, typename ScalarVector> +void LU_countnz(const int n, int& nnzL, int& nnzU, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) +{ + nnzL = 0; + nnzU = (glu.xusub)(n); + int nsuper = (glu.supno)(n); + int jlen; + int i, j, fsupc; + if (n <= 0 ) return; + // For each supernode + for (i = 0; i <= nsuper; i++) + { + fsupc = glu.xsup(i); + jlen = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); + + for (j = fsupc; j < glu.xsup(i+1); j++) + { + nnzL += jlen; + nnzU += j - fsupc + 1; + jlen--; + } + } + +} +/** + * \brief Fix up the data storage lsub for L-subscripts. + * + * It removes the subscripts sets for structural pruning, + * and applies permutation to the remaining subscripts + * + */ +template <typename IndexVector, typename ScalarVector> +void LU_fixupL(const int n, const IndexVector& perm_r, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) +{ + int fsupc, i, j, k, jstart; + + int nextl = 0; + int nsuper = (glu.supno)(n); + + // For each supernode + for (i = 0; i <= nsuper; i++) + { + fsupc = glu.xsup(i); + jstart = glu.xlsub(fsupc); + glu.xlsub(fsupc) = nextl; + for (j = jstart; j < glu.xlsub(fsupc + 1); j++) + { + glu.lsub(nextl) = perm_r(glu.lsub(j)); // Now indexed into P*A + nextl++; + } + for (k = fsupc+1; k < glu.xsup(i+1); k++) + glu.xlsub(k) = nextl; // other columns in supernode i + } + + glu.xlsub(n) = nextl; +} + +#endif diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h new file mode 100644 index 000000000..bf25a33fc --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -0,0 +1,167 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of xcolumn_bmod.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COLUMN_BMOD_H +#define SPARSELU_COLUMN_BMOD_H + +/** + * \brief Performs numeric block updates (sup-col) in topological order + * + * \param jcol current column to update + * \param nseg Number of segments in the U part + * \param dense Store the full representation of the column + * \param tempv working array + * \param segrep segment representative ... + * \param repfnz ??? First nonzero column in each row ??? ... + * \param fpanelc First column in the current panel + * \param glu Global LU data. + * \return 0 - successful return + * > 0 - number of bytes allocated when run out of space + * + */ +template <typename IndexVector, typename ScalarVector, typename BlockIndexVector, typename BlockScalarVector> +int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, ScalarVector& tempv, BlockIndexVector& segrep, BlockIndexVector& repfnz, int fpanelc, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) +{ + typedef typename IndexVector::Scalar Index; + typedef typename ScalarVector::Scalar Scalar; + int jsupno, k, ksub, krep, ksupno; + int lptr, nrow, isub, irow, nextlu, new_next, ufirst; + int fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; + /* krep = representative of current k-th supernode + * fsupc = first supernodal column + * nsupc = number of columns in a supernode + * nsupr = number of rows in a supernode + * luptr = location of supernodal LU-block in storage + * kfnz = first nonz in the k-th supernodal segment + * no_zeros = no lf leading zeros in a supernodal U-segment + */ + + jsupno = glu.supno(jcol); + // For each nonzero supernode segment of U[*,j] in topological order + k = nseg - 1; + int d_fsupc; // distance between the first column of the current panel and the + // first column of the current snode + int fst_col; // First column within small LU update + int segsize; + for (ksub = 0; ksub < nseg; ksub++) + { + krep = segrep(k); k--; + ksupno = glu.supno(krep); + if (jsupno != ksupno ) + { + // outside the rectangular supernode + fsupc = glu.xsup(ksupno); + fst_col = std::max(fsupc, fpanelc); + + // Distance from the current supernode to the current panel; + // d_fsupc = 0 if fsupc > fpanelc + d_fsupc = fst_col - fsupc; + + luptr = glu.xlusup(fst_col) + d_fsupc; + lptr = glu.xlsub(fsupc) + d_fsupc; + + kfnz = repfnz(krep); + kfnz = std::max(kfnz, fpanelc); + + segsize = krep - kfnz + 1; + nsupc = krep - fst_col + 1; + nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); + nrow = nsupr - d_fsupc - nsupc; + + // NOTE Unlike the original implementation in SuperLU, the only feature + // available here is a sup-col update. + + // Perform a triangular solver and block update, + // then scatter the result of sup-col update to dense + no_zeros = kfnz - fst_col; + if(segsize==1) + LU_kernel_bmod<1>::run(segsize, dense, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + else + LU_kernel_bmod<Dynamic>::run(segsize, dense, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + } // end if jsupno + } // end for each segment + + // Process the supernodal portion of L\U[*,j] + nextlu = glu.xlusup(jcol); + fsupc = glu.xsup(jsupno); + + // copy the SPA dense into L\U[*,j] + int mem; + new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc); + while (new_next > glu.nzlumax ) + { + mem = LUMemXpand<ScalarVector>(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions); + if (mem) return mem; + } + + for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++) + { + irow = glu.lsub(isub); + glu.lusup(nextlu) = dense(irow); + dense(irow) = Scalar(0.0); + ++nextlu; + } + + glu.xlusup(jcol + 1) = nextlu; // close L\U(*,jcol); + + /* For more updates within the panel (also within the current supernode), + * should start from the first column of the panel, or the first column + * of the supernode, whichever is bigger. There are two cases: + * 1) fsupc < fpanelc, then fst_col <-- fpanelc + * 2) fsupc >= fpanelc, then fst_col <-- fsupc + */ + fst_col = std::max(fsupc, fpanelc); + + if (fst_col < jcol) + { + // Distance between the current supernode and the current panel + // d_fsupc = 0 if fsupc >= fpanelc + d_fsupc = fst_col - fsupc; + + lptr = glu.xlsub(fsupc) + d_fsupc; + luptr = glu.xlusup(fst_col) + d_fsupc; + nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); // leading dimension + nsupc = jcol - fst_col; // excluding jcol + nrow = nsupr - d_fsupc - nsupc; + + // points to the beginning of jcol in snode L\U(jsupno) + ufirst = glu.xlusup(jcol) + d_fsupc; + Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc); + u = A.template triangularView<UnitLower>().solve(u); + + new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow); + l.noalias() -= A * u; + + } // End if fst_col + return 0; +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h new file mode 100644 index 000000000..568e0686c --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -0,0 +1,165 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]column_dfs.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COLUMN_DFS_H +#define SPARSELU_COLUMN_DFS_H +/** + * \brief Performs a symbolic factorization on column jcol and decide the supernode boundary + * + * A supernode representative is the last column of a supernode. + * The nonzeros in U[*,j] are segments that end at supernodes representatives. + * The routine returns a list of the supernodal representatives + * in topological order of the dfs that generates them. + * The location of the first nonzero in each supernodal segment + * (supernodal entry location) is also returned. + * + * \param m number of rows in the matrix + * \param jcol Current column + * \param perm_r Row permutation + * \param maxsuper + * \param [in,out] nseg Number of segments in current U[*,j] - new segments appended + * \param lsub_col defines the rhs vector to start the dfs + * \param [in,out] segrep Segment representatives - new segments appended + * \param repfnz + * \param xprune + * \param marker + * \param parent + * \param xplore + * \param glu global LU data + * \return 0 success + * > 0 number of bytes allocated when run out of space + * + */ +template<typename IndexVector, typename ScalarVector> +struct LU_column_dfs_traits +{ + typedef typename IndexVector::Scalar Index; + LU_column_dfs_traits(Index jcol, Index& jsuper, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) + : m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu) + {} + bool update_segrep(Index /*krep*/, Index /*jj*/) + { + return true; + } + void mem_expand(IndexVector& lsub, int& nextl, int chmark) + { + if (nextl >= m_glu.nzlmax) + LUMemXpand<IndexVector>(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); + if (chmark != (m_jcol-1)) m_jsuper_ref = IND_EMPTY; + } + enum { ExpandMem = true }; + + int m_jcol; + int& m_jsuper_ref; + LU_GlobalLU_t<IndexVector, ScalarVector>& m_glu; +}; + +template <typename IndexVector, typename ScalarVector, typename BlockIndexVector> +int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg, BlockIndexVector& lsub_col, IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) +{ + typedef typename IndexVector::Scalar Index; + typedef typename ScalarVector::Scalar Scalar; + + int jsuper = glu.supno(jcol); + int nextl = glu.xlsub(jcol); + VectorBlock<IndexVector> marker2(marker, 2*m, m); + + + LU_column_dfs_traits<IndexVector, ScalarVector> traits(jcol, jsuper, glu); + + // For each nonzero in A(*,jcol) do dfs + for (int k = 0; lsub_col[k] != IND_EMPTY; k++) + { + int krow = lsub_col(k); + lsub_col(k) = IND_EMPTY; + int kmark = marker2(krow); + + // krow was visited before, go to the next nonz; + if (kmark == jcol) continue; + + LU_dfs_kernel(jcol, perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent, + xplore, glu, nextl, krow, traits); + } // for each nonzero ... + + int fsupc, jptr, jm1ptr, ito, ifrom, istop; + int nsuper = glu.supno(jcol); + int jcolp1 = jcol + 1; + int jcolm1 = jcol - 1; + + // check to see if j belongs in the same supernode as j-1 + if ( jcol == 0 ) + { // Do nothing for column 0 + nsuper = glu.supno(0) = 0 ; + } + else + { + fsupc = glu.xsup(nsuper); + jptr = glu.xlsub(jcol); // Not yet compressed + jm1ptr = glu.xlsub(jcolm1); + + // Use supernodes of type T2 : see SuperLU paper + if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = IND_EMPTY; + + // Make sure the number of columns in a supernode doesn't + // exceed threshold + if ( (jcol - fsupc) >= maxsuper) jsuper = IND_EMPTY; + + /* If jcol starts a new supernode, reclaim storage space in + * glu.lsub from previous supernode. Note we only store + * the subscript set of the first and last columns of + * a supernode. (first for num values, last for pruning) + */ + if (jsuper == IND_EMPTY) + { // starts a new supernode + if ( (fsupc < jcolm1-1) ) + { // >= 3 columns in nsuper + ito = glu.xlsub(fsupc+1); + glu.xlsub(jcolm1) = ito; + istop = ito + jptr - jm1ptr; + xprune(jcolm1) = istop; // intialize xprune(jcol-1) + glu.xlsub(jcol) = istop; + + for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito) + glu.lsub(ito) = glu.lsub(ifrom); + nextl = ito; // = istop + length(jcol) + } + nsuper++; + glu.supno(jcol) = nsuper; + } // if a new supernode + } // end else: jcol > 0 + + // Tidy up the pointers before exit + glu.xsup(nsuper+1) = jcolp1; + glu.supno(jcolp1) = nsuper; + xprune(jcol) = nextl; // Intialize upper bound for pruning + glu.xlsub(jcolp1) = nextl; + + return 0; +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h new file mode 100644 index 000000000..541785881 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -0,0 +1,102 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +/* + + * NOTE: This file is the modified version of [s,d,c,z]copy_to_ucol.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COPY_TO_UCOL_H +#define SPARSELU_COPY_TO_UCOL_H + +/** + * \brief Performs numeric block updates (sup-col) in topological order + * + * \param jcol current column to update + * \param nseg Number of segments in the U part + * \param segrep segment representative ... + * \param repfnz First nonzero column in each row ... + * \param perm_r Row permutation + * \param dense Store the full representation of the column + * \param glu Global LU data. + * \return 0 - successful return + * > 0 - number of bytes allocated when run out of space + * + */ +template <typename IndexVector, typename ScalarVector, typename SegRepType, typename RepfnzType, typename DenseType> +int LU_copy_to_ucol(const int jcol, const int nseg, SegRepType& segrep, RepfnzType& repfnz ,IndexVector& perm_r, DenseType& dense, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) +{ + typedef typename IndexVector::Scalar Index; + typedef typename ScalarVector::Scalar Scalar; + Index ksub, krep, ksupno; + + Index jsupno = glu.supno(jcol); + + // For each nonzero supernode segment of U[*,j] in topological order + int k = nseg - 1, i; + Index nextu = glu.xusub(jcol); + Index kfnz, isub, segsize; + Index new_next,irow; + Index fsupc, mem; + for (ksub = 0; ksub < nseg; ksub++) + { + krep = segrep(k); k--; + ksupno = glu.supno(krep); + if (jsupno != ksupno ) // should go into ucol(); + { + kfnz = repfnz(krep); + if (kfnz != IND_EMPTY) + { // Nonzero U-segment + fsupc = glu.xsup(ksupno); + isub = glu.xlsub(fsupc) + kfnz - fsupc; + segsize = krep - kfnz + 1; + new_next = nextu + segsize; + while (new_next > glu.nzumax) + { + mem = LUMemXpand<ScalarVector>(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions); + if (mem) return mem; + mem = LUMemXpand<IndexVector>(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions); + if (mem) return mem; + + } + + for (i = 0; i < segsize; i++) + { + irow = glu.lsub(isub); + glu.usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order + glu.ucol(nextu) = dense(irow); + dense(irow) = Scalar(0.0); + nextu++; + isub++; + } + + } // end nonzero U-segment + + } // end if jsupno + + } // end for each segment + glu.xusub(jcol + 1) = nextu; // close U(*,jcol) + return 0; +} + +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h new file mode 100644 index 000000000..1bda70aaf --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h @@ -0,0 +1,119 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* This file is a modified version of heap_relax_snode.c file in SuperLU + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ + +#ifndef SPARSELU_HEAP_RELAX_SNODE_H +#define SPARSELU_HEAP_RELAX_SNODE_H +#include "SparseLU_Coletree.h" +/** + * \brief Identify the initial relaxed supernodes + * + * This routine applied to a symmetric elimination tree. + * It assumes that the matrix has been reordered according to the postorder of the etree + * \param et elimination tree + * \param relax_columns Maximum number of columns allowed in a relaxed snode + * \param descendants Number of descendants of each node in the etree + * \param relax_end last column in a supernode + */ +template <typename IndexVector> +void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end) +{ + + // The etree may not be postordered, but its heap ordered + IndexVector post; + LU_TreePostorder(n, et, post); // Post order etree + IndexVector inv_post(n+1); + int i; + for (i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()??? + + // Renumber etree in postorder + IndexVector iwork(n); + IndexVector et_save(n+1); + for (i = 0; i < n; ++i) + { + iwork(post(i)) = post(et(i)); + } + et_save = et; // Save the original etree + et = iwork; + + // compute the number of descendants of each node in the etree + relax_end.setConstant(IND_EMPTY); + int j, parent; + descendants.setZero(); + for (j = 0; j < n; j++) + { + parent = et(j); + if (parent != n) // not the dummy root + descendants(parent) += descendants(j) + 1; + } + // Identify the relaxed supernodes by postorder traversal of the etree + int snode_start; // beginning of a snode + int k; + int nsuper_et_post = 0; // Number of relaxed snodes in postordered etree + int nsuper_et = 0; // Number of relaxed snodes in the original etree + int l; + for (j = 0; j < n; ) + { + parent = et(j); + snode_start = j; + while ( parent != n && descendants(parent) < relax_columns ) + { + j = parent; + parent = et(j); + } + // Found a supernode in postordered etree, j is the last column + ++nsuper_et_post; + k = n; + for (i = snode_start; i <= j; ++i) + k = std::min(k, inv_post(i)); + l = inv_post(j); + if ( (l - k) == (j - snode_start) ) // Same number of columns in the snode + { + // This is also a supernode in the original etree + relax_end(k) = l; // Record last column + ++nsuper_et; + } + else + { + for (i = snode_start; i <= j; ++i) + { + l = inv_post(i); + if (descendants(i) == 0) + { + relax_end(l) = l; + ++nsuper_et; + } + } + } + j++; + // Search for a new leaf + while (descendants(j) != 0 && j < n) j++; + } // End postorder traversal of the etree + + // Recover the original etree + et = et_save; +} +#endif diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h new file mode 100644 index 000000000..d5cad49b1 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h @@ -0,0 +1,109 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef SPARSELU_KERNEL_BMOD_H +#define SPARSELU_KERNEL_BMOD_H + +/** + * \brief Performs numeric block updates from a given supernode to a single column + * + * \param segsize Size of the segment (and blocks ) to use for updates + * \param [in,out]dense Packed values of the original matrix + * \param tempv temporary vector to use for updates + * \param lusup array containing the supernodes + * \param nsupr Number of rows in the supernode + * \param nrow Number of rows in the rectangular part of the supernode + * \param lsub compressed row subscripts of supernodes + * \param lptr pointer to the first column of the current supernode in lsub + * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode + * \return 0 on success + */ +template <int SegSizeAtCompileTime> struct LU_kernel_bmod +{ + template <typename BlockScalarVector, typename ScalarVector, typename IndexVector> + EIGEN_DONT_INLINE static void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros) + { + typedef typename ScalarVector::Scalar Scalar; + // First, copy U[*,j] segment from dense(*) to tempv(*) + // The result of triangular solve is in tempv[*]; + // The result of matric-vector update is in dense[*] + int isub = lptr + no_zeros; + int i, irow; + for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++) + { + irow = lsub(isub); + tempv(i) = dense(irow); + ++isub; + } + // Dense triangular solve -- start effective triangle + luptr += nsupr * no_zeros + no_zeros; + // Form Eigen matrix and vector + Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); + Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize); + + u = A.template triangularView<UnitLower>().solve(u); + + // Dense matrix-vector product y <-- B*x + luptr += segsize; + Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); + Map<Matrix<Scalar,Dynamic,1> > l(tempv.data()+segsize, nrow); + if(SegSizeAtCompileTime==2) + l = u(0) * B.col(0) + u(1) * B.col(1); + else if(SegSizeAtCompileTime==3) + l = u(0) * B.col(0) + u(1) * B.col(1) + u(2) * B.col(2); + else + l.noalias() = B * u; + + // Scatter tempv[] into SPA dense[] as a temporary storage + isub = lptr + no_zeros; + for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++) + { + irow = lsub(isub++); + dense(irow) = tempv(i); + } + + // Scatter l into SPA dense[] + for (i = 0; i < nrow; i++) + { + irow = lsub(isub++); + dense(irow) -= l(i); + } + } +}; + +template <> struct LU_kernel_bmod<1> +{ + template <typename BlockScalarVector, typename ScalarVector, typename IndexVector> + EIGEN_DONT_INLINE static void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros) + { + typedef typename ScalarVector::Scalar Scalar; + Scalar f = dense(lsub(lptr + no_zeros)); + luptr += nsupr * no_zeros + no_zeros + 1; + const Scalar* a(lusup.data() + luptr); + const typename IndexVector::Scalar* irow(lsub.data()+lptr + no_zeros + 1); + int i = 0; + for (; i+1 < nrow; i+=2) + { + int i0 = *(irow++); + int i1 = *(irow++); + Scalar a0 = *(a++); + Scalar a1 = *(a++); + Scalar d0 = dense.coeff(i0); + Scalar d1 = dense.coeff(i1); + d0 -= f*a0; + d1 -= f*a1; + dense.coeffRef(i0) = d0; + dense.coeffRef(i1) = d1; + } + if(i<nrow) + dense.coeffRef(*(irow++)) -= f * *(a++); + } +}; +#endif diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h new file mode 100644 index 000000000..1b31cc31a --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -0,0 +1,208 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]panel_bmod.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PANEL_BMOD_H +#define SPARSELU_PANEL_BMOD_H +/** + * \brief Performs numeric block updates (sup-panel) in topological order. + * + * Before entering this routine, the original nonzeros in the panel + * were already copied i nto the spa[m,w] ... FIXME to be checked + * + * \param m number of rows in the matrix + * \param w Panel size + * \param jcol Starting column of the panel + * \param nseg Number of segments in the U part + * \param dense Store the full representation of the panel + * \param tempv working array + * \param segrep segment representative... first row in the segment + * \param repfnz First nonzero rows + * \param glu Global LU data. + * + * + */ +template <typename DenseIndexBlock, typename IndexVector, typename ScalarVector> +void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, DenseIndexBlock& segrep, DenseIndexBlock& repfnz, LU_perfvalues& perfv, LU_GlobalLU_t<IndexVector,ScalarVector>& glu) +{ + typedef typename ScalarVector::Scalar Scalar; + + int ksub,jj,nextl_col; + int fsupc, nsupc, nsupr, nrow; + int krep, kfnz; + int lptr; // points to the row subscripts of a supernode + int luptr; // ... + int segsize,no_zeros ; + // For each nonz supernode segment of U[*,j] in topological order + int k = nseg - 1; + for (ksub = 0; ksub < nseg; ksub++) + { // For each updating supernode + + /* krep = representative of current k-th supernode + * fsupc = first supernodal column + * nsupc = number of columns in a supernode + * nsupr = number of rows in a supernode + */ + krep = segrep(k); k--; + fsupc = glu.xsup(glu.supno(krep)); + nsupc = krep - fsupc + 1; + nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); + nrow = nsupr - nsupc; + lptr = glu.xlsub(fsupc); + + // loop over the panel columns to detect the actual number of columns and rows + int u_rows = 0; + int u_cols = 0; + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + + kfnz = repfnz_col(krep); + if ( kfnz == IND_EMPTY ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + u_cols++; + u_rows = std::max(segsize,u_rows); + } + + // if the blocks are large enough, use level 3 + // TODO find better heuristics! + if( nsupc >= perfv.colblk && nrow > perfv.rowblk && u_cols>perfv.relax) + { + Map<Matrix<Scalar,Dynamic,Dynamic> > U(tempv.data(), u_rows, u_cols); + + // gather U + int u_col = 0; + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here + + kfnz = repfnz_col(krep); + if ( kfnz == IND_EMPTY ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + luptr = glu.xlusup(fsupc); + no_zeros = kfnz - fsupc; + + int isub = lptr + no_zeros; + int off = u_rows-segsize; + for (int i = 0; i < off; i++) U(i,u_col) = 0; + for (int i = 0; i < segsize; i++) + { + int irow = glu.lsub(isub); + U(i+off,u_col) = dense_col(irow); + ++isub; + } + u_col++; + } + // solve U = A^-1 U + luptr = glu.xlusup(fsupc); + no_zeros = (krep - u_rows + 1) - fsupc; + luptr += nsupr * no_zeros + no_zeros; + Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(nsupr) ); + U = A.template triangularView<UnitLower>().solve(U); + + // update + luptr += u_rows; + Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(nsupr) ); + assert(tempv.size()>w*u_rows + nrow*w); + Map<Matrix<Scalar,Dynamic,Dynamic> > L(tempv.data()+w*u_rows, nrow, u_cols); + L.noalias() = B * U; + + // scatter U and L + u_col = 0; + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here + + kfnz = repfnz_col(krep); + if ( kfnz == IND_EMPTY ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + no_zeros = kfnz - fsupc; + int isub = lptr + no_zeros; + + int off = u_rows-segsize; + for (int i = 0; i < segsize; i++) + { + int irow = glu.lsub(isub++); + dense_col(irow) = U.coeff(i+off,u_col); + U.coeffRef(i+off,u_col) = 0; + } + + // Scatter l into SPA dense[] + for (int i = 0; i < nrow; i++) + { + int irow = glu.lsub(isub++); + dense_col(irow) -= L.coeff(i,u_col); + L.coeffRef(i,u_col) = 0; + } + u_col++; + } + } + else // level 2 only + { + // Sequence through each column in the panel + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here + + kfnz = repfnz_col(krep); + if ( kfnz == IND_EMPTY ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + luptr = glu.xlusup(fsupc); + + // NOTE : Unlike the original implementation in SuperLU, + // there is no update feature for col-col, 2col-col ... + + // Perform a trianglar solve and block update, + // then scatter the result of sup-col update to dense[] + no_zeros = kfnz - fsupc; + if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + else LU_kernel_bmod<Dynamic>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + } // End for each column in the panel + } + + } // End for each updating supernode +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h new file mode 100644 index 000000000..3581f6d9c --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -0,0 +1,247 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]panel_dfs.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PANEL_DFS_H +#define SPARSELU_PANEL_DFS_H + +template <typename ScalarVector, typename IndexVector, typename MarkerType, typename Traits> +void LU_dfs_kernel(const int jj, IndexVector& perm_r, + int& nseg, IndexVector& panel_lsub, IndexVector& segrep, + VectorBlock<IndexVector>& repfnz_col, IndexVector& xprune, MarkerType& marker, IndexVector& parent, + IndexVector& xplore, LU_GlobalLU_t<IndexVector, ScalarVector>& glu, + int& nextl_col, int krow, Traits& traits + ) +{ + + int kmark = marker(krow); + + // For each unmarked krow of jj + marker(krow) = jj; + int kperm = perm_r(krow); + if (kperm == IND_EMPTY ) { + // krow is in L : place it in structure of L(*, jj) + panel_lsub(nextl_col++) = krow; // krow is indexed into A + + traits.mem_expand(panel_lsub, nextl_col, kmark); + } + else + { + // krow is in U : if its supernode-representative krep + // has been explored, update repfnz(*) + // krep = supernode representative of the current row + int krep = glu.xsup(glu.supno(kperm)+1) - 1; + // First nonzero element in the current column: + int myfnz = repfnz_col(krep); + + if (myfnz != IND_EMPTY ) + { + // Representative visited before + if (myfnz > kperm ) repfnz_col(krep) = kperm; + + } + else + { + // Otherwise, perform dfs starting at krep + int oldrep = IND_EMPTY; + parent(krep) = oldrep; + repfnz_col(krep) = kperm; + int xdfs = glu.xlsub(krep); + int maxdfs = xprune(krep); + + int kpar; + do + { + // For each unmarked kchild of krep + while (xdfs < maxdfs) + { + int kchild = glu.lsub(xdfs); + xdfs++; + int chmark = marker(kchild); + + if (chmark != jj ) + { + marker(kchild) = jj; + int chperm = perm_r(kchild); + + if (chperm == IND_EMPTY) + { + // case kchild is in L: place it in L(*, j) + panel_lsub(nextl_col++) = kchild; + traits.mem_expand(panel_lsub, nextl_col, chmark); + } + else + { + // case kchild is in U : + // chrep = its supernode-rep. If its rep has been explored, + // update its repfnz(*) + int chrep = glu.xsup(glu.supno(chperm)+1) - 1; + myfnz = repfnz_col(chrep); + + if (myfnz != IND_EMPTY) + { // Visited before + if (myfnz > chperm) + repfnz_col(chrep) = chperm; + } + else + { // Cont. dfs at snode-rep of kchild + xplore(krep) = xdfs; + oldrep = krep; + krep = chrep; // Go deeper down G(L) + parent(krep) = oldrep; + repfnz_col(krep) = chperm; + xdfs = glu.xlsub(krep); + maxdfs = xprune(krep); + + } // end if myfnz != -1 + } // end if chperm == -1 + + } // end if chmark !=jj + } // end while xdfs < maxdfs + + // krow has no more unexplored nbrs : + // Place snode-rep krep in postorder DFS, if this + // segment is seen for the first time. (Note that + // "repfnz(krep)" may change later.) + // Baktrack dfs to its parent + if(traits.update_segrep(krep,jj)) + //if (marker1(krep) < jcol ) + { + segrep(nseg) = krep; + ++nseg; + //marker1(krep) = jj; + } + + kpar = parent(krep); // Pop recursion, mimic recursion + if (kpar == IND_EMPTY) + break; // dfs done + krep = kpar; + xdfs = xplore(krep); + maxdfs = xprune(krep); + + } while (kpar != IND_EMPTY); // Do until empty stack + + } // end if (myfnz = -1) + + } // end if (kperm == -1) +} + +/** + * \brief Performs a symbolic factorization on a panel of columns [jcol, jcol+w) + * + * A supernode representative is the last column of a supernode. + * The nonzeros in U[*,j] are segments that end at supernodes representatives + * + * The routine returns a list of the supernodal representatives + * in topological order of the dfs that generates them. This list is + * a superset of the topological order of each individual column within + * the panel. + * The location of the first nonzero in each supernodal segment + * (supernodal entry location) is also returned. Each column has + * a separate list for this purpose. + * + * Two markers arrays are used for dfs : + * marker[i] == jj, if i was visited during dfs of current column jj; + * marker1[i] >= jcol, if i was visited by earlier columns in this panel; + * + * \param [in]m number of rows in the matrix + * \param [in]w Panel size + * \param [in]jcol Starting column of the panel + * \param [in]A Input matrix in column-major storage + * \param [in]perm_r Row permutation + * \param [out]nseg Number of U segments + * \param [out]dense Accumulate the column vectors of the panel + * \param [out]panel_lsub Subscripts of the row in the panel + * \param [out]segrep Segment representative i.e first nonzero row of each segment + * \param [out]repfnz First nonzero location in each row + * \param [out]xprune + * \param [out]marker + * + * + */ + +template<typename IndexVector> +struct LU_panel_dfs_traits +{ + typedef typename IndexVector::Scalar Index; + LU_panel_dfs_traits(Index jcol, Index* marker) + : m_jcol(jcol), m_marker(marker) + {} + bool update_segrep(Index krep, Index jj) + { + if(m_marker[krep]<m_jcol) + { + m_marker[krep] = jj; + return true; + } + return false; + } + void mem_expand(IndexVector& /*glu.lsub*/, int /*nextl*/, int /*chmark*/) {} + enum { ExpandMem = false }; + Index m_jcol; + Index* m_marker; +}; + +template <typename MatrixType, typename ScalarVector, typename IndexVector> +void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) +{ + int nextl_col; // Next available position in panel_lsub[*,jj] + + // Initialize pointers + VectorBlock<IndexVector> marker1(marker, m, m); + nseg = 0; + + LU_panel_dfs_traits<IndexVector> traits(jcol, marker1.data()); + + // For each column in the panel + for (int jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj - jcol) * m; + + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero location in each row + VectorBlock<ScalarVector> dense_col(dense,nextl_col, m); // Accumulate a column vector here + + + // For each nnz in A[*, jj] do depth first search + for (typename MatrixType::InnerIterator it(A, jj); it; ++it) + { + int krow = it.row(); + dense_col(krow) = it.value(); + + int kmark = marker(krow); + if (kmark == jj) + continue; // krow visited before, go to the next nonzero + + LU_dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent, + xplore, glu, nextl_col, krow, traits); + }// end for nonzeros in column jj + + } // end for column jj + +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h new file mode 100644 index 000000000..4ad49adee --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -0,0 +1,128 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of xpivotL.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PIVOTL_H +#define SPARSELU_PIVOTL_H +/** + * \brief Performs the numerical pivotin on the current column of L, and the CDIV operation. + * + * Pivot policy : + * (1) Compute thresh = u * max_(i>=j) abs(A_ij); + * (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN + * pivot row = k; + * ELSE IF abs(A_jj) >= thresh THEN + * pivot row = j; + * ELSE + * pivot row = m; + * + * Note: If you absolutely want to use a given pivot order, then set u=0.0. + * + * \param jcol The current column of L + * \param u diagonal pivoting threshold + * \param [in,out]perm_r Row permutation (threshold pivoting) + * \param [in] iperm_c column permutation - used to finf diagonal of Pc*A*Pc' + * \param [out]pivrow The pivot row + * \param glu Global LU data + * \return 0 if success, i > 0 if U(i,i) is exactly zero + * + */ +template <typename IndexVector, typename ScalarVector> +int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) +{ + typedef typename IndexVector::Scalar Index; + typedef typename ScalarVector::Scalar Scalar; + typedef typename ScalarVector::RealScalar RealScalar; + + Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol + Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0 + Index lptr = glu.xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion + Index nsupr = glu.xlsub(fsupc+1) - lptr; // Number of rows in the supernode + Scalar* lu_sup_ptr = &(glu.lusup.data()[glu.xlusup(fsupc)]); // Start of the current supernode + Scalar* lu_col_ptr = &(glu.lusup.data()[glu.xlusup(jcol)]); // Start of jcol in the supernode + Index* lsub_ptr = &(glu.lsub.data()[lptr]); // Start of row indices of the supernode + + // Determine the largest abs numerical value for partial pivoting + Index diagind = iperm_c(jcol); // diagonal index + RealScalar pivmax = 0.0; + Index pivptr = nsupc; + Index diag = IND_EMPTY; + RealScalar rtemp; + Index isub, icol, itemp, k; + for (isub = nsupc; isub < nsupr; ++isub) { + rtemp = std::abs(lu_col_ptr[isub]); + if (rtemp > pivmax) { + pivmax = rtemp; + pivptr = isub; + } + if (lsub_ptr[isub] == diagind) diag = isub; + } + + // Test for singularity + if ( pivmax == 0.0 ) { + pivrow = lsub_ptr[pivptr]; + perm_r(pivrow) = jcol; + return (jcol+1); + } + + RealScalar thresh = diagpivotthresh * pivmax; + + // Choose appropriate pivotal element + + { + // Test if the diagonal element can be used as a pivot (given the threshold value) + if (diag >= 0 ) + { + // Diagonal element exists + rtemp = std::abs(lu_col_ptr[diag]); + if (rtemp != 0.0 && rtemp >= thresh) pivptr = diag; + } + pivrow = lsub_ptr[pivptr]; + } + + // Record pivot row + perm_r(pivrow) = jcol; + // Interchange row subscripts + if (pivptr != nsupc ) + { + std::swap( lsub_ptr[pivptr], lsub_ptr[nsupc] ); + // Interchange numerical values as well, for the two rows in the whole snode + // such that L is indexed the same way as A + for (icol = 0; icol <= nsupc; icol++) + { + itemp = pivptr + icol * nsupr; + std::swap(lu_sup_ptr[itemp], lu_sup_ptr[nsupc + icol * nsupr]); + } + } + // cdiv operations + Scalar temp = Scalar(1.0) / lu_col_ptr[nsupc]; + for (k = nsupc+1; k < nsupr; k++) + lu_col_ptr[k] *= temp; + return 0; +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h new file mode 100644 index 000000000..f29285bd4 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -0,0 +1,132 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]pruneL.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PRUNEL_H +#define SPARSELU_PRUNEL_H + +/** + * \brief Prunes the L-structure. + * + * It prunes the L-structure of supernodes whose L-structure contains the current pivot row "pivrow" + * + * + * \param jcol The current column of L + * \param [in]perm_r Row permutation + * \param [out]pivrow The pivot row + * \param nseg Number of segments + * \param segrep + * \param repfnz + * \param [out]xprune + * \param glu Global LU data + * + */ +template <typename IndexVector, typename ScalarVector, typename BlockIndexVector> +void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) +{ + typedef typename IndexVector::Scalar Index; + typedef typename ScalarVector::Scalar Scalar; + + // For each supernode-rep irep in U(*,j] + int jsupno = glu.supno(jcol); + int i,irep,irep1; + bool movnum, do_prune = false; + Index kmin, kmax, minloc, maxloc,krow; + for (i = 0; i < nseg; i++) + { + irep = segrep(i); + irep1 = irep + 1; + do_prune = false; + + // Don't prune with a zero U-segment + if (repfnz(irep) == IND_EMPTY) continue; + + // If a snode overlaps with the next panel, then the U-segment + // is fragmented into two parts -- irep and irep1. We should let + // pruning occur at the rep-column in irep1s snode. + if (glu.supno(irep) == glu.supno(irep1) ) continue; // don't prune + + // If it has not been pruned & it has a nonz in row L(pivrow,i) + if (glu.supno(irep) != jsupno ) + { + if ( xprune (irep) >= glu.xlsub(irep1) ) + { + kmin = glu.xlsub(irep); + kmax = glu.xlsub(irep1) - 1; + for (krow = kmin; krow <= kmax; krow++) + { + if (glu.lsub(krow) == pivrow) + { + do_prune = true; + break; + } + } + } + + if (do_prune) + { + // do a quicksort-type partition + // movnum=true means that the num values have to be exchanged + movnum = false; + if (irep == glu.xsup(glu.supno(irep)) ) // Snode of size 1 + movnum = true; + + while (kmin <= kmax) + { + if (perm_r(glu.lsub(kmax)) == IND_EMPTY) + kmax--; + else if ( perm_r(glu.lsub(kmin)) != IND_EMPTY) + kmin++; + else + { + // kmin below pivrow (not yet pivoted), and kmax + // above pivrow: interchange the two suscripts + std::swap(glu.lsub(kmin), glu.lsub(kmax)); + + // If the supernode has only one column, then we + // only keep one set of subscripts. For any subscript + // intercnahge performed, similar interchange must be + // done on the numerical values. + if (movnum) + { + minloc = glu.xlusup(irep) + ( kmin - glu.xlsub(irep) ); + maxloc = glu.xlusup(irep) + ( kmax - glu.xlsub(irep) ); + std::swap(glu.lusup(minloc), glu.lusup(maxloc)); + } + kmin++; + kmax--; + } + } // end while + + xprune(irep) = kmin; //Pruning + } // end if do_prune + } // end pruning + } // End for each U-segment +} + +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_relax_snode.h b/Eigen/src/SparseLU/SparseLU_relax_snode.h new file mode 100644 index 000000000..a9a0a00c1 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_relax_snode.h @@ -0,0 +1,73 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* This file is a modified version of heap_relax_snode.c file in SuperLU + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ + +#ifndef SPARSELU_RELAX_SNODE_H +#define SPARSELU_RELAX_SNODE_H +/** + * \brief Identify the initial relaxed supernodes + * + * This routine is applied to a column elimination tree. + * It assumes that the matrix has been reordered according to the postorder of the etree + * \param et elimination tree + * \param relax_columns Maximum number of columns allowed in a relaxed snode + * \param descendants Number of descendants of each node in the etree + * \param relax_end last column in a supernode + */ +template <typename IndexVector> +void LU_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end) +{ + + // compute the number of descendants of each node in the etree + int j, parent; + relax_end.setConstant(IND_EMPTY); + descendants.setZero(); + for (j = 0; j < n; j++) + { + parent = et(j); + if (parent != n) // not the dummy root + descendants(parent) += descendants(j) + 1; + } + // Identify the relaxed supernodes by postorder traversal of the etree + int snode_start; // beginning of a snode + for (j = 0; j < n; ) + { + parent = et(j); + snode_start = j; + while ( parent != n && descendants(parent) < relax_columns ) + { + j = parent; + parent = et(j); + } + // Found a supernode in postordered etree, j is the last column + relax_end(snode_start) = j; // Record last column + j++; + // Search for a new leaf + while (descendants(j) != 0 && j < n) j++; + } // End postorder traversal of the etree + +} +#endif diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h new file mode 100644 index 000000000..18e6a93d2 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -0,0 +1,74 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]snode_bmod.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_SNODE_BMOD_H +#define SPARSELU_SNODE_BMOD_H +template <typename IndexVector, typename ScalarVector> +int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, LU_GlobalLU_t<IndexVector,ScalarVector>& glu) +{ + typedef typename ScalarVector::Scalar Scalar; + + /* lsub : Compressed row subscripts of ( rectangular supernodes ) + * xlsub : xlsub[j] is the starting location of the j-th column in lsub(*) + * lusup : Numerical values of the rectangular supernodes + * xlusup[j] is the starting location of the j-th column in lusup(*) + */ + int nextlu = glu.xlusup(jcol); // Starting location of the next column to add + int irow, isub; + // Process the supernodal portion of L\U[*,jcol] + for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++) + { + irow = glu.lsub(isub); + glu.lusup(nextlu) = dense(irow); + dense(irow) = 0; + ++nextlu; + } + glu.xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 ) + + if (fsupc < jcol ){ + int luptr = glu.xlusup(fsupc); // points to the first column of the supernode + int nsupr = glu.xlsub(fsupc + 1) -glu.xlsub(fsupc); //Number of rows in the supernode + int nsupc = jcol - fsupc; // Number of columns in the supernodal portion of L\U[*,jcol] + int ufirst = glu.xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno) + + int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks + + // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol) + Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc); + u = A.template triangularView<UnitLower>().solve(u); // Call the Eigen dense triangular solve interface + + // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) + new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow); + l.noalias() -= A * u; + } + return 0; +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h new file mode 100644 index 000000000..edb927cdc --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h @@ -0,0 +1,96 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]snode_dfs.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_SNODE_DFS_H +#define SPARSELU_SNODE_DFS_H + /** + * \brief Determine the union of the row structures of those columns within the relaxed snode. + * NOTE: The relaxed snodes are leaves of the supernodal etree, therefore, + * the portion outside the rectangular supernode must be zero. + * + * \param jcol start of the supernode + * \param kcol end of the supernode + * \param asub Row indices + * \param colptr Pointer to the beginning of each column + * \param xprune (out) The pruned tree ?? + * \param marker (in/out) working vector + * \return 0 on success, > 0 size of the memory when memory allocation failed + */ + template <typename MatrixType, typename IndexVector, typename ScalarVector> + int LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) + { + typedef typename IndexVector::Scalar Index; + int mem; + Index nsuper = ++glu.supno(jcol); // Next available supernode number + int nextl = glu.xlsub(jcol); //Index of the starting location of the jcol-th column in lsub + int krow,kmark; + for (int i = jcol; i <=kcol; i++) + { + // For each nonzero in A(*,i) + for (typename MatrixType::InnerIterator it(mat, i); it; ++it) + { + krow = it.row(); + kmark = marker(krow); + if ( kmark != kcol ) + { + // First time to visit krow + marker(krow) = kcol; + glu.lsub(nextl++) = krow; + if( nextl >= glu.nzlmax ) + { + mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions); + if (mem) return mem; // Memory expansion failed... Return the memory allocated so far + } + } + } + glu.supno(i) = nsuper; + } + + // If supernode > 1, then make a copy of the subscripts for pruning + if (jcol < kcol) + { + Index new_next = nextl + (nextl - glu.xlsub(jcol)); + while (new_next > glu.nzlmax) + { + mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions); + if (mem) return mem; // Memory expansion failed... Return the memory allocated so far + } + Index ifrom, ito = nextl; + for (ifrom = glu.xlsub(jcol); ifrom < nextl;) + glu.lsub(ito++) = glu.lsub(ifrom++); + for (int i = jcol+1; i <=kcol; i++) glu.xlsub(i) = nextl; + nextl = ito; + } + glu.xsup(nsuper+1) = kcol + 1; // Start of next available supernode + glu.supno(kcol+1) = nsuper; + xprune(kcol) = nextl; + glu.xlsub(kcol+1) = nextl; + return 0; + } +#endif
\ No newline at end of file diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index d8a54e18c..cd6c4b91f 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -612,6 +612,7 @@ void SuperLU<MatrixType>::factorize(const MatrixType& a) this->initFactorization(a); + m_sluOptions.ColPerm = COLAMD; int info = 0; RealScalar recip_pivot_growth, rcond; RealScalar ferr, berr; |