diff options
author | 2012-09-29 17:35:15 +0100 | |
---|---|---|
committer | 2012-09-29 17:35:15 +0100 | |
commit | 2008f761203e239908283d5d54167222b0bdde81 (patch) | |
tree | 47b02ff768adfc4992d9453917d70c0428745e2d /Eigen | |
parent | d7d96f669461c07294f5ef48645fb001cda2d5b0 (diff) | |
parent | b68102d9a29ac2f631dead3d861f9e84c5897e9c (diff) |
Merge
Diffstat (limited to 'Eigen')
44 files changed, 5521 insertions, 124 deletions
diff --git a/Eigen/Core b/Eigen/Core index 88337e47e..502a4fc55 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -87,19 +87,25 @@ // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too. // notice that since these are C headers, the extern "C" is theoretically needed anyways. extern "C" { - #include <emmintrin.h> - #include <xmmintrin.h> - #ifdef EIGEN_VECTORIZE_SSE3 - #include <pmmintrin.h> - #endif - #ifdef EIGEN_VECTORIZE_SSSE3 - #include <tmmintrin.h> - #endif - #ifdef EIGEN_VECTORIZE_SSE4_1 - #include <smmintrin.h> - #endif - #ifdef EIGEN_VECTORIZE_SSE4_2 - #include <nmmintrin.h> + // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. + // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: + #ifdef __INTEL_COMPILER + #include <immintrin.h> + #else + #include <emmintrin.h> + #include <xmmintrin.h> + #ifdef EIGEN_VECTORIZE_SSE3 + #include <pmmintrin.h> + #endif + #ifdef EIGEN_VECTORIZE_SSSE3 + #include <tmmintrin.h> + #endif + #ifdef EIGEN_VECTORIZE_SSE4_1 + #include <smmintrin.h> + #endif + #ifdef EIGEN_VECTORIZE_SSE4_2 + #include <nmmintrin.h> + #endif #endif } // end extern "C" #elif defined __ALTIVEC__ diff --git a/Eigen/MetisSupport b/Eigen/MetisSupport new file mode 100644 index 000000000..a44086ad9 --- /dev/null +++ b/Eigen/MetisSupport @@ -0,0 +1,26 @@ +#ifndef EIGEN_METISSUPPORT_MODULE_H +#define EIGEN_METISSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include <metis.h> +} + + +/** \ingroup Support_modules + * \defgroup MetisSupport_Module MetisSupport module + * + * \code + * #include <Eigen/MetisSupport> + * \endcode + */ + + +#include "src/MetisSupport/MetisSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_METISSUPPORT_MODULE_H diff --git a/Eigen/OrderingMethods b/Eigen/OrderingMethods index 1e2d87452..bb43220e8 100644 --- a/Eigen/OrderingMethods +++ b/Eigen/OrderingMethods @@ -17,7 +17,7 @@ */ #include "src/OrderingMethods/Amd.h" - +#include "src/OrderingMethods/Ordering.h" #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/Eigen/SparseLU b/Eigen/SparseLU new file mode 100644 index 000000000..452bc9f83 --- /dev/null +++ b/Eigen/SparseLU @@ -0,0 +1,17 @@ +#ifndef EIGEN_SPARSELU_MODULE_H +#define EIGEN_SPARSELU_MODULE_H + +#include "SparseCore" + + +/** \ingroup Sparse_modules + * \defgroup SparseLU_Module SparseLU module + * + */ + +// Ordering interface +#include "OrderingMethods" + +#include "src/SparseLU/SparseLU.h" + +#endif // EIGEN_SPARSELU_MODULE_H diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index f27ab798a..da0264b0e 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -20,6 +20,7 @@ class DiagonalBase : public EigenBase<Derived> public: typedef typename internal::traits<Derived>::DiagonalVectorType DiagonalVectorType; typedef typename DiagonalVectorType::Scalar Scalar; + typedef typename DiagonalVectorType::RealScalar RealScalar; typedef typename internal::traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::Index Index; @@ -65,6 +66,17 @@ class DiagonalBase : public EigenBase<Derived> return diagonal().cwiseInverse(); } + inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> > + operator*(const Scalar& scalar) const + { + return diagonal() * scalar; + } + friend inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> > + operator*(const Scalar& scalar, const DiagonalBase& other) + { + return other.diagonal() * scalar; + } + #ifdef EIGEN2_SUPPORT template<typename OtherDerived> bool isApprox(const DiagonalBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index c9e8ab150..09388972a 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -454,7 +454,7 @@ struct functor_traits<scalar_log_op<Scalar> > * indeed it seems better to declare m_other as a Packet and do the pset1() once * in the constructor. However, in practice: * - GCC does not like m_other as a Packet and generate a load every time it needs it - * - on the other hand GCC is able to moves the pset1() away the loop :) + * - on the other hand GCC is able to moves the pset1() outside the loop :) * - simpler code ;) * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y) */ @@ -485,33 +485,6 @@ template<typename Scalar1,typename Scalar2> struct functor_traits<scalar_multiple2_op<Scalar1,Scalar2> > { enum { Cost = NumTraits<Scalar1>::MulCost, PacketAccess = false }; }; -template<typename Scalar, bool IsInteger> -struct scalar_quotient1_impl { - typedef typename packet_traits<Scalar>::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_STRONG_INLINE scalar_quotient1_impl(const scalar_quotient1_impl& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_quotient1_impl(const Scalar& other) : m_other(static_cast<Scalar>(1) / other) {} - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a, pset1<Packet>(m_other)); } - const Scalar m_other; -}; -template<typename Scalar> -struct functor_traits<scalar_quotient1_impl<Scalar,false> > -{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; }; - -template<typename Scalar> -struct scalar_quotient1_impl<Scalar,true> { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_STRONG_INLINE scalar_quotient1_impl(const scalar_quotient1_impl& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_quotient1_impl(const Scalar& other) : m_other(other) {} - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other; -}; -template<typename Scalar> -struct functor_traits<scalar_quotient1_impl<Scalar,true> > -{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = false }; }; - /** \internal * \brief Template functor to divide a scalar by a fixed other one * @@ -521,14 +494,19 @@ struct functor_traits<scalar_quotient1_impl<Scalar,true> > * \sa class CwiseUnaryOp, MatrixBase::operator/ */ template<typename Scalar> -struct scalar_quotient1_op : scalar_quotient1_impl<Scalar, NumTraits<Scalar>::IsInteger > { - EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) - : scalar_quotient1_impl<Scalar, NumTraits<Scalar>::IsInteger >(other) {} +struct scalar_quotient1_op { + typedef typename packet_traits<Scalar>::type Packet; + // FIXME default copy constructors seems bugged with std::complex<> + EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { } + EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {} + EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + { return internal::pdiv(a, pset1<Packet>(m_other)); } + typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other; }; template<typename Scalar> struct functor_traits<scalar_quotient1_op<Scalar> > -: functor_traits<scalar_quotient1_impl<Scalar, NumTraits<Scalar>::IsInteger> > -{}; +{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; }; // nullary functors diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index f138b12d2..521bba18a 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -240,7 +240,7 @@ template<typename Derived> class MatrixBase // huuuge hack. make Eigen2's matrix.part<Diagonal>() work in eigen3. Problem: Diagonal is now a class template instead // of an integer constant. Solution: overload the part() method template wrt template parameters list. - template<template<typename T, int n> class U> + template<template<typename T, int N> class U> const DiagonalWrapper<ConstDiagonalReturnType> part() const { return diagonal().asDiagonal(); } #endif // EIGEN2_SUPPORT diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 38a838cf1..9c409eecf 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -195,12 +195,12 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref Base::construct(expr); } template<typename Derived> - inline Ref(const MatrixBase<Derived>& expr, + inline Ref(const DenseBase<Derived>& expr, typename internal::enable_if<bool(internal::is_lvalue<Derived>::value&&bool(Traits::template match<Derived>::MatchAtCompileTime)),Derived>::type* = 0, int = Derived::ThisConstantIsPrivateInPlainObjectBase) #else template<typename Derived> - inline Ref(MatrixBase<Derived>& expr) + inline Ref(DenseBase<Derived>& expr) #endif { Base::construct(expr.const_cast_derived()); @@ -221,7 +221,7 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref<c EIGEN_DENSE_PUBLIC_INTERFACE(Ref) template<typename Derived> - inline Ref(const MatrixBase<Derived>& expr) + inline Ref(const DenseBase<Derived>& expr) { // std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n"; // std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index d8bf7db70..7499b195e 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -131,7 +131,6 @@ MatrixBase<Derived>::blueNorm() const abig = internal::sqrt(abig); if(abig > overfl) { - eigen_assert(false && "overflow"); return rbig; } if(amed > RealScalar(0)) diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 3bf2a257d..fcd40e32f 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -511,6 +511,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite> struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite> { typedef typename Derived1::Index Index; + typedef typename Derived1::Scalar Scalar; static inline void run(Derived1 &dst, const Derived2 &src) { for(Index j = 0; j < dst.cols(); ++j) @@ -520,7 +521,7 @@ struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic dst.copyCoeff(i, j, src); if (ClearOpposite) for(Index i = maxi; i < dst.rows(); ++i) - dst.coeffRef(i, j) = 0; + dst.coeffRef(i, j) = Scalar(0); } } }; diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index 639af8ed4..8895d3ab2 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -81,7 +81,7 @@ EIGEN_DONT_INLINE static void run( const Index peels = 2; const Index LhsPacketAlignedMask = LhsPacketSize-1; const Index ResPacketAlignedMask = ResPacketSize-1; - const Index PeelAlignedMask = ResPacketSize*peels-1; +// const Index PeelAlignedMask = ResPacketSize*peels-1; const Index size = rows; // How many coeffs of the result do we have to skip to be aligned. @@ -335,7 +335,7 @@ EIGEN_DONT_INLINE static void run( const Index peels = 2; const Index RhsPacketAlignedMask = RhsPacketSize-1; const Index LhsPacketAlignedMask = LhsPacketSize-1; - const Index PeelAlignedMask = RhsPacketSize*peels-1; +// const Index PeelAlignedMask = RhsPacketSize*peels-1; const Index depth = cols; // How many coeffs of the result do we have to skip to be aligned. diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 4fd6a23d5..3d1290cd2 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -322,9 +322,9 @@ template<typename T, int n=1, typename PlainObject = typename eval<T>::type> str // it's important that this value can still be squared without integer overflowing. DynamicAsInteger = 10000, ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost, - ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? DynamicAsInteger : ScalarReadCost, + ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost), CoeffReadCost = traits<T>::CoeffReadCost, - CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? DynamicAsInteger : CoeffReadCost, + CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost), NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n, CostEvalAsInteger = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger, CostNoEvalAsInteger = NAsInteger * CoeffReadCostAsInteger diff --git a/Eigen/src/Geometry/Umeyama.h b/Eigen/src/Geometry/Umeyama.h index ac0939cde..345b47e0c 100644 --- a/Eigen/src/Geometry/Umeyama.h +++ b/Eigen/src/Geometry/Umeyama.h @@ -153,16 +153,21 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo Rt.block(0,0,m,m).noalias() = svd.matrixU() * S.asDiagonal() * svd.matrixV().transpose(); } - // Eq. (42) - const Scalar c = 1/src_var * svd.singularValues().dot(S); - - // Eq. (41) - // Note that we first assign dst_mean to the destination so that there no need - // for a temporary. - Rt.col(m).head(m) = dst_mean; - Rt.col(m).head(m).noalias() -= c*Rt.topLeftCorner(m,m)*src_mean; - - if (with_scaling) Rt.block(0,0,m,m) *= c; + if (with_scaling) + { + // Eq. (42) + const Scalar c = 1/src_var * svd.singularValues().dot(S); + + // Eq. (41) + Rt.col(m).head(m) = dst_mean; + Rt.col(m).head(m).noalias() -= c*Rt.topLeftCorner(m,m)*src_mean; + Rt.block(0,0,m,m) *= c; + } + else + { + Rt.col(m).head(m) = dst_mean; + Rt.col(m).head(m).noalias() -= Rt.topLeftCorner(m,m)*src_mean; + } return Rt; } diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h index 126341be8..5a822e0ea 100644 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h @@ -39,10 +39,11 @@ bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x, int maxIters = iters; int n = mat.cols(); + x = precond.solve(x); VectorType r = rhs - mat * x; VectorType r0 = r; - RealScalar r0_sqnorm = r0.squaredNorm(); + RealScalar r0_sqnorm = rhs.squaredNorm(); Scalar rho = 1; Scalar alpha = 1; Scalar w = 1; @@ -223,7 +224,8 @@ public: template<typename Rhs,typename Dest> void _solve(const Rhs& b, Dest& x) const { - x.setZero(); +// x.setZero(); + x = b; _solveWithGuess(b,x); } diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h index 224304f0e..5a71531cd 100644 --- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h @@ -10,8 +10,56 @@ #ifndef EIGEN_INCOMPLETE_LUT_H #define EIGEN_INCOMPLETE_LUT_H + namespace Eigen { +namespace internal { + +/** + * Compute a quick-sort split of a vector + * On output, the vector row is permuted such that its elements satisfy + * abs(row(i)) >= abs(row(ncut)) if i<ncut + * abs(row(i)) <= abs(row(ncut)) if i>ncut + * \param row The vector of values + * \param ind The array of index for the elements in @p row + * \param ncut The number of largest elements to keep + **/ +template <typename VectorV, typename VectorI> +int QuickSplit(VectorV &row, VectorI &ind, int ncut) +{ + typedef typename VectorV::RealScalar RealScalar; + using std::swap; + int mid; + int n = row.size(); /* length of the vector */ + int first, last ; + + ncut--; /* to fit the zero-based indices */ + first = 0; + last = n-1; + if (ncut < first || ncut > last ) return 0; + + do { + mid = first; + RealScalar abskey = std::abs(row(mid)); + for (int j = first + 1; j <= last; j++) { + if ( std::abs(row(j)) > abskey) { + ++mid; + swap(row(mid), row(j)); + swap(ind(mid), ind(j)); + } + } + /* Interchange for the pivot element */ + swap(row(mid), row(first)); + swap(ind(mid), ind(first)); + + if (mid > ncut) last = mid - 1; + else if (mid < ncut ) first = mid + 1; + } while (mid != ncut ); + + return 0; /* mid is equal to ncut */ +} + +}// end namespace internal /** * \brief Incomplete LU factorization with dual-threshold strategy * During the numerical factorization, two dropping rules are used : @@ -126,10 +174,6 @@ class IncompleteLUT : internal::noncopyable protected: - template <typename VectorV, typename VectorI> - int QuickSplit(VectorV &row, VectorI &ind, int ncut); - - /** keeps off-diagonal entries; drops diagonal entries */ struct keep_diag { inline bool operator() (const Index& row, const Index& col, const Scalar&) const @@ -171,51 +215,6 @@ void IncompleteLUT<Scalar>::setFillfactor(int fillfactor) this->m_fillfactor = fillfactor; } - -/** - * Compute a quick-sort split of a vector - * On output, the vector row is permuted such that its elements satisfy - * abs(row(i)) >= abs(row(ncut)) if i<ncut - * abs(row(i)) <= abs(row(ncut)) if i>ncut - * \param row The vector of values - * \param ind The array of index for the elements in @p row - * \param ncut The number of largest elements to keep - **/ -template <typename Scalar> -template <typename VectorV, typename VectorI> -int IncompleteLUT<Scalar>::QuickSplit(VectorV &row, VectorI &ind, int ncut) -{ - using std::swap; - int mid; - int n = row.size(); /* length of the vector */ - int first, last ; - - ncut--; /* to fit the zero-based indices */ - first = 0; - last = n-1; - if (ncut < first || ncut > last ) return 0; - - do { - mid = first; - RealScalar abskey = std::abs(row(mid)); - for (int j = first + 1; j <= last; j++) { - if ( std::abs(row(j)) > abskey) { - ++mid; - swap(row(mid), row(j)); - swap(ind(mid), ind(j)); - } - } - /* Interchange for the pivot element */ - swap(row(mid), row(first)); - swap(ind(mid), ind(first)); - - if (mid > ncut) last = mid - 1; - else if (mid < ncut ) first = mid + 1; - } while (mid != ncut ); - - return 0; /* mid is equal to ncut */ -} - template <typename Scalar> template<typename _MatrixType> void IncompleteLUT<Scalar>::analyzePattern(const _MatrixType& amat) @@ -400,7 +399,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat) len = (std::min)(sizel, nnzL); typename Vector::SegmentReturnType ul(u.segment(0, sizel)); typename VectorXi::SegmentReturnType jul(ju.segment(0, sizel)); - QuickSplit(ul, jul, len); + internal::QuickSplit(ul, jul, len); // store the largest m_fill elements of the L part m_lu.startVec(ii); @@ -429,7 +428,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat) len = (std::min)(sizeu, nnzU); typename Vector::SegmentReturnType uu(u.segment(ii+1, sizeu-1)); typename VectorXi::SegmentReturnType juu(ju.segment(ii+1, sizeu-1)); - QuickSplit(uu, juu, len); + internal::QuickSplit(uu, juu, len); // store the largest elements of the U part for(int k = ii + 1; k < ii + len; k++) diff --git a/Eigen/src/MetisSupport/CMakeLists.txt b/Eigen/src/MetisSupport/CMakeLists.txt new file mode 100644 index 000000000..2bad31416 --- /dev/null +++ b/Eigen/src/MetisSupport/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_MetisSupport_SRCS "*.h") + +INSTALL(FILES + ${Eigen_MetisSupport_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/MetisSupport COMPONENT Devel + ) diff --git a/Eigen/src/MetisSupport/MetisSupport.h b/Eigen/src/MetisSupport/MetisSupport.h new file mode 100644 index 000000000..a762d96f6 --- /dev/null +++ b/Eigen/src/MetisSupport/MetisSupport.h @@ -0,0 +1,138 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifndef METIS_SUPPORT_H +#define METIS_SUPPORT_H + +namespace Eigen { +/** + * Get the fill-reducing ordering from the METIS package + * + * If A is the original matrix and Ap is the permuted matrix, + * the fill-reducing permutation is defined as follows : + * Row (column) i of A is the matperm(i) row (column) of Ap. + * WARNING: As computed by METIS, this corresponds to the vector iperm (instead of perm) + */ +template <typename Index> +class MetisOrdering +{ +public: + typedef PermutationMatrix<Dynamic,Dynamic,Index> PermutationType; + typedef Matrix<Index,Dynamic,1> IndexVector; + + template <typename MatrixType> + void get_symmetrized_graph(const MatrixType& A) + { + Index m = A.cols(); + + // Get the transpose of the input matrix + MatrixType At = A.transpose(); + // Get the number of nonzeros elements in each row/col of At+A + Index TotNz = 0; + IndexVector visited(m); + visited.setConstant(-1); + for (int j = 0; j < m; j++) + { + // Compute the union structure of of A(j,:) and At(j,:) + visited(j) = j; // Do not include the diagonal element + // Get the nonzeros in row/column j of A + for (typename MatrixType::InnerIterator it(A, j); it; ++it) + { + Index idx = it.index(); // Get the row index (for column major) or column index (for row major) + if (visited(idx) != j ) + { + visited(idx) = j; + ++TotNz; + } + } + //Get the nonzeros in row/column j of At + for (typename MatrixType::InnerIterator it(At, j); it; ++it) + { + Index idx = it.index(); + if(visited(idx) != j) + { + visited(idx) = j; + ++TotNz; + } + } + } + // Reserve place for A + At + m_indexPtr.resize(m+1); + m_innerIndices.resize(TotNz); + + // Now compute the real adjacency list of each column/row + visited.setConstant(-1); + Index CurNz = 0; + for (int j = 0; j < m; j++) + { + m_indexPtr(j) = CurNz; + + visited(j) = j; // Do not include the diagonal element + // Add the pattern of row/column j of A to A+At + for (typename MatrixType::InnerIterator it(A,j); it; ++it) + { + Index idx = it.index(); // Get the row index (for column major) or column index (for row major) + if (visited(idx) != j ) + { + visited(idx) = j; + m_innerIndices(CurNz) = idx; + CurNz++; + } + } + //Add the pattern of row/column j of At to A+At + for (typename MatrixType::InnerIterator it(At, j); it; ++it) + { + Index idx = it.index(); + if(visited(idx) != j) + { + visited(idx) = j; + m_innerIndices(CurNz) = idx; + ++CurNz; + } + } + } + m_indexPtr(m) = CurNz; + } + + template <typename MatrixType> + void operator() (const MatrixType& A, PermutationType& matperm) + { + Index m = A.cols(); + IndexVector perm(m),iperm(m); + // First, symmetrize the matrix graph. + get_symmetrized_graph(A); + int output_error; + + // Call the fill-reducing routine from METIS + output_error = METIS_NodeND(&m, m_indexPtr.data(), m_innerIndices.data(), NULL, NULL, perm.data(), iperm.data()); + + if(output_error != METIS_OK) + { + //FIXME The ordering interface should define a class of possible errors + std::cerr << "ERROR WHILE CALLING THE METIS PACKAGE \n"; + return; + } + + // Get the fill-reducing permutation + //NOTE: If Ap is the permuted matrix then perm and iperm vectors are defined as follows + // Row (column) i of Ap is the perm(i) row(column) of A, and row (column) i of A is the iperm(i) row(column) of Ap + + // To be consistent with the use of the permutation in SparseLU module, we thus keep the iperm vector + matperm.resize(m); + for (int j = 0; j < m; j++) + matperm.indices()(j) = iperm(j); + + } + + protected: + IndexVector m_indexPtr; // Pointer to the adjacenccy list of each row/column + IndexVector m_innerIndices; // Adjacency list +}; + +}// end namespace eigen +#endif
\ No newline at end of file diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h new file mode 100644 index 000000000..6dc1f280d --- /dev/null +++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -0,0 +1,1849 @@ +// // This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Desire Nuentsa Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// This file is modified from the colamd/symamd library. The copyright is below + +// The authors of the code itself are Stefan I. Larimore and Timothy A. +// Davis (davis@cise.ufl.edu), University of Florida. The algorithm was +// developed in collaboration with John Gilbert, Xerox PARC, and Esmond +// Ng, Oak Ridge National Laboratory. +// +// Date: +// +// September 8, 2003. Version 2.3. +// +// Acknowledgements: +// +// This work was supported by the National Science Foundation, under +// grants DMS-9504974 and DMS-9803599. +// +// Notice: +// +// Copyright (c) 1998-2003 by the University of Florida. +// All Rights Reserved. +// +// THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY +// EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. +// +// Permission is hereby granted to use, copy, modify, and/or distribute +// this program, provided that the Copyright, this License, and the +// Availability of the original version is retained on all copies and made +// accessible to the end-user of any code or package that includes COLAMD +// or any modified version of COLAMD. +// +// Availability: +// +// The colamd/symamd library is available at +// +// http://www.cise.ufl.edu/research/sparse/colamd/ + +// This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h +// file. It is required by the colamd.c, colamdmex.c, and symamdmex.c +// files, and by any C code that calls the routines whose prototypes are +// listed below, or that uses the colamd/symamd definitions listed below. + +#ifndef EIGEN_COLAMD_H +#define EIGEN_COLAMD_H +namespace internal { +/* Ensure that debugging is turned off: */ +#ifndef COLAMD_NDEBUG +#define COLAMD_NDEBUG +#endif /* NDEBUG */ +/* ========================================================================== */ +/* === Knob and statistics definitions ====================================== */ +/* ========================================================================== */ + +/* size of the knobs [ ] array. Only knobs [0..1] are currently used. */ +#define COLAMD_KNOBS 20 + +/* number of output statistics. Only stats [0..6] are currently used. */ +#define COLAMD_STATS 20 + +/* knobs [0] and stats [0]: dense row knob and output statistic. */ +#define COLAMD_DENSE_ROW 0 + +/* knobs [1] and stats [1]: dense column knob and output statistic. */ +#define COLAMD_DENSE_COL 1 + +/* stats [2]: memory defragmentation count output statistic */ +#define COLAMD_DEFRAG_COUNT 2 + +/* stats [3]: colamd status: zero OK, > 0 warning or notice, < 0 error */ +#define COLAMD_STATUS 3 + +/* stats [4..6]: error info, or info on jumbled columns */ +#define COLAMD_INFO1 4 +#define COLAMD_INFO2 5 +#define COLAMD_INFO3 6 + +/* error codes returned in stats [3]: */ +#define COLAMD_OK (0) +#define COLAMD_OK_BUT_JUMBLED (1) +#define COLAMD_ERROR_A_not_present (-1) +#define COLAMD_ERROR_p_not_present (-2) +#define COLAMD_ERROR_nrow_negative (-3) +#define COLAMD_ERROR_ncol_negative (-4) +#define COLAMD_ERROR_nnz_negative (-5) +#define COLAMD_ERROR_p0_nonzero (-6) +#define COLAMD_ERROR_A_too_small (-7) +#define COLAMD_ERROR_col_length_negative (-8) +#define COLAMD_ERROR_row_index_out_of_bounds (-9) +#define COLAMD_ERROR_out_of_memory (-10) +#define COLAMD_ERROR_internal_error (-999) + +/* ========================================================================== */ +/* === Definitions ========================================================== */ +/* ========================================================================== */ + +#define COLAMD_MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define COLAMD_MIN(a,b) (((a) < (b)) ? (a) : (b)) + +#define ONES_COMPLEMENT(r) (-(r)-1) + +/* -------------------------------------------------------------------------- */ + +#define COLAMD_EMPTY (-1) + +/* Row and column status */ +#define ALIVE (0) +#define DEAD (-1) + +/* Column status */ +#define DEAD_PRINCIPAL (-1) +#define DEAD_NON_PRINCIPAL (-2) + +/* Macros for row and column status update and checking. */ +#define ROW_IS_DEAD(r) ROW_IS_MARKED_DEAD (Row[r].shared2.mark) +#define ROW_IS_MARKED_DEAD(row_mark) (row_mark < ALIVE) +#define ROW_IS_ALIVE(r) (Row [r].shared2.mark >= ALIVE) +#define COL_IS_DEAD(c) (Col [c].start < ALIVE) +#define COL_IS_ALIVE(c) (Col [c].start >= ALIVE) +#define COL_IS_DEAD_PRINCIPAL(c) (Col [c].start == DEAD_PRINCIPAL) +#define KILL_ROW(r) { Row [r].shared2.mark = DEAD ; } +#define KILL_PRINCIPAL_COL(c) { Col [c].start = DEAD_PRINCIPAL ; } +#define KILL_NON_PRINCIPAL_COL(c) { Col [c].start = DEAD_NON_PRINCIPAL ; } + +/* ========================================================================== */ +/* === Colamd reporting mechanism =========================================== */ +/* ========================================================================== */ + + // == Row and Column structures == +typedef struct colamd_col_struct +{ + int start ; /* index for A of first row in this column, or DEAD */ + /* if column is dead */ + int length ; /* number of rows in this column */ + union + { + int thickness ; /* number of original columns represented by this */ + /* col, if the column is alive */ + int parent ; /* parent in parent tree super-column structure, if */ + /* the column is dead */ + } shared1 ; + union + { + int score ; /* the score used to maintain heap, if col is alive */ + int order ; /* pivot ordering of this column, if col is dead */ + } shared2 ; + union + { + int headhash ; /* head of a hash bucket, if col is at the head of */ + /* a degree list */ + int hash ; /* hash value, if col is not in a degree list */ + int prev ; /* previous column in degree list, if col is in a */ + /* degree list (but not at the head of a degree list) */ + } shared3 ; + union + { + int degree_next ; /* next column, if col is in a degree list */ + int hash_next ; /* next column, if col is in a hash list */ + } shared4 ; + +} colamd_col ; + +typedef struct Colamd_Row_struct +{ + int start ; /* index for A of first col in this row */ + int length ; /* number of principal columns in this row */ + union + { + int degree ; /* number of principal & non-principal columns in row */ + int p ; /* used as a row pointer in init_rows_cols () */ + } shared1 ; + union + { + int mark ; /* for computing set differences and marking dead rows*/ + int first_column ;/* first column in row (used in garbage collection) */ + } shared2 ; + +} Colamd_Row ; + +/* ========================================================================== */ +/* === Colamd recommended memory size ======================================= */ +/* ========================================================================== */ + +/* + The recommended length Alen of the array A passed to colamd is given by + the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro. It returns -1 if any + argument is negative. 2*nnz space is required for the row and column + indices of the matrix. colamd_c (n_col) + colamd_r (n_row) space is + required for the Col and Row arrays, respectively, which are internal to + colamd. An additional n_col space is the minimal amount of "elbow room", + and nnz/5 more space is recommended for run time efficiency. + + This macro is not needed when using symamd. + + Explicit typecast to int added Sept. 23, 2002, COLAMD version 2.2, to avoid + gcc -pedantic warning messages. +*/ + +inline int colamd_c(int n_col) +{ return int( ((n_col) + 1) * sizeof (colamd_col) / sizeof (int) ) ; } + +inline int colamd_r(int n_row) +{ return int(((n_row) + 1) * sizeof (Colamd_Row) / sizeof (int)); } + + // Various routines +inline int colamd_recommended (int nnz, int n_row, int n_col) ; + +static inline void colamd_set_defaults (double knobs [COLAMD_KNOBS]) ; + +static bool colamd (int n_row, int n_col, int Alen, int A [], int p [], double knobs[COLAMD_KNOBS], int stats [COLAMD_STATS]) ; + +static int init_rows_cols (int n_row, int n_col, Colamd_Row Row [], colamd_col col [], int A [], int p [], int stats[COLAMD_STATS] ); + +static void init_scoring (int n_row, int n_col, Colamd_Row Row [], colamd_col Col [], int A [], int head [], double knobs[COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg); + +static int find_ordering (int n_row, int n_col, int Alen, Colamd_Row Row [], colamd_col Col [], int A [], int head [], int n_col2, int max_deg, int pfree); + +static void order_children (int n_col, colamd_col Col [], int p []); + +static void detect_super_cols ( + colamd_col Col [], + int A [], + int head [], + int row_start, + int row_length ) ; + +static int garbage_collection (int n_row, int n_col, Colamd_Row Row [], colamd_col Col [], int A [], int *pfree) ; + +static inline int clear_mark (int n_row, Colamd_Row Row [] ) ; + +/* === No debugging ========================================================= */ + +#define COLAMD_DEBUG0(params) ; +#define COLAMD_DEBUG1(params) ; +#define COLAMD_DEBUG2(params) ; +#define COLAMD_DEBUG3(params) ; +#define COLAMD_DEBUG4(params) ; + +#define COLAMD_ASSERT(expression) ((void) 0) + + +/** + * \brief Returns the recommended value of Alen + * + * Returns recommended value of Alen for use by colamd. + * Returns -1 if any input argument is negative. + * The use of this routine or macro is optional. + * Note that the macro uses its arguments more than once, + * so be careful for side effects, if you pass expressions as arguments to COLAMD_RECOMMENDED. + * + * \param nnz nonzeros in A + * \param n_row number of rows in A + * \param n_col number of columns in A + * \return recommended value of Alen for use by colamd + */ +inline int colamd_recommended ( int nnz, int n_row, int n_col) +{ + if ((nnz) < 0 || (n_row) < 0 || (n_col) < 0) + return (-1); + else + return (2 * (nnz) + colamd_c (n_col) + colamd_r (n_row) + (n_col) + ((nnz) / 5)); +} + +/** + * \brief set default parameters The use of this routine is optional. + * + * Colamd: rows with more than (knobs [COLAMD_DENSE_ROW] * n_col) + * entries are removed prior to ordering. Columns with more than + * (knobs [COLAMD_DENSE_COL] * n_row) entries are removed prior to + * ordering, and placed last in the output column ordering. + * + * COLAMD_DENSE_ROW and COLAMD_DENSE_COL are defined as 0 and 1, + * respectively, in colamd.h. Default values of these two knobs + * are both 0.5. Currently, only knobs [0] and knobs [1] are + * used, but future versions may use more knobs. If so, they will + * be properly set to their defaults by the future version of + * colamd_set_defaults, so that the code that calls colamd will + * not need to change, assuming that you either use + * colamd_set_defaults, or pass a (double *) NULL pointer as the + * knobs array to colamd or symamd. + * + * \param knobs parameter settings for colamd + */ +static inline void colamd_set_defaults(double knobs[COLAMD_KNOBS]) +{ + /* === Local variables ================================================== */ + + int i ; + + if (!knobs) + { + return ; /* no knobs to initialize */ + } + for (i = 0 ; i < COLAMD_KNOBS ; i++) + { + knobs [i] = 0 ; + } + knobs [COLAMD_DENSE_ROW] = 0.5 ; /* ignore rows over 50% dense */ + knobs [COLAMD_DENSE_COL] = 0.5 ; /* ignore columns over 50% dense */ +} + +/** + * \brief Computes a column ordering using the column approximate minimum degree ordering + * + * Computes a column ordering (Q) of A such that P(AQ)=LU or + * (AQ)'AQ=LL' have less fill-in and require fewer floating point + * operations than factorizing the unpermuted matrix A or A'A, + * respectively. + * + * + * \param n_row number of rows in A + * \param n_col number of columns in A + * \param Alen, size of the array A + * \param A row indices of the matrix, of size ALen + * \param p column pointers of A, of size n_col+1 + * \param knobs parameter settings for colamd + * \param stats colamd output statistics and error codes + */ +static bool colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[COLAMD_KNOBS], int stats[COLAMD_STATS]) +{ + /* === Local variables ================================================== */ + + int i ; /* loop index */ + int nnz ; /* nonzeros in A */ + int Row_size ; /* size of Row [], in integers */ + int Col_size ; /* size of Col [], in integers */ + int need ; /* minimum required length of A */ + Colamd_Row *Row ; /* pointer into A of Row [0..n_row] array */ + colamd_col *Col ; /* pointer into A of Col [0..n_col] array */ + int n_col2 ; /* number of non-dense, non-empty columns */ + int n_row2 ; /* number of non-dense, non-empty rows */ + int ngarbage ; /* number of garbage collections performed */ + int max_deg ; /* maximum row degree */ + double default_knobs [COLAMD_KNOBS] ; /* default knobs array */ + + + /* === Check the input arguments ======================================== */ + + if (!stats) + { + COLAMD_DEBUG0 (("colamd: stats not present\n")) ; + return (false) ; + } + for (i = 0 ; i < COLAMD_STATS ; i++) + { + stats [i] = 0 ; + } + stats [COLAMD_STATUS] = COLAMD_OK ; + stats [COLAMD_INFO1] = -1 ; + stats [COLAMD_INFO2] = -1 ; + + if (!A) /* A is not present */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ; + COLAMD_DEBUG0 (("colamd: A not present\n")) ; + return (false) ; + } + + if (!p) /* p is not present */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ; + COLAMD_DEBUG0 (("colamd: p not present\n")) ; + return (false) ; + } + + if (n_row < 0) /* n_row must be >= 0 */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ; + stats [COLAMD_INFO1] = n_row ; + COLAMD_DEBUG0 (("colamd: nrow negative %d\n", n_row)) ; + return (false) ; + } + + if (n_col < 0) /* n_col must be >= 0 */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ; + stats [COLAMD_INFO1] = n_col ; + COLAMD_DEBUG0 (("colamd: ncol negative %d\n", n_col)) ; + return (false) ; + } + + nnz = p [n_col] ; + if (nnz < 0) /* nnz must be >= 0 */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ; + stats [COLAMD_INFO1] = nnz ; + COLAMD_DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ; + return (false) ; + } + + if (p [0] != 0) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ; + stats [COLAMD_INFO1] = p [0] ; + COLAMD_DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ; + return (false) ; + } + + /* === If no knobs, set default knobs =================================== */ + + if (!knobs) + { + colamd_set_defaults (default_knobs) ; + knobs = default_knobs ; + } + + /* === Allocate the Row and Col arrays from array A ===================== */ + + Col_size = colamd_c (n_col) ; + Row_size = colamd_r (n_row) ; + need = 2*nnz + n_col + Col_size + Row_size ; + + if (need > Alen) + { + /* not enough space in array A to perform the ordering */ + stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ; + stats [COLAMD_INFO1] = need ; + stats [COLAMD_INFO2] = Alen ; + COLAMD_DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen)); + return (false) ; + } + + Alen -= Col_size + Row_size ; + Col = (colamd_col *) &A [Alen] ; + Row = (Colamd_Row *) &A [Alen + Col_size] ; + + /* === Construct the row and column data structures ===================== */ + + if (!init_rows_cols (n_row, n_col, Row, Col, A, p, stats)) + { + /* input matrix is invalid */ + COLAMD_DEBUG0 (("colamd: Matrix invalid\n")) ; + return (false) ; + } + + /* === Initialize scores, kill dense rows/columns ======================= */ + + init_scoring (n_row, n_col, Row, Col, A, p, knobs, + &n_row2, &n_col2, &max_deg) ; + + /* === Order the supercolumns =========================================== */ + + ngarbage = find_ordering (n_row, n_col, Alen, Row, Col, A, p, + n_col2, max_deg, 2*nnz) ; + + /* === Order the non-principal columns ================================== */ + + order_children (n_col, Col, p) ; + + /* === Return statistics in stats ======================================= */ + + stats [COLAMD_DENSE_ROW] = n_row - n_row2 ; + stats [COLAMD_DENSE_COL] = n_col - n_col2 ; + stats [COLAMD_DEFRAG_COUNT] = ngarbage ; + COLAMD_DEBUG0 (("colamd: done.\n")) ; + return (true) ; +} + +/* ========================================================================== */ +/* === NON-USER-CALLABLE ROUTINES: ========================================== */ +/* ========================================================================== */ + +/* There are no user-callable routines beyond this point in the file */ + + +/* ========================================================================== */ +/* === init_rows_cols ======================================================= */ +/* ========================================================================== */ + +/* + Takes the column form of the matrix in A and creates the row form of the + matrix. Also, row and column attributes are stored in the Col and Row + structs. If the columns are un-sorted or contain duplicate row indices, + this routine will also sort and remove duplicate row indices from the + column form of the matrix. Returns false if the matrix is invalid, + true otherwise. Not user-callable. +*/ + + static int init_rows_cols /* returns true if OK, or false otherwise */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows of A */ + int n_col, /* number of columns of A */ + Colamd_Row Row [], /* of size n_row+1 */ + colamd_col Col [], /* of size n_col+1 */ + int A [], /* row indices of A, of size Alen */ + int p [], /* pointers to columns in A, of size n_col+1 */ + int stats [COLAMD_STATS] /* colamd statistics */ +) +{ + /* === Local variables ================================================== */ + + int col ; /* a column index */ + int row ; /* a row index */ + int *cp ; /* a column pointer */ + int *cp_end ; /* a pointer to the end of a column */ + int *rp ; /* a row pointer */ + int *rp_end ; /* a pointer to the end of a row */ + int last_row ; /* previous row */ + + /* === Initialize columns, and check column pointers ==================== */ + + for (col = 0 ; col < n_col ; col++) + { + Col [col].start = p [col] ; + Col [col].length = p [col+1] - p [col] ; + + if (Col [col].length < 0) + { + /* column pointers must be non-decreasing */ + stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ; + stats [COLAMD_INFO1] = col ; + stats [COLAMD_INFO2] = Col [col].length ; + COLAMD_DEBUG0 (("colamd: col %d length %d < 0\n", col, Col [col].length)) ; + return (false) ; + } + + Col [col].shared1.thickness = 1 ; + Col [col].shared2.score = 0 ; + Col [col].shared3.prev = COLAMD_EMPTY ; + Col [col].shared4.degree_next = COLAMD_EMPTY ; + } + + /* p [0..n_col] no longer needed, used as "head" in subsequent routines */ + + /* === Scan columns, compute row degrees, and check row indices ========= */ + + stats [COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/ + + for (row = 0 ; row < n_row ; row++) + { + Row [row].length = 0 ; + Row [row].shared2.mark = -1 ; + } + + for (col = 0 ; col < n_col ; col++) + { + last_row = -1 ; + + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + + while (cp < cp_end) + { + row = *cp++ ; + + /* make sure row indices within range */ + if (row < 0 || row >= n_row) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ; + stats [COLAMD_INFO1] = col ; + stats [COLAMD_INFO2] = row ; + stats [COLAMD_INFO3] = n_row ; + COLAMD_DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ; + return (false) ; + } + + if (row <= last_row || Row [row].shared2.mark == col) + { + /* row index are unsorted or repeated (or both), thus col */ + /* is jumbled. This is a notice, not an error condition. */ + stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ; + stats [COLAMD_INFO1] = col ; + stats [COLAMD_INFO2] = row ; + (stats [COLAMD_INFO3]) ++ ; + COLAMD_DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col)); + } + + if (Row [row].shared2.mark != col) + { + Row [row].length++ ; + } + else + { + /* this is a repeated entry in the column, */ + /* it will be removed */ + Col [col].length-- ; + } + + /* mark the row as having been seen in this column */ + Row [row].shared2.mark = col ; + + last_row = row ; + } + } + + /* === Compute row pointers ============================================= */ + + /* row form of the matrix starts directly after the column */ + /* form of matrix in A */ + Row [0].start = p [n_col] ; + Row [0].shared1.p = Row [0].start ; + Row [0].shared2.mark = -1 ; + for (row = 1 ; row < n_row ; row++) + { + Row [row].start = Row [row-1].start + Row [row-1].length ; + Row [row].shared1.p = Row [row].start ; + Row [row].shared2.mark = -1 ; + } + + /* === Create row form ================================================== */ + + if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED) + { + /* if cols jumbled, watch for repeated row indices */ + for (col = 0 ; col < n_col ; col++) + { + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + while (cp < cp_end) + { + row = *cp++ ; + if (Row [row].shared2.mark != col) + { + A [(Row [row].shared1.p)++] = col ; + Row [row].shared2.mark = col ; + } + } + } + } + else + { + /* if cols not jumbled, we don't need the mark (this is faster) */ + for (col = 0 ; col < n_col ; col++) + { + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + while (cp < cp_end) + { + A [(Row [*cp++].shared1.p)++] = col ; + } + } + } + + /* === Clear the row marks and set row degrees ========================== */ + + for (row = 0 ; row < n_row ; row++) + { + Row [row].shared2.mark = 0 ; + Row [row].shared1.degree = Row [row].length ; + } + + /* === See if we need to re-create columns ============================== */ + + if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED) + { + COLAMD_DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ; + + + /* === Compute col pointers ========================================= */ + + /* col form of the matrix starts at A [0]. */ + /* Note, we may have a gap between the col form and the row */ + /* form if there were duplicate entries, if so, it will be */ + /* removed upon the first garbage collection */ + Col [0].start = 0 ; + p [0] = Col [0].start ; + for (col = 1 ; col < n_col ; col++) + { + /* note that the lengths here are for pruned columns, i.e. */ + /* no duplicate row indices will exist for these columns */ + Col [col].start = Col [col-1].start + Col [col-1].length ; + p [col] = Col [col].start ; + } + + /* === Re-create col form =========================================== */ + + for (row = 0 ; row < n_row ; row++) + { + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + A [(p [*rp++])++] = row ; + } + } + } + + /* === Done. Matrix is not (or no longer) jumbled ====================== */ + + return (true) ; +} + + +/* ========================================================================== */ +/* === init_scoring ========================================================= */ +/* ========================================================================== */ + +/* + Kills dense or empty columns and rows, calculates an initial score for + each column, and places all columns in the degree lists. Not user-callable. +*/ + +static void init_scoring +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows of A */ + int n_col, /* number of columns of A */ + Colamd_Row Row [], /* of size n_row+1 */ + colamd_col Col [], /* of size n_col+1 */ + int A [], /* column form and row form of A */ + int head [], /* of size n_col+1 */ + double knobs [COLAMD_KNOBS],/* parameters */ + int *p_n_row2, /* number of non-dense, non-empty rows */ + int *p_n_col2, /* number of non-dense, non-empty columns */ + int *p_max_deg /* maximum row degree */ +) +{ + /* === Local variables ================================================== */ + + int c ; /* a column index */ + int r, row ; /* a row index */ + int *cp ; /* a column pointer */ + int deg ; /* degree of a row or column */ + int *cp_end ; /* a pointer to the end of a column */ + int *new_cp ; /* new column pointer */ + int col_length ; /* length of pruned column */ + int score ; /* current column score */ + int n_col2 ; /* number of non-dense, non-empty columns */ + int n_row2 ; /* number of non-dense, non-empty rows */ + int dense_row_count ; /* remove rows with more entries than this */ + int dense_col_count ; /* remove cols with more entries than this */ + int min_score ; /* smallest column score */ + int max_deg ; /* maximum row degree */ + int next_col ; /* Used to add to degree list.*/ + + + /* === Extract knobs ==================================================== */ + + dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ; + dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ; + COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ; + max_deg = 0 ; + n_col2 = n_col ; + n_row2 = n_row ; + + /* === Kill empty columns =============================================== */ + + /* Put the empty columns at the end in their natural order, so that LU */ + /* factorization can proceed as far as possible. */ + for (c = n_col-1 ; c >= 0 ; c--) + { + deg = Col [c].length ; + if (deg == 0) + { + /* this is a empty column, kill and order it last */ + Col [c].shared2.order = --n_col2 ; + KILL_PRINCIPAL_COL (c) ; + } + } + COLAMD_DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ; + + /* === Kill dense columns =============================================== */ + + /* Put the dense columns at the end, in their natural order */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* skip any dead columns */ + if (COL_IS_DEAD (c)) + { + continue ; + } + deg = Col [c].length ; + if (deg > dense_col_count) + { + /* this is a dense column, kill and order it last */ + Col [c].shared2.order = --n_col2 ; + /* decrement the row degrees */ + cp = &A [Col [c].start] ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + Row [*cp++].shared1.degree-- ; + } + KILL_PRINCIPAL_COL (c) ; + } + } + COLAMD_DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ; + + /* === Kill dense and empty rows ======================================== */ + + for (r = 0 ; r < n_row ; r++) + { + deg = Row [r].shared1.degree ; + COLAMD_ASSERT (deg >= 0 && deg <= n_col) ; + if (deg > dense_row_count || deg == 0) + { + /* kill a dense or empty row */ + KILL_ROW (r) ; + --n_row2 ; + } + else + { + /* keep track of max degree of remaining rows */ + max_deg = COLAMD_MAX (max_deg, deg) ; + } + } + COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ; + + /* === Compute initial column scores ==================================== */ + + /* At this point the row degrees are accurate. They reflect the number */ + /* of "live" (non-dense) columns in each row. No empty rows exist. */ + /* Some "live" columns may contain only dead rows, however. These are */ + /* pruned in the code below. */ + + /* now find the initial matlab score for each column */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* skip dead column */ + if (COL_IS_DEAD (c)) + { + continue ; + } + score = 0 ; + cp = &A [Col [c].start] ; + new_cp = cp ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + /* skip if dead */ + if (ROW_IS_DEAD (row)) + { + continue ; + } + /* compact the column */ + *new_cp++ = row ; + /* add row's external degree */ + score += Row [row].shared1.degree - 1 ; + /* guard against integer overflow */ + score = COLAMD_MIN (score, n_col) ; + } + /* determine pruned column length */ + col_length = (int) (new_cp - &A [Col [c].start]) ; + if (col_length == 0) + { + /* a newly-made null column (all rows in this col are "dense" */ + /* and have already been killed) */ + COLAMD_DEBUG2 (("Newly null killed: %d\n", c)) ; + Col [c].shared2.order = --n_col2 ; + KILL_PRINCIPAL_COL (c) ; + } + else + { + /* set column length and set score */ + COLAMD_ASSERT (score >= 0) ; + COLAMD_ASSERT (score <= n_col) ; + Col [c].length = col_length ; + Col [c].shared2.score = score ; + } + } + COLAMD_DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n", + n_col-n_col2)) ; + + /* At this point, all empty rows and columns are dead. All live columns */ + /* are "clean" (containing no dead rows) and simplicial (no supercolumns */ + /* yet). Rows may contain dead columns, but all live rows contain at */ + /* least one live column. */ + + /* === Initialize degree lists ========================================== */ + + + /* clear the hash buckets */ + for (c = 0 ; c <= n_col ; c++) + { + head [c] = COLAMD_EMPTY ; + } + min_score = n_col ; + /* place in reverse order, so low column indices are at the front */ + /* of the lists. This is to encourage natural tie-breaking */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* only add principal columns to degree lists */ + if (COL_IS_ALIVE (c)) + { + COLAMD_DEBUG4 (("place %d score %d minscore %d ncol %d\n", + c, Col [c].shared2.score, min_score, n_col)) ; + + /* === Add columns score to DList =============================== */ + + score = Col [c].shared2.score ; + + COLAMD_ASSERT (min_score >= 0) ; + COLAMD_ASSERT (min_score <= n_col) ; + COLAMD_ASSERT (score >= 0) ; + COLAMD_ASSERT (score <= n_col) ; + COLAMD_ASSERT (head [score] >= COLAMD_EMPTY) ; + + /* now add this column to dList at proper score location */ + next_col = head [score] ; + Col [c].shared3.prev = COLAMD_EMPTY ; + Col [c].shared4.degree_next = next_col ; + + /* if there already was a column with the same score, set its */ + /* previous pointer to this new column */ + if (next_col != COLAMD_EMPTY) + { + Col [next_col].shared3.prev = c ; + } + head [score] = c ; + + /* see if this score is less than current min */ + min_score = COLAMD_MIN (min_score, score) ; + + + } + } + + + /* === Return number of remaining columns, and max row degree =========== */ + + *p_n_col2 = n_col2 ; + *p_n_row2 = n_row2 ; + *p_max_deg = max_deg ; +} + + +/* ========================================================================== */ +/* === find_ordering ======================================================== */ +/* ========================================================================== */ + +/* + Order the principal columns of the supercolumn form of the matrix + (no supercolumns on input). Uses a minimum approximate column minimum + degree ordering method. Not user-callable. +*/ + +static int find_ordering /* return the number of garbage collections */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows of A */ + int n_col, /* number of columns of A */ + int Alen, /* size of A, 2*nnz + n_col or larger */ + Colamd_Row Row [], /* of size n_row+1 */ + colamd_col Col [], /* of size n_col+1 */ + int A [], /* column form and row form of A */ + int head [], /* of size n_col+1 */ + int n_col2, /* Remaining columns to order */ + int max_deg, /* Maximum row degree */ + int pfree /* index of first free slot (2*nnz on entry) */ +) +{ + /* === Local variables ================================================== */ + + int k ; /* current pivot ordering step */ + int pivot_col ; /* current pivot column */ + int *cp ; /* a column pointer */ + int *rp ; /* a row pointer */ + int pivot_row ; /* current pivot row */ + int *new_cp ; /* modified column pointer */ + int *new_rp ; /* modified row pointer */ + int pivot_row_start ; /* pointer to start of pivot row */ + int pivot_row_degree ; /* number of columns in pivot row */ + int pivot_row_length ; /* number of supercolumns in pivot row */ + int pivot_col_score ; /* score of pivot column */ + int needed_memory ; /* free space needed for pivot row */ + int *cp_end ; /* pointer to the end of a column */ + int *rp_end ; /* pointer to the end of a row */ + int row ; /* a row index */ + int col ; /* a column index */ + int max_score ; /* maximum possible score */ + int cur_score ; /* score of current column */ + unsigned int hash ; /* hash value for supernode detection */ + int head_column ; /* head of hash bucket */ + int first_col ; /* first column in hash bucket */ + int tag_mark ; /* marker value for mark array */ + int row_mark ; /* Row [row].shared2.mark */ + int set_difference ; /* set difference size of row with pivot row */ + int min_score ; /* smallest column score */ + int col_thickness ; /* "thickness" (no. of columns in a supercol) */ + int max_mark ; /* maximum value of tag_mark */ + int pivot_col_thickness ; /* number of columns represented by pivot col */ + int prev_col ; /* Used by Dlist operations. */ + int next_col ; /* Used by Dlist operations. */ + int ngarbage ; /* number of garbage collections performed */ + + + /* === Initialization and clear mark ==================================== */ + + max_mark = INT_MAX - n_col ; /* INT_MAX defined in <limits.h> */ + tag_mark = clear_mark (n_row, Row) ; + min_score = 0 ; + ngarbage = 0 ; + COLAMD_DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ; + + /* === Order the columns ================================================ */ + + for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */) + { + + /* === Select pivot column, and order it ============================ */ + + /* make sure degree list isn't empty */ + COLAMD_ASSERT (min_score >= 0) ; + COLAMD_ASSERT (min_score <= n_col) ; + COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ; + + /* get pivot column from head of minimum degree list */ + while (head [min_score] == COLAMD_EMPTY && min_score < n_col) + { + min_score++ ; + } + pivot_col = head [min_score] ; + COLAMD_ASSERT (pivot_col >= 0 && pivot_col <= n_col) ; + next_col = Col [pivot_col].shared4.degree_next ; + head [min_score] = next_col ; + if (next_col != COLAMD_EMPTY) + { + Col [next_col].shared3.prev = COLAMD_EMPTY ; + } + + COLAMD_ASSERT (COL_IS_ALIVE (pivot_col)) ; + COLAMD_DEBUG3 (("Pivot col: %d\n", pivot_col)) ; + + /* remember score for defrag check */ + pivot_col_score = Col [pivot_col].shared2.score ; + + /* the pivot column is the kth column in the pivot order */ + Col [pivot_col].shared2.order = k ; + + /* increment order count by column thickness */ + pivot_col_thickness = Col [pivot_col].shared1.thickness ; + k += pivot_col_thickness ; + COLAMD_ASSERT (pivot_col_thickness > 0) ; + + /* === Garbage_collection, if necessary ============================= */ + + needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ; + if (pfree + needed_memory >= Alen) + { + pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ; + ngarbage++ ; + /* after garbage collection we will have enough */ + COLAMD_ASSERT (pfree + needed_memory < Alen) ; + /* garbage collection has wiped out the Row[].shared2.mark array */ + tag_mark = clear_mark (n_row, Row) ; + + } + + /* === Compute pivot row pattern ==================================== */ + + /* get starting location for this new merged row */ + pivot_row_start = pfree ; + + /* initialize new row counts to zero */ + pivot_row_degree = 0 ; + + /* tag pivot column as having been visited so it isn't included */ + /* in merged pivot row */ + Col [pivot_col].shared1.thickness = -pivot_col_thickness ; + + /* pivot row is the union of all rows in the pivot column pattern */ + cp = &A [Col [pivot_col].start] ; + cp_end = cp + Col [pivot_col].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + COLAMD_DEBUG4 (("Pivot col pattern %d %d\n", ROW_IS_ALIVE (row), row)) ; + /* skip if row is dead */ + if (ROW_IS_DEAD (row)) + { + continue ; + } + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + /* get a column */ + col = *rp++ ; + /* add the column, if alive and untagged */ + col_thickness = Col [col].shared1.thickness ; + if (col_thickness > 0 && COL_IS_ALIVE (col)) + { + /* tag column in pivot row */ + Col [col].shared1.thickness = -col_thickness ; + COLAMD_ASSERT (pfree < Alen) ; + /* place column in pivot row */ + A [pfree++] = col ; + pivot_row_degree += col_thickness ; + } + } + } + + /* clear tag on pivot column */ + Col [pivot_col].shared1.thickness = pivot_col_thickness ; + max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ; + + + /* === Kill all rows used to construct pivot row ==================== */ + + /* also kill pivot row, temporarily */ + cp = &A [Col [pivot_col].start] ; + cp_end = cp + Col [pivot_col].length ; + while (cp < cp_end) + { + /* may be killing an already dead row */ + row = *cp++ ; + COLAMD_DEBUG3 (("Kill row in pivot col: %d\n", row)) ; + KILL_ROW (row) ; + } + + /* === Select a row index to use as the new pivot row =============== */ + + pivot_row_length = pfree - pivot_row_start ; + if (pivot_row_length > 0) + { + /* pick the "pivot" row arbitrarily (first row in col) */ + pivot_row = A [Col [pivot_col].start] ; + COLAMD_DEBUG3 (("Pivotal row is %d\n", pivot_row)) ; + } + else + { + /* there is no pivot row, since it is of zero length */ + pivot_row = COLAMD_EMPTY ; + COLAMD_ASSERT (pivot_row_length == 0) ; + } + COLAMD_ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ; + + /* === Approximate degree computation =============================== */ + + /* Here begins the computation of the approximate degree. The column */ + /* score is the sum of the pivot row "length", plus the size of the */ + /* set differences of each row in the column minus the pattern of the */ + /* pivot row itself. The column ("thickness") itself is also */ + /* excluded from the column score (we thus use an approximate */ + /* external degree). */ + + /* The time taken by the following code (compute set differences, and */ + /* add them up) is proportional to the size of the data structure */ + /* being scanned - that is, the sum of the sizes of each column in */ + /* the pivot row. Thus, the amortized time to compute a column score */ + /* is proportional to the size of that column (where size, in this */ + /* context, is the column "length", or the number of row indices */ + /* in that column). The number of row indices in a column is */ + /* monotonically non-decreasing, from the length of the original */ + /* column on input to colamd. */ + + /* === Compute set differences ====================================== */ + + COLAMD_DEBUG3 (("** Computing set differences phase. **\n")) ; + + /* pivot row is currently dead - it will be revived later. */ + + COLAMD_DEBUG3 (("Pivot row: ")) ; + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + col = *rp++ ; + COLAMD_ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ; + COLAMD_DEBUG3 (("Col: %d\n", col)) ; + + /* clear tags used to construct pivot row pattern */ + col_thickness = -Col [col].shared1.thickness ; + COLAMD_ASSERT (col_thickness > 0) ; + Col [col].shared1.thickness = col_thickness ; + + /* === Remove column from degree list =========================== */ + + cur_score = Col [col].shared2.score ; + prev_col = Col [col].shared3.prev ; + next_col = Col [col].shared4.degree_next ; + COLAMD_ASSERT (cur_score >= 0) ; + COLAMD_ASSERT (cur_score <= n_col) ; + COLAMD_ASSERT (cur_score >= COLAMD_EMPTY) ; + if (prev_col == COLAMD_EMPTY) + { + head [cur_score] = next_col ; + } + else + { + Col [prev_col].shared4.degree_next = next_col ; + } + if (next_col != COLAMD_EMPTY) + { + Col [next_col].shared3.prev = prev_col ; + } + + /* === Scan the column ========================================== */ + + cp = &A [Col [col].start] ; + cp_end = cp + Col [col].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + row_mark = Row [row].shared2.mark ; + /* skip if dead */ + if (ROW_IS_MARKED_DEAD (row_mark)) + { + continue ; + } + COLAMD_ASSERT (row != pivot_row) ; + set_difference = row_mark - tag_mark ; + /* check if the row has been seen yet */ + if (set_difference < 0) + { + COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ; + set_difference = Row [row].shared1.degree ; + } + /* subtract column thickness from this row's set difference */ + set_difference -= col_thickness ; + COLAMD_ASSERT (set_difference >= 0) ; + /* absorb this row if the set difference becomes zero */ + if (set_difference == 0) + { + COLAMD_DEBUG3 (("aggressive absorption. Row: %d\n", row)) ; + KILL_ROW (row) ; + } + else + { + /* save the new mark */ + Row [row].shared2.mark = set_difference + tag_mark ; + } + } + } + + + /* === Add up set differences for each column ======================= */ + + COLAMD_DEBUG3 (("** Adding set differences phase. **\n")) ; + + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + /* get a column */ + col = *rp++ ; + COLAMD_ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ; + hash = 0 ; + cur_score = 0 ; + cp = &A [Col [col].start] ; + /* compact the column */ + new_cp = cp ; + cp_end = cp + Col [col].length ; + + COLAMD_DEBUG4 (("Adding set diffs for Col: %d.\n", col)) ; + + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + COLAMD_ASSERT(row >= 0 && row < n_row) ; + row_mark = Row [row].shared2.mark ; + /* skip if dead */ + if (ROW_IS_MARKED_DEAD (row_mark)) + { + continue ; + } + COLAMD_ASSERT (row_mark > tag_mark) ; + /* compact the column */ + *new_cp++ = row ; + /* compute hash function */ + hash += row ; + /* add set difference */ + cur_score += row_mark - tag_mark ; + /* integer overflow... */ + cur_score = COLAMD_MIN (cur_score, n_col) ; + } + + /* recompute the column's length */ + Col [col].length = (int) (new_cp - &A [Col [col].start]) ; + + /* === Further mass elimination ================================= */ + + if (Col [col].length == 0) + { + COLAMD_DEBUG4 (("further mass elimination. Col: %d\n", col)) ; + /* nothing left but the pivot row in this column */ + KILL_PRINCIPAL_COL (col) ; + pivot_row_degree -= Col [col].shared1.thickness ; + COLAMD_ASSERT (pivot_row_degree >= 0) ; + /* order it */ + Col [col].shared2.order = k ; + /* increment order count by column thickness */ + k += Col [col].shared1.thickness ; + } + else + { + /* === Prepare for supercolumn detection ==================== */ + + COLAMD_DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ; + + /* save score so far */ + Col [col].shared2.score = cur_score ; + + /* add column to hash table, for supercolumn detection */ + hash %= n_col + 1 ; + + COLAMD_DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ; + COLAMD_ASSERT (hash <= n_col) ; + + head_column = head [hash] ; + if (head_column > COLAMD_EMPTY) + { + /* degree list "hash" is non-empty, use prev (shared3) of */ + /* first column in degree list as head of hash bucket */ + first_col = Col [head_column].shared3.headhash ; + Col [head_column].shared3.headhash = col ; + } + else + { + /* degree list "hash" is empty, use head as hash bucket */ + first_col = - (head_column + 2) ; + head [hash] = - (col + 2) ; + } + Col [col].shared4.hash_next = first_col ; + + /* save hash function in Col [col].shared3.hash */ + Col [col].shared3.hash = (int) hash ; + COLAMD_ASSERT (COL_IS_ALIVE (col)) ; + } + } + + /* The approximate external column degree is now computed. */ + + /* === Supercolumn detection ======================================== */ + + COLAMD_DEBUG3 (("** Supercolumn detection phase. **\n")) ; + + detect_super_cols ( + + Col, A, head, pivot_row_start, pivot_row_length) ; + + /* === Kill the pivotal column ====================================== */ + + KILL_PRINCIPAL_COL (pivot_col) ; + + /* === Clear mark =================================================== */ + + tag_mark += (max_deg + 1) ; + if (tag_mark >= max_mark) + { + COLAMD_DEBUG2 (("clearing tag_mark\n")) ; + tag_mark = clear_mark (n_row, Row) ; + } + + /* === Finalize the new pivot row, and column scores ================ */ + + COLAMD_DEBUG3 (("** Finalize scores phase. **\n")) ; + + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + /* compact the pivot row */ + new_rp = rp ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + col = *rp++ ; + /* skip dead columns */ + if (COL_IS_DEAD (col)) + { + continue ; + } + *new_rp++ = col ; + /* add new pivot row to column */ + A [Col [col].start + (Col [col].length++)] = pivot_row ; + + /* retrieve score so far and add on pivot row's degree. */ + /* (we wait until here for this in case the pivot */ + /* row's degree was reduced due to mass elimination). */ + cur_score = Col [col].shared2.score + pivot_row_degree ; + + /* calculate the max possible score as the number of */ + /* external columns minus the 'k' value minus the */ + /* columns thickness */ + max_score = n_col - k - Col [col].shared1.thickness ; + + /* make the score the external degree of the union-of-rows */ + cur_score -= Col [col].shared1.thickness ; + + /* make sure score is less or equal than the max score */ + cur_score = COLAMD_MIN (cur_score, max_score) ; + COLAMD_ASSERT (cur_score >= 0) ; + + /* store updated score */ + Col [col].shared2.score = cur_score ; + + /* === Place column back in degree list ========================= */ + + COLAMD_ASSERT (min_score >= 0) ; + COLAMD_ASSERT (min_score <= n_col) ; + COLAMD_ASSERT (cur_score >= 0) ; + COLAMD_ASSERT (cur_score <= n_col) ; + COLAMD_ASSERT (head [cur_score] >= COLAMD_EMPTY) ; + next_col = head [cur_score] ; + Col [col].shared4.degree_next = next_col ; + Col [col].shared3.prev = COLAMD_EMPTY ; + if (next_col != COLAMD_EMPTY) + { + Col [next_col].shared3.prev = col ; + } + head [cur_score] = col ; + + /* see if this score is less than current min */ + min_score = COLAMD_MIN (min_score, cur_score) ; + + } + + /* === Resurrect the new pivot row ================================== */ + + if (pivot_row_degree > 0) + { + /* update pivot row length to reflect any cols that were killed */ + /* during super-col detection and mass elimination */ + Row [pivot_row].start = pivot_row_start ; + Row [pivot_row].length = (int) (new_rp - &A[pivot_row_start]) ; + Row [pivot_row].shared1.degree = pivot_row_degree ; + Row [pivot_row].shared2.mark = 0 ; + /* pivot row is no longer dead */ + } + } + + /* === All principal columns have now been ordered ====================== */ + + return (ngarbage) ; +} + + +/* ========================================================================== */ +/* === order_children ======================================================= */ +/* ========================================================================== */ + +/* + The find_ordering routine has ordered all of the principal columns (the + representatives of the supercolumns). The non-principal columns have not + yet been ordered. This routine orders those columns by walking up the + parent tree (a column is a child of the column which absorbed it). The + final permutation vector is then placed in p [0 ... n_col-1], with p [0] + being the first column, and p [n_col-1] being the last. It doesn't look + like it at first glance, but be assured that this routine takes time linear + in the number of columns. Although not immediately obvious, the time + taken by this routine is O (n_col), that is, linear in the number of + columns. Not user-callable. +*/ + +static inline void order_children +( + /* === Parameters ======================================================= */ + + int n_col, /* number of columns of A */ + colamd_col Col [], /* of size n_col+1 */ + int p [] /* p [0 ... n_col-1] is the column permutation*/ +) +{ + /* === Local variables ================================================== */ + + int i ; /* loop counter for all columns */ + int c ; /* column index */ + int parent ; /* index of column's parent */ + int order ; /* column's order */ + + /* === Order each non-principal column ================================== */ + + for (i = 0 ; i < n_col ; i++) + { + /* find an un-ordered non-principal column */ + COLAMD_ASSERT (COL_IS_DEAD (i)) ; + if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == COLAMD_EMPTY) + { + parent = i ; + /* once found, find its principal parent */ + do + { + parent = Col [parent].shared1.parent ; + } while (!COL_IS_DEAD_PRINCIPAL (parent)) ; + + /* now, order all un-ordered non-principal columns along path */ + /* to this parent. collapse tree at the same time */ + c = i ; + /* get order of parent */ + order = Col [parent].shared2.order ; + + do + { + COLAMD_ASSERT (Col [c].shared2.order == COLAMD_EMPTY) ; + + /* order this column */ + Col [c].shared2.order = order++ ; + /* collaps tree */ + Col [c].shared1.parent = parent ; + + /* get immediate parent of this column */ + c = Col [c].shared1.parent ; + + /* continue until we hit an ordered column. There are */ + /* guarranteed not to be anymore unordered columns */ + /* above an ordered column */ + } while (Col [c].shared2.order == COLAMD_EMPTY) ; + + /* re-order the super_col parent to largest order for this group */ + Col [parent].shared2.order = order ; + } + } + + /* === Generate the permutation ========================================= */ + + for (c = 0 ; c < n_col ; c++) + { + p [Col [c].shared2.order] = c ; + } +} + + +/* ========================================================================== */ +/* === detect_super_cols ==================================================== */ +/* ========================================================================== */ + +/* + Detects supercolumns by finding matches between columns in the hash buckets. + Check amongst columns in the set A [row_start ... row_start + row_length-1]. + The columns under consideration are currently *not* in the degree lists, + and have already been placed in the hash buckets. + + The hash bucket for columns whose hash function is equal to h is stored + as follows: + + if head [h] is >= 0, then head [h] contains a degree list, so: + + head [h] is the first column in degree bucket h. + Col [head [h]].headhash gives the first column in hash bucket h. + + otherwise, the degree list is empty, and: + + -(head [h] + 2) is the first column in hash bucket h. + + For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous + column" pointer. Col [c].shared3.hash is used instead as the hash number + for that column. The value of Col [c].shared4.hash_next is the next column + in the same hash bucket. + + Assuming no, or "few" hash collisions, the time taken by this routine is + linear in the sum of the sizes (lengths) of each column whose score has + just been computed in the approximate degree computation. + Not user-callable. +*/ + +static void detect_super_cols +( + /* === Parameters ======================================================= */ + + colamd_col Col [], /* of size n_col+1 */ + int A [], /* row indices of A */ + int head [], /* head of degree lists and hash buckets */ + int row_start, /* pointer to set of columns to check */ + int row_length /* number of columns to check */ +) +{ + /* === Local variables ================================================== */ + + int hash ; /* hash value for a column */ + int *rp ; /* pointer to a row */ + int c ; /* a column index */ + int super_c ; /* column index of the column to absorb into */ + int *cp1 ; /* column pointer for column super_c */ + int *cp2 ; /* column pointer for column c */ + int length ; /* length of column super_c */ + int prev_c ; /* column preceding c in hash bucket */ + int i ; /* loop counter */ + int *rp_end ; /* pointer to the end of the row */ + int col ; /* a column index in the row to check */ + int head_column ; /* first column in hash bucket or degree list */ + int first_col ; /* first column in hash bucket */ + + /* === Consider each column in the row ================================== */ + + rp = &A [row_start] ; + rp_end = rp + row_length ; + while (rp < rp_end) + { + col = *rp++ ; + if (COL_IS_DEAD (col)) + { + continue ; + } + + /* get hash number for this column */ + hash = Col [col].shared3.hash ; + COLAMD_ASSERT (hash <= n_col) ; + + /* === Get the first column in this hash bucket ===================== */ + + head_column = head [hash] ; + if (head_column > COLAMD_EMPTY) + { + first_col = Col [head_column].shared3.headhash ; + } + else + { + first_col = - (head_column + 2) ; + } + + /* === Consider each column in the hash bucket ====================== */ + + for (super_c = first_col ; super_c != COLAMD_EMPTY ; + super_c = Col [super_c].shared4.hash_next) + { + COLAMD_ASSERT (COL_IS_ALIVE (super_c)) ; + COLAMD_ASSERT (Col [super_c].shared3.hash == hash) ; + length = Col [super_c].length ; + + /* prev_c is the column preceding column c in the hash bucket */ + prev_c = super_c ; + + /* === Compare super_c with all columns after it ================ */ + + for (c = Col [super_c].shared4.hash_next ; + c != COLAMD_EMPTY ; c = Col [c].shared4.hash_next) + { + COLAMD_ASSERT (c != super_c) ; + COLAMD_ASSERT (COL_IS_ALIVE (c)) ; + COLAMD_ASSERT (Col [c].shared3.hash == hash) ; + + /* not identical if lengths or scores are different */ + if (Col [c].length != length || + Col [c].shared2.score != Col [super_c].shared2.score) + { + prev_c = c ; + continue ; + } + + /* compare the two columns */ + cp1 = &A [Col [super_c].start] ; + cp2 = &A [Col [c].start] ; + + for (i = 0 ; i < length ; i++) + { + /* the columns are "clean" (no dead rows) */ + COLAMD_ASSERT (ROW_IS_ALIVE (*cp1)) ; + COLAMD_ASSERT (ROW_IS_ALIVE (*cp2)) ; + /* row indices will same order for both supercols, */ + /* no gather scatter nessasary */ + if (*cp1++ != *cp2++) + { + break ; + } + } + + /* the two columns are different if the for-loop "broke" */ + if (i != length) + { + prev_c = c ; + continue ; + } + + /* === Got it! two columns are identical =================== */ + + COLAMD_ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ; + + Col [super_c].shared1.thickness += Col [c].shared1.thickness ; + Col [c].shared1.parent = super_c ; + KILL_NON_PRINCIPAL_COL (c) ; + /* order c later, in order_children() */ + Col [c].shared2.order = COLAMD_EMPTY ; + /* remove c from hash bucket */ + Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ; + } + } + + /* === Empty this hash bucket ======================================= */ + + if (head_column > COLAMD_EMPTY) + { + /* corresponding degree list "hash" is not empty */ + Col [head_column].shared3.headhash = COLAMD_EMPTY ; + } + else + { + /* corresponding degree list "hash" is empty */ + head [hash] = COLAMD_EMPTY ; + } + } +} + + +/* ========================================================================== */ +/* === garbage_collection =================================================== */ +/* ========================================================================== */ + +/* + Defragments and compacts columns and rows in the workspace A. Used when + all avaliable memory has been used while performing row merging. Returns + the index of the first free position in A, after garbage collection. The + time taken by this routine is linear is the size of the array A, which is + itself linear in the number of nonzeros in the input matrix. + Not user-callable. +*/ + +static int garbage_collection /* returns the new value of pfree */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows */ + int n_col, /* number of columns */ + Colamd_Row Row [], /* row info */ + colamd_col Col [], /* column info */ + int A [], /* A [0 ... Alen-1] holds the matrix */ + int *pfree /* &A [0] ... pfree is in use */ +) +{ + /* === Local variables ================================================== */ + + int *psrc ; /* source pointer */ + int *pdest ; /* destination pointer */ + int j ; /* counter */ + int r ; /* a row index */ + int c ; /* a column index */ + int length ; /* length of a row or column */ + + /* === Defragment the columns =========================================== */ + + pdest = &A[0] ; + for (c = 0 ; c < n_col ; c++) + { + if (COL_IS_ALIVE (c)) + { + psrc = &A [Col [c].start] ; + + /* move and compact the column */ + COLAMD_ASSERT (pdest <= psrc) ; + Col [c].start = (int) (pdest - &A [0]) ; + length = Col [c].length ; + for (j = 0 ; j < length ; j++) + { + r = *psrc++ ; + if (ROW_IS_ALIVE (r)) + { + *pdest++ = r ; + } + } + Col [c].length = (int) (pdest - &A [Col [c].start]) ; + } + } + + /* === Prepare to defragment the rows =================================== */ + + for (r = 0 ; r < n_row ; r++) + { + if (ROW_IS_ALIVE (r)) + { + if (Row [r].length == 0) + { + /* this row is of zero length. cannot compact it, so kill it */ + COLAMD_DEBUG3 (("Defrag row kill\n")) ; + KILL_ROW (r) ; + } + else + { + /* save first column index in Row [r].shared2.first_column */ + psrc = &A [Row [r].start] ; + Row [r].shared2.first_column = *psrc ; + COLAMD_ASSERT (ROW_IS_ALIVE (r)) ; + /* flag the start of the row with the one's complement of row */ + *psrc = ONES_COMPLEMENT (r) ; + + } + } + } + + /* === Defragment the rows ============================================== */ + + psrc = pdest ; + while (psrc < pfree) + { + /* find a negative number ... the start of a row */ + if (*psrc++ < 0) + { + psrc-- ; + /* get the row index */ + r = ONES_COMPLEMENT (*psrc) ; + COLAMD_ASSERT (r >= 0 && r < n_row) ; + /* restore first column index */ + *psrc = Row [r].shared2.first_column ; + COLAMD_ASSERT (ROW_IS_ALIVE (r)) ; + + /* move and compact the row */ + COLAMD_ASSERT (pdest <= psrc) ; + Row [r].start = (int) (pdest - &A [0]) ; + length = Row [r].length ; + for (j = 0 ; j < length ; j++) + { + c = *psrc++ ; + if (COL_IS_ALIVE (c)) + { + *pdest++ = c ; + } + } + Row [r].length = (int) (pdest - &A [Row [r].start]) ; + + } + } + /* ensure we found all the rows */ + COLAMD_ASSERT (debug_rows == 0) ; + + /* === Return the new value of pfree ==================================== */ + + return ((int) (pdest - &A [0])) ; +} + + +/* ========================================================================== */ +/* === clear_mark =========================================================== */ +/* ========================================================================== */ + +/* + Clears the Row [].shared2.mark array, and returns the new tag_mark. + Return value is the new tag_mark. Not user-callable. +*/ + +static inline int clear_mark /* return the new value for tag_mark */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows in A */ + Colamd_Row Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */ +) +{ + /* === Local variables ================================================== */ + + int r ; + + for (r = 0 ; r < n_row ; r++) + { + if (ROW_IS_ALIVE (r)) + { + Row [r].shared2.mark = 0 ; + } + } + return (1) ; +} + + +} // namespace internal +#endif diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h new file mode 100644 index 000000000..f5757b319 --- /dev/null +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -0,0 +1,158 @@ + +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_ORDERING_H +#define EIGEN_ORDERING_H + +#include "Amd.h" +namespace Eigen { + +#include "Eigen_Colamd.h" + +namespace internal { + + /** + * Get the symmetric pattern A^T+A from the input matrix A. + * FIXME: The values should not be considered here + */ + template<typename MatrixType> + void ordering_helper_at_plus_a(const MatrixType& mat, MatrixType& symmat) + { + MatrixType C; + C = mat.transpose(); // NOTE: Could be costly + for (int i = 0; i < C.rows(); i++) + { + for (typename MatrixType::InnerIterator it(C, i); it; ++it) + it.valueRef() = 0.0; + } + symmat = C + mat; + } + +} + +/** + * Get the approximate minimum degree ordering + * If the matrix is not structurally symmetric, an ordering of A^T+A is computed + * \tparam Index The type of indices of the matrix + */ +template <typename Index> +class AMDOrdering +{ + public: + typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; + + /** Compute the permutation vector from a sparse matrix + * This routine is much faster if the input matrix is column-major + */ + template <typename MatrixType> + void operator()(const MatrixType& mat, PermutationType& perm) + { + // Compute the symmetric pattern + SparseMatrix<typename MatrixType::Scalar, ColMajor, Index> symm; + internal::ordering_helper_at_plus_a(mat,symm); + + // Call the AMD routine + //m_mat.prune(keep_diag()); + internal::minimum_degree_ordering(symm, perm); + } + + /** Compute the permutation with a selfadjoint matrix */ + template <typename SrcType, unsigned int SrcUpLo> + void operator()(const SparseSelfAdjointView<SrcType, SrcUpLo>& mat, PermutationType& perm) + { + SparseMatrix<typename SrcType::Scalar, ColMajor, Index> C = mat; + + // Call the AMD routine + // m_mat.prune(keep_diag()); //Remove the diagonal elements + internal::minimum_degree_ordering(C, perm); + } +}; + +/** + * Get the natural ordering + * + *NOTE Returns an empty permutation matrix + * \tparam Index The type of indices of the matrix + */ +template <typename Index> +class NaturalOrdering +{ + public: + typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; + + /** Compute the permutation vector from a column-major sparse matrix */ + template <typename MatrixType> + void operator()(const MatrixType& mat, PermutationType& perm) + { + perm.resize(0); + } + +}; + +/** + * Get the column approximate minimum degree ordering + * The matrix should be in column-major format + */ +template<typename Index> +class COLAMDOrdering; +#include "Eigen_Colamd.h" + +template<typename Index> +class COLAMDOrdering +{ + public: + typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; + typedef Matrix<Index, Dynamic, 1> IndexVector; + /** Compute the permutation vector form a sparse matrix */ + template <typename MatrixType> + void operator() (const MatrixType& mat, PermutationType& perm) + { + int m = mat.rows(); + int n = mat.cols(); + int nnz = mat.nonZeros(); + // Get the recommended value of Alen to be used by colamd + int Alen = internal::colamd_recommended(nnz, m, n); + // Set the default parameters + double knobs [COLAMD_KNOBS]; + int stats [COLAMD_STATS]; + internal::colamd_set_defaults(knobs); + + int info; + IndexVector p(n+1), A(Alen); + for(int i=0; i <= n; i++) p(i) = mat.outerIndexPtr()[i]; + for(int i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()[i]; + // Call Colamd routine to compute the ordering + info = internal::colamd(m, n, Alen, A.data(), p.data(), knobs, stats); + eigen_assert( info && "COLAMD failed " ); + + perm.resize(n); + for (int i = 0; i < n; i++) perm.indices()(p(i)) = i; + + } + +}; + +} // end namespace Eigen +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 743fa3afc..573804837 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -469,6 +469,18 @@ class SparseMatrix m_data.squeeze(); } + /** Turns the matrix into the uncompressed mode */ + void uncompress() + { + if(m_innerNonZeros != 0) + return; + m_innerNonZeros = new Index[m_outerSize]; + for (int i = 0; i < m_outerSize; i++) + { + m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; + } + } + /** Suppresses all nonzeros which are \b much \b smaller \b than \a reference under the tolerence \a epsilon */ void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision()) { diff --git a/Eigen/src/SparseCore/SparseUtil.h b/Eigen/src/SparseCore/SparseUtil.h index 6062a086f..a686e08da 100644 --- a/Eigen/src/SparseCore/SparseUtil.h +++ b/Eigen/src/SparseCore/SparseUtil.h @@ -113,9 +113,10 @@ template<typename T,int Rows> struct sparse_eval<T,Rows,1> { template<typename T,int Rows,int Cols> struct sparse_eval { typedef typename traits<T>::Scalar _Scalar; - enum { _Flags = traits<T>::Flags }; + typedef typename traits<T>::Index _Index; + enum { _Options = ((traits<T>::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor }; public: - typedef SparseMatrix<_Scalar, _Flags> type; + typedef SparseMatrix<_Scalar, _Options, _Index> type; }; template<typename T> struct sparse_eval<T,1,1> { diff --git a/Eigen/src/SparseLU/CMakeLists.txt b/Eigen/src/SparseLU/CMakeLists.txt new file mode 100644 index 000000000..69729ee89 --- /dev/null +++ b/Eigen/src/SparseLU/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_SparseLU_SRCS "*.h") + +INSTALL(FILES + ${Eigen_SparseLU_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseLU COMPONENT Devel + ) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h new file mode 100644 index 000000000..9ea121ce5 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU.h @@ -0,0 +1,630 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_SPARSE_LU_H +#define EIGEN_SPARSE_LU_H + +namespace Eigen { + + +// Data structure needed by all routines +#include "SparseLU_Structs.h" +#include "SparseLU_Matrix.h" + +// Base structure containing all the factorization routines +#include "SparseLUBase.h" +/** + * \ingroup SparseLU_Module + * \brief Sparse supernodal LU factorization for general matrices + * + * This class implements the supernodal LU factorization for general matrices. + * It uses the main techniques from the sequential SuperLU package + * (http://crd-legacy.lbl.gov/~xiaoye/SuperLU/). It handles transparently real + * and complex arithmetics with single and double precision, depending on the + * scalar type of your input matrix. + * The code has been optimized to provide BLAS-3 operations during supernode-panel updates. + * It benefits directly from the built-in high-performant Eigen BLAS routines. + * Moreover, when the size of a supernode is very small, the BLAS calls are avoided to + * enable a better optimization from the compiler. For best performance, + * you should compile it with NDEBUG flag to avoid the numerous bounds checking on vectors. + * + * An important parameter of this class is the ordering method. It is used to reorder the columns + * (and eventually the rows) of the matrix to reduce the number of new elements that are created during + * numerical factorization. The cheapest method available is COLAMD. + * See \link Ordering_Modules the Ordering module \endlink for the list of + * built-in and external ordering methods. + * + * Simple example with key steps + * \code + * VectorXd x(n), b(n); + * SparseMatrix<double, ColMajor> A; + * SparseLU<SparseMatrix<scalar, ColMajor>, COLAMDOrdering<int> > solver; + * // fill A and b; + * // Compute the ordering permutation vector from the structural pattern of A + * solver.analyzePattern(A); + * // Compute the numerical factorization + * solver.factorize(A); + * //Use the factors to solve the linear system + * x = solver.solve(b); + * \endcode + * + * \WARNING The input matrix A should be in a \b compressed and \b column-major form. + * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix. + * + * \NOTE Unlike the initial SuperLU implementation, there is no step to equilibrate the matrix. + * For badly scaled matrices, this step can be useful to reduce the pivoting during factorization. + * If this is the case for your matrices, you can try the basic scaling method at + * "unsupported/Eigen/src/IterativeSolvers/Scaling.h" + * + * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<> + * \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS + * + * + * \sa \ref TutorialSparseDirectSolvers + * \sa \ref Ordering_Modules + */ +template <typename _MatrixType, typename _OrderingType> +class SparseLU +{ + public: + typedef _MatrixType MatrixType; + typedef _OrderingType OrderingType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::Index Index; + typedef SparseMatrix<Scalar,ColMajor,Index> NCMatrix; + typedef SuperNodalMatrix<Scalar, Index> SCMatrix; + typedef Matrix<Scalar,Dynamic,1> ScalarVector; + typedef Matrix<Index,Dynamic,1> IndexVector; + typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; + + public: + SparseLU():m_isInitialized(true),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0) + { + initperfvalues(); + } + SparseLU(const MatrixType& matrix):m_isInitialized(true),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0) + { + initperfvalues(); + compute(matrix); + } + + ~SparseLU() + { + // Free all explicit dynamic pointers + } + + void analyzePattern (const MatrixType& matrix); + void factorize (const MatrixType& matrix); + void simplicialfactorize(const MatrixType& matrix); + + /** + * Compute the symbolic and numeric factorization of the input sparse matrix. + * The input matrix should be in column-major storage. + */ + void compute (const MatrixType& matrix) + { + // Analyze + analyzePattern(matrix); + //Factorize + factorize(matrix); + } + + inline Index rows() const { return m_mat.rows(); } + inline Index cols() const { return m_mat.cols(); } + /** Indicate that the pattern of the input matrix is symmetric */ + void isSymmetric(bool sym) + { + m_symmetricmode = sym; + } + + /** Set the threshold used for a diagonal entry to be an acceptable pivot. */ + void diagPivotThresh(RealScalar thresh) + { + m_diagpivotthresh = thresh; + } + + /** Return the number of nonzero elements in the L factor */ + int nnzL() + { + if (m_factorizationIsOk) + return m_nnzL; + else + { + std::cerr<<"Numerical factorization should be done before\n"; + return 0; + } + } + /** Return the number of nonzero elements in the U factor */ + int nnzU() + { + if (m_factorizationIsOk) + return m_nnzU; + else + { + std::cerr<<"Numerical factorization should be done before\n"; + return 0; + } + } + /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. + * + * \sa compute() + */ + template<typename Rhs> + inline const internal::solve_retval<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const + { + eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); + eigen_assert(rows()==B.rows() + && "SparseLU::solve(): invalid number of rows of the right hand side matrix B"); + return internal::solve_retval<SparseLU, Rhs>(*this, B.derived()); + } + + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the PaStiX reports a problem + * \c InvalidInput if the input matrix is invalid + * + * \sa iparm() + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "Decomposition is not initialized."); + return m_info; + } + + template<typename Rhs, typename Dest> + bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &_X) const + { + Dest& X(_X.derived()); + eigen_assert(m_factorizationIsOk && "The matrix should be factorized first"); + EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + + + int nrhs = B.cols(); + Index n = B.rows(); + + // Permute the right hand side to form X = Pr*B + // on return, X is overwritten by the computed solution + X.resize(n,nrhs); + for(int j = 0; j < nrhs; ++j) + X.col(j) = m_perm_r * B.col(j); + + //Forward substitution with L + m_Lstore.solveInPlace(X); + + // Backward solve with U + for (int k = m_Lstore.nsuper(); k >= 0; k--) + { + Index fsupc = m_Lstore.supToCol()[k]; + Index istart = m_Lstore.rowIndexPtr()[fsupc]; + Index nsupr = m_Lstore.rowIndexPtr()[fsupc+1] - istart; + Index nsupc = m_Lstore.supToCol()[k+1] - fsupc; + Index luptr = m_Lstore.colIndexPtr()[fsupc]; + + if (nsupc == 1) + { + for (int j = 0; j < nrhs; j++) + { + X(fsupc, j) /= m_Lstore.valuePtr()[luptr]; + } + } + else + { + Map<const Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(m_Lstore.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Map< Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); + U = A.template triangularView<Upper>().solve(U); + } + + for (int j = 0; j < nrhs; ++j) + { + for (int jcol = fsupc; jcol < fsupc + nsupc; jcol++) + { + typename MappedSparseMatrix<Scalar>::InnerIterator it(m_Ustore, jcol); + for ( ; it; ++it) + { + Index irow = it.index(); + X(irow, j) -= X(jcol, j) * it.value(); + } + } + } + } // End For U-solve + + // Permute back the solution + for (int j = 0; j < nrhs; ++j) + X.col(j) = m_perm_c.inverse() * X.col(j); + + return true; + } + + protected: + // Functions + void initperfvalues() + { + m_perfv.panel_size = 12; + m_perfv.relax = 1; + m_perfv.maxsuper = 100; + m_perfv.rowblk = 200; + m_perfv.colblk = 60; + m_perfv.fillfactor = 20; + } + + // Variables + mutable ComputationInfo m_info; + bool m_isInitialized; + bool m_factorizationIsOk; + bool m_analysisIsOk; + NCMatrix m_mat; // The input (permuted ) matrix + SCMatrix m_Lstore; // The lower triangular matrix (supernodal) + MappedSparseMatrix<Scalar> m_Ustore; // The upper triangular matrix + PermutationType m_perm_c; // Column permutation + PermutationType m_perm_r ; // Row permutation + IndexVector m_etree; // Column elimination tree + + LU_GlobalLU_t<IndexVector, ScalarVector> m_glu; + + // SuperLU/SparseLU options + bool m_symmetricmode; + + // values for performance + LU_perfvalues m_perfv; + RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot + int m_nnzL, m_nnzU; // Nonzeros in L and U factors + + private: + // Copy constructor + SparseLU (SparseLU& ) {} + +}; // End class SparseLU + + +// Functions needed by the anaysis phase +/** + * Compute the column permutation to minimize the fill-in + * + * - Apply this permutation to the input matrix - + * + * - Compute the column elimination tree on the permuted matrix + * + * - Postorder the elimination tree and the column permutation + * + */ +template <typename MatrixType, typename OrderingType> +void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat) +{ + + //TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat. + + OrderingType ord; + ord(mat,m_perm_c); + + // Apply the permutation to the column of the input matrix +// m_mat = mat * m_perm_c.inverse(); //FIXME It should be less expensive here to permute only the structural pattern of the matrix + + //First copy the whole input matrix. + m_mat = mat; + m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used. + //Then, permute only the column pointers + for (int i = 0; i < mat.cols(); i++) + { + m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i]; + m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; + } + + // Compute the column elimination tree of the permuted matrix + /*if (m_etree.size() == 0) */m_etree.resize(m_mat.cols()); + + SparseLUBase<Scalar,Index>::LU_sp_coletree(m_mat, m_etree); + + // In symmetric mode, do not do postorder here + if (!m_symmetricmode) { + IndexVector post, iwork; + // Post order etree + SparseLUBase<Scalar,Index>::LU_TreePostorder(m_mat.cols(), m_etree, post); + + + // Renumber etree in postorder + int m = m_mat.cols(); + iwork.resize(m+1); + for (int i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i)); + m_etree = iwork; + + // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree + PermutationType post_perm(m); //FIXME Use directly a constructor with post + for (int i = 0; i < m; i++) + post_perm.indices()(i) = post(i); + + // Combine the two permutations : postorder the permutation for future use + m_perm_c = post_perm * m_perm_c; + + } // end postordering + + m_analysisIsOk = true; +} + +// Functions needed by the numerical factorization phase + + +/** + * - Numerical factorization + * - Interleaved with the symbolic factorization + * On exit, info is + * + * = 0: successful factorization + * + * > 0: if info = i, and i is + * + * <= A->ncol: U(i,i) is exactly zero. The factorization has + * been completed, but the factor U is exactly singular, + * and division by zero will occur if it is used to solve a + * system of equations. + * + * > A->ncol: number of bytes allocated when memory allocation + * failure occurred, plus A->ncol. If lwork = -1, it is + * the estimated amount of space needed, plus A->ncol. + */ +template <typename MatrixType, typename OrderingType> +void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix) +{ + + eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); + eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices"); + + typedef typename IndexVector::Scalar Index; + + + // Apply the column permutation computed in analyzepattern() + // m_mat = matrix * m_perm_c.inverse(); + m_mat = matrix; + m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. + //Then, permute only the column pointers + for (int i = 0; i < matrix.cols(); i++) + { + m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i]; + m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i+1] - matrix.outerIndexPtr()[i]; + } + + int m = m_mat.rows(); + int n = m_mat.cols(); + int nnz = m_mat.nonZeros(); + int maxpanel = m_perfv.panel_size * m; + // Allocate working storage common to the factor routines + int lwork = 0; + int info = SparseLUBase<Scalar,Index>::LUMemInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); + if (info) + { + std::cerr << "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ; + m_factorizationIsOk = false; + return ; + } + + // Set up pointers for integer working arrays + IndexVector segrep(m); segrep.setZero(); + IndexVector parent(m); parent.setZero(); + IndexVector xplore(m); xplore.setZero(); + IndexVector repfnz(maxpanel); + IndexVector panel_lsub(maxpanel); + IndexVector xprune(n); xprune.setZero(); + IndexVector marker(m*LU_NO_MARKER); marker.setZero(); + + repfnz.setConstant(-1); + panel_lsub.setConstant(-1); + + // Set up pointers for scalar working arrays + ScalarVector dense; + dense.setZero(maxpanel); + ScalarVector tempv; + tempv.setZero(LU_NUM_TEMPV(m, m_perfv.panel_size, m_perfv.maxsuper, m_perfv.rowblk) ); + + // Compute the inverse of perm_c + PermutationType iperm_c(m_perm_c.inverse()); + + // Identify initial relaxed snodes + IndexVector relax_end(n); + if ( m_symmetricmode == true ) + SparseLUBase<Scalar,Index>::LU_heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); + else + SparseLUBase<Scalar,Index>::LU_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); + + + m_perm_r.resize(m); + m_perm_r.indices().setConstant(-1); + marker.setConstant(-1); + + m_glu.supno(0) = IND_EMPTY; m_glu.xsup.setConstant(0); + m_glu.xsup(0) = m_glu.xlsub(0) = m_glu.xusub(0) = m_glu.xlusup(0) = Index(0); + + // Work on one 'panel' at a time. A panel is one of the following : + // (a) a relaxed supernode at the bottom of the etree, or + // (b) panel_size contiguous columns, <panel_size> defined by the user + int jcol,kcol; + IndexVector panel_histo(n); + Index nextu, nextlu, jsupno, fsupc, new_next; + Index pivrow; // Pivotal row number in the original row matrix + int nseg1; // Number of segments in U-column above panel row jcol + int nseg; // Number of segments in each U-column + int irep, icol; + int i, k, jj; + for (jcol = 0; jcol < n; ) + { + if (relax_end(jcol) != IND_EMPTY) + { // Starting a relaxed node from jcol + kcol = relax_end(jcol); // End index of the relaxed snode + + // Factorize the relaxed supernode(jcol:kcol) + // First, determine the union of the row structure of the snode + info = SparseLUBase<Scalar,Index>::LU_snode_dfs(jcol, kcol, m_mat, xprune, marker, m_glu); + if ( info ) + { + std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n"; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + nextu = m_glu.xusub(jcol); //starting location of column jcol in ucol + nextlu = m_glu.xlusup(jcol); //Starting location of column jcol in lusup (rectangular supernodes) + jsupno = m_glu.supno(jcol); // Supernode number which column jcol belongs to + fsupc = m_glu.xsup(jsupno); //First column number of the current supernode + new_next = nextlu + (m_glu.xlsub(fsupc+1)-m_glu.xlsub(fsupc)) * (kcol - jcol + 1); + int mem; + while (new_next > m_glu.nzlumax ) + { + mem = SparseLUBase<Scalar,Index>::LUMemXpand(m_glu.lusup, m_glu.nzlumax, nextlu, LUSUP, m_glu.num_expansions); + if (mem) + { + std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n"; + m_factorizationIsOk = false; + return; + } + } + + // Now, left-looking factorize each column within the snode + for (icol = jcol; icol<=kcol; icol++){ + m_glu.xusub(icol+1) = nextu; + // Scatter into SPA dense(*) + for (typename MatrixType::InnerIterator it(m_mat, icol); it; ++it) + dense(it.row()) = it.value(); + + // Numeric update within the snode + SparseLUBase<Scalar,Index>::LU_snode_bmod(icol, fsupc, dense, m_glu); + + // Eliminate the current column + info = SparseLUBase<Scalar,Index>::LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); + if ( info ) + { + m_info = NumericalIssue; + std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl; + m_factorizationIsOk = false; + return; + } + } + jcol = icol; // The last column te be eliminated + } + else + { // Work on one panel of panel_size columns + + // Adjust panel size so that a panel won't overlap with the next relaxed snode. + int panel_size = m_perfv.panel_size; // upper bound on panel width + for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++) + { + if (relax_end(k) != IND_EMPTY) + { + panel_size = k - jcol; + break; + } + } + if (k == n) + panel_size = n - jcol; + + // Symbolic outer factorization on a panel of columns + SparseLUBase<Scalar,Index>::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); + + // Numeric sup-panel updates in topological order + SparseLUBase<Scalar,Index>::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu); + + // Sparse LU within the panel, and below the panel diagonal + for ( jj = jcol; jj< jcol + panel_size; jj++) + { + k = (jj - jcol) * m; // Column index for w-wide arrays + + nseg = nseg1; // begin after all the panel segments + //Depth-first-search for the current column + VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m); + VectorBlock<IndexVector> repfnz_k(repfnz, k, m); + info = SparseLUBase<Scalar,Index>::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); + if ( info ) + { + std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n"; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + // Numeric updates to this column + VectorBlock<ScalarVector> dense_k(dense, k, m); + VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1); + info = SparseLUBase<Scalar,Index>::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); + if ( info ) + { + std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n"; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + + // Copy the U-segments to ucol(*) + info = SparseLUBase<Scalar,Index>::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); + if ( info ) + { + std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n"; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + + // Form the L-segment + info = SparseLUBase<Scalar,Index>::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); + if ( info ) + { + std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + + // Prune columns (0:jj-1) using column jj + SparseLUBase<Scalar,Index>::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); + + // Reset repfnz for this column + for (i = 0; i < nseg; i++) + { + irep = segrep(i); + repfnz_k(irep) = IND_EMPTY; + } + } // end SparseLU within the panel + jcol += panel_size; // Move to the next panel + } // end else + } // end for -- end elimination + + // Count the number of nonzeros in factors + SparseLUBase<Scalar,Index>::LU_countnz(n, m_nnzL, m_nnzU, m_glu); + // Apply permutation to the L subscripts + SparseLUBase<Scalar,Index>::LU_fixupL(n, m_perm_r.indices(), m_glu); + + // Create supernode matrix L + m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); + // Create the column major upper sparse matrix U; + new (&m_Ustore) MappedSparseMatrix<Scalar> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); + + m_info = Success; + m_factorizationIsOk = true; +} + +// #include "SparseLU_simplicialfactorize.h" +namespace internal { + +template<typename _MatrixType, typename Derived, typename Rhs> +struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs> + : solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs> +{ + typedef SparseLU<_MatrixType,Derived> Dec; + EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) + + template<typename Dest> void evalTo(Dest& dst) const + { + dec()._solve(rhs(),dst); + } +}; + +} // end namespace internal + +} // End namespace Eigen +#endif diff --git a/Eigen/src/SparseLU/SparseLUBase.h b/Eigen/src/SparseLU/SparseLUBase.h new file mode 100644 index 000000000..c00bc0532 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLUBase.h @@ -0,0 +1,74 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifndef SPARSELUBASE_H +#define SPARSELUBASE_H +/** + * Base class for sparseLU + */ +template <typename Scalar, typename Index> +struct SparseLUBase +{ + typedef Matrix<Scalar,Dynamic,1> ScalarVector; + typedef Matrix<Index,Dynamic,1> IndexVector; + typedef typename ScalarVector::RealScalar RealScalar; + typedef VectorBlock<Matrix<Scalar,Dynamic,1> > BlockScalarVector; + typedef VectorBlock<Matrix<Index,Dynamic,1> > BlockIndexVector; +// typedef Ref<Matrix<Scalar,Dynamic,1> > BlockScalarVector; +// typedef Ref<Matrix<Index,Dynamic,1> > BlockIndexVector; + typedef LU_GlobalLU_t<IndexVector, ScalarVector> GlobalLU_t; + typedef SparseMatrix<Scalar,ColMajor,Index> MatrixType; + + static int etree_find (int i, IndexVector& pp); + static int LU_sp_coletree(const MatrixType& mat, IndexVector& parent); + static void LU_nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVector& next_kid, IndexVector& post, int postnum); + static void LU_TreePostorder(int n, IndexVector& parent, IndexVector& post); + template <typename VectorType> + static int expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_expansions); + static int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, GlobalLU_t& glu); + template <typename VectorType> + static int LUMemXpand(VectorType& vec, int& maxlen, int nbElts, LU_MemType memtype, int& num_expansions); + static void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end); + static void LU_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end); + static int LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t<IndexVector, ScalarVector>& glu); + static int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu); + static int LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, GlobalLU_t& glu); + template <typename Traits> + static void LU_dfs_kernel(const int jj, IndexVector& perm_r, + int& nseg, IndexVector& panel_lsub, IndexVector& segrep, + Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent, + IndexVector& xplore, GlobalLU_t& glu, int& nextl_col, int krow, Traits& traits); + static void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu); + + static void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, LU_perfvalues& perfv, GlobalLU_t& glu); + static int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg, BlockIndexVector& lsub_col, IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu); + static int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, ScalarVector& tempv, BlockIndexVector& segrep, BlockIndexVector& repfnz, int fpanelc, GlobalLU_t& glu); + static int LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, BlockIndexVector& repfnz ,IndexVector& perm_r, BlockScalarVector& dense, GlobalLU_t& glu); + static void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, GlobalLU_t& glu); + static void LU_countnz(const int n, int& nnzL, int& nnzU, GlobalLU_t& glu); + static void LU_fixupL(const int n, const IndexVector& perm_r, GlobalLU_t& glu); + +}; + +#include "SparseLU_Coletree.h" +#include "SparseLU_Memory.h" +#include "SparseLU_heap_relax_snode.h" +#include "SparseLU_relax_snode.h" +#include "SparseLU_snode_dfs.h" +#include "SparseLU_snode_bmod.h" +#include "SparseLU_pivotL.h" +#include "SparseLU_panel_dfs.h" +#include "SparseLU_kernel_bmod.h" +#include "SparseLU_panel_bmod.h" +#include "SparseLU_column_dfs.h" +#include "SparseLU_column_bmod.h" +#include "SparseLU_copy_to_ucol.h" +#include "SparseLU_pruneL.h" +#include "SparseLU_Utils.h" + +#endif diff --git a/Eigen/src/SparseLU/SparseLU_Coletree.h b/Eigen/src/SparseLU/SparseLU_Coletree.h new file mode 100644 index 000000000..d3bc36ea4 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Coletree.h @@ -0,0 +1,180 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +/* + + * NOTE: This file is the modified version of sp_coletree.c file in SuperLU + + * -- SuperLU routine (version 3.1) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * August 1, 2008 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COLETREE_H +#define SPARSELU_COLETREE_H +/** Find the root of the tree/set containing the vertex i : Use Path halving */ +template< typename Scalar,typename Index> +int SparseLUBase<Scalar,Index>::etree_find (int i, IndexVector& pp) +{ + int p = pp(i); // Parent + int gp = pp(p); // Grand parent + while (gp != p) + { + pp(i) = gp; // Parent pointer on find path is changed to former grand parent + i = gp; + p = pp(i); + gp = pp(p); + } + return p; +} + +/** Compute the column elimination tree of a sparse matrix + * NOTE : The matrix is supposed to be in column-major format. + * + */ +template <typename Scalar, typename Index> +int SparseLUBase<Scalar,Index>::LU_sp_coletree(const MatrixType& mat, IndexVector& parent) +{ + int nc = mat.cols(); // Number of columns + int nr = mat.rows(); // Number of rows + + IndexVector root(nc); // root of subtree of etree + root.setZero(); + IndexVector pp(nc); // disjoint sets + pp.setZero(); // Initialize disjoint sets + IndexVector firstcol(nr); // First nonzero column in each row + + //Compute first nonzero column in each row + int row,col; + firstcol.setConstant(nc); //for (row = 0; row < nr; firstcol(row++) = nc); + for (col = 0; col < nc; col++) + { + for (typename MatrixType::InnerIterator it(mat, col); it; ++it) + { // Is it necessary to browse the whole matrix, the lower part should do the job ?? + row = it.row(); + firstcol(row) = (std::min)(firstcol(row), col); + } + } + /* Compute etree by Liu's algorithm for symmetric matrices, + except use (firstcol[r],c) in place of an edge (r,c) of A. + Thus each row clique in A'*A is replaced by a star + centered at its first vertex, which has the same fill. */ + int rset, cset, rroot; + for (col = 0; col < nc; col++) + { + pp(col) = col; + cset = col; + root(cset) = col; + parent(col) = nc; + for (typename MatrixType::InnerIterator it(mat, col); it; ++it) + { // A sequence of interleaved find and union is performed + row = firstcol(it.row()); + if (row >= col) continue; + rset = etree_find(row, pp); // Find the name of the set containing row + rroot = root(rset); + if (rroot != col) + { + parent(rroot) = col; + pp(cset) = rset; + cset = rset; + root(cset) = col; + } + } + } + return 0; +} + +/** + * Depth-first search from vertex n. No recursion. + * This routine was contributed by Cédric Doucet, CEDRAT Group, Meylan, France. +*/ +template <typename Scalar, typename Index> +void SparseLUBase<Scalar,Index>::LU_nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVector& next_kid, IndexVector& post, int postnum) +{ + int current = n, first, next; + while (postnum != n) + { + // No kid for the current node + first = first_kid(current); + + // no kid for the current node + if (first == -1) + { + // Numbering this node because it has no kid + post(current) = postnum++; + + // looking for the next kid + next = next_kid(current); + while (next == -1) + { + // No more kids : back to the parent node + current = parent(current); + // numbering the parent node + post(current) = postnum++; + + // Get the next kid + next = next_kid(current); + } + // stopping criterion + if (postnum == n+1) return; + + // Updating current node + current = next; + } + else + { + current = first; + } + } +} + + +/** + * Post order a tree + * \param parent Input tree + * \param post postordered tree + */ +template <typename Scalar, typename Index> +void SparseLUBase<Scalar,Index>::LU_TreePostorder(int n, IndexVector& parent, IndexVector& post) +{ + IndexVector first_kid, next_kid; // Linked list of children + int postnum; + // Allocate storage for working arrays and results + first_kid.resize(n+1); + next_kid.setZero(n+1); + post.setZero(n+1); + + // Set up structure describing children + int v, dad; + first_kid.setConstant(-1); + for (v = n-1; v >= 0; v--) + { + dad = parent(v); + next_kid(v) = first_kid(dad); + first_kid(dad) = v; + } + + // Depth-first search from dummy root vertex #n + postnum = 0; + LU_nr_etdfs(n, parent, first_kid, next_kid, post, postnum); +} + +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h new file mode 100644 index 000000000..31aeee64d --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -0,0 +1,313 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSELU_MATRIX_H +#define EIGEN_SPARSELU_MATRIX_H + +/** \ingroup SparseLU_Module + * \brief a class to manipulate the L supernodal factor from the SparseLU factorization + * + * This class contain the data to easily store + * and manipulate the supernodes during the factorization and solution phase of Sparse LU. + * Only the lower triangular matrix has supernodes. + * + * NOTE : This class corresponds to the SCformat structure in SuperLU + * + */ +/* TO DO + * InnerIterator as for sparsematrix + * SuperInnerIterator to iterate through all supernodes + * Function for triangular solve + */ +template <typename _Scalar, typename _Index> +class SuperNodalMatrix +{ + public: + typedef _Scalar Scalar; + typedef _Index Index; + typedef Matrix<Index,Dynamic,1> IndexVector; + typedef Matrix<Scalar,Dynamic,1> ScalarVector; + public: + SuperNodalMatrix() + { + + } + SuperNodalMatrix(int m, int n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, + IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col ) + { + setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col); + } + + ~SuperNodalMatrix() + { + + } + /** + * Set appropriate pointers for the lower triangular supernodal matrix + * These infos are available at the end of the numerical factorization + * FIXME This class will be modified such that it can be use in the course + * of the factorization. + */ + void setInfos(int m, int n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, + IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col ) + { + m_row = m; + m_col = n; + m_nzval = nzval.data(); + m_nzval_colptr = nzval_colptr.data(); + m_rowind = rowind.data(); + m_rowind_colptr = rowind_colptr.data(); + m_nsuper = col_to_sup(n); + m_col_to_sup = col_to_sup.data(); + m_sup_to_col = sup_to_col.data(); + + } + + /** + * Number of rows + */ + int rows() + { + return m_row; + } + + /** + * Number of columns + */ + int cols() + { + return m_col; + } + + /** + * Return the array of nonzero values packed by column + * + * The size is nnz + */ + Scalar* valuePtr() + { + return m_nzval; + } + + const Scalar* valuePtr() const + { + return m_nzval; + } + /** + * Return the pointers to the beginning of each column in \ref valuePtr() + */ + Index* colIndexPtr() + { + return m_nzval_colptr; + } + + const Index* colIndexPtr() const + { + return m_nzval_colptr; + } + + /** + * Return the array of compressed row indices of all supernodes + */ + Index* rowIndex() + { + return m_rowind; + } + + const Index* rowIndex() const + { + return m_rowind; + } + + /** + * Return the location in \em rowvaluePtr() which starts each column + */ + Index* rowIndexPtr() + { + return m_rowind_colptr; + } + + const Index* rowIndexPtr() const + { + return m_rowind_colptr; + } + + /** + * Return the array of column-to-supernode mapping + */ + Index* colToSup() + { + return m_col_to_sup; + } + + const Index* colToSup() const + { + return m_col_to_sup; + } + /** + * Return the array of supernode-to-column mapping + */ + Index* supToCol() + { + return m_sup_to_col; + } + + const Index* supToCol() const + { + return m_sup_to_col; + } + + /** + * Return the number of supernodes + */ + int nsuper() const + { + return m_nsuper; + } + + class InnerIterator; + template<typename Dest> + void solveInPlace( MatrixBase<Dest>&X) const; + + + + + protected: + Index m_row; // Number of rows + Index m_col; // Number of columns + Index m_nsuper; // Number of supernodes + Scalar* m_nzval; //array of nonzero values packed by column + Index* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j + Index* m_rowind; // Array of compressed row indices of rectangular supernodes + Index* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j + Index* m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs + Index* m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode + + private : +}; + +/** + * \brief InnerIterator class to iterate over nonzero values of the current column in the supernode + * + */ +template<typename Scalar, typename Index> +class SuperNodalMatrix<Scalar,Index>::InnerIterator +{ + public: + InnerIterator(const SuperNodalMatrix& mat, Index outer) + : m_matrix(mat), + m_outer(outer), + m_idval(mat.colIndexPtr()[outer]), + m_startval(m_idval), + m_endval(mat.colIndexPtr()[outer+1]), + m_idrow(mat.rowIndexPtr()[outer]), + m_startidrow(m_idrow), + m_endidrow(mat.rowIndexPtr()[outer+1]) + {} + inline InnerIterator& operator++() + { + m_idval++; + m_idrow++; + return *this; + } + inline Scalar value() const { return m_matrix.valuePtr()[m_idval]; } + + inline Scalar& valueRef() { return const_cast<Scalar&>(m_matrix.valuePtr()[m_idval]); } + + inline Index index() const { return m_matrix.rowIndex()[m_idrow]; } + inline Index row() const { return index(); } + inline Index col() const { return m_outer; } + + inline Index supIndex() const { return m_matrix.colToSup()[m_outer]; } + + inline operator bool() const + { + return ( (m_idval < m_endval) && (m_idval > m_startval) && + (m_idrow < m_endidrow) && (m_idrow > m_startidrow) ); + } + + protected: + const SuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix + const Index m_outer; // Current column + Index m_idval; //Index to browse the values in the current column + const Index m_startval; // Start of the column value + const Index m_endval; // End of the column value + Index m_idrow; //Index to browse the row indices + const Index m_startidrow; // Start of the row indices of the current column value + const Index m_endidrow; // End of the row indices of the current column value +}; + +/** + * \brief Solve with the supernode triangular matrix + * + */ +template<typename Scalar, typename Index> +template<typename Dest> +void SuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const +{ + Index n = X.rows(); + int nrhs = X.cols(); + const Scalar * Lval = valuePtr(); // Nonzero values + Matrix<Scalar,Dynamic,Dynamic> work(n, nrhs); // working vector + work.setZero(); + for (int k = 0; k <= nsuper(); k ++) + { + Index fsupc = supToCol()[k]; // First column of the current supernode + Index istart = rowIndexPtr()[fsupc]; // Pointer index to the subscript of the current column + Index nsupr = rowIndexPtr()[fsupc+1] - istart; // Number of rows in the current supernode + Index nsupc = supToCol()[k+1] - fsupc; // Number of columns in the current supernode + Index nrow = nsupr - nsupc; // Number of rows in the non-diagonal part of the supernode + Index irow; //Current index row + + if (nsupc == 1 ) + { + for (int j = 0; j < nrhs; j++) + { + InnerIterator it(*this, fsupc); + ++it; // Skip the diagonal element + for (; it; ++it) + { + irow = it.row(); + X(irow, j) -= X(fsupc, j) * it.value(); + } + } + } + else + { + // The supernode has more than one column + Index luptr = colIndexPtr()[fsupc]; + + // Triangular solve + Map<const Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Map< Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); + U = A.template triangularView<UnitLower>().solve(U); + + // Matrix-vector product + new (&A) Map<const Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + work.block(0, 0, nrow, nrhs) = A * U; + + //Begin Scatter + for (int j = 0; j < nrhs; j++) + { + Index iptr = istart + nsupc; + for (int i = 0; i < nrow; i++) + { + irow = rowIndex()[iptr]; + X(irow, j) -= work(i, j); // Scatter operation + work(i, j) = Scalar(0); + iptr++; + } + } + } + } +} + + +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h new file mode 100644 index 000000000..7b9f01355 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -0,0 +1,204 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]memory.c files in SuperLU + + * -- SuperLU routine (version 3.1) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * August 1, 2008 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ + +#ifndef EIGEN_SPARSELU_MEMORY +#define EIGEN_SPARSELU_MEMORY + +#define LU_NO_MARKER 3 +#define LU_NUM_TEMPV(m,w,t,b) ((std::max)(m, (t+b)*w) ) +#define IND_EMPTY (-1) + +#define LU_Reduce(alpha) ((alpha + 1) / 2) // i.e (alpha-1)/2 + 1 +#define LU_GluIntArray(n) (5* (n) + 5) +#define LU_TempSpace(m, w) ( (2*w + 4 + LU_NO_MARKER) * m * sizeof(Index) \ + + (w + 1) * m * sizeof(Scalar) ) + + +/** + * Expand the existing storage to accomodate more fill-ins + * \param vec Valid pointer to the vector to allocate or expand + * \param [in,out]length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector + * \param [in]nbElts Current number of elements in the factors + * \param keep_prev 1: use length and do not expand the vector; 0: compute new_len and expand + * \param [in,out]num_expansions Number of times the memory has been expanded + */ +template <typename Scalar, typename Index> +template <typename VectorType> +int SparseLUBase<Scalar,Index>::expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_expansions) +{ + + float alpha = 1.5; // Ratio of the memory increase + int new_len; // New size of the allocated memory + + if(num_expansions == 0 || keep_prev) + new_len = length ; // First time allocate requested + else + new_len = alpha * length ; + + VectorType old_vec; // Temporary vector to hold the previous values + if (nbElts > 0 ) + old_vec = vec.segment(0,nbElts); + + //Allocate or expand the current vector + try + { + vec.resize(new_len); + } + catch(std::bad_alloc& ) + { + if ( !num_expansions ) + { + // First time to allocate from LUMemInit() + throw; // Pass the exception to LUMemInit() which has a try... catch block + } + if (keep_prev) + { + // In this case, the memory length should not not be reduced + return new_len; + } + else + { + // Reduce the size and increase again + int tries = 0; // Number of attempts + do + { + alpha = LU_Reduce(alpha); + new_len = alpha * length ; + try + { + vec.resize(new_len); + } + catch(std::bad_alloc& ) + { + tries += 1; + if ( tries > 10) return new_len; + } + } while (!vec.size()); + } + } + //Copy the previous values to the newly allocated space + if (nbElts > 0) + vec.segment(0, nbElts) = old_vec; + + + length = new_len; + if(num_expansions) ++num_expansions; + return 0; +} + +/** + * \brief Allocate various working space for the numerical factorization phase. + * \param m number of rows of the input matrix + * \param n number of columns + * \param annz number of initial nonzeros in the matrix + * \param lwork if lwork=-1, this routine returns an estimated size of the required memory + * \param glu persistent data to facilitate multiple factors : will be deleted later ?? + * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated memory when allocation failed, and 0 on success + * NOTE Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation + */ +template <typename Scalar, typename Index> +int SparseLUBase<Scalar,Index>::LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, GlobalLU_t& glu) +{ + int& num_expansions = glu.num_expansions; //No memory expansions so far + num_expansions = 0; + glu.nzumax = glu.nzlumax = (std::max)(fillratio * annz, m*n); // estimated number of nonzeros in U + glu.nzlmax = (std::max)(1., fillratio/4.) * annz; // estimated nnz in L factor + + // Return the estimated size to the user if necessary + if (lwork == IND_EMPTY) + { + int estimated_size; + estimated_size = LU_GluIntArray(n) * sizeof(Index) + LU_TempSpace(m, panel_size) + + (glu.nzlmax + glu.nzumax) * sizeof(Index) + (glu.nzlumax+glu.nzumax) * sizeof(Scalar) + n; + return estimated_size; + } + + // Setup the required space + + // First allocate Integer pointers for L\U factors + glu.xsup.resize(n+1); + glu.supno.resize(n+1); + glu.xlsub.resize(n+1); + glu.xlusup.resize(n+1); + glu.xusub.resize(n+1); + + // Reserve memory for L/U factors + do + { + try + { + expand<ScalarVector>(glu.lusup, glu.nzlumax, 0, 0, num_expansions); + expand<ScalarVector>(glu.ucol,glu.nzumax, 0, 0, num_expansions); + expand<IndexVector>(glu.lsub,glu.nzlmax, 0, 0, num_expansions); + expand<IndexVector>(glu.usub,glu.nzumax, 0, 1, num_expansions); + } + catch(std::bad_alloc& ) + { + //Reduce the estimated size and retry + glu.nzlumax /= 2; + glu.nzumax /= 2; + glu.nzlmax /= 2; + if (glu.nzlumax < annz ) return glu.nzlumax; + } + + } while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size()); + + + + ++num_expansions; + return 0; + +} // end LuMemInit + +/** + * \brief Expand the existing storage + * \param vec vector to expand + * \param [in,out]maxlen On input, previous size of vec (Number of elements to copy ). on output, new size + * \param nbElts current number of elements in the vector. + * \param glu Global data structure + * \return 0 on success, > 0 size of the memory allocated so far + */ +template <typename Scalar, typename Index> +template <typename VectorType> +int SparseLUBase<Scalar,Index>::LUMemXpand(VectorType& vec, int& maxlen, int nbElts, LU_MemType memtype, int& num_expansions) +{ + int failed_size; + if (memtype == USUB) + failed_size = expand<VectorType>(vec, maxlen, nbElts, 1, num_expansions); + else + failed_size = expand<VectorType>(vec, maxlen, nbElts, 0, num_expansions); + + if (failed_size) + return failed_size; + + return 0 ; + +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h new file mode 100644 index 000000000..7b3aa250c --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -0,0 +1,103 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + * NOTE: This file comes from a partly modified version of files slu_[s,d,c,z]defs.h + * -- SuperLU routine (version 4.1) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November, 2010 + * + * Global data structures used in LU factorization - + * + * nsuper: #supernodes = nsuper + 1, numbered [0, nsuper]. + * (xsup,supno): supno[i] is the supernode no to which i belongs; + * xsup(s) points to the beginning of the s-th supernode. + * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12) + * xsup 0 1 2 4 7 12 + * Note: dfs will be performed on supernode rep. relative to the new + * row pivoting ordering + * + * (xlsub,lsub): lsub[*] contains the compressed subscript of + * rectangular supernodes; xlsub[j] points to the starting + * location of the j-th column in lsub[*]. Note that xlsub + * is indexed by column. + * Storage: original row subscripts + * + * During the course of sparse LU factorization, we also use + * (xlsub,lsub) for the purpose of symmetric pruning. For each + * supernode {s,s+1,...,t=s+r} with first column s and last + * column t, the subscript set + * lsub[j], j=xlsub[s], .., xlsub[s+1]-1 + * is the structure of column s (i.e. structure of this supernode). + * It is used for the storage of numerical values. + * Furthermore, + * lsub[j], j=xlsub[t], .., xlsub[t+1]-1 + * is the structure of the last column t of this supernode. + * It is for the purpose of symmetric pruning. Therefore, the + * structural subscripts can be rearranged without making physical + * interchanges among the numerical values. + * + * However, if the supernode has only one column, then we + * only keep one set of subscripts. For any subscript interchange + * performed, similar interchange must be done on the numerical + * values. + * + * The last column structures (for pruning) will be removed + * after the numercial LU factorization phase. + * + * (xlusup,lusup): lusup[*] contains the numerical values of the + * rectangular supernodes; xlusup[j] points to the starting + * location of the j-th column in storage vector lusup[*] + * Note: xlusup is indexed by column. + * Each rectangular supernode is stored by column-major + * scheme, consistent with Fortran 2-dim array storage. + * + * (xusub,ucol,usub): ucol[*] stores the numerical values of + * U-columns outside the rectangular supernodes. The row + * subscript of nonzero ucol[k] is stored in usub[k]. + * xusub[i] points to the starting location of column i in ucol. + * Storage: new row subscripts; that is subscripts of PA. + */ +#ifndef EIGEN_LU_STRUCTS +#define EIGEN_LU_STRUCTS +typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} LU_MemType; + + +template <typename IndexVector, typename ScalarVector> +struct LU_GlobalLU_t { + typedef typename IndexVector::Scalar Index; + IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode + IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping) + ScalarVector lusup; // nonzero values of L ordered by columns + IndexVector lsub; // Compressed row indices of L rectangular supernodes. + IndexVector xlusup; // pointers to the beginning of each column in lusup + IndexVector xlsub; // pointers to the beginning of each column in lsub + Index nzlmax; // Current max size of lsub + Index nzlumax; // Current max size of lusup + ScalarVector ucol; // nonzero values of U ordered by columns + IndexVector usub; // row indices of U columns in ucol + IndexVector xusub; // Pointers to the beginning of each column of U in ucol + Index nzumax; // Current max size of ucol + Index n; // Number of columns in the matrix + int num_expansions; +}; + +// Values to set for performance +struct LU_perfvalues { + int panel_size; // a panel consists of at most <panel_size> consecutive columns + int relax; // To control degree of relaxing supernodes. If the number of nodes (columns) + // in a subtree of the elimination tree is less than relax, this subtree is considered + // as one supernode regardless of the row structures of those columns + int maxsuper; // The maximum size for a supernode in complete LU + int rowblk; // The minimum row dimension for 2-D blocking to be used; + int colblk; // The minimum column dimension for 2-D blocking to be used; + int fillfactor; // The estimated fills factors for L and U, compared with A +}; +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h new file mode 100644 index 000000000..b13930dbb --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -0,0 +1,75 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_SPARSELU_UTILS_H +#define EIGEN_SPARSELU_UTILS_H + + +/** + * \brief Count Nonzero elements in the factors + */ +template <typename Scalar, typename Index> +void SparseLUBase<Scalar,Index>::LU_countnz(const int n, int& nnzL, int& nnzU, GlobalLU_t& glu) +{ + nnzL = 0; + nnzU = (glu.xusub)(n); + int nsuper = (glu.supno)(n); + int jlen; + int i, j, fsupc; + if (n <= 0 ) return; + // For each supernode + for (i = 0; i <= nsuper; i++) + { + fsupc = glu.xsup(i); + jlen = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); + + for (j = fsupc; j < glu.xsup(i+1); j++) + { + nnzL += jlen; + nnzU += j - fsupc + 1; + jlen--; + } + } + +} +/** + * \brief Fix up the data storage lsub for L-subscripts. + * + * It removes the subscripts sets for structural pruning, + * and applies permutation to the remaining subscripts + * + */ +template <typename Scalar, typename Index> +void SparseLUBase<Scalar,Index>::LU_fixupL(const int n, const IndexVector& perm_r, GlobalLU_t& glu) +{ + int fsupc, i, j, k, jstart; + + int nextl = 0; + int nsuper = (glu.supno)(n); + + // For each supernode + for (i = 0; i <= nsuper; i++) + { + fsupc = glu.xsup(i); + jstart = glu.xlsub(fsupc); + glu.xlsub(fsupc) = nextl; + for (j = jstart; j < glu.xlsub(fsupc + 1); j++) + { + glu.lsub(nextl) = perm_r(glu.lsub(j)); // Now indexed into P*A + nextl++; + } + for (k = fsupc+1; k < glu.xsup(i+1); k++) + glu.xlsub(k) = nextl; // other columns in supernode i + } + + glu.xlsub(n) = nextl; +} + +#endif diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h new file mode 100644 index 000000000..94f18fb73 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -0,0 +1,162 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of xcolumn_bmod.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COLUMN_BMOD_H +#define SPARSELU_COLUMN_BMOD_H + +/** + * \brief Performs numeric block updates (sup-col) in topological order + * + * \param jcol current column to update + * \param nseg Number of segments in the U part + * \param dense Store the full representation of the column + * \param tempv working array + * \param segrep segment representative ... + * \param repfnz ??? First nonzero column in each row ??? ... + * \param fpanelc First column in the current panel + * \param glu Global LU data. + * \return 0 - successful return + * > 0 - number of bytes allocated when run out of space + * + */ +template <typename Scalar, typename Index> +int SparseLUBase<Scalar,Index>::LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, ScalarVector& tempv, BlockIndexVector& segrep, BlockIndexVector& repfnz, int fpanelc, GlobalLU_t& glu) +{ + int jsupno, k, ksub, krep, ksupno; + int lptr, nrow, isub, irow, nextlu, new_next, ufirst; + int fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; + /* krep = representative of current k-th supernode + * fsupc = first supernodal column + * nsupc = number of columns in a supernode + * nsupr = number of rows in a supernode + * luptr = location of supernodal LU-block in storage + * kfnz = first nonz in the k-th supernodal segment + * no_zeros = no lf leading zeros in a supernodal U-segment + */ + + jsupno = glu.supno(jcol); + // For each nonzero supernode segment of U[*,j] in topological order + k = nseg - 1; + int d_fsupc; // distance between the first column of the current panel and the + // first column of the current snode + int fst_col; // First column within small LU update + int segsize; + for (ksub = 0; ksub < nseg; ksub++) + { + krep = segrep(k); k--; + ksupno = glu.supno(krep); + if (jsupno != ksupno ) + { + // outside the rectangular supernode + fsupc = glu.xsup(ksupno); + fst_col = (std::max)(fsupc, fpanelc); + + // Distance from the current supernode to the current panel; + // d_fsupc = 0 if fsupc > fpanelc + d_fsupc = fst_col - fsupc; + + luptr = glu.xlusup(fst_col) + d_fsupc; + lptr = glu.xlsub(fsupc) + d_fsupc; + + kfnz = repfnz(krep); + kfnz = (std::max)(kfnz, fpanelc); + + segsize = krep - kfnz + 1; + nsupc = krep - fst_col + 1; + nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); + nrow = nsupr - d_fsupc - nsupc; + + // Perform a triangular solver and block update, + // then scatter the result of sup-col update to dense + no_zeros = kfnz - fst_col; + if(segsize==1) + LU_kernel_bmod<1>::run(segsize, dense, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + else + LU_kernel_bmod<Dynamic>::run(segsize, dense, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + } // end if jsupno + } // end for each segment + + // Process the supernodal portion of L\U[*,j] + nextlu = glu.xlusup(jcol); + fsupc = glu.xsup(jsupno); + + // copy the SPA dense into L\U[*,j] + int mem; + new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc); + while (new_next > glu.nzlumax ) + { + mem = LUMemXpand<ScalarVector>(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions); + if (mem) return mem; + } + + for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++) + { + irow = glu.lsub(isub); + glu.lusup(nextlu) = dense(irow); + dense(irow) = Scalar(0.0); + ++nextlu; + } + + glu.xlusup(jcol + 1) = nextlu; // close L\U(*,jcol); + + /* For more updates within the panel (also within the current supernode), + * should start from the first column of the panel, or the first column + * of the supernode, whichever is bigger. There are two cases: + * 1) fsupc < fpanelc, then fst_col <-- fpanelc + * 2) fsupc >= fpanelc, then fst_col <-- fsupc + */ + fst_col = (std::max)(fsupc, fpanelc); + + if (fst_col < jcol) + { + // Distance between the current supernode and the current panel + // d_fsupc = 0 if fsupc >= fpanelc + d_fsupc = fst_col - fsupc; + + lptr = glu.xlsub(fsupc) + d_fsupc; + luptr = glu.xlusup(fst_col) + d_fsupc; + nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); // leading dimension + nsupc = jcol - fst_col; // excluding jcol + nrow = nsupr - d_fsupc - nsupc; + + // points to the beginning of jcol in snode L\U(jsupno) + ufirst = glu.xlusup(jcol) + d_fsupc; + Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc); + u = A.template triangularView<UnitLower>().solve(u); + + new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow); + l.noalias() -= A * u; + + } // End if fst_col + return 0; +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h new file mode 100644 index 000000000..fa8dcf18d --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -0,0 +1,164 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]column_dfs.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COLUMN_DFS_H +#define SPARSELU_COLUMN_DFS_H +/** + * \brief Performs a symbolic factorization on column jcol and decide the supernode boundary + * + * A supernode representative is the last column of a supernode. + * The nonzeros in U[*,j] are segments that end at supernodes representatives. + * The routine returns a list of the supernodal representatives + * in topological order of the dfs that generates them. + * The location of the first nonzero in each supernodal segment + * (supernodal entry location) is also returned. + * + * \param m number of rows in the matrix + * \param jcol Current column + * \param perm_r Row permutation + * \param maxsuper Maximum number of column allowed in a supernode + * \param [in,out] nseg Number of segments in current U[*,j] - new segments appended + * \param lsub_col defines the rhs vector to start the dfs + * \param [in,out] segrep Segment representatives - new segments appended + * \param repfnz First nonzero location in each row + * \param xprune + * \param marker marker[i] == jj, if i was visited during dfs of current column jj; + * \param parent + * \param xplore working array + * \param glu global LU data + * \return 0 success + * > 0 number of bytes allocated when run out of space + * + */ +template<typename IndexVector, typename ScalarVector> +struct LU_column_dfs_traits +{ + typedef typename IndexVector::Scalar Index; + typedef typename ScalarVector::Scalar Scalar; + LU_column_dfs_traits(Index jcol, Index& jsuper, LU_GlobalLU_t<IndexVector, ScalarVector>& glu) + : m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu) + {} + bool update_segrep(Index /*krep*/, Index /*jj*/) + { + return true; + } + void mem_expand(IndexVector& lsub, int& nextl, int chmark) + { + if (nextl >= m_glu.nzlmax) + SparseLUBase<Scalar,Index>::LUMemXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); + if (chmark != (m_jcol-1)) m_jsuper_ref = IND_EMPTY; + } + enum { ExpandMem = true }; + + int m_jcol; + int& m_jsuper_ref; + LU_GlobalLU_t<IndexVector, ScalarVector>& m_glu; +}; + +template <typename Scalar, typename Index> +int SparseLUBase<Scalar,Index>::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg, BlockIndexVector& lsub_col, IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu) +{ + + int jsuper = glu.supno(jcol); + int nextl = glu.xlsub(jcol); + VectorBlock<IndexVector> marker2(marker, 2*m, m); + + + LU_column_dfs_traits<IndexVector, ScalarVector> traits(jcol, jsuper, glu); + + // For each nonzero in A(*,jcol) do dfs + for (int k = 0; lsub_col[k] != IND_EMPTY; k++) + { + int krow = lsub_col(k); + lsub_col(k) = IND_EMPTY; + int kmark = marker2(krow); + + // krow was visited before, go to the next nonz; + if (kmark == jcol) continue; + + LU_dfs_kernel(jcol, perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent, + xplore, glu, nextl, krow, traits); + } // for each nonzero ... + + int fsupc, jptr, jm1ptr, ito, ifrom, istop; + int nsuper = glu.supno(jcol); + int jcolp1 = jcol + 1; + int jcolm1 = jcol - 1; + + // check to see if j belongs in the same supernode as j-1 + if ( jcol == 0 ) + { // Do nothing for column 0 + nsuper = glu.supno(0) = 0 ; + } + else + { + fsupc = glu.xsup(nsuper); + jptr = glu.xlsub(jcol); // Not yet compressed + jm1ptr = glu.xlsub(jcolm1); + + // Use supernodes of type T2 : see SuperLU paper + if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = IND_EMPTY; + + // Make sure the number of columns in a supernode doesn't + // exceed threshold + if ( (jcol - fsupc) >= maxsuper) jsuper = IND_EMPTY; + + /* If jcol starts a new supernode, reclaim storage space in + * glu.lsub from previous supernode. Note we only store + * the subscript set of the first and last columns of + * a supernode. (first for num values, last for pruning) + */ + if (jsuper == IND_EMPTY) + { // starts a new supernode + if ( (fsupc < jcolm1-1) ) + { // >= 3 columns in nsuper + ito = glu.xlsub(fsupc+1); + glu.xlsub(jcolm1) = ito; + istop = ito + jptr - jm1ptr; + xprune(jcolm1) = istop; // intialize xprune(jcol-1) + glu.xlsub(jcol) = istop; + + for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito) + glu.lsub(ito) = glu.lsub(ifrom); + nextl = ito; // = istop + length(jcol) + } + nsuper++; + glu.supno(jcol) = nsuper; + } // if a new supernode + } // end else: jcol > 0 + + // Tidy up the pointers before exit + glu.xsup(nsuper+1) = jcolp1; + glu.supno(jcolp1) = nsuper; + xprune(jcol) = nextl; // Intialize upper bound for pruning + glu.xlsub(jcolp1) = nextl; + + return 0; +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h new file mode 100644 index 000000000..d3227469d --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -0,0 +1,100 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +/* + + * NOTE: This file is the modified version of [s,d,c,z]copy_to_ucol.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COPY_TO_UCOL_H +#define SPARSELU_COPY_TO_UCOL_H + +/** + * \brief Performs numeric block updates (sup-col) in topological order + * + * \param jcol current column to update + * \param nseg Number of segments in the U part + * \param segrep segment representative ... + * \param repfnz First nonzero column in each row ... + * \param perm_r Row permutation + * \param dense Store the full representation of the column + * \param glu Global LU data. + * \return 0 - successful return + * > 0 - number of bytes allocated when run out of space + * + */ +template <typename Scalar, typename Index> +int SparseLUBase<Scalar,Index>::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, BlockIndexVector& repfnz ,IndexVector& perm_r, BlockScalarVector& dense, GlobalLU_t& glu) +{ + Index ksub, krep, ksupno; + + Index jsupno = glu.supno(jcol); + + // For each nonzero supernode segment of U[*,j] in topological order + int k = nseg - 1, i; + Index nextu = glu.xusub(jcol); + Index kfnz, isub, segsize; + Index new_next,irow; + Index fsupc, mem; + for (ksub = 0; ksub < nseg; ksub++) + { + krep = segrep(k); k--; + ksupno = glu.supno(krep); + if (jsupno != ksupno ) // should go into ucol(); + { + kfnz = repfnz(krep); + if (kfnz != IND_EMPTY) + { // Nonzero U-segment + fsupc = glu.xsup(ksupno); + isub = glu.xlsub(fsupc) + kfnz - fsupc; + segsize = krep - kfnz + 1; + new_next = nextu + segsize; + while (new_next > glu.nzumax) + { + mem = LUMemXpand<ScalarVector>(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions); + if (mem) return mem; + mem = LUMemXpand<IndexVector>(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions); + if (mem) return mem; + + } + + for (i = 0; i < segsize; i++) + { + irow = glu.lsub(isub); + glu.usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order + glu.ucol(nextu) = dense(irow); + dense(irow) = Scalar(0.0); + nextu++; + isub++; + } + + } // end nonzero U-segment + + } // end if jsupno + + } // end for each segment + glu.xusub(jcol + 1) = nextu; // close U(*,jcol) + return 0; +} + +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h new file mode 100644 index 000000000..69e1d4da9 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h @@ -0,0 +1,119 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* This file is a modified version of heap_relax_snode.c file in SuperLU + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ + +#ifndef SPARSELU_HEAP_RELAX_SNODE_H +#define SPARSELU_HEAP_RELAX_SNODE_H +#include "SparseLU_Coletree.h" +/** + * \brief Identify the initial relaxed supernodes + * + * This routine applied to a symmetric elimination tree. + * It assumes that the matrix has been reordered according to the postorder of the etree + * \param et elimination tree + * \param relax_columns Maximum number of columns allowed in a relaxed snode + * \param descendants Number of descendants of each node in the etree + * \param relax_end last column in a supernode + */ +template <typename Scalar, typename Index> +void SparseLUBase<Scalar,Index>::LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end) +{ + + // The etree may not be postordered, but its heap ordered + IndexVector post; + LU_TreePostorder(n, et, post); // Post order etree + IndexVector inv_post(n+1); + int i; + for (i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()??? + + // Renumber etree in postorder + IndexVector iwork(n); + IndexVector et_save(n+1); + for (i = 0; i < n; ++i) + { + iwork(post(i)) = post(et(i)); + } + et_save = et; // Save the original etree + et = iwork; + + // compute the number of descendants of each node in the etree + relax_end.setConstant(IND_EMPTY); + int j, parent; + descendants.setZero(); + for (j = 0; j < n; j++) + { + parent = et(j); + if (parent != n) // not the dummy root + descendants(parent) += descendants(j) + 1; + } + // Identify the relaxed supernodes by postorder traversal of the etree + int snode_start; // beginning of a snode + int k; + int nsuper_et_post = 0; // Number of relaxed snodes in postordered etree + int nsuper_et = 0; // Number of relaxed snodes in the original etree + int l; + for (j = 0; j < n; ) + { + parent = et(j); + snode_start = j; + while ( parent != n && descendants(parent) < relax_columns ) + { + j = parent; + parent = et(j); + } + // Found a supernode in postordered etree, j is the last column + ++nsuper_et_post; + k = n; + for (i = snode_start; i <= j; ++i) + k = (std::min)(k, inv_post(i)); + l = inv_post(j); + if ( (l - k) == (j - snode_start) ) // Same number of columns in the snode + { + // This is also a supernode in the original etree + relax_end(k) = l; // Record last column + ++nsuper_et; + } + else + { + for (i = snode_start; i <= j; ++i) + { + l = inv_post(i); + if (descendants(i) == 0) + { + relax_end(l) = l; + ++nsuper_et; + } + } + } + j++; + // Search for a new leaf + while (descendants(j) != 0 && j < n) j++; + } // End postorder traversal of the etree + + // Recover the original etree + et = et_save; +} +#endif diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h new file mode 100644 index 000000000..b15ff9c50 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h @@ -0,0 +1,109 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef SPARSELU_KERNEL_BMOD_H +#define SPARSELU_KERNEL_BMOD_H + +/** + * \brief Performs numeric block updates from a given supernode to a single column + * + * \param segsize Size of the segment (and blocks ) to use for updates + * \param [in,out]dense Packed values of the original matrix + * \param tempv temporary vector to use for updates + * \param lusup array containing the supernodes + * \param nsupr Number of rows in the supernode + * \param nrow Number of rows in the rectangular part of the supernode + * \param lsub compressed row subscripts of supernodes + * \param lptr pointer to the first column of the current supernode in lsub + * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode + * \return 0 on success + */ +template <int SegSizeAtCompileTime> struct LU_kernel_bmod +{ + template <typename BlockScalarVector, typename ScalarVector, typename IndexVector> + EIGEN_DONT_INLINE static void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros) + { + typedef typename ScalarVector::Scalar Scalar; + // First, copy U[*,j] segment from dense(*) to tempv(*) + // The result of triangular solve is in tempv[*]; + // The result of matric-vector update is in dense[*] + int isub = lptr + no_zeros; + int i, irow; + for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++) + { + irow = lsub(isub); + tempv(i) = dense(irow); + ++isub; + } + // Dense triangular solve -- start effective triangle + luptr += nsupr * no_zeros + no_zeros; + // Form Eigen matrix and vector + Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); + Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize); + + u = A.template triangularView<UnitLower>().solve(u); + + // Dense matrix-vector product y <-- B*x + luptr += segsize; + Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); + Map<Matrix<Scalar,Dynamic,1> > l(tempv.data()+segsize, nrow); + if(SegSizeAtCompileTime==2) + l = u(0) * B.col(0) + u(1) * B.col(1); + else if(SegSizeAtCompileTime==3) + l = u(0) * B.col(0) + u(1) * B.col(1) + u(2) * B.col(2); + else + l.noalias() = B * u; + + // Scatter tempv[] into SPA dense[] as a temporary storage + isub = lptr + no_zeros; + for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++) + { + irow = lsub(isub++); + dense(irow) = tempv(i); + } + + // Scatter l into SPA dense[] + for (i = 0; i < nrow; i++) + { + irow = lsub(isub++); + dense(irow) -= l(i); + } + } +}; + +template <> struct LU_kernel_bmod<1> +{ + template <typename BlockScalarVector, typename ScalarVector, typename IndexVector> + EIGEN_DONT_INLINE static void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros) + { + typedef typename ScalarVector::Scalar Scalar; + Scalar f = dense(lsub(lptr + no_zeros)); + luptr += nsupr * no_zeros + no_zeros + 1; + const Scalar* a(lusup.data() + luptr); + const typename IndexVector::Scalar* irow(lsub.data()+lptr + no_zeros + 1); + int i = 0; + for (; i+1 < nrow; i+=2) + { + int i0 = *(irow++); + int i1 = *(irow++); + Scalar a0 = *(a++); + Scalar a1 = *(a++); + Scalar d0 = dense.coeff(i0); + Scalar d1 = dense.coeff(i1); + d0 -= f*a0; + d1 -= f*a1; + dense.coeffRef(i0) = d0; + dense.coeffRef(i1) = d1; + } + if(i<nrow) + dense.coeffRef(*(irow++)) -= f * *(a++); + } +}; +#endif diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h new file mode 100644 index 000000000..6688b4e3e --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -0,0 +1,204 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]panel_bmod.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PANEL_BMOD_H +#define SPARSELU_PANEL_BMOD_H +/** + * \brief Performs numeric block updates (sup-panel) in topological order. + * + * Before entering this routine, the original nonzeros in the panel + * were already copied i nto the spa[m,w] + * + * \param m number of rows in the matrix + * \param w Panel size + * \param jcol Starting column of the panel + * \param nseg Number of segments in the U part + * \param dense Store the full representation of the panel + * \param tempv working array + * \param segrep segment representative... first row in the segment + * \param repfnz First nonzero rows + * \param glu Global LU data. + * + * + */ +template <typename Scalar, typename Index> +void SparseLUBase<Scalar,Index>::LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, LU_perfvalues& perfv, GlobalLU_t& glu) +{ + + int ksub,jj,nextl_col; + int fsupc, nsupc, nsupr, nrow; + int krep, kfnz; + int lptr; // points to the row subscripts of a supernode + int luptr; // ... + int segsize,no_zeros ; + // For each nonz supernode segment of U[*,j] in topological order + int k = nseg - 1; + for (ksub = 0; ksub < nseg; ksub++) + { // For each updating supernode + + /* krep = representative of current k-th supernode + * fsupc = first supernodal column + * nsupc = number of columns in a supernode + * nsupr = number of rows in a supernode + */ + krep = segrep(k); k--; + fsupc = glu.xsup(glu.supno(krep)); + nsupc = krep - fsupc + 1; + nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); + nrow = nsupr - nsupc; + lptr = glu.xlsub(fsupc); + + // loop over the panel columns to detect the actual number of columns and rows + int u_rows = 0; + int u_cols = 0; + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + + kfnz = repfnz_col(krep); + if ( kfnz == IND_EMPTY ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + u_cols++; + u_rows = (std::max)(segsize,u_rows); + } + + // if the blocks are large enough, use level 3 + // TODO find better heuristics! + if( nsupc >= perfv.colblk && nrow > perfv.rowblk && u_cols>perfv.relax) + { + Map<Matrix<Scalar,Dynamic,Dynamic> > U(tempv.data(), u_rows, u_cols); + + // gather U + int u_col = 0; + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here + + kfnz = repfnz_col(krep); + if ( kfnz == IND_EMPTY ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + luptr = glu.xlusup(fsupc); + no_zeros = kfnz - fsupc; + + int isub = lptr + no_zeros; + int off = u_rows-segsize; + for (int i = 0; i < off; i++) U(i,u_col) = 0; + for (int i = 0; i < segsize; i++) + { + int irow = glu.lsub(isub); + U(i+off,u_col) = dense_col(irow); + ++isub; + } + u_col++; + } + // solve U = A^-1 U + luptr = glu.xlusup(fsupc); + no_zeros = (krep - u_rows + 1) - fsupc; + luptr += nsupr * no_zeros + no_zeros; + Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(nsupr) ); + U = A.template triangularView<UnitLower>().solve(U); + + // update + luptr += u_rows; + Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(nsupr) ); + assert(tempv.size()>w*u_rows + nrow*w); + Map<Matrix<Scalar,Dynamic,Dynamic> > L(tempv.data()+w*u_rows, nrow, u_cols); + L.noalias() = B * U; + + // scatter U and L + u_col = 0; + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here + + kfnz = repfnz_col(krep); + if ( kfnz == IND_EMPTY ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + no_zeros = kfnz - fsupc; + int isub = lptr + no_zeros; + + int off = u_rows-segsize; + for (int i = 0; i < segsize; i++) + { + int irow = glu.lsub(isub++); + dense_col(irow) = U.coeff(i+off,u_col); + U.coeffRef(i+off,u_col) = 0; + } + + // Scatter l into SPA dense[] + for (int i = 0; i < nrow; i++) + { + int irow = glu.lsub(isub++); + dense_col(irow) -= L.coeff(i,u_col); + L.coeffRef(i,u_col) = 0; + } + u_col++; + } + } + else // level 2 only + { + // Sequence through each column in the panel + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here + + kfnz = repfnz_col(krep); + if ( kfnz == IND_EMPTY ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + luptr = glu.xlusup(fsupc); + + // Perform a trianglar solve and block update, + // then scatter the result of sup-col update to dense[] + no_zeros = kfnz - fsupc; + if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + else LU_kernel_bmod<Dynamic>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + } // End for each column in the panel + } + + } // End for each updating supernode +} +#endif diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h new file mode 100644 index 000000000..5d3025388 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -0,0 +1,247 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]panel_dfs.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PANEL_DFS_H +#define SPARSELU_PANEL_DFS_H +template <typename Scalar, typename Index> +template <typename Traits> +void SparseLUBase<Scalar,Index>::LU_dfs_kernel(const int jj, IndexVector& perm_r, + int& nseg, IndexVector& panel_lsub, IndexVector& segrep, + Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent, + IndexVector& xplore, GlobalLU_t& glu, + int& nextl_col, int krow, Traits& traits + ) +{ + + int kmark = marker(krow); + + // For each unmarked krow of jj + marker(krow) = jj; + int kperm = perm_r(krow); + if (kperm == IND_EMPTY ) { + // krow is in L : place it in structure of L(*, jj) + panel_lsub(nextl_col++) = krow; // krow is indexed into A + + traits.mem_expand(panel_lsub, nextl_col, kmark); + } + else + { + // krow is in U : if its supernode-representative krep + // has been explored, update repfnz(*) + // krep = supernode representative of the current row + int krep = glu.xsup(glu.supno(kperm)+1) - 1; + // First nonzero element in the current column: + int myfnz = repfnz_col(krep); + + if (myfnz != IND_EMPTY ) + { + // Representative visited before + if (myfnz > kperm ) repfnz_col(krep) = kperm; + + } + else + { + // Otherwise, perform dfs starting at krep + int oldrep = IND_EMPTY; + parent(krep) = oldrep; + repfnz_col(krep) = kperm; + int xdfs = glu.xlsub(krep); + int maxdfs = xprune(krep); + + int kpar; + do + { + // For each unmarked kchild of krep + while (xdfs < maxdfs) + { + int kchild = glu.lsub(xdfs); + xdfs++; + int chmark = marker(kchild); + + if (chmark != jj ) + { + marker(kchild) = jj; + int chperm = perm_r(kchild); + + if (chperm == IND_EMPTY) + { + // case kchild is in L: place it in L(*, j) + panel_lsub(nextl_col++) = kchild; + traits.mem_expand(panel_lsub, nextl_col, chmark); + } + else + { + // case kchild is in U : + // chrep = its supernode-rep. If its rep has been explored, + // update its repfnz(*) + int chrep = glu.xsup(glu.supno(chperm)+1) - 1; + myfnz = repfnz_col(chrep); + + if (myfnz != IND_EMPTY) + { // Visited before + if (myfnz > chperm) + repfnz_col(chrep) = chperm; + } + else + { // Cont. dfs at snode-rep of kchild + xplore(krep) = xdfs; + oldrep = krep; + krep = chrep; // Go deeper down G(L) + parent(krep) = oldrep; + repfnz_col(krep) = chperm; + xdfs = glu.xlsub(krep); + maxdfs = xprune(krep); + + } // end if myfnz != -1 + } // end if chperm == -1 + + } // end if chmark !=jj + } // end while xdfs < maxdfs + + // krow has no more unexplored nbrs : + // Place snode-rep krep in postorder DFS, if this + // segment is seen for the first time. (Note that + // "repfnz(krep)" may change later.) + // Baktrack dfs to its parent + if(traits.update_segrep(krep,jj)) + //if (marker1(krep) < jcol ) + { + segrep(nseg) = krep; + ++nseg; + //marker1(krep) = jj; + } + + kpar = parent(krep); // Pop recursion, mimic recursion + if (kpar == IND_EMPTY) + break; // dfs done + krep = kpar; + xdfs = xplore(krep); + maxdfs = xprune(krep); + + } while (kpar != IND_EMPTY); // Do until empty stack + + } // end if (myfnz = -1) + + } // end if (kperm == -1) +} + +/** + * \brief Performs a symbolic factorization on a panel of columns [jcol, jcol+w) + * + * A supernode representative is the last column of a supernode. + * The nonzeros in U[*,j] are segments that end at supernodes representatives + * + * The routine returns a list of the supernodal representatives + * in topological order of the dfs that generates them. This list is + * a superset of the topological order of each individual column within + * the panel. + * The location of the first nonzero in each supernodal segment + * (supernodal entry location) is also returned. Each column has + * a separate list for this purpose. + * + * Two markers arrays are used for dfs : + * marker[i] == jj, if i was visited during dfs of current column jj; + * marker1[i] >= jcol, if i was visited by earlier columns in this panel; + * + * \param [in]m number of rows in the matrix + * \param [in]w Panel size + * \param [in]jcol Starting column of the panel + * \param [in]A Input matrix in column-major storage + * \param [in]perm_r Row permutation + * \param [out]nseg Number of U segments + * \param [out]dense Accumulate the column vectors of the panel + * \param [out]panel_lsub Subscripts of the row in the panel + * \param [out]segrep Segment representative i.e first nonzero row of each segment + * \param [out]repfnz First nonzero location in each row + * \param [out]xprune + * \param [out]marker + * + * + */ + +template<typename IndexVector> +struct LU_panel_dfs_traits +{ + typedef typename IndexVector::Scalar Index; + LU_panel_dfs_traits(Index jcol, Index* marker) + : m_jcol(jcol), m_marker(marker) + {} + bool update_segrep(Index krep, Index jj) + { + if(m_marker[krep]<m_jcol) + { + m_marker[krep] = jj; + return true; + } + return false; + } + void mem_expand(IndexVector& /*glu.lsub*/, int /*nextl*/, int /*chmark*/) {} + enum { ExpandMem = false }; + Index m_jcol; + Index* m_marker; +}; + +template <typename Scalar, typename Index> +void SparseLUBase<Scalar,Index>::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu) +{ + int nextl_col; // Next available position in panel_lsub[*,jj] + + // Initialize pointers + VectorBlock<IndexVector> marker1(marker, m, m); + nseg = 0; + + LU_panel_dfs_traits<IndexVector> traits(jcol, marker1.data()); + + // For each column in the panel + for (int jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj - jcol) * m; + + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero location in each row + VectorBlock<ScalarVector> dense_col(dense,nextl_col, m); // Accumulate a column vector here + + + // For each nnz in A[*, jj] do depth first search + for (typename MatrixType::InnerIterator it(A, jj); it; ++it) + { + int krow = it.row(); + dense_col(krow) = it.value(); + + int kmark = marker(krow); + if (kmark == jj) + continue; // krow visited before, go to the next nonzero + + LU_dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent, + xplore, glu, nextl_col, krow, traits); + }// end for nonzeros in column jj + + } // end for column jj + +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h new file mode 100644 index 000000000..c4a9f1c74 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -0,0 +1,125 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of xpivotL.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PIVOTL_H +#define SPARSELU_PIVOTL_H +/** + * \brief Performs the numerical pivotin on the current column of L, and the CDIV operation. + * + * Pivot policy : + * (1) Compute thresh = u * max_(i>=j) abs(A_ij); + * (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN + * pivot row = k; + * ELSE IF abs(A_jj) >= thresh THEN + * pivot row = j; + * ELSE + * pivot row = m; + * + * Note: If you absolutely want to use a given pivot order, then set u=0.0. + * + * \param jcol The current column of L + * \param u diagonal pivoting threshold + * \param [in,out]perm_r Row permutation (threshold pivoting) + * \param [in] iperm_c column permutation - used to finf diagonal of Pc*A*Pc' + * \param [out]pivrow The pivot row + * \param glu Global LU data + * \return 0 if success, i > 0 if U(i,i) is exactly zero + * + */ +template <typename Scalar, typename Index> +int SparseLUBase<Scalar,Index>::LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, GlobalLU_t& glu) +{ + + Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol + Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0 + Index lptr = glu.xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion + Index nsupr = glu.xlsub(fsupc+1) - lptr; // Number of rows in the supernode + Scalar* lu_sup_ptr = &(glu.lusup.data()[glu.xlusup(fsupc)]); // Start of the current supernode + Scalar* lu_col_ptr = &(glu.lusup.data()[glu.xlusup(jcol)]); // Start of jcol in the supernode + Index* lsub_ptr = &(glu.lsub.data()[lptr]); // Start of row indices of the supernode + + // Determine the largest abs numerical value for partial pivoting + Index diagind = iperm_c(jcol); // diagonal index + RealScalar pivmax = 0.0; + Index pivptr = nsupc; + Index diag = IND_EMPTY; + RealScalar rtemp; + Index isub, icol, itemp, k; + for (isub = nsupc; isub < nsupr; ++isub) { + rtemp = std::abs(lu_col_ptr[isub]); + if (rtemp > pivmax) { + pivmax = rtemp; + pivptr = isub; + } + if (lsub_ptr[isub] == diagind) diag = isub; + } + + // Test for singularity + if ( pivmax == 0.0 ) { + pivrow = lsub_ptr[pivptr]; + perm_r(pivrow) = jcol; + return (jcol+1); + } + + RealScalar thresh = diagpivotthresh * pivmax; + + // Choose appropriate pivotal element + + { + // Test if the diagonal element can be used as a pivot (given the threshold value) + if (diag >= 0 ) + { + // Diagonal element exists + rtemp = std::abs(lu_col_ptr[diag]); + if (rtemp != 0.0 && rtemp >= thresh) pivptr = diag; + } + pivrow = lsub_ptr[pivptr]; + } + + // Record pivot row + perm_r(pivrow) = jcol; + // Interchange row subscripts + if (pivptr != nsupc ) + { + std::swap( lsub_ptr[pivptr], lsub_ptr[nsupc] ); + // Interchange numerical values as well, for the two rows in the whole snode + // such that L is indexed the same way as A + for (icol = 0; icol <= nsupc; icol++) + { + itemp = pivptr + icol * nsupr; + std::swap(lu_sup_ptr[itemp], lu_sup_ptr[nsupc + icol * nsupr]); + } + } + // cdiv operations + Scalar temp = Scalar(1.0) / lu_col_ptr[nsupc]; + for (k = nsupc+1; k < nsupr; k++) + lu_col_ptr[k] *= temp; + return 0; +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h new file mode 100644 index 000000000..d8c58e039 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -0,0 +1,129 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]pruneL.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PRUNEL_H +#define SPARSELU_PRUNEL_H + +/** + * \brief Prunes the L-structure. + * + * It prunes the L-structure of supernodes whose L-structure contains the current pivot row "pivrow" + * + * + * \param jcol The current column of L + * \param [in]perm_r Row permutation + * \param [out]pivrow The pivot row + * \param nseg Number of segments + * \param segrep + * \param repfnz + * \param [out]xprune + * \param glu Global LU data + * + */ +template <typename Scalar, typename Index> +void SparseLUBase<Scalar,Index>::LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, GlobalLU_t& glu) +{ + // For each supernode-rep irep in U(*,j] + int jsupno = glu.supno(jcol); + int i,irep,irep1; + bool movnum, do_prune = false; + Index kmin, kmax, minloc, maxloc,krow; + for (i = 0; i < nseg; i++) + { + irep = segrep(i); + irep1 = irep + 1; + do_prune = false; + + // Don't prune with a zero U-segment + if (repfnz(irep) == IND_EMPTY) continue; + + // If a snode overlaps with the next panel, then the U-segment + // is fragmented into two parts -- irep and irep1. We should let + // pruning occur at the rep-column in irep1s snode. + if (glu.supno(irep) == glu.supno(irep1) ) continue; // don't prune + + // If it has not been pruned & it has a nonz in row L(pivrow,i) + if (glu.supno(irep) != jsupno ) + { + if ( xprune (irep) >= glu.xlsub(irep1) ) + { + kmin = glu.xlsub(irep); + kmax = glu.xlsub(irep1) - 1; + for (krow = kmin; krow <= kmax; krow++) + { + if (glu.lsub(krow) == pivrow) + { + do_prune = true; + break; + } + } + } + + if (do_prune) + { + // do a quicksort-type partition + // movnum=true means that the num values have to be exchanged + movnum = false; + if (irep == glu.xsup(glu.supno(irep)) ) // Snode of size 1 + movnum = true; + + while (kmin <= kmax) + { + if (perm_r(glu.lsub(kmax)) == IND_EMPTY) + kmax--; + else if ( perm_r(glu.lsub(kmin)) != IND_EMPTY) + kmin++; + else + { + // kmin below pivrow (not yet pivoted), and kmax + // above pivrow: interchange the two suscripts + std::swap(glu.lsub(kmin), glu.lsub(kmax)); + + // If the supernode has only one column, then we + // only keep one set of subscripts. For any subscript + // intercnahge performed, similar interchange must be + // done on the numerical values. + if (movnum) + { + minloc = glu.xlusup(irep) + ( kmin - glu.xlsub(irep) ); + maxloc = glu.xlusup(irep) + ( kmax - glu.xlsub(irep) ); + std::swap(glu.lusup(minloc), glu.lusup(maxloc)); + } + kmin++; + kmax--; + } + } // end while + + xprune(irep) = kmin; //Pruning + } // end if do_prune + } // end pruning + } // End for each U-segment +} + +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_relax_snode.h b/Eigen/src/SparseLU/SparseLU_relax_snode.h new file mode 100644 index 000000000..8db8619c1 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_relax_snode.h @@ -0,0 +1,73 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* This file is a modified version of heap_relax_snode.c file in SuperLU + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ + +#ifndef SPARSELU_RELAX_SNODE_H +#define SPARSELU_RELAX_SNODE_H +/** + * \brief Identify the initial relaxed supernodes + * + * This routine is applied to a column elimination tree. + * It assumes that the matrix has been reordered according to the postorder of the etree + * \param et elimination tree + * \param relax_columns Maximum number of columns allowed in a relaxed snode + * \param descendants Number of descendants of each node in the etree + * \param relax_end last column in a supernode + */ +template <typename Scalar, typename Index> +void SparseLUBase<Scalar,Index>::LU_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end) +{ + + // compute the number of descendants of each node in the etree + int j, parent; + relax_end.setConstant(IND_EMPTY); + descendants.setZero(); + for (j = 0; j < n; j++) + { + parent = et(j); + if (parent != n) // not the dummy root + descendants(parent) += descendants(j) + 1; + } + // Identify the relaxed supernodes by postorder traversal of the etree + int snode_start; // beginning of a snode + for (j = 0; j < n; ) + { + parent = et(j); + snode_start = j; + while ( parent != n && descendants(parent) < relax_columns ) + { + j = parent; + parent = et(j); + } + // Found a supernode in postordered etree, j is the last column + relax_end(snode_start) = j; // Record last column + j++; + // Search for a new leaf + while (descendants(j) != 0 && j < n) j++; + } // End postorder traversal of the etree + +} +#endif diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h new file mode 100644 index 000000000..beea71e31 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -0,0 +1,72 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]snode_bmod.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_SNODE_BMOD_H +#define SPARSELU_SNODE_BMOD_H +template <typename Scalar, typename Index> +int SparseLUBase<Scalar,Index>::LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu) +{ + /* lsub : Compressed row subscripts of ( rectangular supernodes ) + * xlsub : xlsub[j] is the starting location of the j-th column in lsub(*) + * lusup : Numerical values of the rectangular supernodes + * xlusup[j] is the starting location of the j-th column in lusup(*) + */ + int nextlu = glu.xlusup(jcol); // Starting location of the next column to add + int irow, isub; + // Process the supernodal portion of L\U[*,jcol] + for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++) + { + irow = glu.lsub(isub); + glu.lusup(nextlu) = dense(irow); + dense(irow) = 0; + ++nextlu; + } + glu.xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 ) + + if (fsupc < jcol ){ + int luptr = glu.xlusup(fsupc); // points to the first column of the supernode + int nsupr = glu.xlsub(fsupc + 1) -glu.xlsub(fsupc); //Number of rows in the supernode + int nsupc = jcol - fsupc; // Number of columns in the supernodal portion of L\U[*,jcol] + int ufirst = glu.xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno) + + int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks + + // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol) + Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc); + u = A.template triangularView<UnitLower>().solve(u); // Call the Eigen dense triangular solve interface + + // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) + new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow); + l.noalias() -= A * u; + } + return 0; +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h new file mode 100644 index 000000000..199436cd7 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h @@ -0,0 +1,95 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]snode_dfs.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_SNODE_DFS_H +#define SPARSELU_SNODE_DFS_H + /** + * \brief Determine the union of the row structures of those columns within the relaxed snode. + * NOTE: The relaxed snodes are leaves of the supernodal etree, therefore, + * the portion outside the rectangular supernode must be zero. + * + * \param jcol start of the supernode + * \param kcol end of the supernode + * \param asub Row indices + * \param colptr Pointer to the beginning of each column + * \param xprune (out) The pruned tree ?? + * \param marker (in/out) working vector + * \return 0 on success, > 0 size of the memory when memory allocation failed + */ +template <typename Scalar, typename Index> +int SparseLUBase<Scalar,Index>::LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu) +{ + int mem; + Index nsuper = ++glu.supno(jcol); // Next available supernode number + int nextl = glu.xlsub(jcol); //Index of the starting location of the jcol-th column in lsub + int krow,kmark; + for (int i = jcol; i <=kcol; i++) + { + // For each nonzero in A(*,i) + for (typename MatrixType::InnerIterator it(mat, i); it; ++it) + { + krow = it.row(); + kmark = marker(krow); + if ( kmark != kcol ) + { + // First time to visit krow + marker(krow) = kcol; + glu.lsub(nextl++) = krow; + if( nextl >= glu.nzlmax ) + { + mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions); + if (mem) return mem; // Memory expansion failed... Return the memory allocated so far + } + } + } + glu.supno(i) = nsuper; + } + + // If supernode > 1, then make a copy of the subscripts for pruning + if (jcol < kcol) + { + Index new_next = nextl + (nextl - glu.xlsub(jcol)); + while (new_next > glu.nzlmax) + { + mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions); + if (mem) return mem; // Memory expansion failed... Return the memory allocated so far + } + Index ifrom, ito = nextl; + for (ifrom = glu.xlsub(jcol); ifrom < nextl;) + glu.lsub(ito++) = glu.lsub(ifrom++); + for (int i = jcol+1; i <=kcol; i++) glu.xlsub(i) = nextl; + nextl = ito; + } + glu.xsup(nsuper+1) = kcol + 1; // Start of next available supernode + glu.supno(kcol+1) = nsuper; + xprune(kcol) = nextl; + glu.xlsub(kcol+1) = nextl; + return 0; +} +#endif
\ No newline at end of file diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index d8a54e18c..cd6c4b91f 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -612,6 +612,7 @@ void SuperLU<MatrixType>::factorize(const MatrixType& a) this->initFactorization(a); + m_sluOptions.ColPerm = COLAMD; int info = 0; RealScalar recip_pivot_growth, rcond; RealScalar ferr, berr; diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 5b979ebf8..1e751ad62 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -33,7 +33,8 @@ EIGEN_MAKE_CWISE_BINARY_OP(min,internal::scalar_min_op) * * \sa max() */ -EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const ConstantReturnType> +EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, + const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> > (min)(const Scalar &other) const { return (min)(Derived::PlainObject::Constant(rows(), cols(), other)); @@ -52,7 +53,8 @@ EIGEN_MAKE_CWISE_BINARY_OP(max,internal::scalar_max_op) * * \sa min() */ -EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const ConstantReturnType> +EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, + const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> > (max)(const Scalar &other) const { return (max)(Derived::PlainObject::Constant(rows(), cols(), other)); diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 0dffaf413..a59636790 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -200,3 +200,4 @@ EIGEN_MAKE_SCALAR_CWISE_UNARY_OP(operator<=, std::less_equal) EIGEN_MAKE_SCALAR_CWISE_UNARY_OP(operator>, std::greater) EIGEN_MAKE_SCALAR_CWISE_UNARY_OP(operator>=, std::greater_equal) + |