From c1f9ca925405c8fad126f327b4bdca7f983fc96e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 17:11:03 +0200 Subject: Update RealQZ to reduce 2x2 diagonal block of T corresponding to non reduced diagonal block of S to positive diagonal form. This step involve a real 2x2 SVD problem. The respective routine is thus in src/misc/ to be shared by both EVD and AVD modules. --- Eigen/Eigenvalues | 1 + Eigen/SVD | 1 + Eigen/src/Eigenvalues/RealQZ.h | 32 ++++++++++++++++++++++++- Eigen/src/SVD/JacobiSVD.h | 32 ------------------------- Eigen/src/misc/RealSvd2x2.h | 54 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 33 deletions(-) create mode 100644 Eigen/src/misc/RealSvd2x2.h diff --git a/Eigen/Eigenvalues b/Eigen/Eigenvalues index ea93eb303..216a6d51d 100644 --- a/Eigen/Eigenvalues +++ b/Eigen/Eigenvalues @@ -32,6 +32,7 @@ * \endcode */ +#include "src/misc/RealSvd2x2.h" #include "src/Eigenvalues/Tridiagonalization.h" #include "src/Eigenvalues/RealSchur.h" #include "src/Eigenvalues/EigenSolver.h" diff --git a/Eigen/SVD b/Eigen/SVD index b353f3f54..565d9c90d 100644 --- a/Eigen/SVD +++ b/Eigen/SVD @@ -31,6 +31,7 @@ * \endcode */ +#include "src/misc/RealSvd2x2.h" #include "src/SVD/UpperBidiagonalization.h" #include "src/SVD/SVDBase.h" #include "src/SVD/JacobiSVD.h" diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h index a62071d42..c4715b954 100644 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h @@ -552,7 +552,6 @@ namespace Eigen { m_T.coeffRef(l,l-1) = Scalar(0.0); } - template RealQZ& RealQZ::compute(const MatrixType& A_in, const MatrixType& B_in, bool computeQZ) { @@ -616,6 +615,37 @@ namespace Eigen { } // check if we converged before reaching iterations limit m_info = (local_iter j_left, j_right; + internal::real_2x2_jacobi_svd(m_T, i, i+1, &j_left, &j_right); + + // Apply resulting Jacobi rotations + m_T.applyOnTheLeft(i,i+1,j_left); + m_T.applyOnTheRight(i,i+1,j_right); + m_S.applyOnTheLeft(i,i+1,j_left); + m_S.applyOnTheRight(i,i+1,j_right); + m_T(i,i+1) = Scalar(0); + + if(m_computeQZ) { + m_Q.applyOnTheRight(i,i+1,j_left.transpose()); + m_Z.applyOnTheLeft(i,i+1,j_right.transpose()); + } + + i++; + } + } + } + return *this; } // end compute diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index 1940c8294..b83fd7a4d 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -419,38 +419,6 @@ struct svd_precondition_2x2_block_to_be_real } }; -template -void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, - JacobiRotation *j_left, - JacobiRotation *j_right) -{ - using std::sqrt; - using std::abs; - Matrix m; - m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)), - numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q)); - JacobiRotation rot1; - RealScalar t = m.coeff(0,0) + m.coeff(1,1); - RealScalar d = m.coeff(1,0) - m.coeff(0,1); - if(d == RealScalar(0)) - { - rot1.s() = RealScalar(0); - rot1.c() = RealScalar(1); - } - else - { - // If d!=0, then t/d cannot overflow because the magnitude of the - // entries forming d are not too small compared to the ones forming t. - RealScalar u = t / d; - RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); - rot1.s() = RealScalar(1) / tmp; - rot1.c() = u / tmp; - } - m.applyOnTheLeft(0,1,rot1); - j_right->makeJacobi(m,0,1); - *j_left = rot1 * j_right->transpose(); -} - template struct traits > { diff --git a/Eigen/src/misc/RealSvd2x2.h b/Eigen/src/misc/RealSvd2x2.h new file mode 100644 index 000000000..cdd7777d2 --- /dev/null +++ b/Eigen/src/misc/RealSvd2x2.h @@ -0,0 +1,54 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Benoit Jacob +// Copyright (C) 2013-2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REALSVD2X2_H +#define EIGEN_REALSVD2X2_H + +namespace Eigen { + +namespace internal { + +template +void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, + JacobiRotation *j_left, + JacobiRotation *j_right) +{ + using std::sqrt; + using std::abs; + Matrix m; + m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)), + numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q)); + JacobiRotation rot1; + RealScalar t = m.coeff(0,0) + m.coeff(1,1); + RealScalar d = m.coeff(1,0) - m.coeff(0,1); + if(d == RealScalar(0)) + { + rot1.s() = RealScalar(0); + rot1.c() = RealScalar(1); + } + else + { + // If d!=0, then t/d cannot overflow because the magnitude of the + // entries forming d are not too small compared to the ones forming t. + RealScalar u = t / d; + RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); + rot1.s() = RealScalar(1) / tmp; + rot1.c() = u / tmp; + } + m.applyOnTheLeft(0,1,rot1); + j_right->makeJacobi(m,0,1); + *j_left = rot1 * j_right->transpose(); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_REALSVD2X2_H \ No newline at end of file -- cgit v1.2.3 From 2bd59b0e0d667dcdcb6b070596a1bf023e3e88f1 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 17:12:03 +0200 Subject: Take advantage that T is already diagonal in the extraction of generalized complex eigenvalues. --- Eigen/src/Eigenvalues/GeneralizedEigenSolver.h | 27 ++++++++------------------ 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index 08caed281..9f43fd544 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -327,24 +327,13 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp } else { - // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a triangular 2x2 block T - // From the eigen decomposition of T = U * E * U^-1, - // we can extract the eigenvalues of (U^-1 * S * U) / E - // Here, we can take advantage that E = diag(T), and U = [ 1 T_01 ; 0 T_11-T_00], and U^-1 = [1 -T_11/(T_11-T_00) ; 0 1/(T_11-T_00)]. - // Then taking beta=T_00*T_11*(T_11-T_00), we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00) * (T_11-T_00): - - // T = [a b ; 0 c] - // S = [e f ; g h] - RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i, i+1), c = m_realQZ.matrixT().coeff(i+1, i+1); - RealScalar e = m_matS.coeff(i, i), f = m_matS.coeff(i, i+1), g = m_matS.coeff(i+1, i), h = m_matS.coeff(i+1, i+1); - RealScalar d = c-a; - RealScalar gb = g*b; - Matrix S2; - S2 << (e*d-gb)*c, ((e*b+f*d-h*b)*d-gb*b)*a, - g*c , (gb+h*d)*a; - - // NOTE, we could also compute the SVD of T's block during the QZ factorization so that the respective T block is guaranteed to be diagonal, - // and then we could directly apply the formula below (while taking care of scaling S columns by T11,T00): + // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a positive diagonal 2x2 block T + // Then taking beta=T_00*T_11, we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00): + + // T = [a 0] + // [0 b] + RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i+1, i+1); + Matrix S2 = m_matS.template block<2,2>(i,i) * Matrix(b,a).asDiagonal(); Scalar p = Scalar(0.5) * (S2.coeff(0,0) - S2.coeff(1,1)); Scalar z = sqrt(abs(p * p + S2.coeff(1,0) * S2.coeff(0,1))); @@ -352,7 +341,7 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp m_alphas.coeffRef(i+1) = ComplexScalar(S2.coeff(1,1) + p, -z); m_betas.coeffRef(i) = - m_betas.coeffRef(i+1) = a*c*d; + m_betas.coeffRef(i+1) = a*b; i += 2; } -- cgit v1.2.3 From e2b383632699684d06ae180b3ad85cfb0189973a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 17:13:33 +0200 Subject: Include recent changesets that played with product's kernel --- bench/perf_monitoring/gemm/changesets.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt index fb3e48e99..d00b4603a 100644 --- a/bench/perf_monitoring/gemm/changesets.txt +++ b/bench/perf_monitoring/gemm/changesets.txt @@ -44,4 +44,8 @@ before-evaluators 7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) 7591:09a8e2186610 # 3.3-alpha1 7650:b0f3c8f43025 # help clang inlining +8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs) +8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes +8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path +8985:d935df21a082 # Remove the rotating kernel. -- cgit v1.2.3