diff options
author | Gael Guennebaud <g.gael@free.fr> | 2009-08-13 11:42:02 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2009-08-13 11:42:02 +0200 |
commit | 1b257a7620d122a05f71b6ebd52e14fe5f1b18ef (patch) | |
tree | 56b66ea6ea98e2bf6be7fb51c9c9f8d1dbb48205 /Eigen | |
parent | 1d80f561ad48a96e9852dd8bfdbc400574aa67e5 (diff) |
add an optimized "apply in place a rotation in the plane",
and make Jacobi and SelfAdjointEigenSolver use it
=> ~ x1.75 speedup for JacobiSVD and x2 for SelfAdjointEigenSolver
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/Core | 1 | ||||
-rw-r--r-- | Eigen/src/Core/products/RotationInThePlane.h | 127 | ||||
-rw-r--r-- | Eigen/src/Jacobi/Jacobi.h | 18 | ||||
-rw-r--r-- | Eigen/src/QR/SelfAdjointEigenSolver.h | 17 |
4 files changed, 136 insertions, 27 deletions
diff --git a/Eigen/Core b/Eigen/Core index 28ed09035..0a1477bbf 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -194,6 +194,7 @@ namespace Eigen { #include "src/Core/products/TriangularMatrixVector.h" #include "src/Core/products/TriangularMatrixMatrix.h" #include "src/Core/products/TriangularSolverMatrix.h" +#include "src/Core/products/RotationInThePlane.h" #include "src/Core/BandMatrix.h" } // namespace Eigen diff --git a/Eigen/src/Core/products/RotationInThePlane.h b/Eigen/src/Core/products/RotationInThePlane.h new file mode 100644 index 000000000..aa0b4d640 --- /dev/null +++ b/Eigen/src/Core/products/RotationInThePlane.h @@ -0,0 +1,127 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_ROTATION_IN_THE_PLANE_H +#define EIGEN_ROTATION_IN_THE_PLANE_H + +/********************************************************************** +* This file implement ... +**********************************************************************/ + +template<typename Scalar, int Incr> +struct ei_apply_rotation_in_the_plane_selector; + +template<typename VectorX, typename VectorY> +void ei_apply_rotation_in_the_plane(VectorX& x, VectorY& y, typename VectorX::Scalar c, typename VectorY::Scalar s) +{ + ei_assert(x.size() == y.size()); + int size = x.size(); + int incrx = size ==1 ? 1 : &x.coeffRef(1) - &x.coeffRef(0); + int incry = size ==1 ? 1 : &y.coeffRef(1) - &y.coeffRef(0); + if (incrx==1 && incry==1) + ei_apply_rotation_in_the_plane_selector<typename VectorX::Scalar,1> + ::run(&x.coeffRef(0), &y.coeffRef(0), x.size(), c, s, 1, 1); + else + ei_apply_rotation_in_the_plane_selector<typename VectorX::Scalar,Dynamic> + ::run(&x.coeffRef(0), &y.coeffRef(0), x.size(), c, s, incrx, incry); +} + +template<typename Scalar> +struct ei_apply_rotation_in_the_plane_selector<Scalar,Dynamic> +{ + static EIGEN_DONT_INLINE void run(Scalar* x, Scalar* y, int size, Scalar c, Scalar s, int incrx, int incry) + { + for(int i=0; i<size; ++i) + { + Scalar xi = *x; + Scalar yi = *y; + *x = c * xi - s * yi; + *y = s * xi + c * yi; + x += incrx; + y += incry; + } + } +}; + +// both vectors are sequentially stored in memory => vectorization +template<typename Scalar> +struct ei_apply_rotation_in_the_plane_selector<Scalar,1> +{ + static EIGEN_DONT_INLINE void run(Scalar* x, Scalar* y, int size, Scalar c, Scalar s, int, int) + { + typedef typename ei_packet_traits<Scalar>::type Packet; + enum { PacketSize = ei_packet_traits<Scalar>::size, Peeling = 2 }; + int alignedStart = ei_alignmentOffset(y, size); + int alignedEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize); + + const Packet pc = ei_pset1(c); + const Packet ps = ei_pset1(s); + + for(int i=0; i<alignedStart; ++i) + { + Scalar xi = x[i]; + Scalar yi = y[i]; + x[i] = c * xi - s * yi; + y[i] = s * xi + c * yi; + } + + Scalar* px = x + alignedStart; + Scalar* py = y + alignedStart; + + if(ei_alignmentOffset(x, size)==alignedStart) + for(int i=alignedStart; i<alignedEnd; i+=PacketSize) + { + Packet xi = ei_pload(px); + Packet yi = ei_pload(py); + ei_pstore(px, ei_psub(ei_pmul(pc,xi),ei_pmul(ps,yi))); + ei_pstore(py, ei_padd(ei_pmul(ps,xi),ei_pmul(pc,yi))); + px += PacketSize; + py += PacketSize; + } + else + for(int i=alignedStart; i<alignedEnd; i+=Peeling*PacketSize) + { + Packet xi = ei_ploadu(px); + Packet xi1 = ei_ploadu(px+PacketSize); + Packet yi = ei_pload (py); + Packet yi1 = ei_pload (py+PacketSize); + ei_pstoreu(px, ei_psub(ei_pmul(pc,xi),ei_pmul(ps,yi))); + ei_pstoreu(px+PacketSize, ei_psub(ei_pmul(pc,xi1),ei_pmul(ps,yi1))); + ei_pstore (py, ei_padd(ei_pmul(ps,xi),ei_pmul(pc,yi))); + ei_pstore (py+PacketSize, ei_padd(ei_pmul(ps,xi1),ei_pmul(pc,yi1))); + px += Peeling*PacketSize; + py += Peeling*PacketSize; + } + + for(int i=alignedEnd; i<size; ++i) + { + Scalar xi = x[i]; + Scalar yi = y[i]; + x[i] = c * xi - s * yi; + y[i] = s * xi + c * yi; + } + } +}; + +#endif // EIGEN_ROTATION_IN_THE_PLANE_H diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h index 785dd26ab..10ae9eb8f 100644 --- a/Eigen/src/Jacobi/Jacobi.h +++ b/Eigen/src/Jacobi/Jacobi.h @@ -28,23 +28,17 @@ template<typename Derived> void MatrixBase<Derived>::applyJacobiOnTheLeft(int p, int q, Scalar c, Scalar s) { - for(int i = 0; i < cols(); ++i) - { - Scalar tmp = coeff(p,i); - coeffRef(p,i) = c * tmp - s * coeff(q,i); - coeffRef(q,i) = s * tmp + c * coeff(q,i); - } + RowXpr x(row(p)); + RowXpr y(row(q)); + ei_apply_rotation_in_the_plane(x, y, c, s); } template<typename Derived> void MatrixBase<Derived>::applyJacobiOnTheRight(int p, int q, Scalar c, Scalar s) { - for(int i = 0; i < rows(); ++i) - { - Scalar tmp = coeff(i,p); - coeffRef(i,p) = c * tmp - s * coeff(i,q); - coeffRef(i,q) = s * tmp + c * coeff(i,q); - } + ColXpr x(col(p)); + ColXpr y(col(q)); + ei_apply_rotation_in_the_plane(x, y, c, s); } template<typename Scalar> diff --git a/Eigen/src/QR/SelfAdjointEigenSolver.h b/Eigen/src/QR/SelfAdjointEigenSolver.h index b003fb4d7..a8e89ba1a 100644 --- a/Eigen/src/QR/SelfAdjointEigenSolver.h +++ b/Eigen/src/QR/SelfAdjointEigenSolver.h @@ -378,23 +378,10 @@ static void ei_tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, int st if (matrixQ) { #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR + ei_apply_rotation_in_the_plane_selector<Scalar,Dynamic>::run(matrixQ+k, matrixQ+k+1, n, c, s, n, n); #else - int kn = k*n; - int kn1 = (k+1)*n; + ei_apply_rotation_in_the_plane_selector<Scalar,1>::run(matrixQ+k*n, matrixQ+k*n+n, n, c, s, 1, 1); #endif - // let's do the product manually to avoid the need of temporaries... - for (int i=0; i<n; ++i) - { - #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR - Scalar matrixQ_i_k = matrixQ[i*n+k]; - matrixQ[i*n+k] = c * matrixQ_i_k - s * matrixQ[i*n+k+1]; - matrixQ[i*n+k+1] = s * matrixQ_i_k + c * matrixQ[i*n+k+1]; - #else - Scalar matrixQ_i_k = matrixQ[i+kn]; - matrixQ[i+kn] = c * matrixQ_i_k - s * matrixQ[i+kn1]; - matrixQ[i+kn1] = s * matrixQ_i_k + c * matrixQ[i+kn1]; - #endif - } } } } |