From 8846aa6d1b68fb0951c70bc339af93b418117ae2 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 4 Sep 2014 09:15:59 +0200 Subject: Optimization: enable cache-efficient application of HouseholderSequence. --- Eigen/src/Householder/BlockHouseholder.h | 73 ++++++++++++++++++++++------- Eigen/src/Householder/HouseholderSequence.h | 34 ++++++++++++-- 2 files changed, 84 insertions(+), 23 deletions(-) (limited to 'Eigen/src/Householder') diff --git a/Eigen/src/Householder/BlockHouseholder.h b/Eigen/src/Householder/BlockHouseholder.h index 60dbea5f5..35dbf80a1 100644 --- a/Eigen/src/Householder/BlockHouseholder.h +++ b/Eigen/src/Householder/BlockHouseholder.h @@ -16,48 +16,85 @@ namespace Eigen { namespace internal { + +/** \internal */ +// template +// void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs) +// { +// typedef typename TriangularFactorType::Index Index; +// typedef typename VectorsType::Scalar Scalar; +// const Index nbVecs = vectors.cols(); +// eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs); +// +// for(Index i = 0; i < nbVecs; i++) +// { +// Index rs = vectors.rows() - i; +// // Warning, note that hCoeffs may alias with vectors. +// // It is then necessary to copy it before modifying vectors(i,i). +// typename CoeffsType::Scalar h = hCoeffs(i); +// // This hack permits to pass trough nested Block<> and Transpose<> expressions. +// Scalar *Vii_ptr = const_cast(vectors.data() + vectors.outerStride()*i + vectors.innerStride()*i); +// Scalar Vii = *Vii_ptr; +// *Vii_ptr = Scalar(1); +// triFactor.col(i).head(i).noalias() = -h * vectors.block(i, 0, rs, i).adjoint() +// * vectors.col(i).tail(rs); +// *Vii_ptr = Vii; +// // FIXME add .noalias() once the triangular product can work inplace +// triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView() +// * triFactor.col(i).head(i); +// triFactor(i,i) = hCoeffs(i); +// } +// } /** \internal */ +// This variant avoid modifications in vectors template void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs) { typedef typename TriangularFactorType::Index Index; - typedef typename VectorsType::Scalar Scalar; const Index nbVecs = vectors.cols(); eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs); - for(Index i = 0; i < nbVecs; i++) + for(Index i = nbVecs-1; i >=0 ; --i) { - Index rs = vectors.rows() - i; - Scalar Vii = vectors(i,i); - vectors.const_cast_derived().coeffRef(i,i) = Scalar(1); - triFactor.col(i).head(i).noalias() = -hCoeffs(i) * vectors.block(i, 0, rs, i).adjoint() - * vectors.col(i).tail(rs); - vectors.const_cast_derived().coeffRef(i, i) = Vii; - // FIXME add .noalias() once the triangular product can work inplace - triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView() - * triFactor.col(i).head(i); + Index rs = vectors.rows() - i - 1; + Index rt = nbVecs-i-1; + + if(rt>0) + { + triFactor.row(i).tail(rt).noalias() = -hCoeffs(i) * vectors.col(i).tail(rs).adjoint() + * vectors.bottomRightCorner(rs, rt).template triangularView(); + + // FIXME add .noalias() once the triangular product can work inplace + triFactor.row(i).tail(rt) = triFactor.row(i).tail(rt) * triFactor.bottomRightCorner(rt,rt).template triangularView(); + + } triFactor(i,i) = hCoeffs(i); } } -/** \internal */ +/** \internal + * if forward then perform mat = H0 * H1 * H2 * mat + * otherwise perform mat = H2 * H1 * H0 * mat + */ template -void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs) +void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs, bool forward) { typedef typename MatrixType::Index Index; enum { TFactorSize = MatrixType::ColsAtCompileTime }; Index nbVecs = vectors.cols(); - Matrix T(nbVecs,nbVecs); - make_block_householder_triangular_factor(T, vectors, hCoeffs); - - const TriangularView& V(vectors); + Matrix T(nbVecs,nbVecs); + + if(forward) make_block_householder_triangular_factor(T, vectors, hCoeffs); + else make_block_householder_triangular_factor(T, vectors, hCoeffs.conjugate()); + const TriangularView V(vectors); // A -= V T V^* A Matrix tmp = V.adjoint() * mat; // FIXME add .noalias() once the triangular product can work inplace - tmp = T.template triangularView().adjoint() * tmp; + if(forward) tmp = T.template triangularView() * tmp; + else tmp = T.template triangularView().adjoint() * tmp; mat.noalias() -= V * tmp; } diff --git a/Eigen/src/Householder/HouseholderSequence.h b/Eigen/src/Householder/HouseholderSequence.h index 99d3eb21f..111936f0e 100644 --- a/Eigen/src/Householder/HouseholderSequence.h +++ b/Eigen/src/Householder/HouseholderSequence.h @@ -319,12 +319,36 @@ template class HouseholderS template inline void applyThisOnTheLeft(Dest& dst, Workspace& workspace) const { - workspace.resize(dst.cols()); - for(Index k = 0; k < m_length; ++k) + const Index BlockSize = 1; + // if the entries are large enough, then apply the reflectors by block + if(m_length>BlockSize && dst.cols()>1) + { + for(Index i = 0; i < m_length; i+=BlockSize) + { + Index end = m_trans ? (std::min)(m_length,i+BlockSize) : m_length-i; + Index k = m_trans ? i : (std::max)(Index(0),end-BlockSize); + Index bs = end-k; + Index start = k + m_shift; + + typedef Block::type,Dynamic,Dynamic> SubVectorsType; + SubVectorsType sub_vecs1(m_vectors.const_cast_derived(), Side==OnTheRight ? k : start, + Side==OnTheRight ? start : k, + Side==OnTheRight ? bs : m_vectors.rows()-start, + Side==OnTheRight ? m_vectors.cols()-start : bs); + typename internal::conditional, SubVectorsType&>::type sub_vecs(sub_vecs1); + Block sub_dst(dst,dst.rows()-rows()+m_shift+k,0, rows()-m_shift-k,dst.cols()); + apply_block_householder_on_the_left(sub_dst, sub_vecs, m_coeffs.segment(k, bs), !m_trans); + } + } + else { - Index actual_k = m_trans ? k : m_length-k-1; - dst.bottomRows(rows()-m_shift-actual_k) - .applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data()); + workspace.resize(dst.cols()); + for(Index k = 0; k < m_length; ++k) + { + Index actual_k = m_trans ? k : m_length-k-1; + dst.bottomRows(rows()-m_shift-actual_k) + .applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data()); + } } } -- cgit v1.2.3