From 0b60027f3c952163608a2d9b75b5da4ed7275210 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 18 Sep 2009 15:36:05 +0200 Subject: implement __gnuc_forget_about_setZero_its_over_now --- Eigen/src/Sparse/SparseProduct.h | 58 +++++++++++++++++++--------------------- bench/sparse_dense_product.cpp | 28 +++++++------------ 2 files changed, 37 insertions(+), 49 deletions(-) diff --git a/Eigen/src/Sparse/SparseProduct.h b/Eigen/src/Sparse/SparseProduct.h index a9ad8aad4..cb1196bc9 100644 --- a/Eigen/src/Sparse/SparseProduct.h +++ b/Eigen/src/Sparse/SparseProduct.h @@ -305,23 +305,9 @@ inline Derived& SparseMatrixBase::operator=(const SparseProduct -// template -// Derived& MatrixBase::lazyAssign(const SparseProduct& product) -// { -// typedef typename ei_cleantype::type _Lhs; -// typedef typename _Lhs::InnerIterator LhsInnerIterator; -// enum { LhsIsRowMajor = (_Lhs::Flags&RowMajorBit)==RowMajorBit }; -// derived().setZero(); -// for (int j=0; j -template -Derived& MatrixBase::lazyAssign(const SparseProduct& product) +// Note that here we force no inlining and separate the setZero() because GCC messes up otherwise +template +EIGEN_DONT_INLINE void ei_sparse_time_dense_product(const Lhs& lhs, const Rhs& rhs, Dest& dst) { typedef typename ei_cleantype::type _Lhs; typedef typename ei_cleantype::type _Rhs; @@ -335,34 +321,44 @@ Derived& MatrixBase::lazyAssign(const SparseProduct res(derived().row(LhsIsRowMajor ? j : 0)); - for (; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i) + typename Rhs::Scalar rhs_j = rhs.coeff(j,0); + Block dst_j(dst.row(LhsIsRowMajor ? j : 0)); + for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i) { - if (LhsIsSelfAdjoint) + if(LhsIsSelfAdjoint) { int a = LhsIsRowMajor ? j : i.index(); int b = LhsIsRowMajor ? i.index() : j; - Scalar v = i.value(); - derived().row(a) += (v) * product.rhs().row(b); - derived().row(b) += ei_conj(v) * product.rhs().row(a); + typename Lhs::Scalar v = i.value(); + dst.row(a) += (v) * rhs.row(b); + dst.row(b) += ei_conj(v) * rhs.row(a); } - else if (LhsIsRowMajor) - res += i.value() * product.rhs().row(i.index()); + else if(LhsIsRowMajor) + dst_j += i.value() * rhs.row(i.index()); + else if(Rhs::ColsAtCompileTime==1) + dst.coeffRef(i.index()) += i.value() * rhs_j; else - derived().row(i.index()) += i.value() * product.rhs().row(j); + dst.row(i.index()) += i.value() * rhs.row(j); } if (ProcessFirstHalf && i && (i.index()==j)) - derived().row(j) += i.value() * product.rhs().row(j); + dst.row(j) += i.value() * rhs.row(j); } +} + +template +template +Derived& MatrixBase::lazyAssign(const SparseProduct& product) +{ + derived().setZero(); + ei_sparse_time_dense_product(product.lhs(), product.rhs(), derived()); return derived(); } diff --git a/bench/sparse_dense_product.cpp b/bench/sparse_dense_product.cpp index 9b9af8819..bfe46122d 100644 --- a/bench/sparse_dense_product.cpp +++ b/bench/sparse_dense_product.cpp @@ -91,22 +91,22 @@ int main(int argc, char *argv[]) { std::cout << "Eigen sparse\t" << sm1.nonZeros()/float(sm1.rows()*sm1.cols())*100 << "%\n"; - BENCH(for (int k=0; k m1(sm1); // std::cout << "Eigen dyn-sparse\t" << m1.nonZeros()/float(m1.rows()*m1.cols())*100 << "%\n"; -// +// // BENCH(for (int k=0; k gmmV1(cols), gmmV2(cols); Map >(&gmmV1[0], cols) = v1; Map >(&gmmV2[0], cols) = v2; - timer.reset(); - timer.start(); - for (int k=0; k