diff options
author | Gael Guennebaud <g.gael@free.fr> | 2009-09-18 15:36:05 +0200 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2009-09-18 15:36:05 +0200 |
commit | 0b60027f3c952163608a2d9b75b5da4ed7275210 (patch) | |
tree | 625725f5226ff98aa96b946f39bab51586d29db3 | |
parent | 6b5f96cb030a0fc65d772c91d66fc27f9cf57af3 (diff) |
implement __gnuc_forget_about_setZero_its_over_now
-rw-r--r-- | Eigen/src/Sparse/SparseProduct.h | 58 | ||||
-rw-r--r-- | bench/sparse_dense_product.cpp | 28 |
2 files changed, 37 insertions, 49 deletions
diff --git a/Eigen/src/Sparse/SparseProduct.h b/Eigen/src/Sparse/SparseProduct.h index a9ad8aad4..cb1196bc9 100644 --- a/Eigen/src/Sparse/SparseProduct.h +++ b/Eigen/src/Sparse/SparseProduct.h @@ -305,23 +305,9 @@ inline Derived& SparseMatrixBase<Derived>::operator=(const SparseProduct<Lhs,Rhs } // dense = sparse * dense -// template<typename Derived> -// template<typename Lhs, typename Rhs> -// Derived& MatrixBase<Derived>::lazyAssign(const SparseProduct<Lhs,Rhs,SparseTimeDenseProduct>& product) -// { -// typedef typename ei_cleantype<Lhs>::type _Lhs; -// typedef typename _Lhs::InnerIterator LhsInnerIterator; -// enum { LhsIsRowMajor = (_Lhs::Flags&RowMajorBit)==RowMajorBit }; -// derived().setZero(); -// for (int j=0; j<product.lhs().outerSize(); ++j) -// for (LhsInnerIterator i(product.lhs(),j); i; ++i) -// derived().row(LhsIsRowMajor ? j : i.index()) += i.value() * product.rhs().row(LhsIsRowMajor ? i.index() : j); -// return derived(); -// } - -template<typename Derived> -template<typename Lhs, typename Rhs> -Derived& MatrixBase<Derived>::lazyAssign(const SparseProduct<Lhs,Rhs,SparseTimeDenseProduct>& product) +// Note that here we force no inlining and separate the setZero() because GCC messes up otherwise +template<typename Lhs, typename Rhs, typename Dest> +EIGEN_DONT_INLINE void ei_sparse_time_dense_product(const Lhs& lhs, const Rhs& rhs, Dest& dst) { typedef typename ei_cleantype<Lhs>::type _Lhs; typedef typename ei_cleantype<Rhs>::type _Rhs; @@ -335,34 +321,44 @@ Derived& MatrixBase<Derived>::lazyAssign(const SparseProduct<Lhs,Rhs,SparseTimeD || ( (_Lhs::Flags&LowerTriangularBit) && LhsIsRowMajor) ), ProcessSecondHalf = LhsIsSelfAdjoint && (!ProcessFirstHalf) }; - derived().setZero(); - for (int j=0; j<product.lhs().outerSize(); ++j) + for (int j=0; j<lhs.outerSize(); ++j) { - LhsInnerIterator i(product.lhs(),j); + LhsInnerIterator i(lhs,j); if (ProcessSecondHalf && i && (i.index()==j)) { - derived().row(j) += i.value() * product.rhs().row(j); + dst.row(j) += i.value() * rhs.row(j); ++i; } - Block<Derived,1,Derived::ColsAtCompileTime> res(derived().row(LhsIsRowMajor ? j : 0)); - for (; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i) + typename Rhs::Scalar rhs_j = rhs.coeff(j,0); + Block<Dest,1,Dest::ColsAtCompileTime> dst_j(dst.row(LhsIsRowMajor ? j : 0)); + for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i) { - if (LhsIsSelfAdjoint) + if(LhsIsSelfAdjoint) { int a = LhsIsRowMajor ? j : i.index(); int b = LhsIsRowMajor ? i.index() : j; - Scalar v = i.value(); - derived().row(a) += (v) * product.rhs().row(b); - derived().row(b) += ei_conj(v) * product.rhs().row(a); + typename Lhs::Scalar v = i.value(); + dst.row(a) += (v) * rhs.row(b); + dst.row(b) += ei_conj(v) * rhs.row(a); } - else if (LhsIsRowMajor) - res += i.value() * product.rhs().row(i.index()); + else if(LhsIsRowMajor) + dst_j += i.value() * rhs.row(i.index()); + else if(Rhs::ColsAtCompileTime==1) + dst.coeffRef(i.index()) += i.value() * rhs_j; else - derived().row(i.index()) += i.value() * product.rhs().row(j); + dst.row(i.index()) += i.value() * rhs.row(j); } if (ProcessFirstHalf && i && (i.index()==j)) - derived().row(j) += i.value() * product.rhs().row(j); + dst.row(j) += i.value() * rhs.row(j); } +} + +template<typename Derived> +template<typename Lhs, typename Rhs> +Derived& MatrixBase<Derived>::lazyAssign(const SparseProduct<Lhs,Rhs,SparseTimeDenseProduct>& product) +{ + derived().setZero(); + ei_sparse_time_dense_product(product.lhs(), product.rhs(), derived()); return derived(); } diff --git a/bench/sparse_dense_product.cpp b/bench/sparse_dense_product.cpp index 9b9af8819..bfe46122d 100644 --- a/bench/sparse_dense_product.cpp +++ b/bench/sparse_dense_product.cpp @@ -91,22 +91,22 @@ int main(int argc, char *argv[]) { std::cout << "Eigen sparse\t" << sm1.nonZeros()/float(sm1.rows()*sm1.cols())*100 << "%\n"; - BENCH(for (int k=0; k<REPEAT; ++k) v2 = sm1 * v1;) + BENCH(asm("#myc"); v2 = sm1 * v1; asm("#myd");) std::cout << " a * v:\t" << timer.value() << endl; - - BENCH(for (int k=0; k<REPEAT; ++k) { asm("#mya"); v2 = sm1.transpose() * v1; asm("#myb"); }) - + + BENCH( { asm("#mya"); v2 = sm1.transpose() * v1; asm("#myb"); }) + std::cout << " a' * v:\t" << timer.value() << endl; } - + // { // DynamicSparseMatrix<Scalar> m1(sm1); // std::cout << "Eigen dyn-sparse\t" << m1.nonZeros()/float(m1.rows()*m1.cols())*100 << "%\n"; -// +// // BENCH(for (int k=0; k<REPEAT; ++k) v2 = m1 * v1;) // std::cout << " a * v:\t" << timer.value() << endl; -// +// // BENCH(for (int k=0; k<REPEAT; ++k) v2 = m1.transpose() * v1;) // std::cout << " a' * v:\t" << timer.value() << endl; // } @@ -118,23 +118,15 @@ int main(int argc, char *argv[]) //GmmDynSparse gmmT3(rows,cols); GmmSparse m1(rows,cols); eiToGmm(sm1, m1); - + std::vector<Scalar> gmmV1(cols), gmmV2(cols); Map<Matrix<Scalar,Dynamic,1> >(&gmmV1[0], cols) = v1; Map<Matrix<Scalar,Dynamic,1> >(&gmmV2[0], cols) = v2; - timer.reset(); - timer.start(); - for (int k=0; k<REPEAT; ++k) - gmm::mult(m1, gmmV1, gmmV2); - timer.stop(); + BENCH( asm("#myx"); gmm::mult(m1, gmmV1, gmmV2); asm("#myy"); ) std::cout << " a * v:\t" << timer.value() << endl; - timer.reset(); - timer.start(); - for (int k=0; k<REPEAT; ++k) - gmm::mult(gmm::transposed(m1), gmmV1, gmmV2); - timer.stop(); + BENCH( gmm::mult(gmm::transposed(m1), gmmV1, gmmV2); ) std::cout << " a' * v:\t" << timer.value() << endl; } #endif |