aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-11-12 18:02:37 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-11-12 18:02:37 +0000
commitcf12474a8b7c121f7ca95e28f8ee3f8d9405cd41 (patch)
tree0967dbdf8c8fe89ca1c698a54168c9b7f8a91d52 /Eigen/src/Core/products
parentc29935b323ffb0b903f640111f0a0b0440e94a2e (diff)
Optimize matrix*matrix and matrix*vector products when they correspond to inner products at runtime.
This speeds up inner products where the one or or both arguments is dynamic for small and medium-sized vectors (up to 32k). name old time/op new time/op delta BM_VecVecStatStat<float>/1 1.64ns ± 0% 1.64ns ± 0% ~ BM_VecVecStatStat<float>/8 2.99ns ± 0% 2.99ns ± 0% ~ BM_VecVecStatStat<float>/64 7.00ns ± 1% 7.04ns ± 0% +0.66% BM_VecVecStatStat<float>/512 61.6ns ± 0% 61.6ns ± 0% ~ BM_VecVecStatStat<float>/4k 551ns ± 0% 553ns ± 1% +0.26% BM_VecVecStatStat<float>/32k 4.45µs ± 0% 4.45µs ± 0% ~ BM_VecVecStatStat<float>/256k 77.9µs ± 0% 78.1µs ± 1% ~ BM_VecVecStatStat<float>/1M 312µs ± 0% 312µs ± 1% ~ BM_VecVecDynStat<float>/1 13.3ns ± 1% 4.6ns ± 0% -65.35% BM_VecVecDynStat<float>/8 14.4ns ± 0% 6.2ns ± 0% -57.00% BM_VecVecDynStat<float>/64 24.0ns ± 0% 10.2ns ± 3% -57.57% BM_VecVecDynStat<float>/512 138ns ± 0% 68ns ± 0% -50.52% BM_VecVecDynStat<float>/4k 1.11µs ± 0% 0.56µs ± 0% -49.72% BM_VecVecDynStat<float>/32k 8.89µs ± 0% 4.46µs ± 0% -49.89% BM_VecVecDynStat<float>/256k 78.2µs ± 0% 78.1µs ± 1% ~ BM_VecVecDynStat<float>/1M 313µs ± 0% 312µs ± 1% ~ BM_VecVecDynDyn<float>/1 10.4ns ± 0% 10.5ns ± 0% +0.91% BM_VecVecDynDyn<float>/8 12.0ns ± 3% 11.9ns ± 0% ~ BM_VecVecDynDyn<float>/64 37.4ns ± 0% 19.6ns ± 1% -47.57% BM_VecVecDynDyn<float>/512 159ns ± 0% 81ns ± 0% -49.07% BM_VecVecDynDyn<float>/4k 1.13µs ± 0% 0.58µs ± 1% -49.11% BM_VecVecDynDyn<float>/32k 8.91µs ± 0% 5.06µs ±12% -43.23% BM_VecVecDynDyn<float>/256k 78.2µs ± 0% 78.2µs ± 1% ~ BM_VecVecDynDyn<float>/1M 313µs ± 0% 312µs ± 1% ~
Diffstat (limited to 'Eigen/src/Core/products')
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h10
1 files changed, 8 insertions, 2 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 508c05c97..8cdf14702 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -471,15 +471,21 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
return;
- // Fallback to GEMV if either the lhs or rhs is a runtime vector
- if (dst.cols() == 1)
+ if (dst.cols() == 1 && dst.rows() == 1) {
+ // Fallback to inner product if both the lhs and rhs is a runtime vector.
+ dst.coeffRef(0,0) += alpha * (a_lhs.row(0).transpose().cwiseProduct(a_rhs.col(0)).sum());
+ return;
+ }
+ else if (dst.cols() == 1)
{
+ // Fallback to GEMV if either the lhs or rhs is a runtime vector
typename Dest::ColXpr dst_vec(dst.col(0));
return internal::generic_product_impl<Lhs,typename Rhs::ConstColXpr,DenseShape,DenseShape,GemvProduct>
::scaleAndAddTo(dst_vec, a_lhs, a_rhs.col(0), alpha);
}
else if (dst.rows() == 1)
{
+ // Fallback to GEMV if either the lhs or rhs is a runtime vector
typename Dest::RowXpr dst_vec(dst.row(0));
return internal::generic_product_impl<typename Lhs::ConstRowXpr,Rhs,DenseShape,DenseShape,GemvProduct>
::scaleAndAddTo(dst_vec, a_lhs.row(0), a_rhs, alpha);