aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2017-08-24 10:43:21 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2017-08-24 10:43:21 +0200
commit12249849b5ef7ec0c64f74440690fb00708b8da6 (patch)
tree95934df3b5e2b0b98520168cbe31964fcb85e365 /Eigen/src/Core
parent39864ebe1eb7c8028769cf5d8750faaabce22446 (diff)
Make the threshold from gemm to coeff-based-product configurable, and add some explanations.
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r--Eigen/src/Core/GeneralProduct.h10
-rw-r--r--Eigen/src/Core/products/GeneralMatrixMatrix.h12
2 files changed, 19 insertions, 3 deletions
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index dec24848d..483277fe6 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -18,6 +18,16 @@ enum {
Small = 3
};
+// Define the threshold value to fallback from the generic matrix-matrix product
+// implementation (heavy) to the lightweight coeff-based product one.
+// See generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
+// in products/GeneralMatrixMatrix.h for more details.
+// TODO This threshold should also be used in the compile-time selector below.
+#ifndef EIGEN_GEMM_TO_COEFFBASED_THRESHOLD
+// This default value has been obtained on a Haswell architecture.
+#define EIGEN_GEMM_TO_COEFFBASED_THRESHOLD 20
+#endif
+
namespace internal {
template<int Rows, int Cols, int Depth> struct product_type_selector;
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 6440e1d09..ed4d3182b 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -427,7 +427,13 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
template<typename Dst>
static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
- if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+ // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=404 for a discussion and helper program
+ // to determine the following heuristic.
+ // EIGEN_GEMM_TO_COEFFBASED_THRESHOLD is typically defined to 20 in GeneralProduct.h,
+ // unless it has been specialized by the user or for a given architecture.
+ // Note that the condition rhs.rows()>0 was required because lazy produc is (was?) not happy with empty inputs.
+ // I'm not sure it is still required.
+ if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
lazyproduct::evalTo(dst, lhs, rhs);
else
{
@@ -439,7 +445,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
template<typename Dst>
static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
- if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+ if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
lazyproduct::addTo(dst, lhs, rhs);
else
scaleAndAddTo(dst,lhs, rhs, Scalar(1));
@@ -448,7 +454,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
template<typename Dst>
static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
- if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+ if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
lazyproduct::subTo(dst, lhs, rhs);
else
scaleAndAddTo(dst, lhs, rhs, Scalar(-1));