aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/products
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-02-14 13:46:13 -0800
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-02-14 13:46:13 -0800
commit65e23ca7e93b6836cb197adcb1e832ae94203b35 (patch)
treea0f8bfe30a0eb5d49443373736bf6366dadfea54 /Eigen/src/Core/products
parentefeabee4451209c95a1ad37b191266d35e31e8ec (diff)
.
Diffstat (limited to 'Eigen/src/Core/products')
-rw-r--r--Eigen/src/Core/products/GeneralMatrixVector.h25
1 files changed, 5 insertions, 20 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index e7dc25478..767feb99d 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -255,20 +255,11 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
- // TODO: fine tune the following heuristic. The rationale is that if the
- // matrix is very large, processing multiple rows at once might be counter
- // productive wrt cache.
-#if EIGEN_ARCH_ARM_OR_ARM64
- // This threshold was empirically determined using a Pixel2.
- // The little cores are a lot more sensitive to this number
- // than the big cores.
- const Index cache_threshold = 1024;
-#else
- const Index cache_threshold = 1024 * 256;
-#endif
-
- const Index row_bytes = lhs.stride() * sizeof(LhsScalar);
- const Index n8 = (8 * row_bytes > cache_threshold) ? 0 : (rows - 7);
+ // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
+ // processing 8 rows at once might be counter productive wrt cache.
+ const Index n8 = lhs.stride()*sizeof(LhsScalar)>32000 ? 0 : rows-7;
+ const Index n4 = rows-3;
+ const Index n2 = rows-1;
// TODO: for padded aligned inputs, we could enable aligned reads
enum { LhsAlignment = Unaligned };
@@ -329,9 +320,6 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
res[(i+6)*resIncr] += alpha*cc6;
res[(i+7)*resIncr] += alpha*cc7;
}
-
- if (i == rows) return;
- const Index n4 = (4 * row_bytes > cache_threshold) ? 0 : (rows - 3);
for(; i<n4; i+=4)
{
ResPacket c0 = pset1<ResPacket>(ResScalar(0)),
@@ -367,9 +355,6 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
res[(i+2)*resIncr] += alpha*cc2;
res[(i+3)*resIncr] += alpha*cc3;
}
-
- if (i == rows) return;
- const Index n2 = (2 * row_bytes > cache_threshold) ? 0 : (rows - 1);
for(; i<n2; i+=2)
{
ResPacket c0 = pset1<ResPacket>(ResScalar(0)),