aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <benoitjacob@google.com>2015-03-15 18:05:12 -0400
committerGravatar Benoit Jacob <benoitjacob@google.com>2015-03-15 18:05:12 -0400
commite56aabf205a1e8f581dd8a46d7d46ce79c45e158 (patch)
tree434040cd952694f849af411315d6d8dd4186fcbe
parentb6b88c08082dcfc5dd81c6997d6090507267cc13 (diff)
Refactor computeProductBlockingSizes to make room for the possibility of using lookup tables
-rw-r--r--Eigen/Core1
-rw-r--r--Eigen/src/Core/MathFunctions.h36
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h100
-rw-r--r--Eigen/src/Core/products/LookupBlockingSizesTable.h89
-rw-r--r--Eigen/src/Core/util/ForwardDeclarations.h8
5 files changed, 182 insertions, 52 deletions
diff --git a/Eigen/Core b/Eigen/Core
index 0b8eaa61c..138c34916 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -381,6 +381,7 @@ using std::ptrdiff_t;
#include "src/Core/Inverse.h"
#include "src/Core/TriangularMatrix.h"
#include "src/Core/SelfAdjointView.h"
+#include "src/Core/products/LookupBlockingSizesTable.h"
#include "src/Core/products/GeneralBlockPanelKernel.h"
#include "src/Core/products/Parallelizer.h"
#include "src/Core/ProductEvaluators.h"
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 3c76a58b9..0fde5c71e 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -473,48 +473,48 @@ struct random_default_impl<Scalar, false, false>
};
enum {
- floor_log2_terminate,
- floor_log2_move_up,
- floor_log2_move_down,
- floor_log2_bogus
+ meta_floor_log2_terminate,
+ meta_floor_log2_move_up,
+ meta_floor_log2_move_down,
+ meta_floor_log2_bogus
};
-template<unsigned int n, int lower, int upper> struct floor_log2_selector
+template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
{
enum { middle = (lower + upper) / 2,
- value = (upper <= lower + 1) ? int(floor_log2_terminate)
- : (n < (1 << middle)) ? int(floor_log2_move_down)
- : (n==0) ? int(floor_log2_bogus)
- : int(floor_log2_move_up)
+ value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
+ : (n < (1 << middle)) ? int(meta_floor_log2_move_down)
+ : (n==0) ? int(meta_floor_log2_bogus)
+ : int(meta_floor_log2_move_up)
};
};
template<unsigned int n,
int lower = 0,
int upper = sizeof(unsigned int) * CHAR_BIT - 1,
- int selector = floor_log2_selector<n, lower, upper>::value>
-struct floor_log2 {};
+ int selector = meta_floor_log2_selector<n, lower, upper>::value>
+struct meta_floor_log2 {};
template<unsigned int n, int lower, int upper>
-struct floor_log2<n, lower, upper, floor_log2_move_down>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
{
- enum { value = floor_log2<n, lower, floor_log2_selector<n, lower, upper>::middle>::value };
+ enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
};
template<unsigned int n, int lower, int upper>
-struct floor_log2<n, lower, upper, floor_log2_move_up>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
{
- enum { value = floor_log2<n, floor_log2_selector<n, lower, upper>::middle, upper>::value };
+ enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
};
template<unsigned int n, int lower, int upper>
-struct floor_log2<n, lower, upper, floor_log2_terminate>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
{
enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
};
template<unsigned int n, int lower, int upper>
-struct floor_log2<n, lower, upper, floor_log2_bogus>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
{
// no value, error at compile time
};
@@ -551,7 +551,7 @@ struct random_default_impl<Scalar, false, true>
#ifdef EIGEN_MAKING_DOCS
return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
#else
- enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value,
+ enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
scalar_bits = sizeof(Scalar) * CHAR_BIT,
shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 1b47f1a6d..617439ff6 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -74,45 +74,23 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
}
}
-/** \brief Computes the blocking parameters for a m x k times k x n matrix product
- *
- * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
- * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
- * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
- *
- * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
- * this function computes the blocking size parameters along the respective dimensions
- * for matrix products and related algorithms. The blocking sizes depends on various
- * parameters:
- * - the L1 and L2 cache sizes,
- * - the register level blocking sizes defined by gebp_traits,
- * - the number of scalars that fit into a packet (when vectorization is enabled).
- *
- * \sa setCpuCacheSizes */
+/* Helper for computeProductBlockingSizes.
+ *
+ * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
+ * this function computes the blocking size parameters along the respective dimensions
+ * for matrix products and related algorithms. The blocking sizes depends on various
+ * parameters:
+ * - the L1 and L2 cache sizes,
+ * - the register level blocking sizes defined by gebp_traits,
+ * - the number of scalars that fit into a packet (when vectorization is enabled).
+ *
+ * \sa setCpuCacheSizes */
template<typename LhsScalar, typename RhsScalar, int KcFactor>
-void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
+void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
{
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
-#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
- if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
- EIGEN_UNUSED_VARIABLE(num_threads);
- enum {
- kr = 8,
- mr = Traits::mr,
- nr = Traits::nr
- };
- k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
- if (k > kr) k -= k % kr;
- m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
- if (m > mr) m -= m % mr;
- n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
- if (n > nr) n -= n % nr;
- return;
- }
-#endif
-
// Explanations:
// Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and
// kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed
@@ -281,6 +259,60 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
}
}
+inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
+{
+#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
+ if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
+ k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
+ m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
+ n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
+ return true;
+ }
+#else
+ EIGEN_UNUSED_VARIABLE(k)
+ EIGEN_UNUSED_VARIABLE(m)
+ EIGEN_UNUSED_VARIABLE(n)
+ return false;
+#endif
+}
+
+/** \brief Computes the blocking parameters for a m x k times k x n matrix product
+ *
+ * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
+ * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
+ * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
+ *
+ * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
+ * this function computes the blocking size parameters along the respective dimensions
+ * for matrix products and related algorithms.
+ *
+ * The blocking size parameters may be evaluated:
+ * - either by a heuristic based on cache sizes;
+ * - or using a precomputed lookup table;
+ * - or using fixed prescribed values (for testing purposes).
+ *
+ * \sa setCpuCacheSizes */
+
+template<typename LhsScalar, typename RhsScalar, int KcFactor>
+void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
+{
+ if (!useSpecificBlockingSizes(k, m, n)) {
+ if (!lookupBlockingSizesFromTable<LhsScalar, RhsScalar>(k, m, n, num_threads)) {
+ evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads);
+ }
+ }
+
+ typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+ enum {
+ kr = 8,
+ mr = Traits::mr,
+ nr = Traits::nr
+ };
+ if (k > kr) k -= k % kr;
+ if (m > mr) m -= m % mr;
+ if (n > nr) n -= n % nr;
+}
+
template<typename LhsScalar, typename RhsScalar>
inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{
diff --git a/Eigen/src/Core/products/LookupBlockingSizesTable.h b/Eigen/src/Core/products/LookupBlockingSizesTable.h
new file mode 100644
index 000000000..85aeedec8
--- /dev/null
+++ b/Eigen/src/Core/products/LookupBlockingSizesTable.h
@@ -0,0 +1,89 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
+#define EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
+
+namespace Eigen {
+
+namespace internal {
+
+template <typename LhsScalar,
+ typename RhsScalar,
+ bool HasLookupTable = BlockingSizesLookupTable<LhsScalar, RhsScalar>::NumSizes != 0 >
+struct LookupBlockingSizesFromTableImpl
+{
+ static bool run(Index&, Index&, Index&, Index)
+ {
+ return false;
+ }
+};
+
+inline uint8_t floor_log2_helper(uint16_t& x, size_t offset)
+{
+ uint16_t y = x >> offset;
+ if (y) {
+ x = y;
+ return offset;
+ } else {
+ return 0;
+ }
+}
+
+inline uint8_t floor_log2(uint16_t x)
+{
+ return floor_log2_helper(x, 8)
+ + floor_log2_helper(x, 4)
+ + floor_log2_helper(x, 2)
+ + floor_log2_helper(x, 1);
+}
+
+inline uint8_t ceil_log2(uint16_t x)
+{
+ return x > 1 ? floor_log2(x - 1) + 1 : 0;
+}
+
+template <typename LhsScalar,
+ typename RhsScalar>
+struct LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar, true>
+{
+ static bool run(Index& k, Index& m, Index& n, Index)
+ {
+ using std::min;
+ using std::max;
+ typedef BlockingSizesLookupTable<LhsScalar, RhsScalar> Table;
+ const uint16_t minsize = Table::BaseSize;
+ const uint16_t maxsize = minsize << (Table::NumSizes + 1);
+ const uint16_t k_clamped = max<uint16_t>(minsize, min<Index>(k, maxsize));
+ const uint16_t m_clamped = max<uint16_t>(minsize, min<Index>(m, maxsize));
+ const uint16_t n_clamped = max<uint16_t>(minsize, min<Index>(n, maxsize));
+ const size_t k_index = ceil_log2(k_clamped / minsize);
+ const size_t m_index = ceil_log2(m_clamped / minsize);
+ const size_t n_index = ceil_log2(n_clamped / minsize);
+ const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index);
+ const uint16_t table_entry = Table::Data()[index];
+ k = min(k, 1 << ((table_entry & 0xf00) >> 8));
+ m = min(m, 1 << ((table_entry & 0x0f0) >> 4));
+ n = min(n, 1 << ((table_entry & 0x00f) >> 0));
+ return true;
+ }
+};
+
+template <typename LhsScalar,
+ typename RhsScalar>
+bool lookupBlockingSizesFromTable(Index& k, Index& m, Index& n, Index num_threads)
+{
+ return LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar>::run(k, m, n, num_threads);
+}
+
+}
+
+}
+
+#endif // EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index c23892c50..8034f9b5e 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -287,6 +287,14 @@ struct stem_function
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
typedef ComplexScalar type(ComplexScalar, int);
};
+
+template <typename LhsScalar,
+ typename RhsScalar>
+struct BlockingSizesLookupTable
+{
+ static const size_t NumSizes = 0;
+};
+
}
} // end namespace Eigen