diff options
Diffstat (limited to 'Eigen/src')
-rw-r--r-- | Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h | 110 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralBlockPanelKernel.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/products/LookupBlockingSizesTable.h | 97 | ||||
-rw-r--r-- | Eigen/src/Core/util/ForwardDeclarations.h | 8 |
4 files changed, 1 insertions, 219 deletions
diff --git a/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h b/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h deleted file mode 100644 index 5007c155d..000000000 --- a/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h +++ /dev/null @@ -1,110 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H -#define EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H - -namespace Eigen { -namespace internal { - -/* The following lookup table was generated from measurements on a Nexus 5, - * which has a Qualcomm Krait 400 CPU. This is very representative of current - * 32bit (ARMv7) Android devices. On the other hand, I don't know how - * representative that is outside of these conditions. Accordingly, - * let's only use this lookup table on ARM 32bit on Android for now. - * - * Measurements were single-threaded, with Scalar=float, compiled with - * -mfpu=neon-vfpv4, so the pmadd instruction used was VFMA.F32. - * - * The device was cooled, allowing it to run a the max clock speed throughout. - * This may not be representative of real-world thermal conditions. - * - * The benchmark attempted to flush caches to test cold-cache performance. - */ -#if EIGEN_ARCH_ARM && EIGEN_OS_ANDROID -template<> -struct BlockingSizesLookupTable<float, float> { - static const size_t BaseSize = 16; - static const size_t NumSizes = 8; - static const unsigned short* Data() { - static const unsigned short data[512] = { - 0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x447, 0x447, - 0x454, 0x455, 0x456, 0x457, 0x458, 0x459, 0x45a, 0x456, - 0x464, 0x465, 0x466, 0x467, 0x468, 0x469, 0x46a, 0x467, - 0x474, 0x475, 0x476, 0x467, 0x478, 0x479, 0x476, 0x478, - 0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x476, 0x476, - 0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x496, 0x488, - 0x474, 0x475, 0x476, 0x4a6, 0x496, 0x496, 0x495, 0x4a6, - 0x474, 0x475, 0x466, 0x4a6, 0x497, 0x4a5, 0x496, 0x4a5, - 0x544, 0x545, 0x546, 0x547, 0x548, 0x549, 0x54a, 0x54b, - 0x554, 0x555, 0x556, 0x557, 0x558, 0x559, 0x55a, 0x55b, - 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x56b, - 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x576, - 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x587, - 0x564, 0x565, 0x566, 0x567, 0x596, 0x596, 0x596, 0x597, - 0x574, 0x565, 0x566, 0x596, 0x596, 0x5a6, 0x5a6, 0x5a6, - 0x564, 0x565, 0x5a6, 0x596, 0x5a6, 0x5a6, 0x5a6, 0x5a6, - 0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0x64a, 0x64b, - 0x644, 0x655, 0x656, 0x657, 0x658, 0x659, 0x65a, 0x65b, - 0x664, 0x665, 0x666, 0x667, 0x668, 0x669, 0x65a, 0x667, - 0x654, 0x665, 0x676, 0x677, 0x678, 0x679, 0x67a, 0x675, - 0x684, 0x675, 0x686, 0x687, 0x688, 0x688, 0x687, 0x686, - 0x664, 0x685, 0x666, 0x677, 0x697, 0x696, 0x697, 0x697, - 0x664, 0x665, 0x696, 0x696, 0x685, 0x6a6, 0x696, 0x696, - 0x664, 0x675, 0x686, 0x696, 0x6a6, 0x696, 0x696, 0x696, - 0x744, 0x745, 0x746, 0x747, 0x748, 0x749, 0x74a, 0x747, - 0x754, 0x755, 0x756, 0x757, 0x758, 0x759, 0x75a, 0x757, - 0x764, 0x765, 0x756, 0x767, 0x768, 0x759, 0x75a, 0x766, - 0x744, 0x755, 0x766, 0x777, 0x768, 0x759, 0x778, 0x777, - 0x744, 0x745, 0x766, 0x777, 0x788, 0x786, 0x786, 0x788, - 0x754, 0x755, 0x766, 0x787, 0x796, 0x796, 0x787, 0x796, - 0x684, 0x695, 0x696, 0x6a6, 0x795, 0x786, 0x795, 0x796, - 0x684, 0x695, 0x696, 0x795, 0x786, 0x796, 0x795, 0x796, - 0x844, 0x845, 0x846, 0x847, 0x848, 0x849, 0x848, 0x848, - 0x844, 0x855, 0x846, 0x847, 0x848, 0x849, 0x855, 0x857, - 0x844, 0x845, 0x846, 0x857, 0x848, 0x859, 0x866, 0x865, - 0x844, 0x855, 0x846, 0x847, 0x878, 0x859, 0x877, 0x877, - 0x844, 0x855, 0x846, 0x867, 0x886, 0x887, 0x885, 0x886, - 0x784, 0x785, 0x786, 0x877, 0x897, 0x885, 0x896, 0x896, - 0x684, 0x695, 0x686, 0x886, 0x885, 0x885, 0x886, 0x896, - 0x694, 0x6a5, 0x6a6, 0x885, 0x885, 0x886, 0x896, 0x896, - 0x944, 0x945, 0x946, 0x947, 0x948, 0x847, 0x847, 0x848, - 0x954, 0x855, 0x856, 0x947, 0x858, 0x857, 0x858, 0x858, - 0x944, 0x945, 0x946, 0x867, 0x948, 0x866, 0x867, 0x867, - 0x944, 0x975, 0x976, 0x877, 0x877, 0x877, 0x877, 0x877, - 0x784, 0x785, 0x886, 0x887, 0x886, 0x887, 0x887, 0x887, - 0x784, 0x785, 0x786, 0x796, 0x887, 0x897, 0x896, 0x896, - 0x684, 0x695, 0x6a6, 0x886, 0x886, 0x896, 0x896, 0x896, - 0x6a4, 0x6a5, 0x696, 0x896, 0x886, 0x896, 0x896, 0x896, - 0xa44, 0xa45, 0xa46, 0xa47, 0x847, 0x848, 0x847, 0x848, - 0xa44, 0xa45, 0x856, 0x857, 0x857, 0x857, 0x857, 0x857, - 0xa44, 0xa65, 0x866, 0x867, 0x867, 0x867, 0x867, 0x867, - 0x774, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877, - 0x784, 0x785, 0x886, 0x887, 0x887, 0x887, 0x887, 0x887, - 0x784, 0x785, 0x786, 0x787, 0x887, 0x896, 0x897, 0x897, - 0x684, 0x6a5, 0x696, 0x886, 0x886, 0x896, 0x896, 0x896, - 0x684, 0x6a5, 0x6a5, 0x886, 0x886, 0x896, 0x896, 0x896, - 0xb44, 0x845, 0x846, 0x847, 0x847, 0x945, 0x846, 0x946, - 0xb54, 0x855, 0x856, 0x857, 0x857, 0x856, 0x857, 0x856, - 0x864, 0x865, 0x866, 0x867, 0x867, 0x866, 0x866, 0x867, - 0x864, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877, - 0x784, 0x885, 0x886, 0x787, 0x887, 0x887, 0x887, 0x887, - 0x784, 0x785, 0x786, 0x796, 0x886, 0x897, 0x897, 0x897, - 0x684, 0x695, 0x696, 0x886, 0x896, 0x896, 0x896, 0x896, - 0x684, 0x685, 0x696, 0xb57, 0x896, 0x896, 0x896, 0x896 - }; - return data; - } -}; -#endif - -} -} - -#endif // EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 320f96a39..1d62ccd93 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -291,7 +291,6 @@ inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n) * * The blocking size parameters may be evaluated: * - either by a heuristic based on cache sizes; - * - or using a precomputed lookup table; * - or using fixed prescribed values (for testing purposes). * * \sa setCpuCacheSizes */ @@ -300,9 +299,7 @@ template<typename LhsScalar, typename RhsScalar, int KcFactor> void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) { if (!useSpecificBlockingSizes(k, m, n)) { - if (!lookupBlockingSizesFromTable<LhsScalar, RhsScalar>(k, m, n, num_threads)) { - evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads); - } + evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads); } typedef gebp_traits<LhsScalar,RhsScalar> Traits; diff --git a/Eigen/src/Core/products/LookupBlockingSizesTable.h b/Eigen/src/Core/products/LookupBlockingSizesTable.h deleted file mode 100644 index 39a53c8f1..000000000 --- a/Eigen/src/Core/products/LookupBlockingSizesTable.h +++ /dev/null @@ -1,97 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H -#define EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H - -namespace Eigen { - -namespace internal { - -template <typename LhsScalar, - typename RhsScalar, - bool HasLookupTable = BlockingSizesLookupTable<LhsScalar, RhsScalar>::NumSizes != 0 > -struct LookupBlockingSizesFromTableImpl -{ - static bool run(Index&, Index&, Index&, Index) - { - return false; - } -}; - -inline size_t floor_log2_helper(unsigned short& x, size_t offset) -{ - unsigned short y = x >> offset; - if (y) { - x = y; - return offset; - } else { - return 0; - } -} - -inline size_t floor_log2(unsigned short x) -{ - return floor_log2_helper(x, 8) - + floor_log2_helper(x, 4) - + floor_log2_helper(x, 2) - + floor_log2_helper(x, 1); -} - -inline size_t ceil_log2(unsigned short x) -{ - return x > 1 ? floor_log2(x - 1) + 1 : 0; -} - -template <typename LhsScalar, - typename RhsScalar> -struct LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar, true> -{ - static bool run(Index& k, Index& m, Index& n, Index) - { - using std::min; - using std::max; - typedef BlockingSizesLookupTable<LhsScalar, RhsScalar> Table; - const unsigned short minsize = Table::BaseSize; - const unsigned short maxsize = minsize << (Table::NumSizes - 1); - const unsigned short k_clamped = max<unsigned short>(minsize, min<Index>(k, maxsize)); - const unsigned short m_clamped = max<unsigned short>(minsize, min<Index>(m, maxsize)); - const unsigned short n_clamped = max<unsigned short>(minsize, min<Index>(n, maxsize)); - const size_t k_index = ceil_log2(k_clamped / minsize); - const size_t m_index = ceil_log2(m_clamped / minsize); - const size_t n_index = ceil_log2(n_clamped / minsize); - const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index); - const unsigned short table_entry = Table::Data()[index]; - k = min<Index>(k, 1 << ((table_entry & 0xf00) >> 8)); - m = min<Index>(m, 1 << ((table_entry & 0x0f0) >> 4)); - n = min<Index>(n, 1 << ((table_entry & 0x00f) >> 0)); - return true; - } -}; - -template <typename LhsScalar, - typename RhsScalar> -bool lookupBlockingSizesFromTable(Index& k, Index& m, Index& n, Index num_threads) -{ - if (num_threads > 1) { - // We don't currently have lookup tables recorded for multithread performance, - // and we have confirmed experimentally that our single-thread-recorded LUTs are - // poor for multithread performance, and our LUTs don't currently contain - // any annotation about multithread status (FIXME - we need that). - // So for now, we just early-return here. - return false; - } - return LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar>::run(k, m, n, num_threads); -} - -} - -} - -#endif // EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 503d5acdf..0d24beb5a 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -288,14 +288,6 @@ struct stem_function typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar; typedef ComplexScalar type(ComplexScalar, int); }; - -template <typename LhsScalar, - typename RhsScalar> -struct BlockingSizesLookupTable -{ - static const size_t NumSizes = 0; -}; - } } // end namespace Eigen |