diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-08-14 22:13:21 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-08-14 22:13:21 -0700 |
commit | 33c702c79fe227a5b22229c26af276d359a6cb1d (patch) | |
tree | 86ca1888d05abb165b3ddbe892cfff7b5464573d /unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h | |
parent | 756292f8aa124c842d1e6d9beeb0c416c0d9a7f3 (diff) |
Added support for fast integer divisions by a constant
Sped up tensor slicing by a factor of 3 by using these fast integer divisions.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h new file mode 100644 index 000000000..cf97031be --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -0,0 +1,82 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H +#define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H + + +namespace Eigen { + +/** \internal + * + * \class TensorIntDiv + * \ingroup CXX11_Tensor_Module + * + * \brief Fast integer division by a constant. + * + * See the paper from Granlund and Montgomery for explanation. + * (at http://dx.doi.org/10.1145/773473.178249) + * + * \sa Tensor + */ + +namespace internal { + +template <typename T> +struct TensorIntDivisor { + public: + TensorIntDivisor() { + multiplier = 0; + shift1 = 0; + shift2 = 0; + } + + // Must have 1 <= divider <= 2^31-1 + TensorIntDivisor(const T divider) { + static const int N = 32; + eigen_assert(divider > 0); + eigen_assert(divider <= (1<<(N-1)) - 1); + + // fast ln2 + const int leading_zeros = __builtin_clz(divider); + const int l = N - (leading_zeros+1); + + multiplier = (static_cast<uint64_t>(1) << (N+l)) / divider - (static_cast<uint64_t>(1) << N) + 1; + shift1 = (std::min)(1, l); + shift2 = (std::max)(0, l-1); + } + + // Must have 0 <= numerator <= 2^32-1 + T divide(const T numerator) const { + static const int N = 32; + eigen_assert(numerator >= 0); + eigen_assert(numerator <= (1ull<<N) - 1); + + uint32_t t1 = (multiplier * numerator) >> 32; + uint32_t t = (static_cast<uint32_t>(numerator) - t1) >> shift1; + return (t1 + t) >> shift2; + } + + private: + uint64_t multiplier; + int32_t shift1; + int32_t shift2; +}; + + +template <typename T> +static T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) { + return divisor.divide(numerator); +} + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H |