aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-08-14 22:13:21 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-08-14 22:13:21 -0700
commit33c702c79fe227a5b22229c26af276d359a6cb1d (patch)
tree86ca1888d05abb165b3ddbe892cfff7b5464573d /unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
parent756292f8aa124c842d1e6d9beeb0c416c0d9a7f3 (diff)
Added support for fast integer divisions by a constant
Sped up tensor slicing by a factor of 3 by using these fast integer divisions.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h82
1 files changed, 82 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
new file mode 100644
index 000000000..cf97031be
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
@@ -0,0 +1,82 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
+#define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
+
+
+namespace Eigen {
+
+/** \internal
+ *
+ * \class TensorIntDiv
+ * \ingroup CXX11_Tensor_Module
+ *
+ * \brief Fast integer division by a constant.
+ *
+ * See the paper from Granlund and Montgomery for explanation.
+ * (at http://dx.doi.org/10.1145/773473.178249)
+ *
+ * \sa Tensor
+ */
+
+namespace internal {
+
+template <typename T>
+struct TensorIntDivisor {
+ public:
+ TensorIntDivisor() {
+ multiplier = 0;
+ shift1 = 0;
+ shift2 = 0;
+ }
+
+ // Must have 1 <= divider <= 2^31-1
+ TensorIntDivisor(const T divider) {
+ static const int N = 32;
+ eigen_assert(divider > 0);
+ eigen_assert(divider <= (1<<(N-1)) - 1);
+
+ // fast ln2
+ const int leading_zeros = __builtin_clz(divider);
+ const int l = N - (leading_zeros+1);
+
+ multiplier = (static_cast<uint64_t>(1) << (N+l)) / divider - (static_cast<uint64_t>(1) << N) + 1;
+ shift1 = (std::min)(1, l);
+ shift2 = (std::max)(0, l-1);
+ }
+
+ // Must have 0 <= numerator <= 2^32-1
+ T divide(const T numerator) const {
+ static const int N = 32;
+ eigen_assert(numerator >= 0);
+ eigen_assert(numerator <= (1ull<<N) - 1);
+
+ uint32_t t1 = (multiplier * numerator) >> 32;
+ uint32_t t = (static_cast<uint32_t>(numerator) - t1) >> shift1;
+ return (t1 + t) >> shift2;
+ }
+
+ private:
+ uint64_t multiplier;
+ int32_t shift1;
+ int32_t shift2;
+};
+
+
+template <typename T>
+static T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) {
+ return divisor.divide(numerator);
+}
+
+
+} // end namespace internal
+} // end namespace Eigen
+
+#endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H