diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2015-11-18 11:37:58 -0800 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2015-11-18 11:37:58 -0800 |
commit | 1dd444ea71d30cc3a1eab7af0ba3f6a0357ae93c (patch) | |
tree | f34b56358cd926957f054657c96126c9f24a84a4 | |
parent | 4926251f130faca49ffc743e88e397eb3e9db9c5 (diff) |
Avoid using the version of TensorIntDiv optimized for 32-bit integers when the divisor can be equal to one since it isn't supported.
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h | 9 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_intdiv.cpp | 27 |
2 files changed, 29 insertions, 7 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index fd2441894..058fb2c42 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -116,7 +116,7 @@ namespace { } -template <typename T> +template <typename T, bool div_gt_one = false> struct TensorIntDivisor { public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { @@ -166,8 +166,9 @@ struct TensorIntDivisor { // Optimized version for signed 32 bit integers. // Derived from Hacker's Delight. +// Only works for divisors strictly greater than one template <> -class TensorIntDivisor<int32_t> { +class TensorIntDivisor<int32_t, true> { public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { magic = 0; @@ -226,8 +227,8 @@ private: }; -template <typename T> -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) { +template <typename T, bool div_gt_one> +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) { return divisor.divide(numerator); } diff --git a/unsupported/test/cxx11_tensor_intdiv.cpp b/unsupported/test/cxx11_tensor_intdiv.cpp index 343b37dbd..fd6d27ae1 100644 --- a/unsupported/test/cxx11_tensor_intdiv.cpp +++ b/unsupported/test/cxx11_tensor_intdiv.cpp @@ -14,8 +14,29 @@ void test_signed_32bit() { + // Divide by one + const Eigen::internal::TensorIntDivisor<int32_t, false> div(1); + + for (int32_t j = 0; j < 25000; ++j) { + const int32_t fast_div = j / div; + const int32_t slow_div = j / 1; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + + // Standard divide by 2 or more + for (int32_t i = 2; i < 25000; ++i) { + const Eigen::internal::TensorIntDivisor<int32_t, false> div(i); + + for (int32_t j = 0; j < 25000; ++j) { + const int32_t fast_div = j / div; + const int32_t slow_div = j / i; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + } + + // Optimized divide by 2 or more for (int32_t i = 2; i < 25000; ++i) { - const Eigen::internal::TensorIntDivisor<int32_t> div(i); + const Eigen::internal::TensorIntDivisor<int32_t, true> div(i); for (int32_t j = 0; j < 25000; ++j) { const int32_t fast_div = j / div; @@ -42,7 +63,7 @@ void test_unsigned_32bit() void test_signed_64bit() { - for (int64_t i = 2; i < 25000; ++i) { + for (int64_t i = 1; i < 25000; ++i) { const Eigen::internal::TensorIntDivisor<int64_t> div(i); for (int64_t j = 0; j < 25000; ++j) { @@ -56,7 +77,7 @@ void test_signed_64bit() void test_unsigned_64bit() { - for (uint64_t i = 2; i < 25000; ++i) { + for (uint64_t i = 1; i < 25000; ++i) { const Eigen::internal::TensorIntDivisor<uint64_t> div(i); for (uint64_t j = 0; j < 25000; ++j) { |