diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-10-13 17:02:09 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-10-13 17:02:09 -0700 |
commit | 99d75235a9567865d2c070a2840d54c8a5ad0f43 (patch) | |
tree | 8ef64899252a8be7b6a868bd64bd167063ea4b2d /unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h | |
parent | 4c70b0a7627d45286ecbb3c73d2d774412168205 (diff) |
Misc improvements and cleanups
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h | 26 |
1 files changed, 15 insertions, 11 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index cf97031be..2714117ab 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -31,30 +31,34 @@ namespace internal { template <typename T> struct TensorIntDivisor { public: - TensorIntDivisor() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { multiplier = 0; shift1 = 0; shift2 = 0; } // Must have 1 <= divider <= 2^31-1 - TensorIntDivisor(const T divider) { - static const int N = 32; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) { + const int N = 32; eigen_assert(divider > 0); eigen_assert(divider <= (1<<(N-1)) - 1); // fast ln2 +#ifndef __CUDA_ARCH__ const int leading_zeros = __builtin_clz(divider); - const int l = N - (leading_zeros+1); - - multiplier = (static_cast<uint64_t>(1) << (N+l)) / divider - (static_cast<uint64_t>(1) << N) + 1; - shift1 = (std::min)(1, l); - shift2 = (std::max)(0, l-1); +#else + const int leading_zeros = __clz(divider); +#endif + const int log_div = N - (leading_zeros+1); + + multiplier = (static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1; + shift1 = log_div > 1 ? 1 : log_div; + shift2 = log_div > 1 ? log_div-1 : 0; } // Must have 0 <= numerator <= 2^32-1 - T divide(const T numerator) const { - static const int N = 32; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const { + const int N = 32; eigen_assert(numerator >= 0); eigen_assert(numerator <= (1ull<<N) - 1); @@ -71,7 +75,7 @@ struct TensorIntDivisor { template <typename T> -static T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) { +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) { return divisor.divide(numerator); } |