aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h50
1 files changed, 37 insertions, 13 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
index 33c6c1b0f..ede3939c2 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
@@ -29,25 +29,47 @@ namespace Eigen {
namespace internal {
namespace {
+
// Note: result is undefined if val == 0
template <typename T>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int count_leading_zeros(const T val)
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+ typename internal::enable_if<sizeof(T)==4,int>::type count_leading_zeros(const T val)
{
#ifdef __CUDA_ARCH__
- return (sizeof(T) == 8) ? __clzll(val) : __clz(val);
+ return __clz(val);
#elif EIGEN_COMP_MSVC
- unsigned long index;
- if (sizeof(T) == 8) {
- _BitScanReverse64(&index, val);
- } else {
- _BitScanReverse(&index, val);
- }
- return (sizeof(T) == 8) ? 63 - index : 31 - index;
+ unsigned long index;
+ _BitScanReverse(&index, val);
+ return 31 - index;
+#else
+ EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE);
+ return __builtin_clz(static_cast<uint32_t>(val));
+#endif
+ }
+
+ template <typename T>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+ typename internal::enable_if<sizeof(T)==8,int>::type count_leading_zeros(const T val)
+ {
+#ifdef __CUDA_ARCH__
+ return __clzll(val);
+#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64
+ unsigned long index;
+ _BitScanReverse64(&index, val);
+ return 63 - index;
+#elif EIGEN_COMP_MSVC
+ // MSVC's _BitScanReverse64 is not available for 32bits builds.
+ unsigned int lo = (unsigned int)(val&0xffffffff);
+ unsigned int hi = (unsigned int)((val>>32)&0xffffffff);
+ int n;
+ if(hi==0)
+ n = 32 + count_leading_zeros<unsigned int>(lo);
+ else
+ n = count_leading_zeros<unsigned int>(hi);
+ return n;
#else
EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return (sizeof(T) == 8) ?
- __builtin_clzll(static_cast<uint64_t>(val)) :
- __builtin_clz(static_cast<uint32_t>(val));
+ return __builtin_clzll(static_cast<uint64_t>(val));
#endif
}
@@ -98,7 +120,9 @@ namespace {
return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
#else
const uint64_t shift = 1ULL << log_div;
- TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1));
+ TensorUInt128<uint64_t, uint64_t> result = TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider)
+ - TensorUInt128<static_val<1>, static_val<0> >(1, 0)
+ + TensorUInt128<static_val<0>, static_val<1> >(1);
return static_cast<uint64_t>(result);
#endif
}