blob: 2714117abc833e891130a09c525565bc0905a69d (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
#define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
namespace Eigen {
/** \internal
*
* \class TensorIntDiv
* \ingroup CXX11_Tensor_Module
*
* \brief Fast integer division by a constant.
*
* See the paper from Granlund and Montgomery for explanation.
* (at http://dx.doi.org/10.1145/773473.178249)
*
* \sa Tensor
*/
namespace internal {
template <typename T>
struct TensorIntDivisor {
public:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
multiplier = 0;
shift1 = 0;
shift2 = 0;
}
// Must have 1 <= divider <= 2^31-1
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) {
const int N = 32;
eigen_assert(divider > 0);
eigen_assert(divider <= (1<<(N-1)) - 1);
// fast ln2
#ifndef __CUDA_ARCH__
const int leading_zeros = __builtin_clz(divider);
#else
const int leading_zeros = __clz(divider);
#endif
const int log_div = N - (leading_zeros+1);
multiplier = (static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1;
shift1 = log_div > 1 ? 1 : log_div;
shift2 = log_div > 1 ? log_div-1 : 0;
}
// Must have 0 <= numerator <= 2^32-1
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const {
const int N = 32;
eigen_assert(numerator >= 0);
eigen_assert(numerator <= (1ull<<N) - 1);
uint32_t t1 = (multiplier * numerator) >> 32;
uint32_t t = (static_cast<uint32_t>(numerator) - t1) >> shift1;
return (t1 + t) >> shift2;
}
private:
uint64_t multiplier;
int32_t shift1;
int32_t shift2;
};
template <typename T>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) {
return divisor.divide(numerator);
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
|