aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-11-18 11:37:58 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-11-18 11:37:58 -0800
commit1dd444ea71d30cc3a1eab7af0ba3f6a0357ae93c (patch)
treef34b56358cd926957f054657c96126c9f24a84a4 /unsupported
parent4926251f130faca49ffc743e88e397eb3e9db9c5 (diff)
Avoid using the version of TensorIntDiv optimized for 32-bit integers when the divisor can be equal to one since it isn't supported.
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h9
-rw-r--r--unsupported/test/cxx11_tensor_intdiv.cpp27
2 files changed, 29 insertions, 7 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
index fd2441894..058fb2c42 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
@@ -116,7 +116,7 @@ namespace {
}
-template <typename T>
+template <typename T, bool div_gt_one = false>
struct TensorIntDivisor {
public:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
@@ -166,8 +166,9 @@ struct TensorIntDivisor {
// Optimized version for signed 32 bit integers.
// Derived from Hacker's Delight.
+// Only works for divisors strictly greater than one
template <>
-class TensorIntDivisor<int32_t> {
+class TensorIntDivisor<int32_t, true> {
public:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
magic = 0;
@@ -226,8 +227,8 @@ private:
};
-template <typename T>
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) {
+template <typename T, bool div_gt_one>
+static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
return divisor.divide(numerator);
}
diff --git a/unsupported/test/cxx11_tensor_intdiv.cpp b/unsupported/test/cxx11_tensor_intdiv.cpp
index 343b37dbd..fd6d27ae1 100644
--- a/unsupported/test/cxx11_tensor_intdiv.cpp
+++ b/unsupported/test/cxx11_tensor_intdiv.cpp
@@ -14,8 +14,29 @@
void test_signed_32bit()
{
+ // Divide by one
+ const Eigen::internal::TensorIntDivisor<int32_t, false> div(1);
+
+ for (int32_t j = 0; j < 25000; ++j) {
+ const int32_t fast_div = j / div;
+ const int32_t slow_div = j / 1;
+ VERIFY_IS_EQUAL(fast_div, slow_div);
+ }
+
+ // Standard divide by 2 or more
+ for (int32_t i = 2; i < 25000; ++i) {
+ const Eigen::internal::TensorIntDivisor<int32_t, false> div(i);
+
+ for (int32_t j = 0; j < 25000; ++j) {
+ const int32_t fast_div = j / div;
+ const int32_t slow_div = j / i;
+ VERIFY_IS_EQUAL(fast_div, slow_div);
+ }
+ }
+
+ // Optimized divide by 2 or more
for (int32_t i = 2; i < 25000; ++i) {
- const Eigen::internal::TensorIntDivisor<int32_t> div(i);
+ const Eigen::internal::TensorIntDivisor<int32_t, true> div(i);
for (int32_t j = 0; j < 25000; ++j) {
const int32_t fast_div = j / div;
@@ -42,7 +63,7 @@ void test_unsigned_32bit()
void test_signed_64bit()
{
- for (int64_t i = 2; i < 25000; ++i) {
+ for (int64_t i = 1; i < 25000; ++i) {
const Eigen::internal::TensorIntDivisor<int64_t> div(i);
for (int64_t j = 0; j < 25000; ++j) {
@@ -56,7 +77,7 @@ void test_signed_64bit()
void test_unsigned_64bit()
{
- for (uint64_t i = 2; i < 25000; ++i) {
+ for (uint64_t i = 1; i < 25000; ++i) {
const Eigen::internal::TensorIntDivisor<uint64_t> div(i);
for (uint64_t j = 0; j < 25000; ++j) {