diff options
-rw-r--r-- | unsupported/Eigen/CXX11/Tensor | 1 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h | 233 | ||||
-rw-r--r-- | unsupported/test/CMakeLists.txt | 1 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_uint128.cpp | 144 |
4 files changed, 379 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 1e3d2c06a..7e59af964 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -67,6 +67,7 @@ #include "src/Tensor/TensorInitializer.h" #include "src/Tensor/TensorTraits.h" #include "src/Tensor/TensorFunctors.h" +#include "src/Tensor/TensorUInt128.h" #include "src/Tensor/TensorIntDiv.h" #include "src/Tensor/TensorBase.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h new file mode 100644 index 000000000..2b0808629 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h @@ -0,0 +1,233 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H +#define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H + +namespace Eigen { +namespace internal { + + +template <uint64_t n> +struct static_val { + static const uint64_t value = n; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { } + template <typename T> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) { + eigen_assert(v == n); + } +}; + + +template <typename HIGH = uint64_t, typename LOW = uint64_t> +struct TensorUInt128 +{ + HIGH high; + LOW low; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(int x) : high(0), low(x) { + eigen_assert(x >= 0); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(int64_t x) : high(0), low(x) { + eigen_assert(x >= 0); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(uint64_t x) : high(0), low(x) { } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(uint64_t y, uint64_t x) : high(y), low(x) { } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator LOW() const { + return low; + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LOW lower() const { + return low; + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HIGH upper() const { + return high; + } +}; + + +template <typename HL, typename LL, typename HR, typename LR> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) +{ + return (lhs.high == rhs.high) & (lhs.low == rhs.low); +} + +template <typename HL, typename LL, typename HR, typename LR> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) +{ + return (lhs.high != rhs.high) | (lhs.low != rhs.low); +} + +template <typename HL, typename LL, typename HR, typename LR> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) +{ + if (lhs.high != rhs.high) { + return lhs.high > rhs.high; + } + return lhs.low >= rhs.low; +} + +template <typename HL, typename LL, typename HR, typename LR> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) +{ + if (lhs.high != rhs.high) { + return lhs.high < rhs.high; + } + return lhs.low < rhs.low; +} + +template <typename HL, typename LL, typename HR, typename LR> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) +{ + TensorUInt128<uint64_t, uint64_t> result(lhs.high + rhs.high, lhs.low + rhs.low); + if (result.low < rhs.low) { + result.high += 1; + } + return result; +} + +template <typename HL, typename LL, typename HR, typename LR> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) +{ + TensorUInt128<uint64_t, uint64_t> result(lhs.high - rhs.high, lhs.low - rhs.low); + if (result.low > lhs.low) { + result.high -= 1; + } + return result; +} + + +template <typename HL, typename LL, typename HR, typename LR> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +static TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) +{ + // Split each 128-bit integer into 4 32-bit integers, and then do the + // multiplications by hand as follow: + // lhs a b c d + // rhs e f g h + // ----------- + // ah bh ch dh + // bg cg dg + // cf df + // de + // The result is stored in 2 64bit integers, high and low. + + static const uint64_t LOW = 0x00000000FFFFFFFFLL; + static const uint64_t HIGH = 0xFFFFFFFF00000000LL; + + uint64_t d = lhs.low & LOW; + uint64_t c = (lhs.low & HIGH) >> 32LL; + uint64_t b = lhs.high & LOW; + uint64_t a = (lhs.high & HIGH) >> 32LL; + + uint64_t h = rhs.low & LOW; + uint64_t g = (rhs.low & HIGH) >> 32LL; + uint64_t f = rhs.high & LOW; + uint64_t e = (rhs.high & HIGH) >> 32LL; + + // Compute the low 32 bits of low + uint64_t acc = d * h; + uint64_t low = acc & LOW; + // Compute the high 32 bits of low. Add a carry every time we wrap around + acc >>= 32LL; + uint64_t carry = 0; + uint64_t acc2 = acc + c * h; + if (acc2 < acc) { + carry++; + } + acc = acc2 + d * g; + if (acc < acc2) { + carry++; + } + low |= (acc << 32LL); + + // Carry forward the high bits of acc to initiate the computation of the + // low 32 bits of high + acc2 = (acc >> 32LL) | (carry << 32LL); + carry = 0; + + acc = acc2 + b * h; + if (acc < acc2) { + carry++; + } + acc2 = acc + c * g; + if (acc2 < acc) { + carry++; + } + acc = acc2 + d * f; + if (acc < acc2) { + carry++; + } + uint64_t high = acc & LOW; + + // Start to compute the high 32 bits of high. + acc2 = (acc >> 32LL) | (carry << 32LL); + + acc = acc2 + a * h; + acc2 = acc + b * g; + acc = acc2 + c * f; + acc2 = acc + d * e; + high |= (acc2 << 32LL); + + return TensorUInt128<uint64_t, uint64_t>(high, low); +} + +template <typename HL, typename LL, typename HR, typename LR> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +static TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) +{ + if (rhs == TensorUInt128<static_val<0>, static_val<1> >(1)) { + return TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low); + } else if (lhs < rhs) { + return TensorUInt128<uint64_t, uint64_t>(0); + } else { + // calculate the biggest power of 2 times rhs that's less than or equal to lhs + TensorUInt128<uint64_t, uint64_t> power2(1); + TensorUInt128<uint64_t, uint64_t> d(rhs); + TensorUInt128<uint64_t, uint64_t> tmp(lhs - d); + while (lhs >= d) { + tmp = tmp - d; + d = d + d; + power2 = power2 + power2; + } + + tmp = TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low); + TensorUInt128<uint64_t, uint64_t> result(0); + while (power2 != TensorUInt128<static_val<0>, static_val<0> >(0)) { + if (tmp >= d) { + tmp = tmp - d; + result = result + power2; + } + // Shift right + power2 = TensorUInt128<uint64_t, uint64_t>(power2.high >> 1, (power2.low >> 1) | (power2.high << 63)); + d = TensorUInt128<uint64_t, uint64_t>(d.high >> 1, (d.low >> 1) | (d.high << 63)); + } + + return result; + } +} + + +} // namespace internal +} // namespace Eigen + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_UINT128_H diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index cc4ce1c59..97257b183 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -117,6 +117,7 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_of_const_values "-std=c++0x") ei_add_test(cxx11_tensor_of_complex "-std=c++0x") ei_add_test(cxx11_tensor_of_strings "-std=c++0x") + ei_add_test(cxx11_tensor_uint128 "-std=c++0x") ei_add_test(cxx11_tensor_intdiv "-std=c++0x") ei_add_test(cxx11_tensor_lvalue "-std=c++0x") ei_add_test(cxx11_tensor_map "-std=c++0x") diff --git a/unsupported/test/cxx11_tensor_uint128.cpp b/unsupported/test/cxx11_tensor_uint128.cpp new file mode 100644 index 000000000..38fad192b --- /dev/null +++ b/unsupported/test/cxx11_tensor_uint128.cpp @@ -0,0 +1,144 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::internal::TensorUInt128; +using Eigen::internal::static_val; + +static void VERIFY_EQUAL(TensorUInt128<uint64_t, uint64_t> actual, __uint128_t expected) { + bool matchl = actual.lower() == static_cast<uint64_t>(expected); + bool matchh = actual.upper() == static_cast<uint64_t>(expected >> 64); + if (!matchl || !matchh) { + const char* testname = g_test_stack.back().c_str(); + std::cerr << "Test " << testname << " failed in " << __FILE__ + << " (" << __LINE__ << ")" + << std::endl; + abort(); + } +} + + +static void test_add() { + uint64_t incr = internal::random<uint64_t>(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128<uint64_t, uint64_t> i(i1, i2); + __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128<uint64_t, uint64_t> j(j1, j2); + __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2); + TensorUInt128<uint64_t, uint64_t> actual = i + j; + __uint128_t expected = a + b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +static void test_sub() { + uint64_t incr = internal::random<uint64_t>(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128<uint64_t, uint64_t> i(i1, i2); + __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128<uint64_t, uint64_t> j(j1, j2); + __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2); + TensorUInt128<uint64_t, uint64_t> actual = i - j; + __uint128_t expected = a - b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +static void test_mul() { + uint64_t incr = internal::random<uint64_t>(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128<uint64_t, uint64_t> i(i1, i2); + __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128<uint64_t, uint64_t> j(j1, j2); + __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2); + TensorUInt128<uint64_t, uint64_t> actual = i * j; + __uint128_t expected = a * b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +static void test_div() { + uint64_t incr = internal::random<uint64_t>(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128<uint64_t, uint64_t> i(i1, i2); + __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128<uint64_t, uint64_t> j(j1, j2); + __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2); + TensorUInt128<uint64_t, uint64_t> actual = i / j; + __uint128_t expected = a / b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +static void test_misc1() { + uint64_t incr = internal::random<uint64_t>(1, 9999999999); + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128<static_val<0>, uint64_t> i(0, i2); + __uint128_t a = static_cast<__uint128_t>(i2); + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128<static_val<0>, uint64_t> j(0, j2); + __uint128_t b = static_cast<__uint128_t>(j2); + uint64_t actual = (i * j).upper(); + uint64_t expected = (a * b) >> 64; + VERIFY_IS_EQUAL(actual, expected); + } + } +} + +static void test_misc2() { + int64_t incr = internal::random<int64_t>(1, 100); + for (int64_t log_div = 0; log_div < 63; ++log_div) { + for (int64_t divider = 1; divider <= 1000000 * incr; divider += incr) { + uint64_t expected = (static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1; + uint64_t shift = 1ULL << log_div; + + TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1)); + uint64_t actual = static_cast<uint64_t>(result); + VERIFY_EQUAL(actual, expected); + } + } +} + + +void test_cxx11_tensor_uint128() +{ + CALL_SUBTEST(test_add()); + CALL_SUBTEST(test_sub()); + CALL_SUBTEST(test_mul()); + CALL_SUBTEST(test_div()); + CALL_SUBTEST(test_misc1()); + CALL_SUBTEST(test_misc2()); +} |