From 01f5f81f93c5a18b0b1e35208e654be8d2c69bdd Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Mon, 15 Jun 2020 00:47:28 -0700 Subject: Export of internal Abseil changes -- ede5d8e8877c81d7e69549e05076f62cb334ef7f by Abseil Team : Optimize 128 bit division in absl::uint128 when the intrinsic does not exist PiperOrigin-RevId: 316413322 -- 5dd02300b5a5700f41e4034b15a1c8c9e7349673 by Abseil Team : Add additional 128 bit division benchmarks. PiperOrigin-RevId: 316358648 -- b7ee6e33b5076b7b6817b728f268d9e58b00b123 by Abseil Team : Add tests for ABSL_PREDICT_TRUE and ABSL_PREDICT_FALSE macros. PiperOrigin-RevId: 316325593 -- 3d7fe4ab8bc1e736b9697098eeb2cdb7e9910105 by Andy Soffer : Removing unnecessary comment using hurtful words. PiperOrigin-RevId: 316192184 GitOrigin-RevId: ede5d8e8877c81d7e69549e05076f62cb334ef7f Change-Id: I4e447286d0b823d99cdd658dd49fb66725bb7a30 --- absl/numeric/BUILD.bazel | 1 + absl/numeric/CMakeLists.txt | 1 + absl/numeric/int128.cc | 40 ++++------ absl/numeric/int128_benchmark.cc | 161 +++++++++++++++++++++++++++------------ 4 files changed, 126 insertions(+), 77 deletions(-) (limited to 'absl/numeric') diff --git a/absl/numeric/BUILD.bazel b/absl/numeric/BUILD.bazel index e09e52d2..da3af4d0 100644 --- a/absl/numeric/BUILD.bazel +++ b/absl/numeric/BUILD.bazel @@ -35,6 +35,7 @@ cc_library( copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ + "//absl/base:bits", "//absl/base:config", "//absl/base:core_headers", ], diff --git a/absl/numeric/CMakeLists.txt b/absl/numeric/CMakeLists.txt index 242889f0..1e12d80f 100644 --- a/absl/numeric/CMakeLists.txt +++ b/absl/numeric/CMakeLists.txt @@ -26,6 +26,7 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::bits absl::config absl::core_headers PUBLIC diff --git a/absl/numeric/int128.cc b/absl/numeric/int128.cc index b605a870..e21e5e9a 100644 --- a/absl/numeric/int128.cc +++ b/absl/numeric/int128.cc @@ -15,6 +15,7 @@ #include "absl/numeric/int128.h" #include + #include #include #include // NOLINT(readability/streams) @@ -22,6 +23,9 @@ #include #include +#include "absl/base/internal/bits.h" +#include "absl/base/optimization.h" + namespace absl { ABSL_NAMESPACE_BEGIN @@ -31,44 +35,26 @@ ABSL_DLL const uint128 kuint128max = MakeUint128( namespace { // Returns the 0-based position of the last set bit (i.e., most significant bit) -// in the given uint64_t. The argument may not be 0. +// in the given uint128. The argument is not 0. // // For example: // Given: 5 (decimal) == 101 (binary) // Returns: 2 -#define STEP(T, n, pos, sh) \ - do { \ - if ((n) >= (static_cast(1) << (sh))) { \ - (n) = (n) >> (sh); \ - (pos) |= (sh); \ - } \ - } while (0) -static inline int Fls64(uint64_t n) { - assert(n != 0); - int pos = 0; - STEP(uint64_t, n, pos, 0x20); - uint32_t n32 = static_cast(n); - STEP(uint32_t, n32, pos, 0x10); - STEP(uint32_t, n32, pos, 0x08); - STEP(uint32_t, n32, pos, 0x04); - return pos + ((uint64_t{0x3333333322221100} >> (n32 << 2)) & 0x3); -} -#undef STEP - -// Like Fls64() above, but returns the 0-based position of the last set bit -// (i.e., most significant bit) in the given uint128. The argument may not be 0. -static inline int Fls128(uint128 n) { +inline ABSL_ATTRIBUTE_ALWAYS_INLINE int Fls128(uint128 n) { if (uint64_t hi = Uint128High64(n)) { - return Fls64(hi) + 64; + ABSL_INTERNAL_ASSUME(hi != 0); + return 127 - base_internal::CountLeadingZeros64(hi); } - return Fls64(Uint128Low64(n)); + const uint64_t low = Uint128Low64(n); + ABSL_INTERNAL_ASSUME(low != 0); + return 63 - base_internal::CountLeadingZeros64(low); } // Long division/modulo for uint128 implemented using the shift-subtract // division algorithm adapted from: // https://stackoverflow.com/questions/5386377/division-without-using -void DivModImpl(uint128 dividend, uint128 divisor, uint128* quotient_ret, - uint128* remainder_ret) { +inline void DivModImpl(uint128 dividend, uint128 divisor, uint128* quotient_ret, + uint128* remainder_ret) { assert(divisor != 0); if (divisor > dividend) { diff --git a/absl/numeric/int128_benchmark.cc b/absl/numeric/int128_benchmark.cc index a5502d92..eab1515c 100644 --- a/absl/numeric/int128_benchmark.cc +++ b/absl/numeric/int128_benchmark.cc @@ -12,15 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "absl/numeric/int128.h" - #include #include +#include #include #include #include "benchmark/benchmark.h" #include "absl/base/config.h" +#include "absl/numeric/int128.h" namespace { @@ -32,57 +32,85 @@ std::mt19937 MakeRandomEngine() { return std::mt19937(seed); } -std::vector> -GetRandomClass128SampleUniformDivisor() { - std::vector> values; +template ::is_signed, int64_t, uint64_t>::type> +std::vector> GetRandomClass128SampleUniformDivisor() { + std::vector> values; std::mt19937 random = MakeRandomEngine(); - std::uniform_int_distribution uniform_uint64; + std::uniform_int_distribution uniform_h; values.reserve(kSampleSize); for (size_t i = 0; i < kSampleSize; ++i) { - absl::uint128 a = - absl::MakeUint128(uniform_uint64(random), uniform_uint64(random)); - absl::uint128 b = - absl::MakeUint128(uniform_uint64(random), uniform_uint64(random)); - values.emplace_back(std::max(a, b), - std::max(absl::uint128(2), std::min(a, b))); + T a{absl::MakeUint128(uniform_h(random), uniform_h(random))}; + T b{absl::MakeUint128(uniform_h(random), uniform_h(random))}; + values.emplace_back(std::max(a, b), std::max(T(2), std::min(a, b))); } return values; } +template void BM_DivideClass128UniformDivisor(benchmark::State& state) { - auto values = GetRandomClass128SampleUniformDivisor(); + auto values = GetRandomClass128SampleUniformDivisor(); while (state.KeepRunningBatch(values.size())) { for (const auto& pair : values) { benchmark::DoNotOptimize(pair.first / pair.second); } } } -BENCHMARK(BM_DivideClass128UniformDivisor); +BENCHMARK_TEMPLATE(BM_DivideClass128UniformDivisor, absl::uint128); +BENCHMARK_TEMPLATE(BM_DivideClass128UniformDivisor, absl::int128); + +template +void BM_RemainderClass128UniformDivisor(benchmark::State& state) { + auto values = GetRandomClass128SampleUniformDivisor(); + while (state.KeepRunningBatch(values.size())) { + for (const auto& pair : values) { + benchmark::DoNotOptimize(pair.first % pair.second); + } + } +} +BENCHMARK_TEMPLATE(BM_RemainderClass128UniformDivisor, absl::uint128); +BENCHMARK_TEMPLATE(BM_RemainderClass128UniformDivisor, absl::int128); -std::vector> -GetRandomClass128SampleSmallDivisor() { - std::vector> values; +template ::is_signed, int64_t, uint64_t>::type> +std::vector> GetRandomClass128SampleSmallDivisor() { + std::vector> values; std::mt19937 random = MakeRandomEngine(); - std::uniform_int_distribution uniform_uint64; + std::uniform_int_distribution uniform_h; values.reserve(kSampleSize); for (size_t i = 0; i < kSampleSize; ++i) { - absl::uint128 a = - absl::MakeUint128(uniform_uint64(random), uniform_uint64(random)); - uint64_t b = std::max(uint64_t{2}, uniform_uint64(random)); - values.emplace_back(std::max(a, absl::uint128(b)), b); + T a{absl::MakeUint128(uniform_h(random), uniform_h(random))}; + H b{std::max(H{2}, uniform_h(random))}; + values.emplace_back(std::max(a, T(b)), b); } return values; } +template void BM_DivideClass128SmallDivisor(benchmark::State& state) { - auto values = GetRandomClass128SampleSmallDivisor(); + auto values = GetRandomClass128SampleSmallDivisor(); while (state.KeepRunningBatch(values.size())) { for (const auto& pair : values) { benchmark::DoNotOptimize(pair.first / pair.second); } } } -BENCHMARK(BM_DivideClass128SmallDivisor); +BENCHMARK_TEMPLATE(BM_DivideClass128SmallDivisor, absl::uint128); +BENCHMARK_TEMPLATE(BM_DivideClass128SmallDivisor, absl::int128); + +template +void BM_RemainderClass128SmallDivisor(benchmark::State& state) { + auto values = GetRandomClass128SampleSmallDivisor(); + while (state.KeepRunningBatch(values.size())) { + for (const auto& pair : values) { + benchmark::DoNotOptimize(pair.first % pair.second); + } + } +} +BENCHMARK_TEMPLATE(BM_RemainderClass128SmallDivisor, absl::uint128); +BENCHMARK_TEMPLATE(BM_RemainderClass128SmallDivisor, absl::int128); std::vector> GetRandomClass128Sample() { std::vector> values; @@ -121,74 +149,107 @@ BENCHMARK(BM_AddClass128); // Some implementations of do not support __int128 when it is // available, so we make our own uniform_int_distribution-like type. +template ::value, int64_t, uint64_t>::type> class UniformIntDistribution128 { public: // NOLINTNEXTLINE: mimicking std::uniform_int_distribution API - unsigned __int128 operator()(std::mt19937& generator) { - return (static_cast(dist64_(generator)) << 64) | - dist64_(generator); + T operator()(std::mt19937& generator) { + return (static_cast(dist64_(generator)) << 64) | dist64_(generator); } private: - std::uniform_int_distribution dist64_; + std::uniform_int_distribution dist64_; }; -std::vector> -GetRandomIntrinsic128SampleUniformDivisor() { - std::vector> values; +template ::value, int64_t, uint64_t>::type> +std::vector> GetRandomIntrinsic128SampleUniformDivisor() { + std::vector> values; std::mt19937 random = MakeRandomEngine(); - UniformIntDistribution128 uniform_uint128; + UniformIntDistribution128 uniform_128; values.reserve(kSampleSize); for (size_t i = 0; i < kSampleSize; ++i) { - unsigned __int128 a = uniform_uint128(random); - unsigned __int128 b = uniform_uint128(random); - values.emplace_back( - std::max(a, b), - std::max(static_cast(2), std::min(a, b))); + T a = uniform_128(random); + T b = uniform_128(random); + values.emplace_back(std::max(a, b), + std::max(static_cast(2), std::min(a, b))); } return values; } +template void BM_DivideIntrinsic128UniformDivisor(benchmark::State& state) { - auto values = GetRandomIntrinsic128SampleUniformDivisor(); + auto values = GetRandomIntrinsic128SampleUniformDivisor(); while (state.KeepRunningBatch(values.size())) { for (const auto& pair : values) { benchmark::DoNotOptimize(pair.first / pair.second); } } } -BENCHMARK(BM_DivideIntrinsic128UniformDivisor); +BENCHMARK_TEMPLATE(BM_DivideIntrinsic128UniformDivisor, unsigned __int128); +BENCHMARK_TEMPLATE(BM_DivideIntrinsic128UniformDivisor, __int128); + +template +void BM_RemainderIntrinsic128UniformDivisor(benchmark::State& state) { + auto values = GetRandomIntrinsic128SampleUniformDivisor(); + while (state.KeepRunningBatch(values.size())) { + for (const auto& pair : values) { + benchmark::DoNotOptimize(pair.first % pair.second); + } + } +} +BENCHMARK_TEMPLATE(BM_RemainderIntrinsic128UniformDivisor, unsigned __int128); +BENCHMARK_TEMPLATE(BM_RemainderIntrinsic128UniformDivisor, __int128); -std::vector> -GetRandomIntrinsic128SampleSmallDivisor() { - std::vector> values; +template ::value, int64_t, uint64_t>::type> +std::vector> GetRandomIntrinsic128SampleSmallDivisor() { + std::vector> values; std::mt19937 random = MakeRandomEngine(); - UniformIntDistribution128 uniform_uint128; - std::uniform_int_distribution uniform_uint64; + UniformIntDistribution128 uniform_int128; + std::uniform_int_distribution uniform_int64; values.reserve(kSampleSize); for (size_t i = 0; i < kSampleSize; ++i) { - unsigned __int128 a = uniform_uint128(random); - uint64_t b = std::max(uint64_t{2}, uniform_uint64(random)); - values.emplace_back(std::max(a, static_cast(b)), b); + T a = uniform_int128(random); + H b = std::max(H{2}, uniform_int64(random)); + values.emplace_back(std::max(a, static_cast(b)), b); } return values; } +template void BM_DivideIntrinsic128SmallDivisor(benchmark::State& state) { - auto values = GetRandomIntrinsic128SampleSmallDivisor(); + auto values = GetRandomIntrinsic128SampleSmallDivisor(); while (state.KeepRunningBatch(values.size())) { for (const auto& pair : values) { benchmark::DoNotOptimize(pair.first / pair.second); } } } -BENCHMARK(BM_DivideIntrinsic128SmallDivisor); +BENCHMARK_TEMPLATE(BM_DivideIntrinsic128SmallDivisor, unsigned __int128); +BENCHMARK_TEMPLATE(BM_DivideIntrinsic128SmallDivisor, __int128); + +template +void BM_RemainderIntrinsic128SmallDivisor(benchmark::State& state) { + auto values = GetRandomIntrinsic128SampleSmallDivisor(); + while (state.KeepRunningBatch(values.size())) { + for (const auto& pair : values) { + benchmark::DoNotOptimize(pair.first % pair.second); + } + } +} +BENCHMARK_TEMPLATE(BM_RemainderIntrinsic128SmallDivisor, unsigned __int128); +BENCHMARK_TEMPLATE(BM_RemainderIntrinsic128SmallDivisor, __int128); std::vector> GetRandomIntrinsic128Sample() { std::vector> values; std::mt19937 random = MakeRandomEngine(); - UniformIntDistribution128 uniform_uint128; + UniformIntDistribution128 uniform_uint128; values.reserve(kSampleSize); for (size_t i = 0; i < kSampleSize; ++i) { values.emplace_back(uniform_uint128(random), uniform_uint128(random)); -- cgit v1.2.3