diff options
author | Abseil Team <absl-team@google.com> | 2020-12-07 09:56:14 -0800 |
---|---|---|
committer | Andy Getz <durandal@google.com> | 2020-12-07 16:03:42 -0500 |
commit | fbdff6f3ae0ba977a69f172e85ecaede535e70f6 (patch) | |
tree | 33b150f23f618fa9709819e37cc3e029800572f7 | |
parent | acf3390ca28edf1438fa896602ffede2a7dff103 (diff) |
Export of internal Abseil changes
--
ff793052bd01e1e4fcf639f94d7c30c4855a9372 by Evan Brown <ezb@google.com>:
Roll forward of btree_iterator refactoring.
PiperOrigin-RevId: 346116047
--
17984679f16e3e2139b0f14fa76f4a6ca16a3ef9 by Chris Kennelly <ckennelly@google.com>:
Extend absl::StrContains to accept single character needles.
Single characters are more efficient to search for. Extending this API allows
the abseil-string-find-str-contains Clang Tidy to include this pattern.
The C++ committee has adopted http://wg21.link/P1679 for inclusion in C++23.
PiperOrigin-RevId: 346095060
--
ef20b31c501b1dcaa25e244fd8f8aa43dec09bd6 by Jorg Brown <jorg@google.com>:
Internal change for cord ring
PiperOrigin-RevId: 346087545
--
b70f2c1cb77fc9e733a126e790967d45c5fd1dc7 by Derek Mauro <dmauro@google.com>:
Release layout_benchmark
PiperOrigin-RevId: 345968909
--
3a0eda337ee43622f92cfe14c2aa06f72dc71ee5 by Derek Mauro <dmauro@google.com>:
Release raw_hash_set_probe_benchmark
PiperOrigin-RevId: 345965969
--
abffdb4bb241a2264cb4e73a6262b660bb10447d by Derek Mauro <dmauro@google.com>:
Internal change
PiperOrigin-RevId: 345733599
--
7c9e24a71188df945be17fe98f700bdb51f81b16 by Derek Mauro <dmauro@google.com>:
Release hash_benchmark
PiperOrigin-RevId: 345721635
--
d68f33f17f9a8cd3f6da8eee3870bdb46402cdc8 by Derek Mauro <dmauro@google.com>:
Release raw_hash_set_benchmark
PiperOrigin-RevId: 345708384
--
6e6c547d4d1327b226c0ffe8ff34d0aa103ce24b by Abseil Team <absl-team@google.com>:
Updates the implementation of InlinedVector to accurately express the value-initialization semantics of the default constructor
PiperOrigin-RevId: 345548260
--
1532424deda97d468444c217cc0fa4614099c7c1 by Evan Brown <ezb@google.com>:
Rollback btree_iterator refactoring.
PiperOrigin-RevId: 345543900
GitOrigin-RevId: ff793052bd01e1e4fcf639f94d7c30c4855a9372
Change-Id: I719831981fd056de41939f9addfee3d85e3b49b2
-rw-r--r-- | WORKSPACE | 6 | ||||
-rw-r--r-- | absl/container/BUILD.bazel | 55 | ||||
-rw-r--r-- | absl/container/internal/inlined_vector.h | 5 | ||||
-rw-r--r-- | absl/container/internal/layout_benchmark.cc | 122 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set_benchmark.cc | 396 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set_probe_benchmark.cc | 590 | ||||
-rw-r--r-- | absl/hash/BUILD.bazel | 18 | ||||
-rw-r--r-- | absl/hash/hash_benchmark.cc | 249 | ||||
-rw-r--r-- | absl/strings/cord.cc | 22 | ||||
-rw-r--r-- | absl/strings/internal/cord_internal.h | 2 | ||||
-rw-r--r-- | absl/strings/internal/cord_rep_flat.h | 14 | ||||
-rw-r--r-- | absl/strings/match.h | 4 | ||||
-rw-r--r-- | absl/strings/match_test.cc | 17 |
13 files changed, 1481 insertions, 19 deletions
@@ -29,9 +29,9 @@ http_archive( # Google benchmark. http_archive( name = "com_github_google_benchmark", - urls = ["https://github.com/google/benchmark/archive/16703ff83c1ae6d53e5155df3bb3ab0bc96083be.zip"], - strip_prefix = "benchmark-16703ff83c1ae6d53e5155df3bb3ab0bc96083be", - sha256 = "59f918c8ccd4d74b6ac43484467b500f1d64b40cc1010daa055375b322a43ba3", + urls = ["https://github.com/google/benchmark/archive/bf585a2789e30585b4e3ce6baf11ef2750b54677.zip"], # 2020-11-26T11:14:03Z + strip_prefix = "benchmark-bf585a2789e30585b4e3ce6baf11ef2750b54677", + sha256 = "2a778d821997df7d8646c9c59b8edb9a573a6e04c534c01892a40aa524a7b68c", ) # C++ rules for Bazel. diff --git a/absl/container/BUILD.bazel b/absl/container/BUILD.bazel index 8e72ad03..70977143 100644 --- a/absl/container/BUILD.bazel +++ b/absl/container/BUILD.bazel @@ -630,6 +630,45 @@ cc_test( ], ) +cc_binary( + name = "raw_hash_set_benchmark", + testonly = 1, + srcs = ["internal/raw_hash_set_benchmark.cc"], + copts = ABSL_TEST_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":hash_function_defaults", + ":raw_hash_set", + "//absl/base:raw_logging_internal", + "//absl/strings:str_format", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_binary( + name = "raw_hash_set_probe_benchmark", + testonly = 1, + srcs = ["internal/raw_hash_set_probe_benchmark.cc"], + copts = ABSL_TEST_COPTS, + linkopts = select({ + "//conditions:default": [], + }) + ABSL_DEFAULT_LINKOPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":flat_hash_map", + ":hash_function_defaults", + ":hashtable_debug", + ":raw_hash_set", + "//absl/random", + "//absl/random:distributions", + "//absl/strings", + "//absl/strings:str_format", + ], +) + cc_test( name = "raw_hash_set_allocator_test", size = "small", @@ -677,6 +716,22 @@ cc_test( ], ) +cc_binary( + name = "layout_benchmark", + testonly = 1, + srcs = ["internal/layout_benchmark.cc"], + copts = ABSL_TEST_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":layout", + "//absl/base:core_headers", + "//absl/base:raw_logging_internal", + "@com_github_google_benchmark//:benchmark_main", + ], +) + cc_library( name = "tracked", testonly = 1, diff --git a/absl/container/internal/inlined_vector.h b/absl/container/internal/inlined_vector.h index c98c25c4..502ea3dd 100644 --- a/absl/container/internal/inlined_vector.h +++ b/absl/container/internal/inlined_vector.h @@ -298,9 +298,10 @@ class Storage { // Storage Constructors and Destructor // --------------------------------------------------------------------------- - Storage() : metadata_() {} + Storage() : metadata_(allocator_type(), /* size and is_allocated */ 0) {} - explicit Storage(const allocator_type& alloc) : metadata_(alloc, {}) {} + explicit Storage(const allocator_type& alloc) + : metadata_(alloc, /* size and is_allocated */ 0) {} ~Storage() { pointer data = GetIsAllocated() ? GetAllocatedData() : GetInlinedData(); diff --git a/absl/container/internal/layout_benchmark.cc b/absl/container/internal/layout_benchmark.cc new file mode 100644 index 00000000..d8636e8d --- /dev/null +++ b/absl/container/internal/layout_benchmark.cc @@ -0,0 +1,122 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Every benchmark should have the same performance as the corresponding +// headroom benchmark. + +#include "absl/base/internal/raw_logging.h" +#include "absl/container/internal/layout.h" +#include "benchmark/benchmark.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace container_internal { +namespace { + +using ::benchmark::DoNotOptimize; + +using Int128 = int64_t[2]; + +// This benchmark provides the upper bound on performance for BM_OffsetConstant. +template <size_t Offset, class... Ts> +void BM_OffsetConstantHeadroom(benchmark::State& state) { + for (auto _ : state) { + DoNotOptimize(Offset); + } +} + +template <size_t Offset, class... Ts> +void BM_OffsetConstant(benchmark::State& state) { + using L = Layout<Ts...>; + ABSL_RAW_CHECK(L::Partial(3, 5, 7).template Offset<3>() == Offset, + "Invalid offset"); + for (auto _ : state) { + DoNotOptimize(L::Partial(3, 5, 7).template Offset<3>()); + } +} + +template <class... Ts> +size_t VariableOffset(size_t n, size_t m, size_t k); + +template <> +size_t VariableOffset<int8_t, int16_t, int32_t, Int128>(size_t n, size_t m, + size_t k) { + auto Align = [](size_t n, size_t m) { return (n + m - 1) & ~(m - 1); }; + return Align(Align(Align(n * 1, 2) + m * 2, 4) + k * 4, 8); +} + +template <> +size_t VariableOffset<Int128, int32_t, int16_t, int8_t>(size_t n, size_t m, + size_t k) { + // No alignment is necessary. + return n * 16 + m * 4 + k * 2; +} + +// This benchmark provides the upper bound on performance for BM_OffsetVariable. +template <size_t Offset, class... Ts> +void BM_OffsetVariableHeadroom(benchmark::State& state) { + size_t n = 3; + size_t m = 5; + size_t k = 7; + ABSL_RAW_CHECK(VariableOffset<Ts...>(n, m, k) == Offset, "Invalid offset"); + for (auto _ : state) { + DoNotOptimize(n); + DoNotOptimize(m); + DoNotOptimize(k); + DoNotOptimize(VariableOffset<Ts...>(n, m, k)); + } +} + +template <size_t Offset, class... Ts> +void BM_OffsetVariable(benchmark::State& state) { + using L = Layout<Ts...>; + size_t n = 3; + size_t m = 5; + size_t k = 7; + ABSL_RAW_CHECK(L::Partial(n, m, k).template Offset<3>() == Offset, + "Inavlid offset"); + for (auto _ : state) { + DoNotOptimize(n); + DoNotOptimize(m); + DoNotOptimize(k); + DoNotOptimize(L::Partial(n, m, k).template Offset<3>()); + } +} + +// Run all benchmarks in two modes: +// +// Layout with padding: int8_t[3], int16_t[5], int32_t[7], Int128[?]. +// Layout without padding: Int128[3], int32_t[5], int16_t[7], int8_t[?]. + +#define OFFSET_BENCHMARK(NAME, OFFSET, T1, T2, T3, T4) \ + auto& NAME##_##OFFSET##_##T1##_##T2##_##T3##_##T4 = \ + NAME<OFFSET, T1, T2, T3, T4>; \ + BENCHMARK(NAME##_##OFFSET##_##T1##_##T2##_##T3##_##T4) + +OFFSET_BENCHMARK(BM_OffsetConstantHeadroom, 48, int8_t, int16_t, int32_t, + Int128); +OFFSET_BENCHMARK(BM_OffsetConstant, 48, int8_t, int16_t, int32_t, Int128); +OFFSET_BENCHMARK(BM_OffsetConstantHeadroom, 82, Int128, int32_t, int16_t, + int8_t); +OFFSET_BENCHMARK(BM_OffsetConstant, 82, Int128, int32_t, int16_t, int8_t); +OFFSET_BENCHMARK(BM_OffsetVariableHeadroom, 48, int8_t, int16_t, int32_t, + Int128); +OFFSET_BENCHMARK(BM_OffsetVariable, 48, int8_t, int16_t, int32_t, Int128); +OFFSET_BENCHMARK(BM_OffsetVariableHeadroom, 82, Int128, int32_t, int16_t, + int8_t); +OFFSET_BENCHMARK(BM_OffsetVariable, 82, Int128, int32_t, int16_t, int8_t); +} // namespace +} // namespace container_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/container/internal/raw_hash_set_benchmark.cc b/absl/container/internal/raw_hash_set_benchmark.cc new file mode 100644 index 00000000..f9be2c5a --- /dev/null +++ b/absl/container/internal/raw_hash_set_benchmark.cc @@ -0,0 +1,396 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/container/internal/raw_hash_set.h" + +#include <numeric> +#include <random> + +#include "absl/base/internal/raw_logging.h" +#include "absl/container/internal/hash_function_defaults.h" +#include "absl/strings/str_format.h" +#include "benchmark/benchmark.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace container_internal { + +struct RawHashSetTestOnlyAccess { + template <typename C> + static auto GetSlots(const C& c) -> decltype(c.slots_) { + return c.slots_; + } +}; + +namespace { + +struct IntPolicy { + using slot_type = int64_t; + using key_type = int64_t; + using init_type = int64_t; + + static void construct(void*, int64_t* slot, int64_t v) { *slot = v; } + static void destroy(void*, int64_t*) {} + static void transfer(void*, int64_t* new_slot, int64_t* old_slot) { + *new_slot = *old_slot; + } + + static int64_t& element(slot_type* slot) { return *slot; } + + template <class F> + static auto apply(F&& f, int64_t x) -> decltype(std::forward<F>(f)(x, x)) { + return std::forward<F>(f)(x, x); + } +}; + +class StringPolicy { + template <class F, class K, class V, + class = typename std::enable_if< + std::is_convertible<const K&, absl::string_view>::value>::type> + decltype(std::declval<F>()( + std::declval<const absl::string_view&>(), std::piecewise_construct, + std::declval<std::tuple<K>>(), + std::declval<V>())) static apply_impl(F&& f, + std::pair<std::tuple<K>, V> p) { + const absl::string_view& key = std::get<0>(p.first); + return std::forward<F>(f)(key, std::piecewise_construct, std::move(p.first), + std::move(p.second)); + } + + public: + struct slot_type { + struct ctor {}; + + template <class... Ts> + slot_type(ctor, Ts&&... ts) : pair(std::forward<Ts>(ts)...) {} + + std::pair<std::string, std::string> pair; + }; + + using key_type = std::string; + using init_type = std::pair<std::string, std::string>; + + template <class allocator_type, class... Args> + static void construct(allocator_type* alloc, slot_type* slot, Args... args) { + std::allocator_traits<allocator_type>::construct( + *alloc, slot, typename slot_type::ctor(), std::forward<Args>(args)...); + } + + template <class allocator_type> + static void destroy(allocator_type* alloc, slot_type* slot) { + std::allocator_traits<allocator_type>::destroy(*alloc, slot); + } + + template <class allocator_type> + static void transfer(allocator_type* alloc, slot_type* new_slot, + slot_type* old_slot) { + construct(alloc, new_slot, std::move(old_slot->pair)); + destroy(alloc, old_slot); + } + + static std::pair<std::string, std::string>& element(slot_type* slot) { + return slot->pair; + } + + template <class F, class... Args> + static auto apply(F&& f, Args&&... args) + -> decltype(apply_impl(std::forward<F>(f), + PairArgs(std::forward<Args>(args)...))) { + return apply_impl(std::forward<F>(f), + PairArgs(std::forward<Args>(args)...)); + } +}; + +struct StringHash : container_internal::hash_default_hash<absl::string_view> { + using is_transparent = void; +}; +struct StringEq : std::equal_to<absl::string_view> { + using is_transparent = void; +}; + +struct StringTable + : raw_hash_set<StringPolicy, StringHash, StringEq, std::allocator<int>> { + using Base = typename StringTable::raw_hash_set; + StringTable() {} + using Base::Base; +}; + +struct IntTable + : raw_hash_set<IntPolicy, container_internal::hash_default_hash<int64_t>, + std::equal_to<int64_t>, std::allocator<int64_t>> { + using Base = typename IntTable::raw_hash_set; + IntTable() {} + using Base::Base; +}; + +struct string_generator { + template <class RNG> + std::string operator()(RNG& rng) const { + std::string res; + res.resize(12); + std::uniform_int_distribution<uint32_t> printable_ascii(0x20, 0x7E); + std::generate(res.begin(), res.end(), [&] { return printable_ascii(rng); }); + return res; + } + + size_t size; +}; + +// Model a cache in steady state. +// +// On a table of size N, keep deleting the LRU entry and add a random one. +void BM_CacheInSteadyState(benchmark::State& state) { + std::random_device rd; + std::mt19937 rng(rd()); + string_generator gen{12}; + StringTable t; + std::deque<std::string> keys; + while (t.size() < state.range(0)) { + auto x = t.emplace(gen(rng), gen(rng)); + if (x.second) keys.push_back(x.first->first); + } + ABSL_RAW_CHECK(state.range(0) >= 10, ""); + while (state.KeepRunning()) { + // Some cache hits. + std::deque<std::string>::const_iterator it; + for (int i = 0; i != 90; ++i) { + if (i % 10 == 0) it = keys.end(); + ::benchmark::DoNotOptimize(t.find(*--it)); + } + // Some cache misses. + for (int i = 0; i != 10; ++i) ::benchmark::DoNotOptimize(t.find(gen(rng))); + ABSL_RAW_CHECK(t.erase(keys.front()), keys.front().c_str()); + keys.pop_front(); + while (true) { + auto x = t.emplace(gen(rng), gen(rng)); + if (x.second) { + keys.push_back(x.first->first); + break; + } + } + } + state.SetItemsProcessed(state.iterations()); + state.SetLabel(absl::StrFormat("load_factor=%.2f", t.load_factor())); +} + +template <typename Benchmark> +void CacheInSteadyStateArgs(Benchmark* bm) { + // The default. + const float max_load_factor = 0.875; + // When the cache is at the steady state, the probe sequence will equal + // capacity if there is no reclamation of deleted slots. Pick a number large + // enough to make the benchmark slow for that case. + const size_t capacity = 1 << 10; + + // Check N data points to cover load factors in [0.4, 0.8). + const size_t kNumPoints = 10; + for (size_t i = 0; i != kNumPoints; ++i) + bm->Arg(std::ceil( + capacity * (max_load_factor + i * max_load_factor / kNumPoints) / 2)); +} +BENCHMARK(BM_CacheInSteadyState)->Apply(CacheInSteadyStateArgs); + +void BM_EndComparison(benchmark::State& state) { + std::random_device rd; + std::mt19937 rng(rd()); + string_generator gen{12}; + StringTable t; + while (t.size() < state.range(0)) { + t.emplace(gen(rng), gen(rng)); + } + + for (auto _ : state) { + for (auto it = t.begin(); it != t.end(); ++it) { + benchmark::DoNotOptimize(it); + benchmark::DoNotOptimize(t); + benchmark::DoNotOptimize(it != t.end()); + } + } +} +BENCHMARK(BM_EndComparison)->Arg(400); + +void BM_CopyCtor(benchmark::State& state) { + std::random_device rd; + std::mt19937 rng(rd()); + IntTable t; + std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{}); + + while (t.size() < state.range(0)) { + t.emplace(dist(rng)); + } + + for (auto _ : state) { + IntTable t2 = t; + benchmark::DoNotOptimize(t2); + } +} +BENCHMARK(BM_CopyCtor)->Range(128, 4096); + +void BM_CopyAssign(benchmark::State& state) { + std::random_device rd; + std::mt19937 rng(rd()); + IntTable t; + std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{}); + while (t.size() < state.range(0)) { + t.emplace(dist(rng)); + } + + IntTable t2; + for (auto _ : state) { + t2 = t; + benchmark::DoNotOptimize(t2); + } +} +BENCHMARK(BM_CopyAssign)->Range(128, 4096); + +void BM_NoOpReserveIntTable(benchmark::State& state) { + IntTable t; + t.reserve(100000); + for (auto _ : state) { + benchmark::DoNotOptimize(t); + t.reserve(100000); + } +} +BENCHMARK(BM_NoOpReserveIntTable); + +void BM_NoOpReserveStringTable(benchmark::State& state) { + StringTable t; + t.reserve(100000); + for (auto _ : state) { + benchmark::DoNotOptimize(t); + t.reserve(100000); + } +} +BENCHMARK(BM_NoOpReserveStringTable); + +void BM_ReserveIntTable(benchmark::State& state) { + int reserve_size = state.range(0); + for (auto _ : state) { + state.PauseTiming(); + IntTable t; + state.ResumeTiming(); + benchmark::DoNotOptimize(t); + t.reserve(reserve_size); + } +} +BENCHMARK(BM_ReserveIntTable)->Range(128, 4096); + +void BM_ReserveStringTable(benchmark::State& state) { + int reserve_size = state.range(0); + for (auto _ : state) { + state.PauseTiming(); + StringTable t; + state.ResumeTiming(); + benchmark::DoNotOptimize(t); + t.reserve(reserve_size); + } +} +BENCHMARK(BM_ReserveStringTable)->Range(128, 4096); + +void BM_Group_Match(benchmark::State& state) { + std::array<ctrl_t, Group::kWidth> group; + std::iota(group.begin(), group.end(), -4); + Group g{group.data()}; + h2_t h = 1; + for (auto _ : state) { + ::benchmark::DoNotOptimize(h); + ::benchmark::DoNotOptimize(g.Match(h)); + } +} +BENCHMARK(BM_Group_Match); + +void BM_Group_MatchEmpty(benchmark::State& state) { + std::array<ctrl_t, Group::kWidth> group; + std::iota(group.begin(), group.end(), -4); + Group g{group.data()}; + for (auto _ : state) ::benchmark::DoNotOptimize(g.MatchEmpty()); +} +BENCHMARK(BM_Group_MatchEmpty); + +void BM_Group_MatchEmptyOrDeleted(benchmark::State& state) { + std::array<ctrl_t, Group::kWidth> group; + std::iota(group.begin(), group.end(), -4); + Group g{group.data()}; + for (auto _ : state) ::benchmark::DoNotOptimize(g.MatchEmptyOrDeleted()); +} +BENCHMARK(BM_Group_MatchEmptyOrDeleted); + +void BM_Group_CountLeadingEmptyOrDeleted(benchmark::State& state) { + std::array<ctrl_t, Group::kWidth> group; + std::iota(group.begin(), group.end(), -2); + Group g{group.data()}; + for (auto _ : state) + ::benchmark::DoNotOptimize(g.CountLeadingEmptyOrDeleted()); +} +BENCHMARK(BM_Group_CountLeadingEmptyOrDeleted); + +void BM_Group_MatchFirstEmptyOrDeleted(benchmark::State& state) { + std::array<ctrl_t, Group::kWidth> group; + std::iota(group.begin(), group.end(), -2); + Group g{group.data()}; + for (auto _ : state) ::benchmark::DoNotOptimize(*g.MatchEmptyOrDeleted()); +} +BENCHMARK(BM_Group_MatchFirstEmptyOrDeleted); + +void BM_DropDeletes(benchmark::State& state) { + constexpr size_t capacity = (1 << 20) - 1; + std::vector<ctrl_t> ctrl(capacity + 1 + Group::kWidth); + ctrl[capacity] = kSentinel; + std::vector<ctrl_t> pattern = {kEmpty, 2, kDeleted, 2, kEmpty, 1, kDeleted}; + for (size_t i = 0; i != capacity; ++i) { + ctrl[i] = pattern[i % pattern.size()]; + } + while (state.KeepRunning()) { + state.PauseTiming(); + std::vector<ctrl_t> ctrl_copy = ctrl; + state.ResumeTiming(); + ConvertDeletedToEmptyAndFullToDeleted(ctrl_copy.data(), capacity); + ::benchmark::DoNotOptimize(ctrl_copy[capacity]); + } +} +BENCHMARK(BM_DropDeletes); + +} // namespace +} // namespace container_internal +ABSL_NAMESPACE_END +} // namespace absl + +// These methods are here to make it easy to examine the assembly for targeted +// parts of the API. +auto CodegenAbslRawHashSetInt64Find(absl::container_internal::IntTable* table, + int64_t key) -> decltype(table->find(key)) { + return table->find(key); +} + +bool CodegenAbslRawHashSetInt64FindNeEnd( + absl::container_internal::IntTable* table, int64_t key) { + return table->find(key) != table->end(); +} + +bool CodegenAbslRawHashSetInt64Contains( + absl::container_internal::IntTable* table, int64_t key) { + return table->contains(key); +} + +void CodegenAbslRawHashSetInt64Iterate( + absl::container_internal::IntTable* table) { + for (auto x : *table) benchmark::DoNotOptimize(x); +} + +int odr = + (::benchmark::DoNotOptimize(std::make_tuple( + &CodegenAbslRawHashSetInt64Find, &CodegenAbslRawHashSetInt64FindNeEnd, + &CodegenAbslRawHashSetInt64Contains, + &CodegenAbslRawHashSetInt64Iterate)), + 1); diff --git a/absl/container/internal/raw_hash_set_probe_benchmark.cc b/absl/container/internal/raw_hash_set_probe_benchmark.cc new file mode 100644 index 00000000..7169a2e2 --- /dev/null +++ b/absl/container/internal/raw_hash_set_probe_benchmark.cc @@ -0,0 +1,590 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Generates probe length statistics for many combinations of key types and key +// distributions, all using the default hash function for swisstable. + +#include <memory> +#include <regex> // NOLINT +#include <vector> + +#include "absl/container/flat_hash_map.h" +#include "absl/container/internal/hash_function_defaults.h" +#include "absl/container/internal/hashtable_debug.h" +#include "absl/container/internal/raw_hash_set.h" +#include "absl/random/distributions.h" +#include "absl/random/random.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" + +namespace { + +enum class OutputStyle { kRegular, kBenchmark }; + +// The --benchmark command line flag. +// This is populated from main(). +// When run in "benchmark" mode, we have different output. This allows +// A/B comparisons with tools like `benchy`. +absl::string_view benchmarks; + +OutputStyle output() { + return !benchmarks.empty() ? OutputStyle::kBenchmark : OutputStyle::kRegular; +} + +template <class T> +struct Policy { + using slot_type = T; + using key_type = T; + using init_type = T; + + template <class allocator_type, class Arg> + static void construct(allocator_type* alloc, slot_type* slot, + const Arg& arg) { + std::allocator_traits<allocator_type>::construct(*alloc, slot, arg); + } + + template <class allocator_type> + static void destroy(allocator_type* alloc, slot_type* slot) { + std::allocator_traits<allocator_type>::destroy(*alloc, slot); + } + + static slot_type& element(slot_type* slot) { return *slot; } + + template <class F, class... Args> + static auto apply(F&& f, const slot_type& arg) + -> decltype(std::forward<F>(f)(arg, arg)) { + return std::forward<F>(f)(arg, arg); + } +}; + +absl::BitGen& GlobalBitGen() { + static auto* value = new absl::BitGen; + return *value; +} + +// Keeps a pool of allocations and randomly gives one out. +// This introduces more randomization to the addresses given to swisstable and +// should help smooth out this factor from probe length calculation. +template <class T> +class RandomizedAllocator { + public: + using value_type = T; + + RandomizedAllocator() = default; + template <typename U> + RandomizedAllocator(RandomizedAllocator<U>) {} // NOLINT + + static T* allocate(size_t n) { + auto& pointers = GetPointers(n); + // Fill the pool + while (pointers.size() < kRandomPool) { + pointers.push_back(std::allocator<T>{}.allocate(n)); + } + + // Choose a random one. + size_t i = absl::Uniform<size_t>(GlobalBitGen(), 0, pointers.size()); + T* result = pointers[i]; + pointers[i] = pointers.back(); + pointers.pop_back(); + return result; + } + + static void deallocate(T* p, size_t n) { + // Just put it back on the pool. No need to release the memory. + GetPointers(n).push_back(p); + } + + private: + // We keep at least kRandomPool allocations for each size. + static constexpr size_t kRandomPool = 20; + + static std::vector<T*>& GetPointers(size_t n) { + static auto* m = new absl::flat_hash_map<size_t, std::vector<T*>>(); + return (*m)[n]; + } +}; + +template <class T> +struct DefaultHash { + using type = absl::container_internal::hash_default_hash<T>; +}; + +template <class T> +using DefaultHashT = typename DefaultHash<T>::type; + +template <class T> +struct Table : absl::container_internal::raw_hash_set< + Policy<T>, DefaultHashT<T>, + absl::container_internal::hash_default_eq<T>, + RandomizedAllocator<T>> {}; + +struct LoadSizes { + size_t min_load; + size_t max_load; +}; + +LoadSizes GetMinMaxLoadSizes() { + static const auto sizes = [] { + Table<int> t; + + // First, fill enough to have a good distribution. + constexpr size_t kMinSize = 10000; + while (t.size() < kMinSize) t.insert(t.size()); + + const auto reach_min_load_factor = [&] { + const double lf = t.load_factor(); + while (lf <= t.load_factor()) t.insert(t.size()); + }; + + // Then, insert until we reach min load factor. + reach_min_load_factor(); + const size_t min_load_size = t.size(); + + // Keep going until we hit min load factor again, then go back one. + t.insert(t.size()); + reach_min_load_factor(); + + return LoadSizes{min_load_size, t.size() - 1}; + }(); + return sizes; +} + +struct Ratios { + double min_load; + double avg_load; + double max_load; +}; + +// See absl/container/internal/hashtable_debug.h for details on +// probe length calculation. +template <class ElemFn> +Ratios CollectMeanProbeLengths() { + const auto min_max_sizes = GetMinMaxLoadSizes(); + + ElemFn elem; + using Key = decltype(elem()); + Table<Key> t; + + Ratios result; + while (t.size() < min_max_sizes.min_load) t.insert(elem()); + result.min_load = + absl::container_internal::GetHashtableDebugProbeSummary(t).mean; + + while (t.size() < (min_max_sizes.min_load + min_max_sizes.max_load) / 2) + t.insert(elem()); + result.avg_load = + absl::container_internal::GetHashtableDebugProbeSummary(t).mean; + + while (t.size() < min_max_sizes.max_load) t.insert(elem()); + result.max_load = + absl::container_internal::GetHashtableDebugProbeSummary(t).mean; + + return result; +} + +template <int Align> +uintptr_t PointerForAlignment() { + alignas(Align) static constexpr uintptr_t kInitPointer = 0; + return reinterpret_cast<uintptr_t>(&kInitPointer); +} + +// This incomplete type is used for testing hash of pointers of different +// alignments. +// NOTE: We are generating invalid pointer values on the fly with +// reinterpret_cast. There are not "safely derived" pointers so using them is +// technically UB. It is unlikely to be a problem, though. +template <int Align> +struct Ptr; + +template <int Align> +Ptr<Align>* MakePtr(uintptr_t v) { + if (sizeof(v) == 8) { + constexpr int kCopyBits = 16; + // Ensure high bits are all the same. + v = static_cast<uintptr_t>(static_cast<intptr_t>(v << kCopyBits) >> + kCopyBits); + } + return reinterpret_cast<Ptr<Align>*>(v); +} + +struct IntIdentity { + uint64_t i; + friend bool operator==(IntIdentity a, IntIdentity b) { return a.i == b.i; } + IntIdentity operator++(int) { return IntIdentity{i++}; } +}; + +template <int Align> +struct PtrIdentity { + explicit PtrIdentity(uintptr_t val = PointerForAlignment<Align>()) : i(val) {} + uintptr_t i; + friend bool operator==(PtrIdentity a, PtrIdentity b) { return a.i == b.i; } + PtrIdentity operator++(int) { + PtrIdentity p(i); + i += Align; + return p; + } +}; + +constexpr char kStringFormat[] = "/path/to/file/name-%07d-of-9999999.txt"; + +template <bool small> +struct String { + std::string value; + static std::string Make(uint32_t v) { + return {small ? absl::StrCat(v) : absl::StrFormat(kStringFormat, v)}; + } +}; + +template <> +struct DefaultHash<IntIdentity> { + struct type { + size_t operator()(IntIdentity t) const { return t.i; } + }; +}; + +template <int Align> +struct DefaultHash<PtrIdentity<Align>> { + struct type { + size_t operator()(PtrIdentity<Align> t) const { return t.i; } + }; +}; + +template <class T> +struct Sequential { + T operator()() const { return current++; } + mutable T current{}; +}; + +template <int Align> +struct Sequential<Ptr<Align>*> { + Ptr<Align>* operator()() const { + auto* result = MakePtr<Align>(current); + current += Align; + return result; + } + mutable uintptr_t current = PointerForAlignment<Align>(); +}; + + +template <bool small> +struct Sequential<String<small>> { + std::string operator()() const { return String<small>::Make(current++); } + mutable uint32_t current = 0; +}; + +template <class T, class U> +struct Sequential<std::pair<T, U>> { + mutable Sequential<T> tseq; + mutable Sequential<U> useq; + + using RealT = decltype(tseq()); + using RealU = decltype(useq()); + + mutable std::vector<RealT> ts; + mutable std::vector<RealU> us; + mutable size_t ti = 0, ui = 0; + + std::pair<RealT, RealU> operator()() const { + std::pair<RealT, RealU> value{get_t(), get_u()}; + if (ti == 0) { + ti = ui + 1; + ui = 0; + } else { + --ti; + ++ui; + } + return value; + } + + RealT get_t() const { + while (ti >= ts.size()) ts.push_back(tseq()); + return ts[ti]; + } + + RealU get_u() const { + while (ui >= us.size()) us.push_back(useq()); + return us[ui]; + } +}; + +template <class T, int percent_skip> +struct AlmostSequential { + mutable Sequential<T> current; + + auto operator()() const -> decltype(current()) { + while (absl::Uniform(GlobalBitGen(), 0.0, 1.0) <= percent_skip / 100.) + current(); + return current(); + } +}; + +struct Uniform { + template <typename T> + T operator()(T) const { + return absl::Uniform<T>(absl::IntervalClosed, GlobalBitGen(), T{0}, ~T{0}); + } +}; + +struct Gaussian { + template <typename T> + T operator()(T) const { + double d; + do { + d = absl::Gaussian<double>(GlobalBitGen(), 1e6, 1e4); + } while (d <= 0 || d > std::numeric_limits<T>::max() / 2); + return static_cast<T>(d); + } +}; + +struct Zipf { + template <typename T> + T operator()(T) const { + return absl::Zipf<T>(GlobalBitGen(), std::numeric_limits<T>::max(), 1.6); + } +}; + +template <class T, class Dist> +struct Random { + T operator()() const { return Dist{}(T{}); } +}; + +template <class Dist, int Align> +struct Random<Ptr<Align>*, Dist> { + Ptr<Align>* operator()() const { + return MakePtr<Align>(Random<uintptr_t, Dist>{}() * Align); + } +}; + +template <class Dist> +struct Random<IntIdentity, Dist> { + IntIdentity operator()() const { + return IntIdentity{Random<uint64_t, Dist>{}()}; + } +}; + +template <class Dist, int Align> +struct Random<PtrIdentity<Align>, Dist> { + PtrIdentity<Align> operator()() const { + return PtrIdentity<Align>{Random<uintptr_t, Dist>{}() * Align}; + } +}; + +template <class Dist, bool small> +struct Random<String<small>, Dist> { + std::string operator()() const { + return String<small>::Make(Random<uint32_t, Dist>{}()); + } +}; + +template <class T, class U, class Dist> +struct Random<std::pair<T, U>, Dist> { + auto operator()() const + -> decltype(std::make_pair(Random<T, Dist>{}(), Random<U, Dist>{}())) { + return std::make_pair(Random<T, Dist>{}(), Random<U, Dist>{}()); + } +}; + +template <typename> +std::string Name(); + +std::string Name(uint32_t*) { return "u32"; } +std::string Name(uint64_t*) { return "u64"; } +std::string Name(IntIdentity*) { return "IntIdentity"; } + +template <int Align> +std::string Name(Ptr<Align>**) { + return absl::StrCat("Ptr", Align); +} + +template <int Align> +std::string Name(PtrIdentity<Align>*) { + return absl::StrCat("PtrIdentity", Align); +} + +template <bool small> +std::string Name(String<small>*) { + return small ? "StrS" : "StrL"; +} + +template <class T, class U> +std::string Name(std::pair<T, U>*) { + if (output() == OutputStyle::kBenchmark) + return absl::StrCat("P_", Name<T>(), "_", Name<U>()); + return absl::StrCat("P<", Name<T>(), ",", Name<U>(), ">"); +} + +template <class T> +std::string Name(Sequential<T>*) { + return "Sequential"; +} + +template <class T, int P> +std::string Name(AlmostSequential<T, P>*) { + return absl::StrCat("AlmostSeq_", P); +} + +template <class T> +std::string Name(Random<T, Uniform>*) { + return "UnifRand"; +} + +template <class T> +std::string Name(Random<T, Gaussian>*) { + return "GausRand"; +} + +template <class T> +std::string Name(Random<T, Zipf>*) { + return "ZipfRand"; +} + +template <typename T> +std::string Name() { + return Name(static_cast<T*>(nullptr)); +} + +constexpr int kNameWidth = 15; +constexpr int kDistWidth = 16; + +bool CanRunBenchmark(absl::string_view name) { + static std::regex* const filter = []() -> std::regex* { + return benchmarks.empty() || benchmarks == "all" + ? nullptr + : new std::regex(std::string(benchmarks)); + }(); + return filter == nullptr || std::regex_search(std::string(name), *filter); +} + +struct Result { + std::string name; + std::string dist_name; + Ratios ratios; +}; + +template <typename T, typename Dist> +void RunForTypeAndDistribution(std::vector<Result>& results) { + std::string name = absl::StrCat(Name<T>(), "/", Name<Dist>()); + // We have to check against all three names (min/avg/max) before we run it. + // If any of them is enabled, we run it. + if (!CanRunBenchmark(absl::StrCat(name, "/min")) && + !CanRunBenchmark(absl::StrCat(name, "/avg")) && + !CanRunBenchmark(absl::StrCat(name, "/max"))) { + return; + } + results.push_back({Name<T>(), Name<Dist>(), CollectMeanProbeLengths<Dist>()}); +} + +template <class T> +void RunForType(std::vector<Result>& results) { + RunForTypeAndDistribution<T, Sequential<T>>(results); + RunForTypeAndDistribution<T, AlmostSequential<T, 20>>(results); + RunForTypeAndDistribution<T, AlmostSequential<T, 50>>(results); + RunForTypeAndDistribution<T, Random<T, Uniform>>(results); +#ifdef NDEBUG + // Disable these in non-opt mode because they take too long. + RunForTypeAndDistribution<T, Random<T, Gaussian>>(results); + RunForTypeAndDistribution<T, Random<T, Zipf>>(results); +#endif // NDEBUG +} + +} // namespace + +int main(int argc, char** argv) { + // Parse the benchmark flags. Ignore all of them except the regex pattern. + for (int i = 1; i < argc; ++i) { + absl::string_view arg = argv[i]; + const auto next = [&] { return argv[std::min(i + 1, argc - 1)]; }; + + if (absl::ConsumePrefix(&arg, "--benchmark_filter")) { + if (arg == "") { + // --benchmark_filter X + benchmarks = next(); + } else if (absl::ConsumePrefix(&arg, "=")) { + // --benchmark_filter=X + benchmarks = arg; + } + } + + // Any --benchmark flag turns on the mode. + if (absl::ConsumePrefix(&arg, "--benchmark")) { + if (benchmarks.empty()) benchmarks="all"; + } + } + + std::vector<Result> results; + RunForType<uint64_t>(results); + RunForType<IntIdentity>(results); + RunForType<Ptr<8>*>(results); + RunForType<Ptr<16>*>(results); + RunForType<Ptr<32>*>(results); + RunForType<Ptr<64>*>(results); + RunForType<PtrIdentity<8>>(results); + RunForType<PtrIdentity<16>>(results); + RunForType<PtrIdentity<32>>(results); + RunForType<PtrIdentity<64>>(results); + RunForType<std::pair<uint32_t, uint32_t>>(results); + RunForType<String<true>>(results); + RunForType<String<false>>(results); + RunForType<std::pair<uint64_t, String<true>>>(results); + RunForType<std::pair<String<true>, uint64_t>>(results); + RunForType<std::pair<uint64_t, String<false>>>(results); + RunForType<std::pair<String<false>, uint64_t>>(results); + + switch (output()) { + case OutputStyle::kRegular: + absl::PrintF("%-*s%-*s Min Avg Max\n%s\n", kNameWidth, + "Type", kDistWidth, "Distribution", + std::string(kNameWidth + kDistWidth + 10 * 3, '-')); + for (const auto& result : results) { + absl::PrintF("%-*s%-*s %8.4f %8.4f %8.4f\n", kNameWidth, result.name, + kDistWidth, result.dist_name, result.ratios.min_load, + result.ratios.avg_load, result.ratios.max_load); + } + break; + case OutputStyle::kBenchmark: { + absl::PrintF("{\n"); + absl::PrintF(" \"benchmarks\": [\n"); + absl::string_view comma; + for (const auto& result : results) { + auto print = [&](absl::string_view stat, double Ratios::*val) { + std::string name = + absl::StrCat(result.name, "/", result.dist_name, "/", stat); + // Check the regex again. We might had have enabled only one of the + // stats for the benchmark. + if (!CanRunBenchmark(name)) return; + absl::PrintF(" %s{\n", comma); + absl::PrintF(" \"cpu_time\": %f,\n", 1e9 * result.ratios.*val); + absl::PrintF(" \"real_time\": %f,\n", 1e9 * result.ratios.*val); + absl::PrintF(" \"iterations\": 1,\n"); + absl::PrintF(" \"name\": \"%s\",\n", name); + absl::PrintF(" \"time_unit\": \"ns\"\n"); + absl::PrintF(" }\n"); + comma = ","; + }; + print("min", &Ratios::min_load); + print("avg", &Ratios::avg_load); + print("max", &Ratios::max_load); + } + absl::PrintF(" ],\n"); + absl::PrintF(" \"context\": {\n"); + absl::PrintF(" }\n"); + absl::PrintF("}\n"); + break; + } + } + + return 0; +} diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel index 40c8f207..acd490ba 100644 --- a/absl/hash/BUILD.bazel +++ b/absl/hash/BUILD.bazel @@ -82,6 +82,24 @@ cc_test( ], ) +cc_binary( + name = "hash_benchmark", + testonly = 1, + srcs = ["hash_benchmark.cc"], + copts = ABSL_TEST_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":hash", + "//absl/base:core_headers", + "//absl/random", + "//absl/strings:cord", + "//absl/strings:cord_test_helpers", + "@com_github_google_benchmark//:benchmark_main", + ], +) + cc_library( name = "spy_hash_state", testonly = 1, diff --git a/absl/hash/hash_benchmark.cc b/absl/hash/hash_benchmark.cc new file mode 100644 index 00000000..27e52719 --- /dev/null +++ b/absl/hash/hash_benchmark.cc @@ -0,0 +1,249 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <typeindex> + +#include "absl/base/attributes.h" +#include "absl/hash/hash.h" +#include "absl/random/random.h" +#include "absl/strings/cord.h" +#include "absl/strings/cord_test_helpers.h" +#include "benchmark/benchmark.h" + +namespace { + +using absl::Hash; + +template <template <typename> class H, typename T> +void RunBenchmark(benchmark::State& state, T value) { + H<T> h; + for (auto _ : state) { + benchmark::DoNotOptimize(value); + benchmark::DoNotOptimize(h(value)); + } +} + +} // namespace + +template <typename T> +using AbslHash = absl::Hash<T>; + +class TypeErasedInterface { + public: + virtual ~TypeErasedInterface() = default; + + template <typename H> + friend H AbslHashValue(H state, const TypeErasedInterface& wrapper) { + state = H::combine(std::move(state), std::type_index(typeid(wrapper))); + wrapper.HashValue(absl::HashState::Create(&state)); + return state; + } + + private: + virtual void HashValue(absl::HashState state) const = 0; +}; + +template <typename T> +struct TypeErasedAbslHash { + class Wrapper : public TypeErasedInterface { + public: + explicit Wrapper(const T& value) : value_(value) {} + + private: + void HashValue(absl::HashState state) const override { + absl::HashState::combine(std::move(state), value_); + } + + const T& value_; + }; + + size_t operator()(const T& value) { + return absl::Hash<Wrapper>{}(Wrapper(value)); + } +}; + +template <typename FuncType> +inline FuncType* ODRUseFunction(FuncType* ptr) { + volatile FuncType* dummy = ptr; + return dummy; +} + +absl::Cord FlatCord(size_t size) { + absl::Cord result(std::string(size, 'a')); + result.Flatten(); + return result; +} + +absl::Cord FragmentedCord(size_t size) { + const size_t orig_size = size; + std::vector<std::string> chunks; + size_t chunk_size = std::max<size_t>(1, size / 10); + while (size > chunk_size) { + chunks.push_back(std::string(chunk_size, 'a')); + size -= chunk_size; + } + if (size > 0) { + chunks.push_back(std::string(size, 'a')); + } + absl::Cord result = absl::MakeFragmentedCord(chunks); + (void) orig_size; + assert(result.size() == orig_size); + return result; +} + +// Generates a benchmark and a codegen method for the provided types. The +// codegen method provides a well known entrypoint for dumping assembly. +#define MAKE_BENCHMARK(hash, name, ...) \ + namespace { \ + void BM_##hash##_##name(benchmark::State& state) { \ + RunBenchmark<hash>(state, __VA_ARGS__); \ + } \ + BENCHMARK(BM_##hash##_##name); \ + } \ + size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg); \ + size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg) { \ + return hash<decltype(__VA_ARGS__)>{}(arg); \ + } \ + bool absl_hash_test_odr_use##hash##name = \ + ODRUseFunction(&Codegen##hash##name); + +MAKE_BENCHMARK(AbslHash, Int32, int32_t{}); +MAKE_BENCHMARK(AbslHash, Int64, int64_t{}); +MAKE_BENCHMARK(AbslHash, Double, 1.2); +MAKE_BENCHMARK(AbslHash, DoubleZero, 0.0); +MAKE_BENCHMARK(AbslHash, PairInt32Int32, std::pair<int32_t, int32_t>{}); +MAKE_BENCHMARK(AbslHash, PairInt64Int64, std::pair<int64_t, int64_t>{}); +MAKE_BENCHMARK(AbslHash, TupleInt32BoolInt64, + std::tuple<int32_t, bool, int64_t>{}); +MAKE_BENCHMARK(AbslHash, String_0, std::string()); +MAKE_BENCHMARK(AbslHash, String_10, std::string(10, 'a')); +MAKE_BENCHMARK(AbslHash, String_30, std::string(30, 'a')); +MAKE_BENCHMARK(AbslHash, String_90, std::string(90, 'a')); +MAKE_BENCHMARK(AbslHash, String_200, std::string(200, 'a')); +MAKE_BENCHMARK(AbslHash, String_5000, std::string(5000, 'a')); +MAKE_BENCHMARK(AbslHash, Cord_Flat_0, absl::Cord()); +MAKE_BENCHMARK(AbslHash, Cord_Flat_10, FlatCord(10)); +MAKE_BENCHMARK(AbslHash, Cord_Flat_30, FlatCord(30)); +MAKE_BENCHMARK(AbslHash, Cord_Flat_90, FlatCord(90)); +MAKE_BENCHMARK(AbslHash, Cord_Flat_200, FlatCord(200)); +MAKE_BENCHMARK(AbslHash, Cord_Flat_5000, FlatCord(5000)); +MAKE_BENCHMARK(AbslHash, Cord_Fragmented_200, FragmentedCord(200)); +MAKE_BENCHMARK(AbslHash, Cord_Fragmented_5000, FragmentedCord(5000)); +MAKE_BENCHMARK(AbslHash, VectorInt64_10, std::vector<int64_t>(10)); +MAKE_BENCHMARK(AbslHash, VectorInt64_100, std::vector<int64_t>(100)); +MAKE_BENCHMARK(AbslHash, VectorDouble_10, std::vector<double>(10, 1.1)); +MAKE_BENCHMARK(AbslHash, VectorDouble_100, std::vector<double>(100, 1.1)); +MAKE_BENCHMARK(AbslHash, PairStringString_0, + std::make_pair(std::string(), std::string())); +MAKE_BENCHMARK(AbslHash, PairStringString_10, + std::make_pair(std::string(10, 'a'), std::string(10, 'b'))); +MAKE_BENCHMARK(AbslHash, PairStringString_30, + std::make_pair(std::string(30, 'a'), std::string(30, 'b'))); +MAKE_BENCHMARK(AbslHash, PairStringString_90, + std::make_pair(std::string(90, 'a'), std::string(90, 'b'))); +MAKE_BENCHMARK(AbslHash, PairStringString_200, + std::make_pair(std::string(200, 'a'), std::string(200, 'b'))); +MAKE_BENCHMARK(AbslHash, PairStringString_5000, + std::make_pair(std::string(5000, 'a'), std::string(5000, 'b'))); + +MAKE_BENCHMARK(TypeErasedAbslHash, Int32, int32_t{}); +MAKE_BENCHMARK(TypeErasedAbslHash, Int64, int64_t{}); +MAKE_BENCHMARK(TypeErasedAbslHash, PairInt32Int32, + std::pair<int32_t, int32_t>{}); +MAKE_BENCHMARK(TypeErasedAbslHash, PairInt64Int64, + std::pair<int64_t, int64_t>{}); +MAKE_BENCHMARK(TypeErasedAbslHash, TupleInt32BoolInt64, + std::tuple<int32_t, bool, int64_t>{}); +MAKE_BENCHMARK(TypeErasedAbslHash, String_0, std::string()); +MAKE_BENCHMARK(TypeErasedAbslHash, String_10, std::string(10, 'a')); +MAKE_BENCHMARK(TypeErasedAbslHash, String_30, std::string(30, 'a')); +MAKE_BENCHMARK(TypeErasedAbslHash, String_90, std::string(90, 'a')); +MAKE_BENCHMARK(TypeErasedAbslHash, String_200, std::string(200, 'a')); +MAKE_BENCHMARK(TypeErasedAbslHash, String_5000, std::string(5000, 'a')); +MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_10, + std::vector<double>(10, 1.1)); +MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_100, + std::vector<double>(100, 1.1)); + +// The latency benchmark attempts to model the speed of the hash function in +// production. When a hash function is used for hashtable lookups it is rarely +// used to hash N items in a tight loop nor on constant sized strings. Instead, +// after hashing there is a potential equality test plus a (usually) large +// amount of user code. To simulate this effectively we introduce a data +// dependency between elements we hash by using the hash of the Nth element as +// the selector of the N+1th element to hash. This isolates the hash function +// code much like in production. As a bonus we use the hash to generate strings +// of size [1,N] (instead of fixed N) to disable perfect branch predictions in +// hash function implementations. +namespace { +// 16kb fits in L1 cache of most CPUs we care about. Keeping memory latency low +// will allow us to attribute most time to CPU which means more accurate +// measurements. +static constexpr size_t kEntropySize = 16 << 10; +static char entropy[kEntropySize + 1024]; +ABSL_ATTRIBUTE_UNUSED static const bool kInitialized = [] { + absl::BitGen gen; + static_assert(sizeof(entropy) % sizeof(uint64_t) == 0, ""); + for (int i = 0; i != sizeof(entropy); i += sizeof(uint64_t)) { + auto rand = absl::Uniform<uint64_t>(gen); + memcpy(&entropy[i], &rand, sizeof(uint64_t)); + } + return true; +}(); +} // namespace + +template <class T> +struct PodRand { + static_assert(std::is_pod<T>::value, ""); + static_assert(kEntropySize + sizeof(T) < sizeof(entropy), ""); + + T Get(size_t i) const { + T v; + memcpy(&v, &entropy[i % kEntropySize], sizeof(T)); + return v; + } +}; + +template <size_t N> +struct StringRand { + static_assert(kEntropySize + N < sizeof(entropy), ""); + + absl::string_view Get(size_t i) const { + // This has a small bias towards small numbers. Because max N is ~200 this + // is very small and prefer to be very fast instead of absolutely accurate. + // Also we pass N = 2^K+1 so that mod reduces to a bitand. + size_t s = (i % (N - 1)) + 1; + return {&entropy[i % kEntropySize], s}; + } +}; + +#define MAKE_LATENCY_BENCHMARK(hash, name, ...) \ + namespace { \ + void BM_latency_##hash##_##name(benchmark::State& state) { \ + __VA_ARGS__ r; \ + hash<decltype(r.Get(0))> h; \ + size_t i = 871401241; \ + for (auto _ : state) { \ + benchmark::DoNotOptimize(i = h(r.Get(i))); \ + } \ + } \ + BENCHMARK(BM_latency_##hash##_##name); \ + } // namespace + +MAKE_LATENCY_BENCHMARK(AbslHash, Int32, PodRand<int32_t>); +MAKE_LATENCY_BENCHMARK(AbslHash, Int64, PodRand<int64_t>); +MAKE_LATENCY_BENCHMARK(AbslHash, String9, StringRand<9>); +MAKE_LATENCY_BENCHMARK(AbslHash, String33, StringRand<33>); +MAKE_LATENCY_BENCHMARK(AbslHash, String65, StringRand<65>); +MAKE_LATENCY_BENCHMARK(AbslHash, String257, StringRand<257>); diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 7c4f6c92..badeb610 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -58,7 +58,6 @@ using ::absl::cord_internal::kMaxFlatLength; using ::absl::cord_internal::CONCAT; using ::absl::cord_internal::EXTERNAL; using ::absl::cord_internal::FLAT; -using ::absl::cord_internal::MAX_FLAT_TAG; using ::absl::cord_internal::SUBSTRING; namespace cord_internal { @@ -93,6 +92,15 @@ inline const CordRepExternal* CordRep::external() const { return static_cast<const CordRepExternal*>(this); } +inline CordRepFlat* CordRep::flat() { + assert(tag >= FLAT); + return static_cast<CordRepFlat*>(this); +} +inline const CordRepFlat* CordRep::flat() const { + assert(tag >= FLAT); + return static_cast<const CordRepFlat*>(this); +} + } // namespace cord_internal // Prefer copying blocks of at most this size, otherwise reference count. @@ -457,7 +465,7 @@ static inline bool PrepareAppendRegion(CordRep* root, char** region, } const size_t in_use = dst->length; - const size_t capacity = static_cast<CordRepFlat*>(dst)->Capacity(); + const size_t capacity = dst->flat()->Capacity(); if (in_use == capacity) { *region = nullptr; *size = 0; @@ -539,7 +547,7 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) { // will return true. static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) { if (rep->tag >= FLAT) { - *total_mem_usage += static_cast<const CordRepFlat*>(rep)->AllocatedSize(); + *total_mem_usage += rep->flat()->AllocatedSize(); return true; } if (rep->tag == EXTERNAL) { @@ -637,7 +645,7 @@ Cord& Cord::operator=(absl::string_view src) { return *this; } if (tree != nullptr && tree->tag >= FLAT && - static_cast<CordRepFlat*>(tree)->Capacity() >= length && + tree->flat()->Capacity() >= length && tree->refcount.IsOne()) { // Copy in place if the existing FLAT node is reusable. memmove(tree->data, data, length); @@ -694,7 +702,7 @@ void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { const size_t size2 = inline_length + src_size / 10; root = CordRepFlat::New(std::max<size_t>(size1, size2)); appended = std::min( - src_size, static_cast<CordRepFlat*>(root)->Capacity() - inline_length); + src_size, root->flat()->Capacity() - inline_length); memcpy(root->data, data_.as_chars, inline_length); memcpy(root->data + inline_length, src_data, appended); root->length = inline_length + appended; @@ -1785,7 +1793,7 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) { *os << absl::CEscape(std::string(rep->external()->base, rep->length)); *os << "]\n"; } else { - *os << "FLAT cap=" << static_cast<CordRepFlat*>(rep)->Capacity() + *os << "FLAT cap=" << rep->flat()->Capacity() << " ["; if (include_data) *os << absl::CEscape(std::string(rep->data, rep->length)); @@ -1835,7 +1843,7 @@ static bool VerifyNode(CordRep* root, CordRep* start_node, } } else if (node->tag >= FLAT) { ABSL_INTERNAL_CHECK( - node->length <= static_cast<CordRepFlat*>(node)->Capacity(), + node->length <= node->flat()->Capacity(), ReportError(root, node)); } else if (node->tag == EXTERNAL) { ABSL_INTERNAL_CHECK(node->external()->base != nullptr, diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index ec2c767b..6fb75c4f 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -149,6 +149,8 @@ struct CordRep { inline const CordRepSubstring* substring() const; inline CordRepExternal* external(); inline const CordRepExternal* external() const; + inline CordRepFlat* flat(); + inline const CordRepFlat* flat() const; }; struct CordRepConcat : public CordRep { diff --git a/absl/strings/internal/cord_rep_flat.h b/absl/strings/internal/cord_rep_flat.h index 3e2cd33c..80391a5e 100644 --- a/absl/strings/internal/cord_rep_flat.h +++ b/absl/strings/internal/cord_rep_flat.h @@ -43,7 +43,7 @@ static constexpr size_t kMaxFlatSize = 4096; static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead; static constexpr size_t kMinFlatLength = kMinFlatSize - kFlatOverhead; -static constexpr size_t AllocatedSizeToTagUnchecked(size_t size) { +constexpr size_t AllocatedSizeToTagUnchecked(size_t size) { return (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32; } @@ -51,12 +51,12 @@ static_assert(kMinFlatSize / 8 >= FLAT, ""); static_assert(AllocatedSizeToTagUnchecked(kMaxFlatSize) <= MAX_FLAT_TAG, ""); // Helper functions for rounded div, and rounding to exact sizes. -static size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; } -static size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; } +constexpr size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; } +constexpr size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; } // Returns the size to the nearest equal or larger value that can be // expressed exactly as a tag value. -static size_t RoundUpForTag(size_t size) { +inline size_t RoundUpForTag(size_t size) { return RoundUp(size, (size <= 1024) ? 8 : 32); } @@ -64,19 +64,19 @@ static size_t RoundUpForTag(size_t size) { // does not exactly match a 'tag expressible' size value. The result is // undefined if the size exceeds the maximum size that can be encoded in // a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>). -static uint8_t AllocatedSizeToTag(size_t size) { +inline uint8_t AllocatedSizeToTag(size_t size) { const size_t tag = AllocatedSizeToTagUnchecked(size); assert(tag <= MAX_FLAT_TAG); return tag; } // Converts the provided tag to the corresponding allocated size -static constexpr size_t TagToAllocatedSize(uint8_t tag) { +constexpr size_t TagToAllocatedSize(uint8_t tag) { return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32); } // Converts the provided tag to the corresponding available data length -static constexpr size_t TagToLength(uint8_t tag) { +constexpr size_t TagToLength(uint8_t tag) { return TagToAllocatedSize(tag) - kFlatOverhead; } diff --git a/absl/strings/match.h b/absl/strings/match.h index 6c822c26..038cbb3f 100644 --- a/absl/strings/match.h +++ b/absl/strings/match.h @@ -48,6 +48,10 @@ inline bool StrContains(absl::string_view haystack, return haystack.find(needle, 0) != haystack.npos; } +inline bool StrContains(absl::string_view haystack, char needle) noexcept { + return haystack.find(needle) != haystack.npos; +} + // StartsWith() // // Returns whether a given string `text` begins with `prefix`. diff --git a/absl/strings/match_test.cc b/absl/strings/match_test.cc index 4c313dda..5841bc1b 100644 --- a/absl/strings/match_test.cc +++ b/absl/strings/match_test.cc @@ -66,6 +66,23 @@ TEST(MatchTest, Contains) { EXPECT_FALSE(absl::StrContains("", "a")); } +TEST(MatchTest, ContainsChar) { + absl::string_view a("abcdefg"); + absl::string_view b("abcd"); + EXPECT_TRUE(absl::StrContains(a, 'a')); + EXPECT_TRUE(absl::StrContains(a, 'b')); + EXPECT_TRUE(absl::StrContains(a, 'e')); + EXPECT_FALSE(absl::StrContains(a, 'h')); + + EXPECT_TRUE(absl::StrContains(b, 'a')); + EXPECT_TRUE(absl::StrContains(b, 'b')); + EXPECT_FALSE(absl::StrContains(b, 'e')); + EXPECT_FALSE(absl::StrContains(b, 'h')); + + EXPECT_FALSE(absl::StrContains("", 'a')); + EXPECT_FALSE(absl::StrContains("", 'a')); +} + TEST(MatchTest, ContainsNull) { const std::string s = "foo"; const char* cs = "foo"; |