diff options
Diffstat (limited to 'absl/hash')
-rw-r--r-- | absl/hash/BUILD.bazel | 18 | ||||
-rw-r--r-- | absl/hash/hash_benchmark.cc | 249 |
2 files changed, 267 insertions, 0 deletions
diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel index 40c8f207..acd490ba 100644 --- a/absl/hash/BUILD.bazel +++ b/absl/hash/BUILD.bazel @@ -82,6 +82,24 @@ cc_test( ], ) +cc_binary( + name = "hash_benchmark", + testonly = 1, + srcs = ["hash_benchmark.cc"], + copts = ABSL_TEST_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":hash", + "//absl/base:core_headers", + "//absl/random", + "//absl/strings:cord", + "//absl/strings:cord_test_helpers", + "@com_github_google_benchmark//:benchmark_main", + ], +) + cc_library( name = "spy_hash_state", testonly = 1, diff --git a/absl/hash/hash_benchmark.cc b/absl/hash/hash_benchmark.cc new file mode 100644 index 00000000..27e52719 --- /dev/null +++ b/absl/hash/hash_benchmark.cc @@ -0,0 +1,249 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <typeindex> + +#include "absl/base/attributes.h" +#include "absl/hash/hash.h" +#include "absl/random/random.h" +#include "absl/strings/cord.h" +#include "absl/strings/cord_test_helpers.h" +#include "benchmark/benchmark.h" + +namespace { + +using absl::Hash; + +template <template <typename> class H, typename T> +void RunBenchmark(benchmark::State& state, T value) { + H<T> h; + for (auto _ : state) { + benchmark::DoNotOptimize(value); + benchmark::DoNotOptimize(h(value)); + } +} + +} // namespace + +template <typename T> +using AbslHash = absl::Hash<T>; + +class TypeErasedInterface { + public: + virtual ~TypeErasedInterface() = default; + + template <typename H> + friend H AbslHashValue(H state, const TypeErasedInterface& wrapper) { + state = H::combine(std::move(state), std::type_index(typeid(wrapper))); + wrapper.HashValue(absl::HashState::Create(&state)); + return state; + } + + private: + virtual void HashValue(absl::HashState state) const = 0; +}; + +template <typename T> +struct TypeErasedAbslHash { + class Wrapper : public TypeErasedInterface { + public: + explicit Wrapper(const T& value) : value_(value) {} + + private: + void HashValue(absl::HashState state) const override { + absl::HashState::combine(std::move(state), value_); + } + + const T& value_; + }; + + size_t operator()(const T& value) { + return absl::Hash<Wrapper>{}(Wrapper(value)); + } +}; + +template <typename FuncType> +inline FuncType* ODRUseFunction(FuncType* ptr) { + volatile FuncType* dummy = ptr; + return dummy; +} + +absl::Cord FlatCord(size_t size) { + absl::Cord result(std::string(size, 'a')); + result.Flatten(); + return result; +} + +absl::Cord FragmentedCord(size_t size) { + const size_t orig_size = size; + std::vector<std::string> chunks; + size_t chunk_size = std::max<size_t>(1, size / 10); + while (size > chunk_size) { + chunks.push_back(std::string(chunk_size, 'a')); + size -= chunk_size; + } + if (size > 0) { + chunks.push_back(std::string(size, 'a')); + } + absl::Cord result = absl::MakeFragmentedCord(chunks); + (void) orig_size; + assert(result.size() == orig_size); + return result; +} + +// Generates a benchmark and a codegen method for the provided types. The +// codegen method provides a well known entrypoint for dumping assembly. +#define MAKE_BENCHMARK(hash, name, ...) \ + namespace { \ + void BM_##hash##_##name(benchmark::State& state) { \ + RunBenchmark<hash>(state, __VA_ARGS__); \ + } \ + BENCHMARK(BM_##hash##_##name); \ + } \ + size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg); \ + size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg) { \ + return hash<decltype(__VA_ARGS__)>{}(arg); \ + } \ + bool absl_hash_test_odr_use##hash##name = \ + ODRUseFunction(&Codegen##hash##name); + +MAKE_BENCHMARK(AbslHash, Int32, int32_t{}); +MAKE_BENCHMARK(AbslHash, Int64, int64_t{}); +MAKE_BENCHMARK(AbslHash, Double, 1.2); +MAKE_BENCHMARK(AbslHash, DoubleZero, 0.0); +MAKE_BENCHMARK(AbslHash, PairInt32Int32, std::pair<int32_t, int32_t>{}); +MAKE_BENCHMARK(AbslHash, PairInt64Int64, std::pair<int64_t, int64_t>{}); +MAKE_BENCHMARK(AbslHash, TupleInt32BoolInt64, + std::tuple<int32_t, bool, int64_t>{}); +MAKE_BENCHMARK(AbslHash, String_0, std::string()); +MAKE_BENCHMARK(AbslHash, String_10, std::string(10, 'a')); +MAKE_BENCHMARK(AbslHash, String_30, std::string(30, 'a')); +MAKE_BENCHMARK(AbslHash, String_90, std::string(90, 'a')); +MAKE_BENCHMARK(AbslHash, String_200, std::string(200, 'a')); +MAKE_BENCHMARK(AbslHash, String_5000, std::string(5000, 'a')); +MAKE_BENCHMARK(AbslHash, Cord_Flat_0, absl::Cord()); +MAKE_BENCHMARK(AbslHash, Cord_Flat_10, FlatCord(10)); +MAKE_BENCHMARK(AbslHash, Cord_Flat_30, FlatCord(30)); +MAKE_BENCHMARK(AbslHash, Cord_Flat_90, FlatCord(90)); +MAKE_BENCHMARK(AbslHash, Cord_Flat_200, FlatCord(200)); +MAKE_BENCHMARK(AbslHash, Cord_Flat_5000, FlatCord(5000)); +MAKE_BENCHMARK(AbslHash, Cord_Fragmented_200, FragmentedCord(200)); +MAKE_BENCHMARK(AbslHash, Cord_Fragmented_5000, FragmentedCord(5000)); +MAKE_BENCHMARK(AbslHash, VectorInt64_10, std::vector<int64_t>(10)); +MAKE_BENCHMARK(AbslHash, VectorInt64_100, std::vector<int64_t>(100)); +MAKE_BENCHMARK(AbslHash, VectorDouble_10, std::vector<double>(10, 1.1)); +MAKE_BENCHMARK(AbslHash, VectorDouble_100, std::vector<double>(100, 1.1)); +MAKE_BENCHMARK(AbslHash, PairStringString_0, + std::make_pair(std::string(), std::string())); +MAKE_BENCHMARK(AbslHash, PairStringString_10, + std::make_pair(std::string(10, 'a'), std::string(10, 'b'))); +MAKE_BENCHMARK(AbslHash, PairStringString_30, + std::make_pair(std::string(30, 'a'), std::string(30, 'b'))); +MAKE_BENCHMARK(AbslHash, PairStringString_90, + std::make_pair(std::string(90, 'a'), std::string(90, 'b'))); +MAKE_BENCHMARK(AbslHash, PairStringString_200, + std::make_pair(std::string(200, 'a'), std::string(200, 'b'))); +MAKE_BENCHMARK(AbslHash, PairStringString_5000, + std::make_pair(std::string(5000, 'a'), std::string(5000, 'b'))); + +MAKE_BENCHMARK(TypeErasedAbslHash, Int32, int32_t{}); +MAKE_BENCHMARK(TypeErasedAbslHash, Int64, int64_t{}); +MAKE_BENCHMARK(TypeErasedAbslHash, PairInt32Int32, + std::pair<int32_t, int32_t>{}); +MAKE_BENCHMARK(TypeErasedAbslHash, PairInt64Int64, + std::pair<int64_t, int64_t>{}); +MAKE_BENCHMARK(TypeErasedAbslHash, TupleInt32BoolInt64, + std::tuple<int32_t, bool, int64_t>{}); +MAKE_BENCHMARK(TypeErasedAbslHash, String_0, std::string()); +MAKE_BENCHMARK(TypeErasedAbslHash, String_10, std::string(10, 'a')); +MAKE_BENCHMARK(TypeErasedAbslHash, String_30, std::string(30, 'a')); +MAKE_BENCHMARK(TypeErasedAbslHash, String_90, std::string(90, 'a')); +MAKE_BENCHMARK(TypeErasedAbslHash, String_200, std::string(200, 'a')); +MAKE_BENCHMARK(TypeErasedAbslHash, String_5000, std::string(5000, 'a')); +MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_10, + std::vector<double>(10, 1.1)); +MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_100, + std::vector<double>(100, 1.1)); + +// The latency benchmark attempts to model the speed of the hash function in +// production. When a hash function is used for hashtable lookups it is rarely +// used to hash N items in a tight loop nor on constant sized strings. Instead, +// after hashing there is a potential equality test plus a (usually) large +// amount of user code. To simulate this effectively we introduce a data +// dependency between elements we hash by using the hash of the Nth element as +// the selector of the N+1th element to hash. This isolates the hash function +// code much like in production. As a bonus we use the hash to generate strings +// of size [1,N] (instead of fixed N) to disable perfect branch predictions in +// hash function implementations. +namespace { +// 16kb fits in L1 cache of most CPUs we care about. Keeping memory latency low +// will allow us to attribute most time to CPU which means more accurate +// measurements. +static constexpr size_t kEntropySize = 16 << 10; +static char entropy[kEntropySize + 1024]; +ABSL_ATTRIBUTE_UNUSED static const bool kInitialized = [] { + absl::BitGen gen; + static_assert(sizeof(entropy) % sizeof(uint64_t) == 0, ""); + for (int i = 0; i != sizeof(entropy); i += sizeof(uint64_t)) { + auto rand = absl::Uniform<uint64_t>(gen); + memcpy(&entropy[i], &rand, sizeof(uint64_t)); + } + return true; +}(); +} // namespace + +template <class T> +struct PodRand { + static_assert(std::is_pod<T>::value, ""); + static_assert(kEntropySize + sizeof(T) < sizeof(entropy), ""); + + T Get(size_t i) const { + T v; + memcpy(&v, &entropy[i % kEntropySize], sizeof(T)); + return v; + } +}; + +template <size_t N> +struct StringRand { + static_assert(kEntropySize + N < sizeof(entropy), ""); + + absl::string_view Get(size_t i) const { + // This has a small bias towards small numbers. Because max N is ~200 this + // is very small and prefer to be very fast instead of absolutely accurate. + // Also we pass N = 2^K+1 so that mod reduces to a bitand. + size_t s = (i % (N - 1)) + 1; + return {&entropy[i % kEntropySize], s}; + } +}; + +#define MAKE_LATENCY_BENCHMARK(hash, name, ...) \ + namespace { \ + void BM_latency_##hash##_##name(benchmark::State& state) { \ + __VA_ARGS__ r; \ + hash<decltype(r.Get(0))> h; \ + size_t i = 871401241; \ + for (auto _ : state) { \ + benchmark::DoNotOptimize(i = h(r.Get(i))); \ + } \ + } \ + BENCHMARK(BM_latency_##hash##_##name); \ + } // namespace + +MAKE_LATENCY_BENCHMARK(AbslHash, Int32, PodRand<int32_t>); +MAKE_LATENCY_BENCHMARK(AbslHash, Int64, PodRand<int64_t>); +MAKE_LATENCY_BENCHMARK(AbslHash, String9, StringRand<9>); +MAKE_LATENCY_BENCHMARK(AbslHash, String33, StringRand<33>); +MAKE_LATENCY_BENCHMARK(AbslHash, String65, StringRand<65>); +MAKE_LATENCY_BENCHMARK(AbslHash, String257, StringRand<257>); |