2 files changed, 267 insertions, 0 deletions
diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel
index 40c8f207..acd490ba 100644
--- a/absl/hash/BUILD.bazel
+++ b/absl/hash/BUILD.bazel
@@ -82,6 +82,24 @@ cc_test(
     ],
 )
 
+cc_binary(
+    name = "hash_benchmark",
+    testonly = 1,
+    srcs = ["hash_benchmark.cc"],
+    copts = ABSL_TEST_COPTS,
+    linkopts = ABSL_DEFAULT_LINKOPTS,
+    tags = ["benchmark"],
+    visibility = ["//visibility:private"],
+    deps = [
+        ":hash",
+        "//absl/base:core_headers",
+        "//absl/random",
+        "//absl/strings:cord",
+        "//absl/strings:cord_test_helpers",
+        "@com_github_google_benchmark//:benchmark_main",
+    ],
+)
+
 cc_library(
     name = "spy_hash_state",
     testonly = 1,
diff --git a/absl/hash/hash_benchmark.cc b/absl/hash/hash_benchmark.cc
new file mode 100644
index 00000000..27e52719
--- /dev/null
+++ b/absl/hash/hash_benchmark.cc
@@ -0,0 +1,249 @@
+// Copyright 2018 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <typeindex>
+
+#include "absl/base/attributes.h"
+#include "absl/hash/hash.h"
+#include "absl/random/random.h"
+#include "absl/strings/cord.h"
+#include "absl/strings/cord_test_helpers.h"
+#include "benchmark/benchmark.h"
+
+namespace {
+
+using absl::Hash;
+
+template <template <typename> class H, typename T>
+void RunBenchmark(benchmark::State& state, T value) {
+  H<T> h;
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(value);
+    benchmark::DoNotOptimize(h(value));
+  }
+}
+
+}  // namespace
+
+template <typename T>
+using AbslHash = absl::Hash<T>;
+
+class TypeErasedInterface {
+ public:
+  virtual ~TypeErasedInterface() = default;
+
+  template <typename H>
+  friend H AbslHashValue(H state, const TypeErasedInterface& wrapper) {
+    state = H::combine(std::move(state), std::type_index(typeid(wrapper)));
+    wrapper.HashValue(absl::HashState::Create(&state));
+    return state;
+  }
+
+ private:
+  virtual void HashValue(absl::HashState state) const = 0;
+};
+
+template <typename T>
+struct TypeErasedAbslHash {
+  class Wrapper : public TypeErasedInterface {
+   public:
+    explicit Wrapper(const T& value) : value_(value) {}
+
+   private:
+    void HashValue(absl::HashState state) const override {
+      absl::HashState::combine(std::move(state), value_);
+    }
+
+    const T& value_;
+  };
+
+  size_t operator()(const T& value) {
+    return absl::Hash<Wrapper>{}(Wrapper(value));
+  }
+};
+
+template <typename FuncType>
+inline FuncType* ODRUseFunction(FuncType* ptr) {
+  volatile FuncType* dummy = ptr;
+  return dummy;
+}
+
+absl::Cord FlatCord(size_t size) {
+  absl::Cord result(std::string(size, 'a'));
+  result.Flatten();
+  return result;
+}
+
+absl::Cord FragmentedCord(size_t size) {
+  const size_t orig_size = size;
+  std::vector<std::string> chunks;
+  size_t chunk_size = std::max<size_t>(1, size / 10);
+  while (size > chunk_size) {
+    chunks.push_back(std::string(chunk_size, 'a'));
+    size -= chunk_size;
+  }
+  if (size > 0) {
+    chunks.push_back(std::string(size, 'a'));
+  }
+  absl::Cord result = absl::MakeFragmentedCord(chunks);
+  (void) orig_size;
+  assert(result.size() == orig_size);
+  return result;
+}
+
+// Generates a benchmark and a codegen method for the provided types.  The
+// codegen method provides a well known entrypoint for dumping assembly.
+#define MAKE_BENCHMARK(hash, name, ...)                          \
+  namespace {                                                    \
+  void BM_##hash##_##name(benchmark::State& state) {             \
+    RunBenchmark<hash>(state, __VA_ARGS__);                      \
+  }                                                              \
+  BENCHMARK(BM_##hash##_##name);                                 \
+  }                                                              \
+  size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg);  \
+  size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg) { \
+    return hash<decltype(__VA_ARGS__)>{}(arg);                   \
+  }                                                              \
+  bool absl_hash_test_odr_use##hash##name =                      \
+      ODRUseFunction(&Codegen##hash##name);
+
+MAKE_BENCHMARK(AbslHash, Int32, int32_t{});
+MAKE_BENCHMARK(AbslHash, Int64, int64_t{});
+MAKE_BENCHMARK(AbslHash, Double, 1.2);
+MAKE_BENCHMARK(AbslHash, DoubleZero, 0.0);
+MAKE_BENCHMARK(AbslHash, PairInt32Int32, std::pair<int32_t, int32_t>{});
+MAKE_BENCHMARK(AbslHash, PairInt64Int64, std::pair<int64_t, int64_t>{});
+MAKE_BENCHMARK(AbslHash, TupleInt32BoolInt64,
+               std::tuple<int32_t, bool, int64_t>{});
+MAKE_BENCHMARK(AbslHash, String_0, std::string());
+MAKE_BENCHMARK(AbslHash, String_10, std::string(10, 'a'));
+MAKE_BENCHMARK(AbslHash, String_30, std::string(30, 'a'));
+MAKE_BENCHMARK(AbslHash, String_90, std::string(90, 'a'));
+MAKE_BENCHMARK(AbslHash, String_200, std::string(200, 'a'));
+MAKE_BENCHMARK(AbslHash, String_5000, std::string(5000, 'a'));
+MAKE_BENCHMARK(AbslHash, Cord_Flat_0, absl::Cord());
+MAKE_BENCHMARK(AbslHash, Cord_Flat_10, FlatCord(10));
+MAKE_BENCHMARK(AbslHash, Cord_Flat_30, FlatCord(30));
+MAKE_BENCHMARK(AbslHash, Cord_Flat_90, FlatCord(90));
+MAKE_BENCHMARK(AbslHash, Cord_Flat_200, FlatCord(200));
+MAKE_BENCHMARK(AbslHash, Cord_Flat_5000, FlatCord(5000));
+MAKE_BENCHMARK(AbslHash, Cord_Fragmented_200, FragmentedCord(200));
+MAKE_BENCHMARK(AbslHash, Cord_Fragmented_5000, FragmentedCord(5000));
+MAKE_BENCHMARK(AbslHash, VectorInt64_10, std::vector<int64_t>(10));
+MAKE_BENCHMARK(AbslHash, VectorInt64_100, std::vector<int64_t>(100));
+MAKE_BENCHMARK(AbslHash, VectorDouble_10, std::vector<double>(10, 1.1));
+MAKE_BENCHMARK(AbslHash, VectorDouble_100, std::vector<double>(100, 1.1));
+MAKE_BENCHMARK(AbslHash, PairStringString_0,
+               std::make_pair(std::string(), std::string()));
+MAKE_BENCHMARK(AbslHash, PairStringString_10,
+               std::make_pair(std::string(10, 'a'), std::string(10, 'b')));
+MAKE_BENCHMARK(AbslHash, PairStringString_30,
+               std::make_pair(std::string(30, 'a'), std::string(30, 'b')));
+MAKE_BENCHMARK(AbslHash, PairStringString_90,
+               std::make_pair(std::string(90, 'a'), std::string(90, 'b')));
+MAKE_BENCHMARK(AbslHash, PairStringString_200,
+               std::make_pair(std::string(200, 'a'), std::string(200, 'b')));
+MAKE_BENCHMARK(AbslHash, PairStringString_5000,
+               std::make_pair(std::string(5000, 'a'), std::string(5000, 'b')));
+
+MAKE_BENCHMARK(TypeErasedAbslHash, Int32, int32_t{});
+MAKE_BENCHMARK(TypeErasedAbslHash, Int64, int64_t{});
+MAKE_BENCHMARK(TypeErasedAbslHash, PairInt32Int32,
+               std::pair<int32_t, int32_t>{});
+MAKE_BENCHMARK(TypeErasedAbslHash, PairInt64Int64,
+               std::pair<int64_t, int64_t>{});
+MAKE_BENCHMARK(TypeErasedAbslHash, TupleInt32BoolInt64,
+               std::tuple<int32_t, bool, int64_t>{});
+MAKE_BENCHMARK(TypeErasedAbslHash, String_0, std::string());
+MAKE_BENCHMARK(TypeErasedAbslHash, String_10, std::string(10, 'a'));
+MAKE_BENCHMARK(TypeErasedAbslHash, String_30, std::string(30, 'a'));
+MAKE_BENCHMARK(TypeErasedAbslHash, String_90, std::string(90, 'a'));
+MAKE_BENCHMARK(TypeErasedAbslHash, String_200, std::string(200, 'a'));
+MAKE_BENCHMARK(TypeErasedAbslHash, String_5000, std::string(5000, 'a'));
+MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_10,
+               std::vector<double>(10, 1.1));
+MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_100,
+               std::vector<double>(100, 1.1));
+
+// The latency benchmark attempts to model the speed of the hash function in
+// production. When a hash function is used for hashtable lookups it is rarely
+// used to hash N items in a tight loop nor on constant sized strings. Instead,
+// after hashing there is a potential equality test plus a (usually) large
+// amount of user code. To simulate this effectively we introduce a data
+// dependency between elements we hash by using the hash of the Nth element as
+// the selector of the N+1th element to hash. This isolates the hash function
+// code much like in production. As a bonus we use the hash to generate strings
+// of size [1,N] (instead of fixed N) to disable perfect branch predictions in
+// hash function implementations.
+namespace {
+// 16kb fits in L1 cache of most CPUs we care about. Keeping memory latency low
+// will allow us to attribute most time to CPU which means more accurate
+// measurements.
+static constexpr size_t kEntropySize = 16 << 10;
+static char entropy[kEntropySize + 1024];
+ABSL_ATTRIBUTE_UNUSED static const bool kInitialized = [] {
+  absl::BitGen gen;
+  static_assert(sizeof(entropy) % sizeof(uint64_t) == 0, "");
+  for (int i = 0; i != sizeof(entropy); i += sizeof(uint64_t)) {
+    auto rand = absl::Uniform<uint64_t>(gen);
+    memcpy(&entropy[i], &rand, sizeof(uint64_t));
+  }
+  return true;
+}();
+}  // namespace
+
+template <class T>
+struct PodRand {
+  static_assert(std::is_pod<T>::value, "");
+  static_assert(kEntropySize + sizeof(T) < sizeof(entropy), "");
+
+  T Get(size_t i) const {
+    T v;
+    memcpy(&v, &entropy[i % kEntropySize], sizeof(T));
+    return v;
+  }
+};
+
+template <size_t N>
+struct StringRand {
+  static_assert(kEntropySize + N < sizeof(entropy), "");
+
+  absl::string_view Get(size_t i) const {
+    // This has a small bias towards small numbers. Because max N is ~200 this
+    // is very small and prefer to be very fast instead of absolutely accurate.
+    // Also we pass N = 2^K+1 so that mod reduces to a bitand.
+    size_t s = (i % (N - 1)) + 1;
+    return {&entropy[i % kEntropySize], s};
+  }
+};
+
+#define MAKE_LATENCY_BENCHMARK(hash, name, ...)              \
+  namespace {                                                \
+  void BM_latency_##hash##_##name(benchmark::State& state) { \
+    __VA_ARGS__ r;                                           \
+    hash<decltype(r.Get(0))> h;                              \
+    size_t i = 871401241;                                    \
+    for (auto _ : state) {                                   \
+      benchmark::DoNotOptimize(i = h(r.Get(i)));             \
+    }                                                        \
+  }                                                          \
+  BENCHMARK(BM_latency_##hash##_##name);                     \
+  }  // namespace
+
+MAKE_LATENCY_BENCHMARK(AbslHash, Int32, PodRand<int32_t>);
+MAKE_LATENCY_BENCHMARK(AbslHash, Int64, PodRand<int64_t>);
+MAKE_LATENCY_BENCHMARK(AbslHash, String9, StringRand<9>);
+MAKE_LATENCY_BENCHMARK(AbslHash, String33, StringRand<33>);
+MAKE_LATENCY_BENCHMARK(AbslHash, String65, StringRand<65>);
+MAKE_LATENCY_BENCHMARK(AbslHash, String257, StringRand<257>);