From 74eee2aff683cc7dcd2dbaa69b2c654596d8024e Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Sat, 7 Jan 2023 09:57:55 -0800 Subject: Replace absl::Hash for inputs from 9 to 16 bytes according to AlphaZero findings PiperOrigin-RevId: 500401844 Change-Id: I6d0909a8e395c914861dd034824a34737a52d71f --- absl/hash/BUILD.bazel | 1 + absl/hash/CMakeLists.txt | 1 + absl/hash/internal/hash.h | 20 +++++++++++++++++--- 3 files changed, 19 insertions(+), 3 deletions(-) (limited to 'absl/hash') diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel index 4a95f054..a0db919b 100644 --- a/absl/hash/BUILD.bazel +++ b/absl/hash/BUILD.bazel @@ -43,6 +43,7 @@ cc_library( "//absl/container:fixed_array", "//absl/functional:function_ref", "//absl/meta:type_traits", + "//absl/numeric:bits", "//absl/numeric:int128", "//absl/strings", "//absl/types:optional", diff --git a/absl/hash/CMakeLists.txt b/absl/hash/CMakeLists.txt index 0514c296..f99f35bc 100644 --- a/absl/hash/CMakeLists.txt +++ b/absl/hash/CMakeLists.txt @@ -25,6 +25,7 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::bits absl::city absl::config absl::core_headers diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index dbdc2050..ccf4cc1a 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -49,6 +49,7 @@ #include "absl/hash/internal/city.h" #include "absl/hash/internal/low_level_hash.h" #include "absl/meta/type_traits.h" +#include "absl/numeric/bits.h" #include "absl/numeric/int128.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" @@ -1052,7 +1053,7 @@ class ABSL_DLL MixingHashState : public HashStateBase { uint64_t most_significant = low_mem; uint64_t least_significant = high_mem; #endif - return {least_significant, most_significant >> (128 - len * 8)}; + return {least_significant, most_significant}; } // Reads 4 to 8 bytes from p. Zero pads to fill uint64_t. @@ -1183,9 +1184,22 @@ inline uint64_t MixingHashState::CombineContiguousImpl( } v = Hash64(first, len); } else if (len > 8) { + // This hash function was constructed by the ML-driven algorithm discovery + // using reinforcement learning. We fed the agent lots of inputs from + // microbenchmarks, SMHasher, low hamming distance from generated inputs and + // picked up the one that was good on micro and macrobenchmarks. auto p = Read9To16(first, len); - state = Mix(state, p.first); - v = p.second; + uint64_t lo = p.first; + uint64_t hi = p.second; + // Rotation by 53 was found to be most often useful when discovering these + // hashing algorithms with ML techniques. + lo = absl::rotr(lo, 53); + state += kMul; + lo += state; + state ^= hi; + uint128 m = state; + m *= lo; + return static_cast(m ^ (m >> 64)); } else if (len >= 4) { v = Read4To8(first, len); } else if (len > 0) { -- cgit v1.2.3