diff options
Diffstat (limited to 'absl/hash/internal/hash.h')
-rw-r--r-- | absl/hash/internal/hash.h | 123 |
1 files changed, 86 insertions, 37 deletions
diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 9e608f7c..7fb0af0b 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -38,9 +38,11 @@ #include <utility> #include <vector> -#include "absl/base/internal/endian.h" +#include "absl/base/config.h" +#include "absl/base/internal/unaligned_access.h" #include "absl/base/port.h" #include "absl/container/fixed_array.h" +#include "absl/hash/internal/wyhash.h" #include "absl/meta/type_traits.h" #include "absl/numeric/int128.h" #include "absl/strings/string_view.h" @@ -712,9 +714,8 @@ template <typename T> struct is_hashable : std::integral_constant<bool, HashSelect::template Apply<T>::value> {}; -// CityHashState -class ABSL_DLL CityHashState - : public HashStateBase<CityHashState> { +// HashState +class ABSL_DLL HashState : public HashStateBase<HashState> { // absl::uint128 is not an alias or a thin wrapper around the intrinsic. // We use the intrinsic when available to improve performance. #ifdef ABSL_HAVE_INTRINSIC_INT128 @@ -733,23 +734,22 @@ class ABSL_DLL CityHashState public: // Move only - CityHashState(CityHashState&&) = default; - CityHashState& operator=(CityHashState&&) = default; + HashState(HashState&&) = default; + HashState& operator=(HashState&&) = default; - // CityHashState::combine_contiguous() + // HashState::combine_contiguous() // // Fundamental base case for hash recursion: mixes the given range of bytes // into the hash state. - static CityHashState combine_contiguous(CityHashState hash_state, - const unsigned char* first, - size_t size) { - return CityHashState( + static HashState combine_contiguous(HashState hash_state, + const unsigned char* first, size_t size) { + return HashState( CombineContiguousImpl(hash_state.state_, first, size, std::integral_constant<int, sizeof(size_t)>{})); } - using CityHashState::HashStateBase::combine_contiguous; + using HashState::HashStateBase::combine_contiguous; - // CityHashState::hash() + // HashState::hash() // // For performance reasons in non-opt mode, we specialize this for // integral types. @@ -761,24 +761,24 @@ class ABSL_DLL CityHashState return static_cast<size_t>(Mix(Seed(), static_cast<uint64_t>(value))); } - // Overload of CityHashState::hash() + // Overload of HashState::hash() template <typename T, absl::enable_if_t<!IntegralFastPath<T>::value, int> = 0> static size_t hash(const T& value) { - return static_cast<size_t>(combine(CityHashState{}, value).state_); + return static_cast<size_t>(combine(HashState{}, value).state_); } private: // Invoked only once for a given argument; that plus the fact that this is // move-only ensures that there is only one non-moved-from object. - CityHashState() : state_(Seed()) {} + HashState() : state_(Seed()) {} // Workaround for MSVC bug. // We make the type copyable to fix the calling convention, even though we // never actually copy it. Keep it private to not affect the public API of the // type. - CityHashState(const CityHashState&) = default; + HashState(const HashState&) = default; - explicit CityHashState(uint64_t state) : state_(state) {} + explicit HashState(uint64_t state) : state_(state) {} // Implementation of the base case for combine_contiguous where we actually // mix the bytes into the state. @@ -791,7 +791,8 @@ class ABSL_DLL CityHashState static uint64_t CombineContiguousImpl(uint64_t state, const unsigned char* first, size_t len, std::integral_constant<int, 8> - /* sizeof_size_t*/); + /* sizeof_size_t */); + // Slow dispatch path for calls to CombineContiguousImpl with a size argument // larger than PiecewiseChunkSize(). Has the same effect as calling @@ -804,26 +805,54 @@ class ABSL_DLL CityHashState size_t len); // Reads 9 to 16 bytes from p. - // The first 8 bytes are in .first, the rest (zero padded) bytes are in - // .second. + // The least significant 8 bytes are in .first, the rest (zero padded) bytes + // are in .second. static std::pair<uint64_t, uint64_t> Read9To16(const unsigned char* p, size_t len) { - uint64_t high = little_endian::Load64(p + len - 8); - return {little_endian::Load64(p), high >> (128 - len * 8)}; + uint64_t low_mem = absl::base_internal::UnalignedLoad64(p); + uint64_t high_mem = absl::base_internal::UnalignedLoad64(p + len - 8); +#ifdef ABSL_IS_LITTLE_ENDIAN + uint64_t most_significant = high_mem; + uint64_t least_significant = low_mem; +#else + uint64_t most_significant = low_mem; + uint64_t least_significant = high_mem; +#endif + return {least_significant, most_significant >> (128 - len * 8)}; } // Reads 4 to 8 bytes from p. Zero pads to fill uint64_t. static uint64_t Read4To8(const unsigned char* p, size_t len) { - return (static_cast<uint64_t>(little_endian::Load32(p + len - 4)) - << (len - 4) * 8) | - little_endian::Load32(p); + uint32_t low_mem = absl::base_internal::UnalignedLoad32(p); + uint32_t high_mem = absl::base_internal::UnalignedLoad32(p + len - 4); +#ifdef ABSL_IS_LITTLE_ENDIAN + uint32_t most_significant = high_mem; + uint32_t least_significant = low_mem; +#else + uint32_t most_significant = low_mem; + uint32_t least_significant = high_mem; +#endif + return (static_cast<uint64_t>(most_significant) << (len - 4) * 8) | + least_significant; } // Reads 1 to 3 bytes from p. Zero pads to fill uint32_t. static uint32_t Read1To3(const unsigned char* p, size_t len) { - return static_cast<uint32_t>((p[0]) | // - (p[len / 2] << (len / 2 * 8)) | // - (p[len - 1] << ((len - 1) * 8))); + unsigned char mem0 = p[0]; + unsigned char mem1 = p[len / 2]; + unsigned char mem2 = p[len - 1]; +#ifdef ABSL_IS_LITTLE_ENDIAN + unsigned char significant2 = mem2; + unsigned char significant1 = mem1; + unsigned char significant0 = mem0; +#else + unsigned char significant2 = mem0; + unsigned char significant1 = mem1; + unsigned char significant0 = mem2; +#endif + return static_cast<uint32_t>(significant0 | // + (significant1 << (len / 2 * 8)) | // + (significant2 << ((len - 1) * 8))); } ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Mix(uint64_t state, uint64_t v) { @@ -838,6 +867,19 @@ class ABSL_DLL CityHashState return static_cast<uint64_t>(m ^ (m >> (sizeof(m) * 8 / 2))); } + // An extern to avoid bloat on a direct call to Wyhash() with fixed values for + // both the seed and salt parameters. + static uint64_t WyhashImpl(const unsigned char* data, size_t len); + + ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Hash64(const unsigned char* data, + size_t len) { +#ifdef ABSL_HAVE_INTRINSIC_INT128 + return WyhashImpl(data, len); +#else + return absl::hash_internal::CityHash64(reinterpret_cast<const char*>(data), len); +#endif + } + // Seed() // // A non-deterministic seed. @@ -855,15 +897,22 @@ class ABSL_DLL CityHashState // On other platforms this is still going to be non-deterministic but most // probably per-build and not per-process. ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Seed() { +#if (!defined(__clang__) || __clang_major__ > 11) && \ + !defined(__apple_build_version__) + return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(&kSeed)); +#else + // Workaround the absence of + // https://github.com/llvm/llvm-project/commit/bc15bf66dcca76cc06fe71fca35b74dc4d521021. return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(kSeed)); +#endif } static const void* const kSeed; uint64_t state_; }; -// CityHashState::CombineContiguousImpl() -inline uint64_t CityHashState::CombineContiguousImpl( +// HashState::CombineContiguousImpl() +inline uint64_t HashState::CombineContiguousImpl( uint64_t state, const unsigned char* first, size_t len, std::integral_constant<int, 4> /* sizeof_size_t */) { // For large values we use CityHash, for small ones we just use a @@ -885,18 +934,18 @@ inline uint64_t CityHashState::CombineContiguousImpl( return Mix(state, v); } -// Overload of CityHashState::CombineContiguousImpl() -inline uint64_t CityHashState::CombineContiguousImpl( +// Overload of HashState::CombineContiguousImpl() +inline uint64_t HashState::CombineContiguousImpl( uint64_t state, const unsigned char* first, size_t len, std::integral_constant<int, 8> /* sizeof_size_t */) { - // For large values we use CityHash, for small ones we just use a - // multiplicative hash. + // For large values we use Wyhash or CityHash depending on the platform, for + // small ones we just use a multiplicative hash. uint64_t v; if (len > 16) { if (ABSL_PREDICT_FALSE(len > PiecewiseChunkSize())) { return CombineLargeContiguousImpl64(state, first, len); } - v = absl::hash_internal::CityHash64(reinterpret_cast<const char*>(first), len); + v = Hash64(first, len); } else if (len > 8) { auto p = Read9To16(first, len); state = Mix(state, p.first); @@ -927,7 +976,7 @@ struct PoisonedHash : private AggregateBarrier { template <typename T> struct HashImpl { - size_t operator()(const T& value) const { return CityHashState::hash(value); } + size_t operator()(const T& value) const { return HashState::hash(value); } }; template <typename T> |