summaryrefslogtreecommitdiff
path: root/absl/hash/internal/hash.h
diff options
context:
space:
mode:
Diffstat (limited to 'absl/hash/internal/hash.h')
-rw-r--r--absl/hash/internal/hash.h123
1 files changed, 86 insertions, 37 deletions
diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h
index 9e608f7c..7fb0af0b 100644
--- a/absl/hash/internal/hash.h
+++ b/absl/hash/internal/hash.h
@@ -38,9 +38,11 @@
#include <utility>
#include <vector>
-#include "absl/base/internal/endian.h"
+#include "absl/base/config.h"
+#include "absl/base/internal/unaligned_access.h"
#include "absl/base/port.h"
#include "absl/container/fixed_array.h"
+#include "absl/hash/internal/wyhash.h"
#include "absl/meta/type_traits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/string_view.h"
@@ -712,9 +714,8 @@ template <typename T>
struct is_hashable
: std::integral_constant<bool, HashSelect::template Apply<T>::value> {};
-// CityHashState
-class ABSL_DLL CityHashState
- : public HashStateBase<CityHashState> {
+// HashState
+class ABSL_DLL HashState : public HashStateBase<HashState> {
// absl::uint128 is not an alias or a thin wrapper around the intrinsic.
// We use the intrinsic when available to improve performance.
#ifdef ABSL_HAVE_INTRINSIC_INT128
@@ -733,23 +734,22 @@ class ABSL_DLL CityHashState
public:
// Move only
- CityHashState(CityHashState&&) = default;
- CityHashState& operator=(CityHashState&&) = default;
+ HashState(HashState&&) = default;
+ HashState& operator=(HashState&&) = default;
- // CityHashState::combine_contiguous()
+ // HashState::combine_contiguous()
//
// Fundamental base case for hash recursion: mixes the given range of bytes
// into the hash state.
- static CityHashState combine_contiguous(CityHashState hash_state,
- const unsigned char* first,
- size_t size) {
- return CityHashState(
+ static HashState combine_contiguous(HashState hash_state,
+ const unsigned char* first, size_t size) {
+ return HashState(
CombineContiguousImpl(hash_state.state_, first, size,
std::integral_constant<int, sizeof(size_t)>{}));
}
- using CityHashState::HashStateBase::combine_contiguous;
+ using HashState::HashStateBase::combine_contiguous;
- // CityHashState::hash()
+ // HashState::hash()
//
// For performance reasons in non-opt mode, we specialize this for
// integral types.
@@ -761,24 +761,24 @@ class ABSL_DLL CityHashState
return static_cast<size_t>(Mix(Seed(), static_cast<uint64_t>(value)));
}
- // Overload of CityHashState::hash()
+ // Overload of HashState::hash()
template <typename T, absl::enable_if_t<!IntegralFastPath<T>::value, int> = 0>
static size_t hash(const T& value) {
- return static_cast<size_t>(combine(CityHashState{}, value).state_);
+ return static_cast<size_t>(combine(HashState{}, value).state_);
}
private:
// Invoked only once for a given argument; that plus the fact that this is
// move-only ensures that there is only one non-moved-from object.
- CityHashState() : state_(Seed()) {}
+ HashState() : state_(Seed()) {}
// Workaround for MSVC bug.
// We make the type copyable to fix the calling convention, even though we
// never actually copy it. Keep it private to not affect the public API of the
// type.
- CityHashState(const CityHashState&) = default;
+ HashState(const HashState&) = default;
- explicit CityHashState(uint64_t state) : state_(state) {}
+ explicit HashState(uint64_t state) : state_(state) {}
// Implementation of the base case for combine_contiguous where we actually
// mix the bytes into the state.
@@ -791,7 +791,8 @@ class ABSL_DLL CityHashState
static uint64_t CombineContiguousImpl(uint64_t state,
const unsigned char* first, size_t len,
std::integral_constant<int, 8>
- /* sizeof_size_t*/);
+ /* sizeof_size_t */);
+
// Slow dispatch path for calls to CombineContiguousImpl with a size argument
// larger than PiecewiseChunkSize(). Has the same effect as calling
@@ -804,26 +805,54 @@ class ABSL_DLL CityHashState
size_t len);
// Reads 9 to 16 bytes from p.
- // The first 8 bytes are in .first, the rest (zero padded) bytes are in
- // .second.
+ // The least significant 8 bytes are in .first, the rest (zero padded) bytes
+ // are in .second.
static std::pair<uint64_t, uint64_t> Read9To16(const unsigned char* p,
size_t len) {
- uint64_t high = little_endian::Load64(p + len - 8);
- return {little_endian::Load64(p), high >> (128 - len * 8)};
+ uint64_t low_mem = absl::base_internal::UnalignedLoad64(p);
+ uint64_t high_mem = absl::base_internal::UnalignedLoad64(p + len - 8);
+#ifdef ABSL_IS_LITTLE_ENDIAN
+ uint64_t most_significant = high_mem;
+ uint64_t least_significant = low_mem;
+#else
+ uint64_t most_significant = low_mem;
+ uint64_t least_significant = high_mem;
+#endif
+ return {least_significant, most_significant >> (128 - len * 8)};
}
// Reads 4 to 8 bytes from p. Zero pads to fill uint64_t.
static uint64_t Read4To8(const unsigned char* p, size_t len) {
- return (static_cast<uint64_t>(little_endian::Load32(p + len - 4))
- << (len - 4) * 8) |
- little_endian::Load32(p);
+ uint32_t low_mem = absl::base_internal::UnalignedLoad32(p);
+ uint32_t high_mem = absl::base_internal::UnalignedLoad32(p + len - 4);
+#ifdef ABSL_IS_LITTLE_ENDIAN
+ uint32_t most_significant = high_mem;
+ uint32_t least_significant = low_mem;
+#else
+ uint32_t most_significant = low_mem;
+ uint32_t least_significant = high_mem;
+#endif
+ return (static_cast<uint64_t>(most_significant) << (len - 4) * 8) |
+ least_significant;
}
// Reads 1 to 3 bytes from p. Zero pads to fill uint32_t.
static uint32_t Read1To3(const unsigned char* p, size_t len) {
- return static_cast<uint32_t>((p[0]) | //
- (p[len / 2] << (len / 2 * 8)) | //
- (p[len - 1] << ((len - 1) * 8)));
+ unsigned char mem0 = p[0];
+ unsigned char mem1 = p[len / 2];
+ unsigned char mem2 = p[len - 1];
+#ifdef ABSL_IS_LITTLE_ENDIAN
+ unsigned char significant2 = mem2;
+ unsigned char significant1 = mem1;
+ unsigned char significant0 = mem0;
+#else
+ unsigned char significant2 = mem0;
+ unsigned char significant1 = mem1;
+ unsigned char significant0 = mem2;
+#endif
+ return static_cast<uint32_t>(significant0 | //
+ (significant1 << (len / 2 * 8)) | //
+ (significant2 << ((len - 1) * 8)));
}
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Mix(uint64_t state, uint64_t v) {
@@ -838,6 +867,19 @@ class ABSL_DLL CityHashState
return static_cast<uint64_t>(m ^ (m >> (sizeof(m) * 8 / 2)));
}
+ // An extern to avoid bloat on a direct call to Wyhash() with fixed values for
+ // both the seed and salt parameters.
+ static uint64_t WyhashImpl(const unsigned char* data, size_t len);
+
+ ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Hash64(const unsigned char* data,
+ size_t len) {
+#ifdef ABSL_HAVE_INTRINSIC_INT128
+ return WyhashImpl(data, len);
+#else
+ return absl::hash_internal::CityHash64(reinterpret_cast<const char*>(data), len);
+#endif
+ }
+
// Seed()
//
// A non-deterministic seed.
@@ -855,15 +897,22 @@ class ABSL_DLL CityHashState
// On other platforms this is still going to be non-deterministic but most
// probably per-build and not per-process.
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Seed() {
+#if (!defined(__clang__) || __clang_major__ > 11) && \
+ !defined(__apple_build_version__)
+ return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(&kSeed));
+#else
+ // Workaround the absence of
+ // https://github.com/llvm/llvm-project/commit/bc15bf66dcca76cc06fe71fca35b74dc4d521021.
return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(kSeed));
+#endif
}
static const void* const kSeed;
uint64_t state_;
};
-// CityHashState::CombineContiguousImpl()
-inline uint64_t CityHashState::CombineContiguousImpl(
+// HashState::CombineContiguousImpl()
+inline uint64_t HashState::CombineContiguousImpl(
uint64_t state, const unsigned char* first, size_t len,
std::integral_constant<int, 4> /* sizeof_size_t */) {
// For large values we use CityHash, for small ones we just use a
@@ -885,18 +934,18 @@ inline uint64_t CityHashState::CombineContiguousImpl(
return Mix(state, v);
}
-// Overload of CityHashState::CombineContiguousImpl()
-inline uint64_t CityHashState::CombineContiguousImpl(
+// Overload of HashState::CombineContiguousImpl()
+inline uint64_t HashState::CombineContiguousImpl(
uint64_t state, const unsigned char* first, size_t len,
std::integral_constant<int, 8> /* sizeof_size_t */) {
- // For large values we use CityHash, for small ones we just use a
- // multiplicative hash.
+ // For large values we use Wyhash or CityHash depending on the platform, for
+ // small ones we just use a multiplicative hash.
uint64_t v;
if (len > 16) {
if (ABSL_PREDICT_FALSE(len > PiecewiseChunkSize())) {
return CombineLargeContiguousImpl64(state, first, len);
}
- v = absl::hash_internal::CityHash64(reinterpret_cast<const char*>(first), len);
+ v = Hash64(first, len);
} else if (len > 8) {
auto p = Read9To16(first, len);
state = Mix(state, p.first);
@@ -927,7 +976,7 @@ struct PoisonedHash : private AggregateBarrier {
template <typename T>
struct HashImpl {
- size_t operator()(const T& value) const { return CityHashState::hash(value); }
+ size_t operator()(const T& value) const { return HashState::hash(value); }
};
template <typename T>