diff options
Diffstat (limited to 'absl/hash')
-rw-r--r-- | absl/hash/BUILD.bazel | 2 | ||||
-rw-r--r-- | absl/hash/hash_test.cc | 114 | ||||
-rw-r--r-- | absl/hash/internal/hash.cc | 30 | ||||
-rw-r--r-- | absl/hash/internal/hash.h | 121 | ||||
-rw-r--r-- | absl/hash/internal/spy_hash_state.h | 13 |
5 files changed, 275 insertions, 5 deletions
diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel index c51248a5..ffe8c294 100644 --- a/absl/hash/BUILD.bazel +++ b/absl/hash/BUILD.bazel @@ -71,9 +71,9 @@ cc_test( deps = [ ":hash", ":hash_testing", + ":spy_hash_state", "//absl/base:core_headers", "//absl/container:flat_hash_set", - "//absl/hash:spy_hash_state", "//absl/meta:type_traits", "//absl/numeric:int128", "@com_google_googletest//:gtest_main", diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc index 9a667ba7..61f7661f 100644 --- a/absl/hash/hash_test.cc +++ b/absl/hash/hash_test.cc @@ -274,8 +274,8 @@ TEST(HashValueTest, Strings) { const std::string small = "foo"; const std::string dup = "foofoo"; - const std::string large = "large"; - const std::string huge = std::string(5000, 'a'); + const std::string large = std::string(2048, 'x'); // multiple of chunk size + const std::string huge = std::string(5000, 'a'); // not a multiple EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly(std::make_tuple( std::string(), absl::string_view(), @@ -378,6 +378,116 @@ struct Private { } }; +// Test helper for combine_piecewise_buffer. It holds a string_view to the +// buffer-to-be-hashed. Its AbslHashValue specialization will split up its +// contents at the character offsets requested. +class PiecewiseHashTester { + public: + // Create a hash view of a buffer to be hashed contiguously. + explicit PiecewiseHashTester(absl::string_view buf) + : buf_(buf), piecewise_(false), split_locations_() {} + + // Create a hash view of a buffer to be hashed piecewise, with breaks at the + // given locations. + PiecewiseHashTester(absl::string_view buf, std::set<size_t> split_locations) + : buf_(buf), + piecewise_(true), + split_locations_(std::move(split_locations)) {} + + template <typename H> + friend H AbslHashValue(H h, const PiecewiseHashTester& p) { + if (!p.piecewise_) { + return H::combine_contiguous(std::move(h), p.buf_.data(), p.buf_.size()); + } + absl::hash_internal::PiecewiseCombiner combiner; + if (p.split_locations_.empty()) { + h = combiner.add_buffer(std::move(h), p.buf_.data(), p.buf_.size()); + return combiner.finalize(std::move(h)); + } + size_t begin = 0; + for (size_t next : p.split_locations_) { + absl::string_view chunk = p.buf_.substr(begin, next - begin); + h = combiner.add_buffer(std::move(h), chunk.data(), chunk.size()); + begin = next; + } + absl::string_view last_chunk = p.buf_.substr(begin); + if (!last_chunk.empty()) { + h = combiner.add_buffer(std::move(h), last_chunk.data(), + last_chunk.size()); + } + return combiner.finalize(std::move(h)); + } + + private: + absl::string_view buf_; + bool piecewise_; + std::set<size_t> split_locations_; +}; + +// Dummy object that hashes as two distinct contiguous buffers, "foo" followed +// by "bar" +struct DummyFooBar { + template <typename H> + friend H AbslHashValue(H h, const DummyFooBar&) { + const char* foo = "foo"; + const char* bar = "bar"; + h = H::combine_contiguous(std::move(h), foo, 3); + h = H::combine_contiguous(std::move(h), bar, 3); + return std::move(h); + } +}; + +TEST(HashValueTest, CombinePiecewiseBuffer) { + absl::Hash<PiecewiseHashTester> hash; + + // Check that hashing an empty buffer through the piecewise API works. + EXPECT_EQ(hash(PiecewiseHashTester("")), hash(PiecewiseHashTester("", {}))); + + // Similarly, small buffers should give consistent results + EXPECT_EQ(hash(PiecewiseHashTester("foobar")), + hash(PiecewiseHashTester("foobar", {}))); + EXPECT_EQ(hash(PiecewiseHashTester("foobar")), + hash(PiecewiseHashTester("foobar", {3}))); + + // But hashing "foobar" in pieces gives a different answer than hashing "foo" + // contiguously, then "bar" contiguously. + EXPECT_NE(hash(PiecewiseHashTester("foobar", {3})), + absl::Hash<DummyFooBar>()(DummyFooBar{})); + + // Test hashing a large buffer incrementally, broken up in several different + // ways. Arrange for breaks on and near the stride boundaries to look for + // off-by-one errors in the implementation. + // + // This test is run on a buffer that is a multiple of the stride size, and one + // that isn't. + for (size_t big_buffer_size : {1024 * 2 + 512, 1024 * 3}) { + SCOPED_TRACE(big_buffer_size); + std::string big_buffer; + for (int i = 0; i < big_buffer_size; ++i) { + // Arbitrary std::string + big_buffer.push_back(32 + (i * (i / 3)) % 64); + } + auto big_buffer_hash = hash(PiecewiseHashTester(big_buffer)); + + const int possible_breaks = 9; + size_t breaks[possible_breaks] = {1, 512, 1023, 1024, 1025, + 1536, 2047, 2048, 2049}; + for (unsigned test_mask = 0; test_mask < (1u << possible_breaks); + ++test_mask) { + SCOPED_TRACE(test_mask); + std::set<size_t> break_locations; + for (int j = 0; j < possible_breaks; ++j) { + if (test_mask & (1u << j)) { + break_locations.insert(breaks[j]); + } + } + EXPECT_EQ( + hash(PiecewiseHashTester(big_buffer, std::move(break_locations))), + big_buffer_hash); + } + } +} + TEST(HashValueTest, PrivateSanity) { // Sanity check that Private is working as the tests below expect it to work. EXPECT_TRUE(is_hashable<Private>::value); diff --git a/absl/hash/internal/hash.cc b/absl/hash/internal/hash.cc index 4ab7a9f8..c17f3be1 100644 --- a/absl/hash/internal/hash.cc +++ b/absl/hash/internal/hash.cc @@ -17,6 +17,36 @@ namespace absl { namespace hash_internal { +uint64_t CityHashState::CombineLargeContiguousImpl32(uint64_t state, + const unsigned char* first, + size_t len) { + while (len >= PiecewiseChunkSize()) { + state = + Mix(state, absl::hash_internal::CityHash32(reinterpret_cast<const char*>(first), + PiecewiseChunkSize())); + len -= PiecewiseChunkSize(); + first += PiecewiseChunkSize(); + } + // Handle the remainder. + return CombineContiguousImpl(state, first, len, + std::integral_constant<int, 4>{}); +} + +uint64_t CityHashState::CombineLargeContiguousImpl64(uint64_t state, + const unsigned char* first, + size_t len) { + while (len >= PiecewiseChunkSize()) { + state = + Mix(state, absl::hash_internal::CityHash64(reinterpret_cast<const char*>(first), + PiecewiseChunkSize())); + len -= PiecewiseChunkSize(); + first += PiecewiseChunkSize(); + } + // Handle the remainder. + return CombineContiguousImpl(state, first, len, + std::integral_constant<int, 8>{}); +} + ABSL_CONST_INIT const void* const CityHashState::kSeed = &kSeed; } // namespace hash_internal diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 4ff4a126..e99f8e75 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -52,6 +52,12 @@ namespace absl { namespace hash_internal { +class PiecewiseCombiner; + +// Internal detail: Large buffers are hashed in smaller chunks. This function +// returns the size of these chunks. +constexpr int PiecewiseChunkSize() { return 1024; } + // HashStateBase // // A hash state object represents an intermediate state in the computation @@ -68,7 +74,7 @@ namespace hash_internal { // // `static H combine_contiguous(H state, const unsigned char*, size_t)`. // -// `HashStateBase` will provide a complete implementations for a hash state +// `HashStateBase` will provide a complete implementation for a hash state // object in terms of this method. // // Example: @@ -117,6 +123,9 @@ class HashStateBase { // for-loop instead. template <typename T> static H combine_contiguous(H state, const T* data, size_t size); + + private: + friend class PiecewiseCombiner; }; // is_uniquely_represented @@ -187,6 +196,61 @@ H hash_bytes(H hash_state, const T& value) { return H::combine_contiguous(std::move(hash_state), start, sizeof(value)); } +// PiecewiseCombiner +// +// PiecewiseCombiner is an internal-only helper class for hashing a piecewise +// buffer of `char` or `unsigned char` as though it were contiguous. This class +// provides two methods: +// +// H add_buffer(state, data, size) +// H finalize(state) +// +// `add_buffer` can be called zero or more times, followed by a single call to +// `finalize`. This will produce the same hash expansion as concatenating each +// buffer piece into a single contiguous buffer, and passing this to +// `H::combine_contiguous`. +// +// Example usage: +// PiecewiseCombiner combiner; +// for (const auto& piece : pieces) { +// state = combiner.add_buffer(std::move(state), piece.data, piece.size); +// } +// return combiner.finalize(std::move(state)); +class PiecewiseCombiner { + public: + PiecewiseCombiner() : position_(0) {} + PiecewiseCombiner(const PiecewiseCombiner&) = delete; + PiecewiseCombiner& operator=(const PiecewiseCombiner&) = delete; + + // PiecewiseCombiner::add_buffer() + // + // Appends the given range of bytes to the sequence to be hashed, which may + // modify the provided hash state. + template <typename H> + H add_buffer(H state, const unsigned char* data, size_t size); + template <typename H> + H add_buffer(H state, const char* data, size_t size) { + return add_buffer(std::move(state), + reinterpret_cast<const unsigned char*>(data), size); + } + + // PiecewiseCombiner::finalize() + // + // Finishes combining the hash sequence, which may may modify the provided + // hash state. + // + // Once finalize() is called, add_buffer() may no longer be called. The + // resulting hash state will be the same as if the pieces passed to + // add_buffer() were concatenated into a single flat buffer, and then provided + // to H::combine_contiguous(). + template <typename H> + H finalize(H state); + + private: + unsigned char buf_[PiecewiseChunkSize()]; + size_t position_; +}; + // ----------------------------------------------------------------------------- // AbslHashValue for Basic Types // ----------------------------------------------------------------------------- @@ -709,6 +773,16 @@ class CityHashState : public HashStateBase<CityHashState> { std::integral_constant<int, 8> /* sizeof_size_t*/); + // Slow dispatch path for calls to CombineContiguousImpl with a size argument + // larger than PiecewiseChunkSize(). Has the same effect as calling + // CombineContiguousImpl() repeatedly with the chunk stride size. + static uint64_t CombineLargeContiguousImpl32(uint64_t state, + const unsigned char* first, + size_t len); + static uint64_t CombineLargeContiguousImpl64(uint64_t state, + const unsigned char* first, + size_t len); + // Reads 9 to 16 bytes from p. // The first 8 bytes are in .first, the rest (zero padded) bytes are in // .second. @@ -776,6 +850,9 @@ inline uint64_t CityHashState::CombineContiguousImpl( // multiplicative hash. uint64_t v; if (len > 8) { + if (ABSL_PREDICT_FALSE(len > PiecewiseChunkSize())) { + return CombineLargeContiguousImpl32(state, first, len); + } v = absl::hash_internal::CityHash32(reinterpret_cast<const char*>(first), len); } else if (len >= 4) { v = Read4To8(first, len); @@ -796,6 +873,9 @@ inline uint64_t CityHashState::CombineContiguousImpl( // multiplicative hash. uint64_t v; if (len > 16) { + if (ABSL_PREDICT_FALSE(len > PiecewiseChunkSize())) { + return CombineLargeContiguousImpl64(state, first, len); + } v = absl::hash_internal::CityHash64(reinterpret_cast<const char*>(first), len); } else if (len > 8) { auto p = Read9To16(first, len); @@ -812,7 +892,6 @@ inline uint64_t CityHashState::CombineContiguousImpl( return Mix(state, v); } - struct AggregateBarrier {}; // HashImpl @@ -849,6 +928,44 @@ template <typename T> H HashStateBase<H>::combine_contiguous(H state, const T* data, size_t size) { return hash_internal::hash_range_or_bytes(std::move(state), data, size); } + +// HashStateBase::PiecewiseCombiner::add_buffer() +template <typename H> +H PiecewiseCombiner::add_buffer(H state, const unsigned char* data, + size_t size) { + if (position_ + size < PiecewiseChunkSize()) { + // This partial chunk does not fill our existing buffer + memcpy(buf_ + position_, data, size); + position_ += size; + return std::move(state); + } + + // Complete the buffer and hash it + const size_t bytes_needed = PiecewiseChunkSize() - position_; + memcpy(buf_ + position_, data, bytes_needed); + state = H::combine_contiguous(std::move(state), buf_, PiecewiseChunkSize()); + data += bytes_needed; + size -= bytes_needed; + + // Hash whatever chunks we can without copying + while (size >= PiecewiseChunkSize()) { + state = H::combine_contiguous(std::move(state), data, PiecewiseChunkSize()); + data += PiecewiseChunkSize(); + size -= PiecewiseChunkSize(); + } + // Fill the buffer with the remainder + memcpy(buf_, data, size); + position_ = size; + return std::move(state); +} + +// HashStateBase::PiecewiseCombiner::finalize() +template <typename H> +H PiecewiseCombiner::finalize(H state) { + // Hash the remainder left in the buffer, which may be empty + return H::combine_contiguous(std::move(state), buf_, position_); +} + } // namespace hash_internal } // namespace absl diff --git a/absl/hash/internal/spy_hash_state.h b/absl/hash/internal/spy_hash_state.h index c4cc8d07..05c7cafe 100644 --- a/absl/hash/internal/spy_hash_state.h +++ b/absl/hash/internal/spy_hash_state.h @@ -146,6 +146,19 @@ class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> { static SpyHashStateImpl combine_contiguous(SpyHashStateImpl hash_state, const unsigned char* begin, size_t size) { + const size_t large_chunk_stride = PiecewiseChunkSize(); + if (size > large_chunk_stride) { + // Combining a large contiguous buffer must have the same effect as + // doing it piecewise by the stride length, followed by the (possibly + // empty) remainder. + while (size >= large_chunk_stride) { + hash_state = SpyHashStateImpl::combine_contiguous( + std::move(hash_state), begin, large_chunk_stride); + begin += large_chunk_stride; + size -= large_chunk_stride; + } + } + hash_state.hash_representation_.emplace_back( reinterpret_cast<const char*>(begin), size); return hash_state; |