summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Evan Brown <ezb@google.com>2023-01-30 14:59:39 -0800
committerGravatar Copybara-Service <copybara-worker@google.com>2023-01-30 15:01:00 -0800
commited59f62f8bbc5f05bcba2f89ee16f107e03813f2 (patch)
tree3d4a917a7a988580a3a5cbb024e9f08b5c02b0e8
parent0c3df2f5a7d3918b9e5762cd8f143a6004b76cda (diff)
In sanitizer mode, detect when references become invalidated by randomly rehashing on insertions when there is no reserved growth.
PiperOrigin-RevId: 505807487 Change-Id: I9051a04f6a75e579d16e9ae8defd404bcc377fba
-rw-r--r--absl/container/BUILD.bazel1
-rw-r--r--absl/container/CMakeLists.txt1
-rw-r--r--absl/container/internal/raw_hash_set.cc15
-rw-r--r--absl/container/internal/raw_hash_set.h33
-rw-r--r--absl/container/internal/raw_hash_set_test.cc31
5 files changed, 72 insertions, 9 deletions
diff --git a/absl/container/BUILD.bazel b/absl/container/BUILD.bazel
index 7a966d63..92031811 100644
--- a/absl/container/BUILD.bazel
+++ b/absl/container/BUILD.bazel
@@ -623,6 +623,7 @@ cc_library(
"//absl/base:endian",
"//absl/base:prefetch",
"//absl/base:raw_logging_internal",
+ "//absl/hash",
"//absl/memory",
"//absl/meta:type_traits",
"//absl/numeric:bits",
diff --git a/absl/container/CMakeLists.txt b/absl/container/CMakeLists.txt
index 416e3e38..2ebfd08e 100644
--- a/absl/container/CMakeLists.txt
+++ b/absl/container/CMakeLists.txt
@@ -705,6 +705,7 @@ absl_cc_library(
absl::container_memory
absl::core_headers
absl::endian
+ absl::hash
absl::hash_policy_traits
absl::hashtable_debug_hooks
absl::hashtablez_sampler
diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc
index 3677ac59..5dc8b2fa 100644
--- a/absl/container/internal/raw_hash_set.cc
+++ b/absl/container/internal/raw_hash_set.cc
@@ -19,6 +19,7 @@
#include <cstring>
#include "absl/base/config.h"
+#include "absl/hash/hash.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
@@ -51,6 +52,20 @@ inline size_t RandomSeed() {
return value ^ static_cast<size_t>(reinterpret_cast<uintptr_t>(&counter));
}
+bool CommonFieldsGenerationInfoEnabled::
+ should_rehash_for_bug_detection_on_insert(const ctrl_t* ctrl,
+ size_t capacity) const {
+ if (reserved_growth_ == kReservedGrowthJustRanOut) return true;
+ if (reserved_growth_ > 0) return false;
+ // Note: we can't use the abseil-random library because abseil-random
+ // depends on swisstable. We want to return true with probability
+ // `min(1, RehashProbabilityConstant() / capacity())`. In order to do this,
+ // we probe based on a random hash and see if the offset is less than
+ // RehashProbabilityConstant().
+ return probe(ctrl, capacity, absl::HashOf(RandomSeed())).offset() <
+ RehashProbabilityConstant();
+}
+
bool ShouldInsertBackwards(size_t hash, const ctrl_t* ctrl) {
// To avoid problems with weak hashes and single bit tests, we use % 13.
// TODO(kfm,sbenza): revisit after we do unconditional mixing
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index 09b55f66..65dc66c2 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -749,6 +749,15 @@ using Group = GroupAArch64Impl;
using Group = GroupPortableImpl;
#endif
+// When there is an insertion with no reserved growth, we rehash with
+// probability `min(1, RehashProbabilityConstant() / capacity())`. Using a
+// constant divided by capacity ensures that inserting N elements is still O(N)
+// in the average case. Using the constant 16 means that we expect to rehash ~8
+// times more often than when generations are disabled. We are adding expected
+// rehash_probability * #insertions/capacity_growth = 16/capacity * ((7/8 -
+// 7/16) * capacity)/capacity_growth = ~7 extra rehashes per capacity growth.
+inline size_t RehashProbabilityConstant() { return 16; }
+
class CommonFieldsGenerationInfoEnabled {
// A sentinel value for reserved_growth_ indicating that we just ran out of
// reserved growth on the last insertion. When reserve is called and then
@@ -769,12 +778,10 @@ class CommonFieldsGenerationInfoEnabled {
// Whether we should rehash on insert in order to detect bugs of using invalid
// references. We rehash on the first insertion after reserved_growth_ reaches
- // 0 after a call to reserve.
- // TODO(b/254649633): we could potentially do a rehash with low probability
+ // 0 after a call to reserve. We also do a rehash with low probability
// whenever reserved_growth_ is zero.
- bool should_rehash_for_bug_detection_on_insert() const {
- return reserved_growth_ == kReservedGrowthJustRanOut;
- }
+ bool should_rehash_for_bug_detection_on_insert(const ctrl_t* ctrl,
+ size_t capacity) const;
void maybe_increment_generation_on_insert() {
if (reserved_growth_ == kReservedGrowthJustRanOut) reserved_growth_ = 0;
@@ -820,7 +827,9 @@ class CommonFieldsGenerationInfoDisabled {
CommonFieldsGenerationInfoDisabled& operator=(
CommonFieldsGenerationInfoDisabled&&) = default;
- bool should_rehash_for_bug_detection_on_insert() const { return false; }
+ bool should_rehash_for_bug_detection_on_insert(const ctrl_t*, size_t) const {
+ return false;
+ }
void maybe_increment_generation_on_insert() {}
void reset_reserved_growth(size_t, size_t) {}
size_t reserved_growth() const { return 0; }
@@ -905,6 +914,10 @@ class CommonFields : public CommonFieldsGenerationInfo {
return compressed_tuple_.template get<1>();
}
+ bool should_rehash_for_bug_detection_on_insert() const {
+ return CommonFieldsGenerationInfo::
+ should_rehash_for_bug_detection_on_insert(control_, capacity_);
+ }
void reset_reserved_growth(size_t reservation) {
CommonFieldsGenerationInfo::reset_reserved_growth(reservation, size_);
}
@@ -1106,11 +1119,13 @@ struct FindInfo {
inline bool is_small(size_t capacity) { return capacity < Group::kWidth - 1; }
// Begins a probing operation on `common.control`, using `hash`.
-inline probe_seq<Group::kWidth> probe(const CommonFields& common, size_t hash) {
- const ctrl_t* ctrl = common.control_;
- const size_t capacity = common.capacity_;
+inline probe_seq<Group::kWidth> probe(const ctrl_t* ctrl, const size_t capacity,
+ size_t hash) {
return probe_seq<Group::kWidth>(H1(hash, ctrl), capacity);
}
+inline probe_seq<Group::kWidth> probe(const CommonFields& common, size_t hash) {
+ return probe(common.control_, common.capacity_, hash);
+}
// Probes an array of control bits using a probe sequence derived from `hash`,
// and returns the offset corresponding to the first deleted or empty slot.
diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc
index bdffb817..e33fda20 100644
--- a/absl/container/internal/raw_hash_set_test.cc
+++ b/absl/container/internal/raw_hash_set_test.cc
@@ -865,6 +865,10 @@ void TestDecompose(bool construct_three) {
}
TEST(Table, Decompose) {
+ if (SwisstableGenerationsEnabled()) {
+ GTEST_SKIP() << "Generations being enabled causes extra rehashes.";
+ }
+
TestDecompose<DecomposeHash, DecomposeEq>(false);
struct TransparentHashIntOverload {
@@ -903,6 +907,10 @@ struct Modulo1000HashTable
// Test that rehash with no resize happen in case of many deleted slots.
TEST(Table, RehashWithNoResize) {
+ if (SwisstableGenerationsEnabled()) {
+ GTEST_SKIP() << "Generations being enabled causes extra rehashes.";
+ }
+
Modulo1000HashTable t;
// Adding the same length (and the same hash) strings
// to have at least kMinFullGroups groups
@@ -996,6 +1004,10 @@ TEST(Table, EnsureNonQuadraticAsInRust) {
}
TEST(Table, ClearBug) {
+ if (SwisstableGenerationsEnabled()) {
+ GTEST_SKIP() << "Generations being enabled causes extra rehashes.";
+ }
+
IntTable t;
constexpr size_t capacity = container_internal::Group::kWidth - 1;
constexpr size_t max_size = capacity / 2 + 1;
@@ -2318,6 +2330,25 @@ TEST(Table, ReservedGrowthUpdatesWhenTableDoesntGrow) {
EXPECT_EQ(*it, 0);
}
+TEST(Table, InvalidReferenceUseCrashesWithSanitizers) {
+ if (!SwisstableGenerationsEnabled()) GTEST_SKIP() << "Generations disabled.";
+#ifdef ABSL_HAVE_MEMORY_SANITIZER
+ GTEST_SKIP() << "MSan fails to detect some of these rehashes.";
+#endif
+
+ IntTable t;
+ t.insert(0);
+ // Rehashing is guaranteed on every insertion while capacity is less than
+ // RehashProbabilityConstant().
+ int64_t i = 0;
+ while (t.capacity() <= RehashProbabilityConstant()) {
+ // ptr will become invalidated on rehash.
+ const int64_t* ptr = &*t.begin();
+ t.insert(++i);
+ EXPECT_DEATH_IF_SUPPORTED(std::cout << *ptr, "heap-use-after-free") << i;
+ }
+}
+
} // namespace
} // namespace container_internal
ABSL_NAMESPACE_END