summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Evan Brown <ezb@google.com>2022-10-17 12:56:53 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2022-10-17 12:58:14 -0700
commitbbf2ed7890615a587f0209c6d7e9b2c08781ee09 (patch)
tree3ae3ec73b24f430ff747dd3af7f0c0a5717f4d6d
parent7ab917ec21efb6cdb3c3946fbecb9522ff2af100 (diff)
Implement btree_iterator::operator-, which is faster than std::distance for btree iterators.
Note: btree_iterator::operator- is still O(N) because in the worst case (end()-begin()), we will have at least one operation per node in the tree, and there are at least N/M nodes, where M (a constant) is the maximum number of values per node. PiperOrigin-RevId: 481716874 Change-Id: Ic0225b7509208ed96b75a2dc626d2aa4a24f4946
-rw-r--r--absl/container/BUILD.bazel4
-rw-r--r--absl/container/CMakeLists.txt2
-rw-r--r--absl/container/btree_benchmark.cc25
-rw-r--r--absl/container/btree_map.h3
-rw-r--r--absl/container/btree_set.h3
-rw-r--r--absl/container/btree_test.cc20
-rw-r--r--absl/container/internal/btree.h73
7 files changed, 130 insertions, 0 deletions
diff --git a/absl/container/BUILD.bazel b/absl/container/BUILD.bazel
index d8dad8e8..70febdda 100644
--- a/absl/container/BUILD.bazel
+++ b/absl/container/BUILD.bazel
@@ -984,11 +984,13 @@ cc_test(
":btree_test_common",
":counting_allocator",
":test_instance_tracker",
+ "//absl/algorithm:container",
"//absl/base:core_headers",
"//absl/base:raw_logging_internal",
"//absl/flags:flag",
"//absl/hash:hash_testing",
"//absl/memory",
+ "//absl/random",
"//absl/strings",
"//absl/types:compare",
"@com_google_googletest//:gtest_main",
@@ -1011,10 +1013,12 @@ cc_binary(
":flat_hash_map",
":flat_hash_set",
":hashtable_debug",
+ "//absl/algorithm:container",
"//absl/base:raw_logging_internal",
"//absl/hash",
"//absl/log",
"//absl/memory",
+ "//absl/random",
"//absl/strings:cord",
"//absl/strings:str_format",
"//absl/time",
diff --git a/absl/container/CMakeLists.txt b/absl/container/CMakeLists.txt
index 25216466..b3776aed 100644
--- a/absl/container/CMakeLists.txt
+++ b/absl/container/CMakeLists.txt
@@ -72,6 +72,7 @@ absl_cc_test(
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
DEPS
+ absl::algorithm_container
absl::btree
absl::btree_test_common
absl::compare
@@ -79,6 +80,7 @@ absl_cc_test(
absl::counting_allocator
absl::flags
absl::hash_testing
+ absl::random_random
absl::raw_logging_internal
absl::strings
absl::test_instance_tracker
diff --git a/absl/container/btree_benchmark.cc b/absl/container/btree_benchmark.cc
index cc9a106d..0d26fd42 100644
--- a/absl/container/btree_benchmark.cc
+++ b/absl/container/btree_benchmark.cc
@@ -27,6 +27,7 @@
#include <vector>
#include "benchmark/benchmark.h"
+#include "absl/algorithm/container.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/container/btree_map.h"
#include "absl/container/btree_set.h"
@@ -37,6 +38,7 @@
#include "absl/hash/hash.h"
#include "absl/log/log.h"
#include "absl/memory/memory.h"
+#include "absl/random/random.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_format.h"
#include "absl/time/time.h"
@@ -733,6 +735,29 @@ double ContainerInfo(const btree_map<int, BigTypePtr<Size>>& b) {
BIG_TYPE_PTR_BENCHMARKS(32);
+void BM_BtreeSet_IteratorSubtraction(benchmark::State& state) {
+ absl::InsecureBitGen bitgen;
+ std::vector<int> vec;
+ // Randomize the set's insertion order so the nodes aren't all full.
+ vec.reserve(state.range(0));
+ for (int i = 0; i < state.range(0); ++i) vec.push_back(i);
+ absl::c_shuffle(vec, bitgen);
+
+ absl::btree_set<int> set;
+ for (int i : vec) set.insert(i);
+
+ size_t distance = absl::Uniform(bitgen, 0u, set.size());
+ while (state.KeepRunningBatch(distance)) {
+ size_t end = absl::Uniform(bitgen, distance, set.size());
+ size_t begin = end - distance;
+ benchmark::DoNotOptimize(set.find(static_cast<int>(end)) -
+ set.find(static_cast<int>(begin)));
+ distance = absl::Uniform(bitgen, 0u, set.size());
+ }
+}
+
+BENCHMARK(BM_BtreeSet_IteratorSubtraction)->Range(1 << 10, 1 << 20);
+
} // namespace
} // namespace container_internal
ABSL_NAMESPACE_END
diff --git a/absl/container/btree_map.h b/absl/container/btree_map.h
index 286817f1..479db8b7 100644
--- a/absl/container/btree_map.h
+++ b/absl/container/btree_map.h
@@ -46,6 +46,9 @@
// reason, `insert()` and `erase()` return a valid iterator at the current
// position. Another important difference is that key-types must be
// copy-constructible.
+//
+// Another API difference is that btree iterators can be subtracted, and this
+// is faster than using std::distance.
#ifndef ABSL_CONTAINER_BTREE_MAP_H_
#define ABSL_CONTAINER_BTREE_MAP_H_
diff --git a/absl/container/btree_set.h b/absl/container/btree_set.h
index e823a2a0..bbff65d1 100644
--- a/absl/container/btree_set.h
+++ b/absl/container/btree_set.h
@@ -45,6 +45,9 @@
// more than one iterator, pointer, or reference simultaneously. For this
// reason, `insert()` and `erase()` return a valid iterator at the current
// position.
+//
+// Another API difference is that btree iterators can be subtracted, and this
+// is faster than using std::distance.
#ifndef ABSL_CONTAINER_BTREE_SET_H_
#define ABSL_CONTAINER_BTREE_SET_H_
diff --git a/absl/container/btree_test.cc b/absl/container/btree_test.cc
index 9386a6b1..6a5351fe 100644
--- a/absl/container/btree_test.cc
+++ b/absl/container/btree_test.cc
@@ -31,6 +31,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "absl/algorithm/container.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/macros.h"
#include "absl/container/btree_map.h"
@@ -40,6 +41,7 @@
#include "absl/flags/flag.h"
#include "absl/hash/hash_testing.h"
#include "absl/memory/memory.h"
+#include "absl/random/random.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
#include "absl/strings/string_view.h"
@@ -3320,6 +3322,24 @@ TEST(Btree, ReusePoisonMemory) {
set.insert(0);
}
+TEST(Btree, IteratorSubtraction) {
+ absl::BitGen bitgen;
+ std::vector<int> vec;
+ // Randomize the set's insertion order so the nodes aren't all full.
+ for (int i = 0; i < 1000000; ++i) vec.push_back(i);
+ absl::c_shuffle(vec, bitgen);
+
+ absl::btree_set<int> set;
+ for (int i : vec) set.insert(i);
+
+ for (int i = 0; i < 1000; ++i) {
+ size_t begin = absl::Uniform(bitgen, 0u, set.size());
+ size_t end = absl::Uniform(bitgen, begin, set.size());
+ ASSERT_EQ(end - begin, set.find(end) - set.find(begin))
+ << begin << " " << end;
+ }
+}
+
} // namespace
} // namespace container_internal
ABSL_NAMESPACE_END
diff --git a/absl/container/internal/btree.h b/absl/container/internal/btree.h
index ecf31bea..9c509073 100644
--- a/absl/container/internal/btree.h
+++ b/absl/container/internal/btree.h
@@ -1085,6 +1085,16 @@ class btree_iterator {
return node_ != other.node_ || position_ != other.position_;
}
+ // Returns n such that n calls to ++other yields *this.
+ // Precondition: n exists.
+ difference_type operator-(const_iterator other) const {
+ if (node_ == other.node_) {
+ if (node_->is_leaf()) return position_ - other.position_;
+ if (position_ == other.position_) return 0;
+ }
+ return distance_slow(other);
+ }
+
// Accessors for the key/value the iterator is pointing at.
reference operator*() const {
ABSL_HARDENING_ASSERT(node_ != nullptr);
@@ -1148,6 +1158,11 @@ class btree_iterator {
#endif
}
+ // Returns n such that n calls to ++other yields *this.
+ // Precondition: n exists && (this->node_ != other.node_ ||
+ // !this->node_->is_leaf() || this->position_ != other.position_).
+ difference_type distance_slow(const_iterator other) const;
+
// Increment/decrement the iterator.
void increment() {
assert_valid_generation();
@@ -1975,6 +1990,64 @@ void btree_node<P>::clear_and_delete(btree_node *node, allocator_type *alloc) {
////
// btree_iterator methods
+
+// Note: the implementation here is based on btree_node::clear_and_delete.
+template <typename N, typename R, typename P>
+auto btree_iterator<N, R, P>::distance_slow(const_iterator other) const
+ -> difference_type {
+ const_iterator begin = other;
+ const_iterator end = *this;
+ assert(begin.node_ != end.node_ || !begin.node_->is_leaf() ||
+ begin.position_ != end.position_);
+
+ const node_type *node = begin.node_;
+ // We need to compensate for double counting if begin.node_ is a leaf node.
+ difference_type count = node->is_leaf() ? -begin.position_ : 0;
+
+ // First navigate to the leftmost leaf node past begin.
+ if (node->is_internal()) {
+ ++count;
+ node = node->child(begin.position_ + 1);
+ }
+ while (node->is_internal()) node = node->start_child();
+
+ // Use `size_type` because `pos` needs to be able to hold `kNodeSlots+1`,
+ // which isn't guaranteed to be a valid `field_type`.
+ size_type pos = node->position();
+ const node_type *parent = node->parent();
+ for (;;) {
+ // In each iteration of the next loop, we count one leaf node and go right.
+ assert(pos <= parent->finish());
+ do {
+ node = parent->child(static_cast<field_type>(pos));
+ if (node->is_internal()) {
+ // Navigate to the leftmost leaf under node.
+ while (node->is_internal()) node = node->start_child();
+ pos = node->position();
+ parent = node->parent();
+ }
+ if (node == end.node_) return count + end.position_;
+ if (parent == end.node_ && pos == static_cast<size_type>(end.position_))
+ return count + node->count();
+ // +1 is for the next internal node value.
+ count += node->count() + 1;
+ ++pos;
+ } while (pos <= parent->finish());
+
+ // Once we've counted all children of parent, go up/right.
+ assert(pos > parent->finish());
+ do {
+ node = parent;
+ pos = node->position();
+ parent = node->parent();
+ // -1 because we counted the value at end and shouldn't.
+ if (parent == end.node_ && pos == static_cast<size_type>(end.position_))
+ return count - 1;
+ ++pos;
+ } while (pos > parent->finish());
+ }
+}
+
template <typename N, typename R, typename P>
void btree_iterator<N, R, P>::increment_slow() {
if (node_->is_leaf()) {