diff options
Diffstat (limited to 'absl/container/internal/btree.h')
-rw-r--r-- | absl/container/internal/btree.h | 123 |
1 files changed, 65 insertions, 58 deletions
diff --git a/absl/container/internal/btree.h b/absl/container/internal/btree.h index 6f5f01b8..00444a53 100644 --- a/absl/container/internal/btree.h +++ b/absl/container/internal/btree.h @@ -499,23 +499,23 @@ class btree_node { // // is the same as the count of values. // field_type finish; // // The maximum number of values the node can hold. This is an integer in - // // [1, kNodeValues] for root leaf nodes, kNodeValues for non-root leaf + // // [1, kNodeSlots] for root leaf nodes, kNodeSlots for non-root leaf // // nodes, and kInternalNodeMaxCount (as a sentinel value) for internal - // // nodes (even though there are still kNodeValues values in the node). + // // nodes (even though there are still kNodeSlots values in the node). // // TODO(ezb): make max_count use only 4 bits and record log2(capacity) // // to free extra bits for is_root, etc. // field_type max_count; // // // The array of values. The capacity is `max_count` for leaf nodes and - // // kNodeValues for internal nodes. Only the values in + // // kNodeSlots for internal nodes. Only the values in // // [start, finish) have been initialized and are valid. // slot_type values[max_count]; // // // The array of child pointers. The keys in children[i] are all less // // than key(i). The keys in children[i + 1] are all greater than key(i). - // // There are 0 children for leaf nodes and kNodeValues + 1 children for + // // There are 0 children for leaf nodes and kNodeSlots + 1 children for // // internal nodes. - // btree_node *children[kNodeValues + 1]; + // btree_node *children[kNodeSlots + 1]; // // This class is only constructed by EmptyNodeType. Normally, pointers to the // layout above are allocated, cast to btree_node*, and de-allocated within @@ -537,57 +537,62 @@ class btree_node { private: using layout_type = absl::container_internal::Layout<btree_node *, field_type, slot_type, btree_node *>; - constexpr static size_type SizeWithNValues(size_type n) { + constexpr static size_type SizeWithNSlots(size_type n) { return layout_type(/*parent*/ 1, /*position, start, finish, max_count*/ 4, - /*values*/ n, + /*slots*/ n, /*children*/ 0) .AllocSize(); } // A lower bound for the overhead of fields other than values in a leaf node. constexpr static size_type MinimumOverhead() { - return SizeWithNValues(1) - sizeof(value_type); + return SizeWithNSlots(1) - sizeof(value_type); } // Compute how many values we can fit onto a leaf node taking into account // padding. - constexpr static size_type NodeTargetValues(const int begin, const int end) { + constexpr static size_type NodeTargetSlots(const int begin, const int end) { return begin == end ? begin - : SizeWithNValues((begin + end) / 2 + 1) > + : SizeWithNSlots((begin + end) / 2 + 1) > params_type::kTargetNodeSize - ? NodeTargetValues(begin, (begin + end) / 2) - : NodeTargetValues((begin + end) / 2 + 1, end); + ? NodeTargetSlots(begin, (begin + end) / 2) + : NodeTargetSlots((begin + end) / 2 + 1, end); } enum { kTargetNodeSize = params_type::kTargetNodeSize, - kNodeTargetValues = NodeTargetValues(0, params_type::kTargetNodeSize), + kNodeTargetSlots = NodeTargetSlots(0, params_type::kTargetNodeSize), - // We need a minimum of 3 values per internal node in order to perform + // We need a minimum of 3 slots per internal node in order to perform // splitting (1 value for the two nodes involved in the split and 1 value - // propagated to the parent as the delimiter for the split). - kNodeValues = kNodeTargetValues >= 3 ? kNodeTargetValues : 3, + // propagated to the parent as the delimiter for the split). For performance + // reasons, we don't allow 3 slots-per-node due to bad worst case occupancy + // of 1/3 (for a node, not a b-tree). + kMinNodeSlots = 4, + + kNodeSlots = + kNodeTargetSlots >= kMinNodeSlots ? kNodeTargetSlots : kMinNodeSlots, // The node is internal (i.e. is not a leaf node) if and only if `max_count` // has this value. kInternalNodeMaxCount = 0, }; - // Leaves can have less than kNodeValues values. - constexpr static layout_type LeafLayout(const int max_values = kNodeValues) { + // Leaves can have less than kNodeSlots values. + constexpr static layout_type LeafLayout(const int slots = kNodeSlots) { return layout_type(/*parent*/ 1, /*position, start, finish, max_count*/ 4, - /*values*/ max_values, + /*slots*/ slots, /*children*/ 0); } constexpr static layout_type InternalLayout() { return layout_type(/*parent*/ 1, /*position, start, finish, max_count*/ 4, - /*values*/ kNodeValues, - /*children*/ kNodeValues + 1); + /*slots*/ kNodeSlots, + /*children*/ kNodeSlots + 1); } - constexpr static size_type LeafSize(const int max_values = kNodeValues) { - return LeafLayout(max_values).AllocSize(); + constexpr static size_type LeafSize(const int slots = kNodeSlots) { + return LeafLayout(slots).AllocSize(); } constexpr static size_type InternalSize() { return InternalLayout().AllocSize(); @@ -644,10 +649,10 @@ class btree_node { } field_type max_count() const { // Internal nodes have max_count==kInternalNodeMaxCount. - // Leaf nodes have max_count in [1, kNodeValues]. + // Leaf nodes have max_count in [1, kNodeSlots]. const field_type max_count = GetField<1>()[3]; return max_count == field_type{kInternalNodeMaxCount} - ? field_type{kNodeValues} + ? field_type{kNodeSlots} : max_count; } @@ -837,12 +842,12 @@ class btree_node { start_slot(), max_count * sizeof(slot_type)); } void init_internal(btree_node *parent) { - init_leaf(parent, kNodeValues); + init_leaf(parent, kNodeSlots); // Set `max_count` to a sentinel value to indicate that this node is // internal. set_max_count(kInternalNodeMaxCount); absl::container_internal::SanitizerPoisonMemoryRegion( - &mutable_child(start()), (kNodeValues + 1) * sizeof(btree_node *)); + &mutable_child(start()), (kNodeSlots + 1) * sizeof(btree_node *)); } static void deallocate(const size_type size, btree_node *node, @@ -1099,8 +1104,8 @@ class btree { } enum : uint32_t { - kNodeValues = node_type::kNodeValues, - kMinNodeValues = kNodeValues / 2, + kNodeSlots = node_type::kNodeSlots, + kMinNodeValues = kNodeSlots / 2, }; struct node_stats { @@ -1381,12 +1386,14 @@ class btree { } } - // The average number of bytes used per value stored in the btree. + // The average number of bytes used per value stored in the btree assuming + // random insertion order. static double average_bytes_per_value() { - // Returns the number of bytes per value on a leaf node that is 75% - // full. Experimentally, this matches up nicely with the computed number of - // bytes per value in trees that had their values inserted in random order. - return node_type::LeafSize() / (kNodeValues * 0.75); + // The expected number of values per node with random insertion order is the + // average of the maximum and minimum numbers of values per node. + const double expected_values_per_node = + (kNodeSlots + kMinNodeValues) / 2.0; + return node_type::LeafSize() / expected_values_per_node; } // The fullness of the btree. Computed as the number of elements in the btree @@ -1396,7 +1403,7 @@ class btree { // Returns 0 for empty trees. double fullness() const { if (empty()) return 0.0; - return static_cast<double>(size()) / (nodes() * kNodeValues); + return static_cast<double>(size()) / (nodes() * kNodeSlots); } // The overhead of the btree structure in bytes per node. Computed as the // total number of bytes used by the btree minus the number of bytes used for @@ -1446,7 +1453,7 @@ class btree { } node_type *new_leaf_node(node_type *parent) { node_type *n = allocate(node_type::LeafSize()); - n->init_leaf(parent, kNodeValues); + n->init_leaf(parent, kNodeSlots); return n; } node_type *new_leaf_root_node(const int max_count) { @@ -1691,7 +1698,7 @@ template <typename P> void btree_node<P>::split(const int insert_position, btree_node *dest, allocator_type *alloc) { assert(dest->count() == 0); - assert(max_count() == kNodeValues); + assert(max_count() == kNodeSlots); // We bias the split based on the position being inserted. If we're // inserting at the beginning of the left node then bias the split to put @@ -1699,7 +1706,7 @@ void btree_node<P>::split(const int insert_position, btree_node *dest, // right node then bias the split to put more values on the left node. if (insert_position == start()) { dest->set_finish(dest->start() + finish() - 1); - } else if (insert_position == kNodeValues) { + } else if (insert_position == kNodeSlots) { dest->set_finish(dest->start()); } else { dest->set_finish(dest->start() + count() / 2); @@ -1770,7 +1777,7 @@ void btree_node<P>::clear_and_delete(btree_node *node, allocator_type *alloc) { // Navigate to the leftmost leaf under node, and then delete upwards. while (!node->leaf()) node = node->start_child(); - // Use `int` because `pos` needs to be able to hold `kNodeValues+1`, which + // Use `int` because `pos` needs to be able to hold `kNodeSlots+1`, which // isn't guaranteed to be a valid `field_type`. int pos = node->position(); btree_node *parent = node->parent(); @@ -1889,7 +1896,7 @@ constexpr bool btree<P>::static_assert_validation() { // Note: We assert that kTargetValues, which is computed from // Params::kTargetNodeSize, must fit the node_type::field_type. static_assert( - kNodeValues < (1 << (8 * sizeof(typename node_type::field_type))), + kNodeSlots < (1 << (8 * sizeof(typename node_type::field_type))), "target node size too large"); // Verify that key_compare returns an absl::{weak,strong}_ordering or bool. @@ -2270,7 +2277,7 @@ void btree<P>::rebalance_or_split(iterator *iter) { node_type *&node = iter->node; int &insert_position = iter->position; assert(node->count() == node->max_count()); - assert(kNodeValues == node->max_count()); + assert(kNodeSlots == node->max_count()); // First try to make room on the node by rebalancing. node_type *parent = node->parent(); @@ -2278,17 +2285,17 @@ void btree<P>::rebalance_or_split(iterator *iter) { if (node->position() > parent->start()) { // Try rebalancing with our left sibling. node_type *left = parent->child(node->position() - 1); - assert(left->max_count() == kNodeValues); - if (left->count() < kNodeValues) { + assert(left->max_count() == kNodeSlots); + if (left->count() < kNodeSlots) { // We bias rebalancing based on the position being inserted. If we're // inserting at the end of the right node then we bias rebalancing to // fill up the left node. - int to_move = (kNodeValues - left->count()) / - (1 + (insert_position < static_cast<int>(kNodeValues))); + int to_move = (kNodeSlots - left->count()) / + (1 + (insert_position < static_cast<int>(kNodeSlots))); to_move = (std::max)(1, to_move); if (insert_position - to_move >= node->start() || - left->count() + to_move < static_cast<int>(kNodeValues)) { + left->count() + to_move < static_cast<int>(kNodeSlots)) { left->rebalance_right_to_left(to_move, node, mutable_allocator()); assert(node->max_count() - node->count() == to_move); @@ -2307,17 +2314,17 @@ void btree<P>::rebalance_or_split(iterator *iter) { if (node->position() < parent->finish()) { // Try rebalancing with our right sibling. node_type *right = parent->child(node->position() + 1); - assert(right->max_count() == kNodeValues); - if (right->count() < kNodeValues) { + assert(right->max_count() == kNodeSlots); + if (right->count() < kNodeSlots) { // We bias rebalancing based on the position being inserted. If we're // inserting at the beginning of the left node then we bias rebalancing // to fill up the right node. - int to_move = (static_cast<int>(kNodeValues) - right->count()) / + int to_move = (static_cast<int>(kNodeSlots) - right->count()) / (1 + (insert_position > node->start())); to_move = (std::max)(1, to_move); if (insert_position <= node->finish() - to_move || - right->count() + to_move < static_cast<int>(kNodeValues)) { + right->count() + to_move < static_cast<int>(kNodeSlots)) { node->rebalance_left_to_right(to_move, right, mutable_allocator()); if (insert_position > node->finish()) { @@ -2333,8 +2340,8 @@ void btree<P>::rebalance_or_split(iterator *iter) { // Rebalancing failed, make sure there is room on the parent node for a new // value. - assert(parent->max_count() == kNodeValues); - if (parent->count() == kNodeValues) { + assert(parent->max_count() == kNodeSlots); + if (parent->count() == kNodeSlots) { iterator parent_iter(node->parent(), node->position()); rebalance_or_split(&parent_iter); } @@ -2379,8 +2386,8 @@ bool btree<P>::try_merge_or_rebalance(iterator *iter) { if (iter->node->position() > parent->start()) { // Try merging with our left sibling. node_type *left = parent->child(iter->node->position() - 1); - assert(left->max_count() == kNodeValues); - if (1U + left->count() + iter->node->count() <= kNodeValues) { + assert(left->max_count() == kNodeSlots); + if (1U + left->count() + iter->node->count() <= kNodeSlots) { iter->position += 1 + left->count(); merge_nodes(left, iter->node); iter->node = left; @@ -2390,8 +2397,8 @@ bool btree<P>::try_merge_or_rebalance(iterator *iter) { if (iter->node->position() < parent->finish()) { // Try merging with our right sibling. node_type *right = parent->child(iter->node->position() + 1); - assert(right->max_count() == kNodeValues); - if (1U + iter->node->count() + right->count() <= kNodeValues) { + assert(right->max_count() == kNodeSlots); + if (1U + iter->node->count() + right->count() <= kNodeSlots) { merge_nodes(iter->node, right); return true; } @@ -2472,12 +2479,12 @@ inline auto btree<P>::internal_emplace(iterator iter, Args &&... args) allocator_type *alloc = mutable_allocator(); if (iter.node->count() == max_count) { // Make room in the leaf for the new item. - if (max_count < kNodeValues) { + if (max_count < kNodeSlots) { // Insertion into the root where the root is smaller than the full node // size. Simply grow the size of the root node. assert(iter.node == root()); iter.node = - new_leaf_root_node((std::min<int>)(kNodeValues, 2 * max_count)); + new_leaf_root_node((std::min<int>)(kNodeSlots, 2 * max_count)); // Transfer the values from the old root to the new root. node_type *old_root = root(); node_type *new_root = iter.node; |