1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
|
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
#include <cassert>
#include <cstdint>
#include <iosfwd>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/optimization.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
class CordRepBtreeNavigator;
// CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
// Data is stored at the leaf level only, non leaf nodes contain down pointers
// only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT
// or EXTERNAL nodes. The implementation allows for data to be added to either
// end of the tree only, it does not provide any 'insert' logic. This has the
// benefit that we can expect good fill ratios: all nodes except the outer
// 'legs' will have 100% fill ratios for trees built using Append/Prepend
// methods. Merged trees will typically have a fill ratio well above 50% as in a
// similar fashion, one side of the merged tree will typically have a 100% fill
// ratio, and the 'open' end will average 50%. All operations are O(log(n)) or
// better, and the tree never needs balancing.
//
// All methods accepting a CordRep* or CordRepBtree* adopt a reference on that
// input unless explicitly stated otherwise. All functions returning a CordRep*
// or CordRepBtree* instance transfer a reference back to the caller.
// Simplified, callers both 'donate' and 'consume' a reference count on each
// call, simplifying the API. An example of building a tree:
//
// CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello"));
// tree = CordRepBtree::Append(tree, MakeFlat("world"));
//
// In the above example, all inputs are consumed, making each call affecting
// `tree` reference count neutral. The returned `tree` value can be different
// from the input if the input is shared with other threads, or if the tree
// grows in height, but callers typically never have to concern themselves with
// that and trust that all methods DTRT at all times.
class CordRepBtree : public CordRep {
public:
// EdgeType identifies `front` and `back` enum values.
// Various implementations in CordRepBtree such as `Add` and `Edge` are
// generic and templated on operating on either of the boundary edges.
// For more information on the possible edges contained in a CordRepBtree
// instance see the documentation for `edges_`.
enum class EdgeType { kFront, kBack };
// Convenience constants into `EdgeType`
static constexpr EdgeType kFront = EdgeType::kFront;
static constexpr EdgeType kBack = EdgeType::kBack;
// Maximum number of edges: based on experiments and performance data, we can
// pick suitable values resulting in optimum cacheline aligned values. The
// preferred values are based on 64-bit systems where we aim to align this
// class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc.
// TODO(b/192061034): experiment with alternative sizes.
static constexpr size_t kMaxCapacity = 6;
// Reasonable maximum height of the btree. We can expect a fill ratio of at
// least 50%: trees are always expanded at the front or back. Concatenating
// trees will then typically fold at the top most node, where the lower nodes
// are at least at capacity on one side of joined inputs. At a lower fill
// rate of 4 edges per node, we have capacity for ~16 million leaf nodes.
// We will fail / abort if an application ever exceeds this height, which
// should be extremely rare (near impossible) and be an indication of an
// application error: we do not assume it reasonable for any application to
// operate correctly with such monster trees.
// Another compelling reason for the number `12` is that any contextual stack
// required for navigation or insertion requires 12 words and 12 bytes, which
// fits inside 2 cache lines with some room to spare, and is reasonable as a
// local stack variable compared to Cord's current near 400 bytes stack use.
// The maximum `height` value of a node is then `kMaxDepth - 1` as node height
// values start with a value of 0 for leaf nodes.
static constexpr size_t kMaxDepth = 12;
// See comments on height() for why this is an int and not a size_t.
static constexpr int kMaxHeight = static_cast<int>(kMaxDepth - 1);
// `Action` defines the action for unwinding changes done at the btree's leaf
// level that need to be propagated up to the parent node(s). Each operation
// on a node has an effect / action defined as follows:
// - kSelf
// The operation (add / update, etc) was performed directly on the node as
// the node is private to the current thread (i.e.: not shared directly or
// indirectly through a refcount > 1). Changes can be propagated directly to
// all parent nodes as all parent nodes are also then private to the current
// thread.
// - kCopied
// The operation (add / update, etc) was performed on a copy of the original
// node, as the node is (potentially) directly or indirectly shared with
// other threads. Changes need to be propagated into the parent nodes where
// the old down pointer must be unreffed and replaced with this new copy.
// Such changes to parent nodes may themselves require a copy if the parent
// node is also shared. A kCopied action can propagate all the way to the
// top node where we then must unref the `tree` input provided by the
// caller, and return the new copy.
// - kPopped
// The operation (typically add) could not be satisfied due to insufficient
// capacity in the targeted node, and a new 'leg' was created that needs to
// be added into the parent node. For example, adding a FLAT inside a leaf
// node that is at capacity will create a new leaf node containing that
// FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can
// cascade up the tree if parent nodes are also at capacity. A 'Popped'
// action propagating all the way to the top of the tree will result in
// the tree becoming one level higher than the current tree through a final
// `CordRepBtree::New(tree, popped)` call, resulting in a new top node
// referencing the old tree and the new (fully popped upwards) 'leg'.
enum Action { kSelf, kCopied, kPopped };
// Result of an operation on a node. See the `Action` enum for details.
struct OpResult {
CordRepBtree* tree;
Action action;
};
// Return value of the CopyPrefix and CopySuffix methods which can
// return a node or data edge at any height inside the tree.
// A height of 0 defines the lowest (leaf) node, a height of -1 identifies
// `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof.
struct CopyResult {
CordRep* edge;
int height;
};
// Logical position inside a node:
// - index: index of the edge.
// - n: size or offset value depending on context.
struct Position {
size_t index;
size_t n;
};
// Creates a btree from the given input. Adopts a ref of `rep`.
// If the input `rep` is itself a btree, i.e., `IsBtree()`, then this
// function immediately returns `rep->btree()`. If the input is a valid data
// edge (see IsDataEdge()), then a new leaf node is returned containing `rep`
// as the sole data edge. Else, the input is assumed to be a (legacy) concat
// tree, and the input is consumed and transformed into a btree().
static CordRepBtree* Create(CordRep* rep);
// Destroys the provided tree. Should only be called by cord internal API's,
// typically after a ref_count.Decrement() on the last reference count.
static void Destroy(CordRepBtree* tree);
// Destruction
static void Delete(CordRepBtree* tree) { delete tree; }
// Use CordRep::Unref() as we overload for absl::Span<CordRep* const>.
using CordRep::Unref;
// Unrefs all edges in `edges` which are assumed to be 'likely one'.
static void Unref(absl::Span<CordRep* const> edges);
// Appends / Prepends an existing CordRep instance to this tree.
// The below methods accept three types of input:
// 1) `rep` is a data node (See `IsDataNode` for valid data edges).
// `rep` is appended or prepended to this tree 'as is'.
// 2) `rep` is a BTREE.
// `rep` is merged into `tree` respecting the Append/Prepend order.
// 3) `rep` is some other (legacy) type.
// `rep` is converted in place and added to `tree`
// Requires `tree` and `rep` to be not null.
static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep);
static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep);
// Append/Prepend the data in `data` to this tree.
// The `extra` parameter defines how much extra capacity should be allocated
// for any additional FLAT being allocated. This is an optimization hint from
// the caller. For example, a caller may need to add 2 string_views of data
// "abc" and "defghi" which are not consecutive. The caller can in this case
// invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated
// where possible with at least 6 bytes of extra capacity beyond `length`.
// This helps avoiding data getting fragmented over multiple flats.
// There is no limit on the size of `data`. If `data` can not be stored inside
// a single flat, then the function will iteratively add flats until all data
// has been consumed and appended or prepended to the tree.
static CordRepBtree* Append(CordRepBtree* tree, string_view data,
size_t extra = 0);
static CordRepBtree* Prepend(CordRepBtree* tree, string_view data,
size_t extra = 0);
// Returns a new tree, containing `n` bytes of data from this instance
// starting at offset `offset`. Where possible, the returned tree shares
// (re-uses) data edges and nodes with this instance to minimize the
// combined memory footprint of both trees.
// Requires `offset + n <= length`. Returns `nullptr` if `n` is zero.
CordRep* SubTree(size_t offset, size_t n);
// Removes `n` trailing bytes from `tree`, and returns the resulting tree
// or data edge. Returns `tree` if n is zero, and nullptr if n == length.
// This function is logically identical to:
// result = tree->SubTree(0, tree->length - n);
// Unref(tree);
// return result;
// However, the actual implementation will as much as possible perform 'in
// place' modifications on the tree on all nodes and edges that are mutable.
// For example, in a fully privately owned tree with the last edge being a
// flat of length 12, RemoveSuffix(1) will simply set the length of that data
// edge to 11, and reduce the length of all nodes on the edge path by 1.
static CordRep* RemoveSuffix(CordRepBtree* tree, size_t n);
// Returns the character at the given offset.
char GetCharacter(size_t offset) const;
// Returns true if this node holds a single data edge, and if so, sets
// `fragment` to reference the contained data. `fragment` is an optional
// output parameter and allowed to be null.
bool IsFlat(absl::string_view* fragment) const;
// Returns true if the data of `n` bytes starting at offset `offset`
// is contained in a single data edge, and if so, sets fragment to reference
// the contained data. `fragment` is an optional output parameter and allowed
// to be null.
bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const;
// Returns a span (mutable range of bytes) of up to `size` bytes into the
// last FLAT data edge inside this tree under the following conditions:
// - none of the nodes down into the FLAT node are shared.
// - the last data edge in this tree is a non-shared FLAT.
// - the referenced FLAT has additional capacity available.
// If all these conditions are met, a non-empty span is returned, and the
// length of the flat node and involved tree nodes have been increased by
// `span.length()`. The caller is responsible for immediately assigning values
// to all uninitialized data reference by the returned span.
// Requires `this->refcount.IsOne()`: this function forces the caller to do
// this fast path check on the top level node, as this is the most commonly
// shared node of a cord tree.
Span<char> GetAppendBuffer(size_t size);
// Extracts the right-most data edge from this tree iff:
// - the tree and all internal edges to the right-most node are not shared.
// - the right-most node is a FLAT node and not shared.
// - the right-most node has at least the desired extra capacity.
//
// Returns {tree, nullptr} if any of the above conditions are not met.
// This method effectively removes data from the tree. The intent of this
// method is to allow applications appending small string data to use
// pre-existing capacity, and add the modified rep back to the tree.
//
// Simplified such code would look similar to this:
// void MyTreeBuilder::Append(string_view data) {
// ExtractResult result = CordRepBtree::ExtractAppendBuffer(tree_, 1);
// if (CordRep* rep = result.extracted) {
// size_t available = rep->Capacity() - rep->length;
// size_t n = std::min(data.size(), n);
// memcpy(rep->Data(), data.data(), n);
// rep->length += n;
// data.remove_prefix(n);
// if (!result.tree->IsBtree()) {
// tree_ = CordRepBtree::Create(result.tree);
// }
// tree_ = CordRepBtree::Append(tree_, rep);
// }
// ...
// // Remaining edge in `result.tree`.
// }
static ExtractResult ExtractAppendBuffer(CordRepBtree* tree,
size_t extra_capacity = 1);
// Returns the `height` of the tree. The height of a tree is limited to
// kMaxHeight. `height` is implemented as an `int` as in some places we
// use negative (-1) values for 'data edges'.
int height() const { return static_cast<int>(storage[0]); }
// Properties: begin, back, end, front/back boundary indexes.
size_t begin() const { return static_cast<size_t>(storage[1]); }
size_t back() const { return static_cast<size_t>(storage[2]) - 1; }
size_t end() const { return static_cast<size_t>(storage[2]); }
size_t index(EdgeType edge) const {
return edge == kFront ? begin() : back();
}
// Properties: size and capacity.
// `capacity` contains the current capacity of this instance, where
// `kMaxCapacity` contains the maximum capacity of a btree node.
// For now, `capacity` and `kMaxCapacity` return the same value, but this may
// change in the future if we see benefit in dynamically sizing 'small' nodes
// to 'large' nodes for large data trees.
size_t size() const { return end() - begin(); }
size_t capacity() const { return kMaxCapacity; }
// Edge access
inline CordRep* Edge(size_t index) const;
inline CordRep* Edge(EdgeType edge_type) const;
inline absl::Span<CordRep* const> Edges() const;
inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const;
// Returns reference to the data edge at `index`.
// Requires this instance to be a leaf node, and `index` to be valid index.
inline absl::string_view Data(size_t index) const;
// Diagnostics: returns true if `tree` is valid and internally consistent.
// If `shallow` is false, then the provided top level node and all child nodes
// below it are recursively checked. If `shallow` is true, only the provided
// node in `tree` and the cumulative length, type and height of the direct
// child nodes of `tree` are checked. The value of `shallow` is ignored if the
// internal `cord_btree_exhaustive_validation` diagnostics variable is true,
// in which case the performed validations works as if `shallow` were false.
// This function is intended for debugging and testing purposes only.
static bool IsValid(const CordRepBtree* tree, bool shallow = false);
// Diagnostics: asserts that the provided tree is valid.
// `AssertValid()` performs a shallow validation by default. `shallow` can be
// set to false in which case an exhaustive validation is performed. This
// function is implemented in terms of calling `IsValid()` and asserting the
// return value to be true. See `IsValid()` for more information.
// This function is intended for debugging and testing purposes only.
static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true);
static const CordRepBtree* AssertValid(const CordRepBtree* tree,
bool shallow = true);
// Diagnostics: dump the contents of this tree to `stream`.
// This function is intended for debugging and testing purposes only.
static void Dump(const CordRep* rep, std::ostream& stream);
static void Dump(const CordRep* rep, absl::string_view label,
std::ostream& stream);
static void Dump(const CordRep* rep, absl::string_view label,
bool include_contents, std::ostream& stream);
// Adds the edge `edge` to this node if possible. `owned` indicates if the
// current node is potentially shared or not with other threads. Returns:
// - {kSelf, <this>}
// The edge was directly added to this node.
// - {kCopied, <node>}
// The edge was added to a copy of this node.
// - {kPopped, New(edge, height())}
// A new leg with the edge was created as this node has no extra capacity.
template <EdgeType edge_type>
inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta);
// Replaces the front or back edge with the provided new edge. Returns:
// - {kSelf, <this>}
// The edge was directly set in this node. The old edge is unreffed.
// - {kCopied, <node>}
// A copy of this node was created with the new edge value.
// In both cases, the function adopts a reference on `edge`.
template <EdgeType edge_type>
OpResult SetEdge(bool owned, CordRep* edge, size_t delta);
// Creates a new empty node at the specified height.
static CordRepBtree* New(int height = 0);
// Creates a new node containing `rep`, with the height being computed
// automatically based on the type of `rep`.
static CordRepBtree* New(CordRep* rep);
// Creates a new node containing both `front` and `back` at height
// `front.height() + 1`. Requires `back.height() == front.height()`.
static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back);
// Creates a fully balanced tree from the provided tree by rebuilding a new
// tree from all data edges in the input. This function is automatically
// invoked internally when the tree exceeds the maximum height.
static CordRepBtree* Rebuild(CordRepBtree* tree);
private:
CordRepBtree() = default;
~CordRepBtree() = default;
// Initializes the main properties `tag`, `begin`, `end`, `height`.
inline void InitInstance(int height, size_t begin = 0, size_t end = 0);
// Direct property access begin / end
void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); }
void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); }
// Decreases the value of `begin` by `n`, and returns the new value. Notice
// how this returns the new value unlike atomic::fetch_add which returns the
// old value. This is because this is used to prepend edges at 'begin - 1'.
size_t sub_fetch_begin(size_t n) {
storage[1] -= static_cast<uint8_t>(n);
return storage[1];
}
// Increases the value of `end` by `n`, and returns the previous value. This
// function is typically used to append edges at 'end'.
size_t fetch_add_end(size_t n) {
const uint8_t current = storage[2];
storage[2] = static_cast<uint8_t>(current + n);
return current;
}
// Returns the index of the last edge starting on, or before `offset`, with
// `n` containing the relative offset of `offset` inside that edge.
// Requires `offset` < length.
Position IndexOf(size_t offset) const;
// Returns the index of the last edge starting before `offset`, with `n`
// containing the relative offset of `offset` inside that edge.
// This function is useful to find the edges for some span of bytes ending at
// `offset` (i.e., `n` bytes). For example:
//
// Position pos = IndexBefore(n)
// edges = Edges(begin(), pos.index) // All full edges (may be empty)
// last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty)
//
// Requires 0 < `offset` <= length.
Position IndexBefore(size_t offset) const;
// Returns the index of the edge ending at (or on) length `length`, and the
// number of bytes inside that edge up to `length`. For example, if we have a
// Node with 2 edges, one of 10 and one of 20 long, then IndexOfLength(27)
// will return {1, 17}, and IndexOfLength(10) will return {0, 10}.
Position IndexOfLength(size_t n) const;
// Identical to the above function except starting from the position `front`.
// This function is equivalent to `IndexBefore(front.n + offset)`, with
// the difference that this function is optimized to start at `front.index`.
Position IndexBefore(Position front, size_t offset) const;
// Returns the index of the edge directly beyond the edge containing offset
// `offset`, with `n` containing the distance of that edge from `offset`.
// This function is useful for iteratively finding suffix nodes and remaining
// partial bytes in left-most suffix nodes as for example in CopySuffix.
// Requires `offset` < length.
Position IndexBeyond(size_t offset) const;
// Creates a new leaf node containing as much data as possible from `data`.
// The data is added either forwards or reversed depending on `edge_type`.
// Callers must check the length of the returned node to determine if all data
// was copied or not.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
static CordRepBtree* NewLeaf(absl::string_view data, size_t extra);
// Creates a raw copy of this Btree node with the specified length, copying
// all properties, but without adding any references to existing edges.
CordRepBtree* CopyRaw(size_t new_length) const;
// Creates a full copy of this Btree node, adding a reference on all edges.
CordRepBtree* Copy() const;
// Creates a partial copy of this Btree node, copying all edges up to `end`,
// adding a reference on each copied edge, and sets the length of the newly
// created copy to `new_length`.
CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const;
// Returns a tree containing the edges [tree->begin(), end) and length
// of `new_length`. This method consumes a reference on the provided
// tree, and logically performs the following operation:
// result = tree->CopyBeginTo(end, new_length);
// CordRep::Unref(tree);
// return result;
static CordRepBtree* ConsumeBeginTo(CordRepBtree* tree, size_t end,
size_t new_length);
// Creates a partial copy of this Btree node, copying all edges starting at
// `begin`, adding a reference on each copied edge, and sets the length of
// the newly created copy to `new_length`.
CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const;
// Extracts and returns the front edge from the provided tree.
// This method consumes a reference on the provided tree, and logically
// performs the following operation:
// edge = CordRep::Ref(tree->Edge(kFront));
// CordRep::Unref(tree);
// return edge;
static CordRep* ExtractFront(CordRepBtree* tree);
// Returns a tree containing the result of appending `right` to `left`.
static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right);
// Fallback functions for `Create()`, `Append()` and `Prepend()` which
// deal with legacy / non conforming input, i.e.: CONCAT trees.
static CordRepBtree* CreateSlow(CordRep* rep);
static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep);
static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep);
// Recursively rebuilds `tree` into `stack`. If 'consume` is set to true, the
// function will consume a reference on `tree`. `stack` is a null terminated
// array containing the new tree's state, with the current leaf node at
// stack[0], and parent nodes above that, or null for 'top of tree'.
static void Rebuild(CordRepBtree** stack, CordRepBtree* tree, bool consume);
// Aligns existing edges to start at index 0, to allow for a new edge to be
// added to the back of the current edges.
inline void AlignBegin();
// Aligns existing edges to end at `capacity`, to allow for a new edge to be
// added in front of the current edges.
inline void AlignEnd();
// Adds the provided edge to this node.
// Requires this node to have capacity for the edge. Realigns / moves
// existing edges as needed to prepend or append the new edge.
template <EdgeType edge_type>
inline void Add(CordRep* rep);
// Adds the provided edges to this node.
// Requires this node to have capacity for the edges. Realigns / moves
// existing edges as needed to prepend or append the new edges.
template <EdgeType edge_type>
inline void Add(absl::Span<CordRep* const>);
// Adds data from `data` to this node until either all data has been consumed,
// or there is no more capacity for additional flat nodes inside this node.
// Requires the current node to be a leaf node, data to be non empty, and the
// current node to have capacity for at least one more data edge.
// Returns any remaining data from `data` that was not added, which is
// depending on the edge type (front / back) either the remaining prefix of
// suffix of the input.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
absl::string_view AddData(absl::string_view data, size_t extra);
// Replace the front or back edge with the provided value.
// Adopts a reference on `edge` and unrefs the old edge.
template <EdgeType edge_type>
inline void SetEdge(CordRep* edge);
// Returns a partial copy of the current tree containing the first `n` bytes
// of data. `CopyResult` contains both the resulting edge and its height. The
// resulting tree may be less high than the current tree, or even be a single
// matching data edge if `allow_folding` is set to true.
// For example, if `n == 1`, then the result will be the single data edge, and
// height will be set to -1 (one below the owning leaf node). If n == 0, this
// function returns null. Requires `n <= length`
CopyResult CopyPrefix(size_t n, bool allow_folding = true);
// Returns a partial copy of the current tree containing all data starting
// after `offset`. `CopyResult` contains both the resulting edge and its
// height. The resulting tree may be less high than the current tree, or even
// be a single matching data edge. For example, if `n == length - 1`, then the
// result will be a single data edge, and height will be set to -1 (one below
// the owning leaf node).
// Requires `offset < length`
CopyResult CopySuffix(size_t offset);
// Returns a OpResult value of {this, kSelf} or {Copy(), kCopied}
// depending on the value of `owned`.
inline OpResult ToOpResult(bool owned);
// Adds `rep` to the specified tree, returning the modified tree.
template <EdgeType edge_type>
static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep);
// Adds `data` to the specified tree, returning the modified tree.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data,
size_t extra = 0);
// Merges `src` into `dst` with `src` being added either before (kFront) or
// after (kBack) `dst`. Requires the height of `dst` to be greater than or
// equal to the height of `src`.
template <EdgeType edge_type>
static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src);
// Fallback version of GetAppendBuffer for large trees: GetAppendBuffer()
// implements an inlined version for trees of limited height (3 levels),
// GetAppendBufferSlow implements the logic for large trees.
Span<char> GetAppendBufferSlow(size_t size);
// `edges_` contains all edges starting from this instance.
// These are explicitly `child` edges only, a cord btree (or any cord tree in
// that respect) does not store `parent` pointers anywhere: multiple trees /
// parents can reference the same shared child edge. The type of these edges
// depends on the height of the node. `Leaf nodes` (height == 0) contain `data
// edges` (external or flat nodes, or sub-strings thereof). All other nodes
// (height > 0) contain pointers to BTREE nodes with a height of `height - 1`.
CordRep* edges_[kMaxCapacity];
friend class CordRepBtreeTestPeer;
friend class CordRepBtreeNavigator;
};
inline CordRepBtree* CordRep::btree() {
assert(IsBtree());
return static_cast<CordRepBtree*>(this);
}
inline const CordRepBtree* CordRep::btree() const {
assert(IsBtree());
return static_cast<const CordRepBtree*>(this);
}
inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) {
tag = BTREE;
storage[0] = static_cast<uint8_t>(height);
storage[1] = static_cast<uint8_t>(begin);
storage[2] = static_cast<uint8_t>(end);
}
inline CordRep* CordRepBtree::Edge(size_t index) const {
assert(index >= begin());
assert(index < end());
return edges_[index];
}
inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const {
return edges_[edge_type == kFront ? begin() : back()];
}
inline absl::Span<CordRep* const> CordRepBtree::Edges() const {
return {edges_ + begin(), size()};
}
inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin,
size_t end) const {
assert(begin <= end);
assert(begin >= this->begin());
assert(end <= this->end());
return {edges_ + begin, static_cast<size_t>(end - begin)};
}
inline absl::string_view CordRepBtree::Data(size_t index) const {
assert(height() == 0);
return EdgeData(Edge(index));
}
inline CordRepBtree* CordRepBtree::New(int height) {
CordRepBtree* tree = new CordRepBtree;
tree->length = 0;
tree->InitInstance(height);
return tree;
}
inline CordRepBtree* CordRepBtree::New(CordRep* rep) {
CordRepBtree* tree = new CordRepBtree;
int height = rep->IsBtree() ? rep->btree()->height() + 1 : 0;
tree->length = rep->length;
tree->InitInstance(height, /*begin=*/0, /*end=*/1);
tree->edges_[0] = rep;
return tree;
}
inline CordRepBtree* CordRepBtree::New(CordRepBtree* front,
CordRepBtree* back) {
assert(front->height() == back->height());
CordRepBtree* tree = new CordRepBtree;
tree->length = front->length + back->length;
tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2);
tree->edges_[0] = front;
tree->edges_[1] = back;
return tree;
}
inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) {
for (CordRep* edge : edges) {
if (ABSL_PREDICT_FALSE(!edge->refcount.Decrement())) {
CordRep::Destroy(edge);
}
}
}
inline CordRepBtree* CordRepBtree::CopyRaw(size_t new_length) const {
CordRepBtree* tree = new CordRepBtree;
// `length` and `refcount` are the first members of `CordRepBtree`.
// We initialize `length` using the given length, have `refcount` be set to
// ref = 1 through its default constructor, and copy all data beyond
// 'refcount' which starts with `tag` using a single memcpy: all contents
// except `refcount` is trivially copyable, and the compiler does not
// efficiently coalesce member-wise copy of these members.
// See https://gcc.godbolt.org/z/qY8zsca6z
// # LINT.IfChange(copy_raw)
tree->length = new_length;
uint8_t* dst = &tree->tag;
const uint8_t* src = &tag;
const ptrdiff_t offset = src - reinterpret_cast<const uint8_t*>(this);
memcpy(dst, src, sizeof(CordRepBtree) - static_cast<size_t>(offset));
return tree;
// # LINT.ThenChange()
}
inline CordRepBtree* CordRepBtree::Copy() const {
CordRepBtree* tree = CopyRaw(length);
for (CordRep* rep : Edges()) CordRep::Ref(rep);
return tree;
}
inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin,
size_t new_length) const {
assert(begin >= this->begin());
assert(begin <= this->end());
CordRepBtree* tree = CopyRaw(new_length);
tree->set_begin(begin);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;
}
inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end,
size_t new_length) const {
assert(end <= capacity());
assert(end >= this->begin());
CordRepBtree* tree = CopyRaw(new_length);
tree->set_end(end);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;
}
inline void CordRepBtree::AlignBegin() {
// The below code itself does not need to be fast as typically we have
// mono-directional append/prepend calls, and `begin` / `end` are typically
// adjusted no more than once. But we want to avoid potential register clobber
// effects, making the compiler emit register save/store/spills, and minimize
// the size of code.
const size_t delta = begin();
if (ABSL_PREDICT_FALSE(delta != 0)) {
const size_t new_end = end() - delta;
set_begin(0);
set_end(new_end);
// TODO(mvels): we can write this using 2 loads / 2 stores depending on
// total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on
// size, and then do overlapping load/store of up to 4 pointers (inlined as
// XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a)
// compact and b) not clobbering any registers.
ABSL_ASSUME(new_end <= kMaxCapacity);
#ifdef __clang__
#pragma unroll 1
#endif
for (size_t i = 0; i < new_end; ++i) {
edges_[i] = edges_[i + delta];
}
}
}
inline void CordRepBtree::AlignEnd() {
// See comments in `AlignBegin` for motivation on the hand-rolled for loops.
const size_t delta = capacity() - end();
if (delta != 0) {
const size_t new_begin = begin() + delta;
const size_t new_end = end() + delta;
set_begin(new_begin);
set_end(new_end);
ABSL_ASSUME(new_end <= kMaxCapacity);
#ifdef __clang__
#pragma unroll 1
#endif
for (size_t i = new_end - 1; i >= new_begin; --i) {
edges_[i] = edges_[i - delta];
}
}
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) {
AlignBegin();
edges_[fetch_add_end(1)] = rep;
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kBack>(
absl::Span<CordRep* const> edges) {
AlignBegin();
size_t new_end = end();
for (CordRep* edge : edges) edges_[new_end++] = edge;
set_end(new_end);
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) {
AlignEnd();
edges_[sub_fetch_begin(1)] = rep;
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kFront>(
absl::Span<CordRep* const> edges) {
AlignEnd();
size_t new_begin = begin() - edges.size();
set_begin(new_begin);
for (CordRep* edge : edges) edges_[new_begin++] = edge;
}
template <CordRepBtree::EdgeType edge_type>
inline void CordRepBtree::SetEdge(CordRep* edge) {
const int idx = edge_type == kFront ? begin() : back();
CordRep::Unref(edges_[idx]);
edges_[idx] = edge;
}
inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) {
return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied};
}
inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const {
assert(offset < length);
size_t index = begin();
while (offset >= edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const {
assert(offset > 0);
assert(offset <= length);
size_t index = begin();
while (offset > edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front,
size_t offset) const {
size_t index = front.index;
offset = offset + front.n;
while (offset > edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexOfLength(size_t n) const {
assert(n <= length);
size_t index = back();
size_t strip = length - n;
while (strip >= edges_[index]->length) strip -= edges_[index--]->length;
return {index, edges_[index]->length - strip};
}
inline CordRepBtree::Position CordRepBtree::IndexBeyond(
const size_t offset) const {
// We need to find the edge which `starting offset` is beyond (>=)`offset`.
// For this we can't use the `offset -= length` logic of IndexOf. Instead, we
// track the offset of the `current edge` in `off`, which we increase as we
// iterate over the edges until we find the matching edge.
size_t off = 0;
size_t index = begin();
while (offset > off) off += edges_[index++]->length;
return {index, off - offset};
}
inline CordRepBtree* CordRepBtree::Create(CordRep* rep) {
if (IsDataEdge(rep)) return New(rep);
return CreateSlow(rep);
}
inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) {
assert(refcount.IsOne());
CordRepBtree* tree = this;
const int height = this->height();
CordRepBtree* n1 = tree;
CordRepBtree* n2 = tree;
CordRepBtree* n3 = tree;
switch (height) {
case 3:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
n2 = tree;
ABSL_FALLTHROUGH_INTENDED;
case 2:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
n1 = tree;
ABSL_FALLTHROUGH_INTENDED;
case 1:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
ABSL_FALLTHROUGH_INTENDED;
case 0:
CordRep* edge = tree->Edge(kBack);
if (!edge->refcount.IsOne()) return {};
if (edge->tag < FLAT) return {};
size_t avail = edge->flat()->Capacity() - edge->length;
if (avail == 0) return {};
size_t delta = (std::min)(size, avail);
Span<char> span = {edge->flat()->Data() + edge->length, delta};
edge->length += delta;
switch (height) {
case 3:
n3->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 2:
n2->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 1:
n1->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 0:
tree->length += delta;
return span;
}
break;
}
return GetAppendBufferSlow(size);
}
extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>(
CordRepBtree* tree, CordRep* rep);
extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>(
CordRepBtree* tree, CordRep* rep);
inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
return CordRepBtree::AddCordRep<kBack>(tree, rep);
}
return AppendSlow(tree, rep);
}
inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
return CordRepBtree::AddCordRep<kFront>(tree, rep);
}
return PrependSlow(tree, rep);
}
#ifdef NDEBUG
inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree,
bool /* shallow */) {
return tree;
}
inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
bool /* shallow */) {
return tree;
}
#endif
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
|