summaryrefslogtreecommitdiff
path: root/absl/strings/internal/cord_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings/internal/cord_internal.h')
-rw-r--r--absl/strings/internal/cord_internal.h304
1 files changed, 208 insertions, 96 deletions
diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h
index a1ba67fe..b50fb79a 100644
--- a/absl/strings/internal/cord_internal.h
+++ b/absl/strings/internal/cord_internal.h
@@ -21,6 +21,7 @@
#include <cstdint>
#include <type_traits>
+#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/endian.h"
#include "absl/base/internal/invoke.h"
@@ -33,6 +34,19 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
+// The overhead of a vtable is too much for Cord, so we roll our own subclasses
+// using only a single byte to differentiate classes from each other - the "tag"
+// byte. Define the subclasses first so we can provide downcasting helper
+// functions in the base class.
+struct CordRep;
+struct CordRepConcat;
+struct CordRepExternal;
+struct CordRepFlat;
+struct CordRepSubstring;
+struct CordRepCrc;
+class CordRepRing;
+class CordRepBtree;
+
class CordzInfo;
// Default feature enable states for cord ring buffers
@@ -44,6 +58,12 @@ enum CordFeatureDefaults {
extern std::atomic<bool> cord_ring_buffer_enabled;
extern std::atomic<bool> shallow_subcords_enabled;
+// `cord_btree_exhaustive_validation` can be set to force exhaustive validation
+// in debug assertions, and code that calls `IsValid()` explicitly. By default,
+// assertions should be relatively cheap and AssertValid() can easily lead to
+// O(n^2) complexity as recursive / full tree validation is O(n).
+extern std::atomic<bool> cord_btree_exhaustive_validation;
+
inline void enable_cord_ring_buffer(bool enable) {
cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed);
}
@@ -68,12 +88,16 @@ enum Constants {
kMaxBytesToCopy = 511
};
-// Wraps std::atomic for reference counting.
-class Refcount {
+// Emits a fatal error "Unexpected node type: xyz" and aborts the program.
+ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep);
+
+// Compact class for tracking the reference count and state flags for CordRep
+// instances. Data is stored in an atomic int32_t for compactness and speed.
+class RefcountAndFlags {
public:
- constexpr Refcount() : count_{kRefIncrement} {}
+ constexpr RefcountAndFlags() : count_{kRefIncrement} {}
struct Immortal {};
- explicit constexpr Refcount(Immortal) : count_(kImmortalTag) {}
+ explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {}
// Increments the reference count. Imposes no memory ordering.
inline void Increment() {
@@ -86,26 +110,27 @@ class Refcount {
// Returns false if there are no references outstanding; true otherwise.
// Inserts barriers to ensure that state written before this method returns
// false will be visible to a thread that just observed this method returning
- // false.
+ // false. Always returns false when the immortal bit is set.
inline bool Decrement() {
- int32_t refcount = count_.load(std::memory_order_acquire);
- assert(refcount > 0 || refcount & kImmortalTag);
+ int32_t refcount = count_.load(std::memory_order_acquire) & kRefcountMask;
+ assert(refcount > 0 || refcount & kImmortalFlag);
return refcount != kRefIncrement &&
- count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) !=
- kRefIncrement;
+ (count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) &
+ kRefcountMask) != kRefIncrement;
}
// Same as Decrement but expect that refcount is greater than 1.
inline bool DecrementExpectHighRefcount() {
int32_t refcount =
- count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel);
- assert(refcount > 0 || refcount & kImmortalTag);
+ count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) &
+ kRefcountMask;
+ assert(refcount > 0 || refcount & kImmortalFlag);
return refcount != kRefIncrement;
}
// Returns the current reference count using acquire semantics.
inline int32_t Get() const {
- return count_.load(std::memory_order_acquire) >> kImmortalShift;
+ return count_.load(std::memory_order_acquire) >> kNumFlags;
}
// Returns whether the atomic integer is 1.
@@ -115,83 +140,127 @@ class Refcount {
// This call performs the test for a reference count of one, and
// performs the memory barrier needed for the owning thread
// to act on the object, knowing that it has exclusive access to the
- // object.
+ // object. Always returns false when the immortal bit is set.
inline bool IsOne() {
- return count_.load(std::memory_order_acquire) == kRefIncrement;
+ return (count_.load(std::memory_order_acquire) & kRefcountMask) ==
+ kRefIncrement;
}
bool IsImmortal() const {
- return (count_.load(std::memory_order_relaxed) & kImmortalTag) != 0;
+ return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0;
}
private:
- // We reserve the bottom bit to tag a reference count as immortal.
- // By making it `1` we ensure that we never reach `0` when adding/subtracting
- // `2`, thus it never looks as if it should be destroyed.
- // These are used for the StringConstant constructor where we do not increase
- // the refcount at construction time (due to constinit requirements) but we
- // will still decrease it at destruction time to avoid branching on Unref.
- enum {
- kImmortalShift = 1,
- kRefIncrement = 1 << kImmortalShift,
- kImmortalTag = kRefIncrement - 1
+ // We reserve the bottom bits for flags.
+ // kImmortalBit indicates that this entity should never be collected; it is
+ // used for the StringConstant constructor to avoid collecting immutable
+ // constant cords.
+ // kReservedFlag is reserved for future use.
+ enum Flags {
+ kNumFlags = 2,
+
+ kImmortalFlag = 0x1,
+ kReservedFlag = 0x2,
+ kRefIncrement = (1 << kNumFlags),
+
+ // Bitmask to use when checking refcount by equality. This masks out
+ // all flags except kImmortalFlag, which is part of the refcount for
+ // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1
+ // if the immortal bit is set.)
+ kRefcountMask = ~kReservedFlag,
};
std::atomic<int32_t> count_;
};
-// The overhead of a vtable is too much for Cord, so we roll our own subclasses
-// using only a single byte to differentiate classes from each other - the "tag"
-// byte. Define the subclasses first so we can provide downcasting helper
-// functions in the base class.
-
-struct CordRepConcat;
-struct CordRepExternal;
-struct CordRepFlat;
-struct CordRepSubstring;
-class CordRepRing;
-
// Various representations that we allow
enum CordRepKind {
- CONCAT = 0,
- EXTERNAL = 1,
- SUBSTRING = 2,
- RING = 3,
+ UNUSED_0 = 0,
+ SUBSTRING = 1,
+ CRC = 2,
+ BTREE = 3,
+ RING = 4,
+ EXTERNAL = 5,
// We have different tags for different sized flat arrays,
- // starting with FLAT, and limited to MAX_FLAT_TAG. The 224 value is based on
- // the current 'size to tag' encoding of 8 / 32 bytes. If a new tag is needed
- // in the future, then 'FLAT' and 'MAX_FLAT_TAG' should be adjusted as well
- // as the Tag <---> Size logic so that FLAT stil represents the minimum flat
- // allocation size. (32 bytes as of now).
- FLAT = 4,
- MAX_FLAT_TAG = 224
+ // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an
+ // allocated range of 32 bytes to 256 KB. The current granularity is:
+ // - 8 byte granularity for flat sizes in [32 - 512]
+ // - 64 byte granularity for flat sizes in (512 - 8KiB]
+ // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB]
+ // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should
+ // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still
+ // represents the minimum flat allocation size. (32 bytes as of now).
+ FLAT = 6,
+ MAX_FLAT_TAG = 248
};
+// There are various locations where we want to check if some rep is a 'plain'
+// data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we
+// can perform this check in a single branch as 'tag >= EXTERNAL'
+// Likewise, we have some locations where we check for 'ring or external/flat',
+// so likewise align RING to EXTERNAL.
+// Note that we can leave this optimization to the compiler. The compiler will
+// DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`.
+static_assert(RING == BTREE + 1, "BTREE and RING not consecutive");
+static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive");
+static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive");
+
struct CordRep {
+ // Result from an `extract edge` operation. Contains the (possibly changed)
+ // tree node as well as the extracted edge, or {tree, nullptr} if no edge
+ // could be extracted.
+ // On success, the returned `tree` value is null if `extracted` was the only
+ // data edge inside the tree, a data edge if there were only two data edges in
+ // the tree, or the (possibly new / smaller) remaining tree with the extracted
+ // data edge removed.
+ struct ExtractResult {
+ CordRep* tree;
+ CordRep* extracted;
+ };
+
CordRep() = default;
- constexpr CordRep(Refcount::Immortal immortal, size_t l)
+ constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l)
: length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
// The following three fields have to be less than 32 bytes since
// that is the smallest supported flat node size.
size_t length;
- Refcount refcount;
+ RefcountAndFlags refcount;
// If tag < FLAT, it represents CordRepKind and indicates the type of node.
// Otherwise, the node type is CordRepFlat and the tag is the encoded size.
uint8_t tag;
- char storage[1]; // Starting point for flat array: MUST BE LAST FIELD
+
+ // `storage` provides two main purposes:
+ // - the starting point for FlatCordRep.Data() [flexible-array-member]
+ // - 3 bytes of additional storage for use by derived classes.
+ // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores
+ // a 'depth' value in storage[0], and the (future) CordRepBtree class stores
+ // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to
+ // allocate room for these in the derived class, as not all compilers reuse
+ // padding space from the base class (clang and gcc do, MSVC does not, etc)
+ uint8_t storage[3];
+
+ // Returns true if this instance's tag matches the requested type.
+ constexpr bool IsRing() const { return tag == RING; }
+ constexpr bool IsSubstring() const { return tag == SUBSTRING; }
+ constexpr bool IsCrc() const { return tag == CRC; }
+ constexpr bool IsExternal() const { return tag == EXTERNAL; }
+ constexpr bool IsFlat() const { return tag >= FLAT; }
+ constexpr bool IsBtree() const { return tag == BTREE; }
inline CordRepRing* ring();
inline const CordRepRing* ring() const;
- inline CordRepConcat* concat();
- inline const CordRepConcat* concat() const;
inline CordRepSubstring* substring();
inline const CordRepSubstring* substring() const;
+ inline CordRepCrc* crc();
+ inline const CordRepCrc* crc() const;
inline CordRepExternal* external();
inline const CordRepExternal* external() const;
inline CordRepFlat* flat();
inline const CordRepFlat* flat() const;
+ inline CordRepBtree* btree();
+ inline const CordRepBtree* btree() const;
// --------------------------------------------------------------------
// Memory management
@@ -208,17 +277,23 @@ struct CordRep {
static inline void Unref(CordRep* rep);
};
-struct CordRepConcat : public CordRep {
- CordRep* left;
- CordRep* right;
-
- uint8_t depth() const { return static_cast<uint8_t>(storage[0]); }
- void set_depth(uint8_t depth) { storage[0] = static_cast<char>(depth); }
-};
-
struct CordRepSubstring : public CordRep {
size_t start; // Starting offset of substring in child
CordRep* child;
+
+ // Creates a substring on `child`, adopting a reference on `child`.
+ // Requires `child` to be either a flat or external node, and `pos` and `n` to
+ // form a non-empty partial sub range of `'child`, i.e.:
+ // `n > 0 && n < length && n + pos <= length`
+ static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n);
+
+ // Creates a substring of `rep`. Does not adopt a reference on `rep`.
+ // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`.
+ // If `n == rep->length` then this method returns `CordRep::Ref(rep)`
+ // If `rep` is a substring of a flat or external node, then this method will
+ // return a new substring of that flat or external node with `pos` adjusted
+ // with the original `start` position.
+ static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n);
};
// Type for function pointer that will invoke the releaser function and also
@@ -231,7 +306,7 @@ using ExternalReleaserInvoker = void (*)(CordRepExternal*);
struct CordRepExternal : public CordRep {
CordRepExternal() = default;
explicit constexpr CordRepExternal(absl::string_view str)
- : CordRep(Refcount::Immortal{}, str.size()),
+ : CordRep(RefcountAndFlags::Immortal{}, str.size()),
base(str.data()),
releaser_invoker(nullptr) {}
@@ -240,7 +315,7 @@ struct CordRepExternal : public CordRep {
ExternalReleaserInvoker releaser_invoker;
// Deletes (releases) the external rep.
- // Requires rep != nullptr and rep->tag == EXTERNAL
+ // Requires rep != nullptr and rep->IsExternal()
static void Delete(CordRep* rep);
};
@@ -282,8 +357,49 @@ struct CordRepExternalImpl
}
};
+inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos,
+ size_t n) {
+ assert(child != nullptr);
+ assert(n > 0);
+ assert(n < child->length);
+ assert(pos < child->length);
+ assert(n <= child->length - pos);
+
+ // TODO(b/217376272): Harden internal logic.
+ // Move to strategical places inside the Cord logic and make this an assert.
+ if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) {
+ LogFatalNodeType(child);
+ }
+
+ CordRepSubstring* rep = new CordRepSubstring();
+ rep->length = n;
+ rep->tag = SUBSTRING;
+ rep->start = pos;
+ rep->child = child;
+ return rep;
+}
+
+inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos,
+ size_t n) {
+ assert(rep != nullptr);
+ assert(n != 0);
+ assert(pos < rep->length);
+ assert(n <= rep->length - pos);
+ if (n == rep->length) return CordRep::Ref(rep);
+ if (rep->IsSubstring()) {
+ pos += rep->substring()->start;
+ rep = rep->substring()->child;
+ }
+ CordRepSubstring* substr = new CordRepSubstring();
+ substr->length = n;
+ substr->tag = SUBSTRING;
+ substr->start = pos;
+ substr->child = CordRep::Ref(rep);
+ return substr;
+}
+
inline void CordRepExternal::Delete(CordRep* rep) {
- assert(rep != nullptr && rep->tag == EXTERNAL);
+ assert(rep != nullptr && rep->IsExternal());
auto* rep_external = static_cast<CordRepExternal*>(rep);
assert(rep_external->releaser_invoker != nullptr);
rep_external->releaser_invoker(rep_external);
@@ -295,7 +411,8 @@ struct ConstInitExternalStorage {
};
template <typename Str>
-CordRepExternal ConstInitExternalStorage<Str>::value(Str::value);
+ABSL_CONST_INIT CordRepExternal
+ ConstInitExternalStorage<Str>::value(Str::value);
enum {
kMaxInline = 15,
@@ -329,18 +446,17 @@ static constexpr cordz_info_t BigEndianByte(unsigned char value) {
class InlineData {
public:
+ // DefaultInitType forces the use of the default initialization constructor.
+ enum DefaultInitType { kDefaultInit };
+
// kNullCordzInfo holds the big endian representation of intptr_t(1)
// This is the 'null' / initial value of 'cordz_info'. The null value
// is specifically big endian 1 as with 64-bit pointers, the last
// byte of cordz_info overlaps with the last byte holding the tag.
static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1);
- // kFakeCordzInfo holds a 'fake', non-null cordz-info value we use to
- // emulate the previous 'kProfiled' tag logic in 'set_profiled' until
- // cord code is changed to store cordz_info values in InlineData.
- static constexpr cordz_info_t kFakeCordzInfo = BigEndianByte(9);
-
constexpr InlineData() : as_chars_{0} {}
+ explicit InlineData(DefaultInitType) {}
explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {}
explicit constexpr InlineData(absl::string_view chars)
: as_chars_{
@@ -367,13 +483,23 @@ class InlineData {
return as_tree_.cordz_info != kNullCordzInfo;
}
+ // Returns true if either of the provided instances hold a cordz_info value.
+ // This method is more efficient than the equivalent `data1.is_profiled() ||
+ // data2.is_profiled()`. Requires both arguments to hold a tree.
+ static bool is_either_profiled(const InlineData& data1,
+ const InlineData& data2) {
+ assert(data1.is_tree() && data2.is_tree());
+ return (data1.as_tree_.cordz_info | data2.as_tree_.cordz_info) !=
+ kNullCordzInfo;
+ }
+
// Returns the cordz_info sampling instance for this instance, or nullptr
// if the current instance is not sampled and does not have CordzInfo data.
// Requires the current instance to hold a tree value.
CordzInfo* cordz_info() const {
assert(is_tree());
- intptr_t info =
- static_cast<intptr_t>(absl::big_endian::ToHost64(as_tree_.cordz_info));
+ intptr_t info = static_cast<intptr_t>(
+ absl::big_endian::ToHost64(static_cast<uint64_t>(as_tree_.cordz_info)));
assert(info & 1);
return reinterpret_cast<CordzInfo*>(info - 1);
}
@@ -383,8 +509,9 @@ class InlineData {
// Requires the current instance to hold a tree value.
void set_cordz_info(CordzInfo* cordz_info) {
assert(is_tree());
- intptr_t info = reinterpret_cast<intptr_t>(cordz_info) | 1;
- as_tree_.cordz_info = absl::big_endian::FromHost64(info);
+ uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1;
+ as_tree_.cordz_info =
+ static_cast<cordz_info_t>(absl::big_endian::FromHost64(info));
}
// Resets the current cordz_info to null / empty.
@@ -454,13 +581,6 @@ class InlineData {
tag() = static_cast<char>(size << 1);
}
- // Sets or unsets the 'is_profiled' state of this instance.
- // Requires the current instance to hold a tree value.
- void set_profiled(bool profiled) {
- assert(is_tree());
- as_tree_.cordz_info = profiled ? kFakeCordzInfo : kNullCordzInfo;
- }
-
private:
// See cordz_info_t for forced alignment and size of `cordz_info` details.
struct AsTree {
@@ -483,7 +603,7 @@ class InlineData {
// store the size in the last char of `as_chars_` shifted left + 1.
// Else we store it in a tree and store a pointer to that tree in
// `as_tree_.rep` and store a tag in `tagged_size`.
- union {
+ union {
char as_chars_[kMaxInline + 1];
AsTree as_tree_;
};
@@ -491,38 +611,30 @@ class InlineData {
static_assert(sizeof(InlineData) == kMaxInline + 1, "");
-inline CordRepConcat* CordRep::concat() {
- assert(tag == CONCAT);
- return static_cast<CordRepConcat*>(this);
-}
-
-inline const CordRepConcat* CordRep::concat() const {
- assert(tag == CONCAT);
- return static_cast<const CordRepConcat*>(this);
-}
-
inline CordRepSubstring* CordRep::substring() {
- assert(tag == SUBSTRING);
+ assert(IsSubstring());
return static_cast<CordRepSubstring*>(this);
}
inline const CordRepSubstring* CordRep::substring() const {
- assert(tag == SUBSTRING);
+ assert(IsSubstring());
return static_cast<const CordRepSubstring*>(this);
}
inline CordRepExternal* CordRep::external() {
- assert(tag == EXTERNAL);
+ assert(IsExternal());
return static_cast<CordRepExternal*>(this);
}
inline const CordRepExternal* CordRep::external() const {
- assert(tag == EXTERNAL);
+ assert(IsExternal());
return static_cast<const CordRepExternal*>(this);
}
inline CordRep* CordRep::Ref(CordRep* rep) {
- assert(rep != nullptr);
+ // ABSL_ASSUME is a workaround for
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585
+ ABSL_ASSUME(rep != nullptr);
rep->refcount.Increment();
return rep;
}