summaryrefslogtreecommitdiff
path: root/absl/strings/internal/cord_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings/internal/cord_internal.h')
-rw-r--r--absl/strings/internal/cord_internal.h404
1 files changed, 387 insertions, 17 deletions
diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h
index d456eef8..a1ba67fe 100644
--- a/absl/strings/internal/cord_internal.h
+++ b/absl/strings/internal/cord_internal.h
@@ -1,4 +1,4 @@
-// Copyright 2020 The Abseil Authors.
+// Copyright 2021 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -21,7 +21,10 @@
#include <cstdint>
#include <type_traits>
+#include "absl/base/config.h"
+#include "absl/base/internal/endian.h"
#include "absl/base/internal/invoke.h"
+#include "absl/base/optimization.h"
#include "absl/container/internal/compressed_tuple.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"
@@ -30,17 +33,55 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
+class CordzInfo;
+
+// Default feature enable states for cord ring buffers
+enum CordFeatureDefaults {
+ kCordEnableRingBufferDefault = false,
+ kCordShallowSubcordsDefault = false
+};
+
+extern std::atomic<bool> cord_ring_buffer_enabled;
+extern std::atomic<bool> shallow_subcords_enabled;
+
+inline void enable_cord_ring_buffer(bool enable) {
+ cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed);
+}
+
+inline void enable_shallow_subcords(bool enable) {
+ shallow_subcords_enabled.store(enable, std::memory_order_relaxed);
+}
+
+enum Constants {
+ // The inlined size to use with absl::InlinedVector.
+ //
+ // Note: The InlinedVectors in this file (and in cord.h) do not need to use
+ // the same value for their inlined size. The fact that they do is historical.
+ // It may be desirable for each to use a different inlined size optimized for
+ // that InlinedVector's usage.
+ //
+ // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
+ // the inlined vector size (47 exists for backward compatibility).
+ kInlinedVectorSize = 47,
+
+ // Prefer copying blocks of at most this size, otherwise reference count.
+ kMaxBytesToCopy = 511
+};
+
// Wraps std::atomic for reference counting.
class Refcount {
public:
- Refcount() : count_{1} {}
- ~Refcount() {}
+ constexpr Refcount() : count_{kRefIncrement} {}
+ struct Immortal {};
+ explicit constexpr Refcount(Immortal) : count_(kImmortalTag) {}
- // Increments the reference count by 1. Imposes no memory ordering.
- inline void Increment() { count_.fetch_add(1, std::memory_order_relaxed); }
+ // Increments the reference count. Imposes no memory ordering.
+ inline void Increment() {
+ count_.fetch_add(kRefIncrement, std::memory_order_relaxed);
+ }
// Asserts that the current refcount is greater than 0. If the refcount is
- // greater than 1, decrements the reference count by 1.
+ // greater than 1, decrements the reference count.
//
// Returns false if there are no references outstanding; true otherwise.
// Inserts barriers to ensure that state written before this method returns
@@ -48,19 +89,24 @@ class Refcount {
// false.
inline bool Decrement() {
int32_t refcount = count_.load(std::memory_order_acquire);
- assert(refcount > 0);
- return refcount != 1 && count_.fetch_sub(1, std::memory_order_acq_rel) != 1;
+ assert(refcount > 0 || refcount & kImmortalTag);
+ return refcount != kRefIncrement &&
+ count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) !=
+ kRefIncrement;
}
// Same as Decrement but expect that refcount is greater than 1.
inline bool DecrementExpectHighRefcount() {
- int32_t refcount = count_.fetch_sub(1, std::memory_order_acq_rel);
- assert(refcount > 0);
- return refcount != 1;
+ int32_t refcount =
+ count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel);
+ assert(refcount > 0 || refcount & kImmortalTag);
+ return refcount != kRefIncrement;
}
// Returns the current reference count using acquire semantics.
- inline int32_t Get() const { return count_.load(std::memory_order_acquire); }
+ inline int32_t Get() const {
+ return count_.load(std::memory_order_acquire) >> kImmortalShift;
+ }
// Returns whether the atomic integer is 1.
// If the reference count is used in the conventional way, a
@@ -70,9 +116,27 @@ class Refcount {
// performs the memory barrier needed for the owning thread
// to act on the object, knowing that it has exclusive access to the
// object.
- inline bool IsOne() { return count_.load(std::memory_order_acquire) == 1; }
+ inline bool IsOne() {
+ return count_.load(std::memory_order_acquire) == kRefIncrement;
+ }
+
+ bool IsImmortal() const {
+ return (count_.load(std::memory_order_relaxed) & kImmortalTag) != 0;
+ }
private:
+ // We reserve the bottom bit to tag a reference count as immortal.
+ // By making it `1` we ensure that we never reach `0` when adding/subtracting
+ // `2`, thus it never looks as if it should be destroyed.
+ // These are used for the StringConstant constructor where we do not increase
+ // the refcount at construction time (due to constinit requirements) but we
+ // will still decrease it at destruction time to avoid branching on Unref.
+ enum {
+ kImmortalShift = 1,
+ kRefIncrement = 1 << kImmortalShift,
+ kImmortalTag = kRefIncrement - 1
+ };
+
std::atomic<int32_t> count_;
};
@@ -82,10 +146,33 @@ class Refcount {
// functions in the base class.
struct CordRepConcat;
-struct CordRepSubstring;
struct CordRepExternal;
+struct CordRepFlat;
+struct CordRepSubstring;
+class CordRepRing;
+
+// Various representations that we allow
+enum CordRepKind {
+ CONCAT = 0,
+ EXTERNAL = 1,
+ SUBSTRING = 2,
+ RING = 3,
+
+ // We have different tags for different sized flat arrays,
+ // starting with FLAT, and limited to MAX_FLAT_TAG. The 224 value is based on
+ // the current 'size to tag' encoding of 8 / 32 bytes. If a new tag is needed
+ // in the future, then 'FLAT' and 'MAX_FLAT_TAG' should be adjusted as well
+ // as the Tag <---> Size logic so that FLAT stil represents the minimum flat
+ // allocation size. (32 bytes as of now).
+ FLAT = 4,
+ MAX_FLAT_TAG = 224
+};
struct CordRep {
+ CordRep() = default;
+ constexpr CordRep(Refcount::Immortal immortal, size_t l)
+ : length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
+
// The following three fields have to be less than 32 bytes since
// that is the smallest supported flat node size.
size_t length;
@@ -93,22 +180,40 @@ struct CordRep {
// If tag < FLAT, it represents CordRepKind and indicates the type of node.
// Otherwise, the node type is CordRepFlat and the tag is the encoded size.
uint8_t tag;
- char data[1]; // Starting point for flat array: MUST BE LAST FIELD of CordRep
+ char storage[1]; // Starting point for flat array: MUST BE LAST FIELD
+ inline CordRepRing* ring();
+ inline const CordRepRing* ring() const;
inline CordRepConcat* concat();
inline const CordRepConcat* concat() const;
inline CordRepSubstring* substring();
inline const CordRepSubstring* substring() const;
inline CordRepExternal* external();
inline const CordRepExternal* external() const;
+ inline CordRepFlat* flat();
+ inline const CordRepFlat* flat() const;
+
+ // --------------------------------------------------------------------
+ // Memory management
+
+ // Destroys the provided `rep`.
+ static void Destroy(CordRep* rep);
+
+ // Increments the reference count of `rep`.
+ // Requires `rep` to be a non-null pointer value.
+ static inline CordRep* Ref(CordRep* rep);
+
+ // Decrements the reference count of `rep`. Destroys rep if count reaches
+ // zero. Requires `rep` to be a non-null pointer value.
+ static inline void Unref(CordRep* rep);
};
struct CordRepConcat : public CordRep {
CordRep* left;
CordRep* right;
- uint8_t depth() const { return static_cast<uint8_t>(data[0]); }
- void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); }
+ uint8_t depth() const { return static_cast<uint8_t>(storage[0]); }
+ void set_depth(uint8_t depth) { storage[0] = static_cast<char>(depth); }
};
struct CordRepSubstring : public CordRep {
@@ -124,9 +229,19 @@ using ExternalReleaserInvoker = void (*)(CordRepExternal*);
// External CordReps are allocated together with a type erased releaser. The
// releaser is stored in the memory directly following the CordRepExternal.
struct CordRepExternal : public CordRep {
+ CordRepExternal() = default;
+ explicit constexpr CordRepExternal(absl::string_view str)
+ : CordRep(Refcount::Immortal{}, str.size()),
+ base(str.data()),
+ releaser_invoker(nullptr) {}
+
const char* base;
// Pointer to function that knows how to call and destroy the releaser.
ExternalReleaserInvoker releaser_invoker;
+
+ // Deletes (releases) the external rep.
+ // Requires rep != nullptr and rep->tag == EXTERNAL
+ static void Delete(CordRep* rep);
};
struct Rank1 {};
@@ -167,7 +282,262 @@ struct CordRepExternalImpl
}
};
+inline void CordRepExternal::Delete(CordRep* rep) {
+ assert(rep != nullptr && rep->tag == EXTERNAL);
+ auto* rep_external = static_cast<CordRepExternal*>(rep);
+ assert(rep_external->releaser_invoker != nullptr);
+ rep_external->releaser_invoker(rep_external);
+}
+
+template <typename Str>
+struct ConstInitExternalStorage {
+ ABSL_CONST_INIT static CordRepExternal value;
+};
+
+template <typename Str>
+CordRepExternal ConstInitExternalStorage<Str>::value(Str::value);
+
+enum {
+ kMaxInline = 15,
+};
+
+constexpr char GetOrNull(absl::string_view data, size_t pos) {
+ return pos < data.size() ? data[pos] : '\0';
+}
+
+// We store cordz_info as 64 bit pointer value in big endian format. This
+// guarantees that the least significant byte of cordz_info matches the last
+// byte of the inline data representation in as_chars_, which holds the inlined
+// size or the 'is_tree' bit.
+using cordz_info_t = int64_t;
+
+// Assert that the `cordz_info` pointer value perfectly overlaps the last half
+// of `as_chars_` and can hold a pointer value.
+static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
+static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");
+
+// BigEndianByte() creates a big endian representation of 'value', i.e.: a big
+// endian value where the last byte in the host's representation holds 'value`,
+// with all other bytes being 0.
+static constexpr cordz_info_t BigEndianByte(unsigned char value) {
+#if defined(ABSL_IS_BIG_ENDIAN)
+ return value;
+#else
+ return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
+#endif
+}
+
+class InlineData {
+ public:
+ // kNullCordzInfo holds the big endian representation of intptr_t(1)
+ // This is the 'null' / initial value of 'cordz_info'. The null value
+ // is specifically big endian 1 as with 64-bit pointers, the last
+ // byte of cordz_info overlaps with the last byte holding the tag.
+ static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1);
+
+ // kFakeCordzInfo holds a 'fake', non-null cordz-info value we use to
+ // emulate the previous 'kProfiled' tag logic in 'set_profiled' until
+ // cord code is changed to store cordz_info values in InlineData.
+ static constexpr cordz_info_t kFakeCordzInfo = BigEndianByte(9);
+
+ constexpr InlineData() : as_chars_{0} {}
+ explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {}
+ explicit constexpr InlineData(absl::string_view chars)
+ : as_chars_{
+ GetOrNull(chars, 0), GetOrNull(chars, 1),
+ GetOrNull(chars, 2), GetOrNull(chars, 3),
+ GetOrNull(chars, 4), GetOrNull(chars, 5),
+ GetOrNull(chars, 6), GetOrNull(chars, 7),
+ GetOrNull(chars, 8), GetOrNull(chars, 9),
+ GetOrNull(chars, 10), GetOrNull(chars, 11),
+ GetOrNull(chars, 12), GetOrNull(chars, 13),
+ GetOrNull(chars, 14), static_cast<char>((chars.size() << 1))} {}
+
+ // Returns true if the current instance is empty.
+ // The 'empty value' is an inlined data value of zero length.
+ bool is_empty() const { return tag() == 0; }
+
+ // Returns true if the current instance holds a tree value.
+ bool is_tree() const { return (tag() & 1) != 0; }
+
+ // Returns true if the current instance holds a cordz_info value.
+ // Requires the current instance to hold a tree value.
+ bool is_profiled() const {
+ assert(is_tree());
+ return as_tree_.cordz_info != kNullCordzInfo;
+ }
+
+ // Returns the cordz_info sampling instance for this instance, or nullptr
+ // if the current instance is not sampled and does not have CordzInfo data.
+ // Requires the current instance to hold a tree value.
+ CordzInfo* cordz_info() const {
+ assert(is_tree());
+ intptr_t info =
+ static_cast<intptr_t>(absl::big_endian::ToHost64(as_tree_.cordz_info));
+ assert(info & 1);
+ return reinterpret_cast<CordzInfo*>(info - 1);
+ }
+
+ // Sets the current cordz_info sampling instance for this instance, or nullptr
+ // if the current instance is not sampled and does not have CordzInfo data.
+ // Requires the current instance to hold a tree value.
+ void set_cordz_info(CordzInfo* cordz_info) {
+ assert(is_tree());
+ intptr_t info = reinterpret_cast<intptr_t>(cordz_info) | 1;
+ as_tree_.cordz_info = absl::big_endian::FromHost64(info);
+ }
+
+ // Resets the current cordz_info to null / empty.
+ void clear_cordz_info() {
+ assert(is_tree());
+ as_tree_.cordz_info = kNullCordzInfo;
+ }
+
+ // Returns a read only pointer to the character data inside this instance.
+ // Requires the current instance to hold inline data.
+ const char* as_chars() const {
+ assert(!is_tree());
+ return as_chars_;
+ }
+
+ // Returns a mutable pointer to the character data inside this instance.
+ // Should be used for 'write only' operations setting an inlined value.
+ // Applications can set the value of inlined data either before or after
+ // setting the inlined size, i.e., both of the below are valid:
+ //
+ // // Set inlined data and inline size
+ // memcpy(data_.as_chars(), data, size);
+ // data_.set_inline_size(size);
+ //
+ // // Set inlined size and inline data
+ // data_.set_inline_size(size);
+ // memcpy(data_.as_chars(), data, size);
+ //
+ // It's an error to read from the returned pointer without a preceding write
+ // if the current instance does not hold inline data, i.e.: is_tree() == true.
+ char* as_chars() { return as_chars_; }
+
+ // Returns the tree value of this value.
+ // Requires the current instance to hold a tree value.
+ CordRep* as_tree() const {
+ assert(is_tree());
+ return as_tree_.rep;
+ }
+
+ // Initialize this instance to holding the tree value `rep`,
+ // initializing the cordz_info to null, i.e.: 'not profiled'.
+ void make_tree(CordRep* rep) {
+ as_tree_.rep = rep;
+ as_tree_.cordz_info = kNullCordzInfo;
+ }
+
+ // Set the tree value of this instance to 'rep`.
+ // Requires the current instance to already hold a tree value.
+ // Does not affect the value of cordz_info.
+ void set_tree(CordRep* rep) {
+ assert(is_tree());
+ as_tree_.rep = rep;
+ }
+
+ // Returns the size of the inlined character data inside this instance.
+ // Requires the current instance to hold inline data.
+ size_t inline_size() const {
+ assert(!is_tree());
+ return tag() >> 1;
+ }
+
+ // Sets the size of the inlined character data inside this instance.
+ // Requires `size` to be <= kMaxInline.
+ // See the documentation on 'as_chars()' for more information and examples.
+ void set_inline_size(size_t size) {
+ ABSL_ASSERT(size <= kMaxInline);
+ tag() = static_cast<char>(size << 1);
+ }
+
+ // Sets or unsets the 'is_profiled' state of this instance.
+ // Requires the current instance to hold a tree value.
+ void set_profiled(bool profiled) {
+ assert(is_tree());
+ as_tree_.cordz_info = profiled ? kFakeCordzInfo : kNullCordzInfo;
+ }
+
+ private:
+ // See cordz_info_t for forced alignment and size of `cordz_info` details.
+ struct AsTree {
+ explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
+ : rep(tree), cordz_info(kNullCordzInfo) {}
+ // This union uses up extra space so that whether rep is 32 or 64 bits,
+ // cordz_info will still start at the eighth byte, and the last
+ // byte of cordz_info will still be the last byte of InlineData.
+ union {
+ absl::cord_internal::CordRep* rep;
+ cordz_info_t unused_aligner;
+ };
+ cordz_info_t cordz_info;
+ };
+
+ char& tag() { return reinterpret_cast<char*>(this)[kMaxInline]; }
+ char tag() const { return reinterpret_cast<const char*>(this)[kMaxInline]; }
+
+ // If the data has length <= kMaxInline, we store it in `as_chars_`, and
+ // store the size in the last char of `as_chars_` shifted left + 1.
+ // Else we store it in a tree and store a pointer to that tree in
+ // `as_tree_.rep` and store a tag in `tagged_size`.
+ union {
+ char as_chars_[kMaxInline + 1];
+ AsTree as_tree_;
+ };
+};
+
+static_assert(sizeof(InlineData) == kMaxInline + 1, "");
+
+inline CordRepConcat* CordRep::concat() {
+ assert(tag == CONCAT);
+ return static_cast<CordRepConcat*>(this);
+}
+
+inline const CordRepConcat* CordRep::concat() const {
+ assert(tag == CONCAT);
+ return static_cast<const CordRepConcat*>(this);
+}
+
+inline CordRepSubstring* CordRep::substring() {
+ assert(tag == SUBSTRING);
+ return static_cast<CordRepSubstring*>(this);
+}
+
+inline const CordRepSubstring* CordRep::substring() const {
+ assert(tag == SUBSTRING);
+ return static_cast<const CordRepSubstring*>(this);
+}
+
+inline CordRepExternal* CordRep::external() {
+ assert(tag == EXTERNAL);
+ return static_cast<CordRepExternal*>(this);
+}
+
+inline const CordRepExternal* CordRep::external() const {
+ assert(tag == EXTERNAL);
+ return static_cast<const CordRepExternal*>(this);
+}
+
+inline CordRep* CordRep::Ref(CordRep* rep) {
+ assert(rep != nullptr);
+ rep->refcount.Increment();
+ return rep;
+}
+
+inline void CordRep::Unref(CordRep* rep) {
+ assert(rep != nullptr);
+ // Expect refcount to be 0. Avoiding the cost of an atomic decrement should
+ // typically outweigh the cost of an extra branch checking for ref == 1.
+ if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) {
+ Destroy(rep);
+ }
+}
+
} // namespace cord_internal
+
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_