summaryrefslogtreecommitdiff
path: root/absl/strings/cord_analysis.cc
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings/cord_analysis.cc')
-rw-r--r--absl/strings/cord_analysis.cc237
1 files changed, 237 insertions, 0 deletions
diff --git a/absl/strings/cord_analysis.cc b/absl/strings/cord_analysis.cc
new file mode 100644
index 00000000..435b4c94
--- /dev/null
+++ b/absl/strings/cord_analysis.cc
@@ -0,0 +1,237 @@
+// Copyright 2021 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstddef>
+#include <cstdint>
+
+#include "absl/base/attributes.h"
+#include "absl/base/config.h"
+#include "absl/container/inlined_vector.h"
+#include "absl/strings/cord_analysis.h"
+#include "absl/strings/internal/cord_internal.h"
+#include "absl/strings/internal/cord_rep_btree.h"
+#include "absl/strings/internal/cord_rep_crc.h"
+#include "absl/strings/internal/cord_rep_flat.h"
+#include "absl/strings/internal/cord_rep_ring.h"
+//
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+#include "absl/functional/function_ref.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace cord_internal {
+namespace {
+
+// Accounting mode for analyzing memory usage.
+enum class Mode { kTotal, kFairShare };
+
+// CordRepRef holds a `const CordRep*` reference in rep, and depending on mode,
+// holds a 'fraction' representing a cumulative inverse refcount weight.
+template <Mode mode>
+struct CordRepRef {
+ // Instantiates a CordRepRef instance.
+ explicit CordRepRef(const CordRep* r) : rep(r) {}
+
+ // Creates a child reference holding the provided child.
+ // Overloaded to add cumulative reference count for kFairShare.
+ CordRepRef Child(const CordRep* child) const { return CordRepRef(child); }
+
+ const CordRep* rep;
+};
+
+// RawUsage holds the computed total number of bytes.
+template <Mode mode>
+struct RawUsage {
+ size_t total = 0;
+
+ // Add 'size' to total, ignoring the CordRepRef argument.
+ void Add(size_t size, CordRepRef<mode>) { total += size; }
+};
+
+// Returns n / refcount avoiding a div for the common refcount == 1.
+template <typename refcount_t>
+double MaybeDiv(double d, refcount_t refcount) {
+ return refcount == 1 ? d : d / refcount;
+}
+
+// Overloaded 'kFairShare' specialization for CordRepRef. This class holds a
+// `fraction` value which represents a cumulative inverse refcount weight.
+// For example, a top node with a reference count of 2 will have a fraction
+// value of 1/2 = 0.5, representing the 'fair share' of memory it references.
+// A node below such a node with a reference count of 5 then has a fraction of
+// 0.5 / 5 = 0.1 representing the fair share of memory below that node, etc.
+template <>
+struct CordRepRef<Mode::kFairShare> {
+ // Creates a CordRepRef with the provided rep and top (parent) fraction.
+ explicit CordRepRef(const CordRep* r, double frac = 1.0)
+ : rep(r), fraction(MaybeDiv(frac, r->refcount.Get())) {}
+
+ // Returns a CordRepRef with a fraction of `this->fraction / child.refcount`
+ CordRepRef Child(const CordRep* child) const {
+ return CordRepRef(child, fraction);
+ }
+
+ const CordRep* rep;
+ double fraction;
+};
+
+// Overloaded 'kFairShare' specialization for RawUsage
+template <>
+struct RawUsage<Mode::kFairShare> {
+ double total = 0;
+
+ // Adds `size` multiplied by `rep.fraction` to the total size.
+ void Add(size_t size, CordRepRef<Mode::kFairShare> rep) {
+ total += static_cast<double>(size) * rep.fraction;
+ }
+};
+
+// Returns true if the provided rep is a valid data edge.
+bool IsDataEdge(const CordRep* rep) {
+ // The fast path is that `rep` is an EXTERNAL or FLAT node, making the below
+ // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
+ // check in the slow path the SUBSTRING check to optimize for the hot path.
+ if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true;
+ if (rep->tag == SUBSTRING) rep = rep->substring()->child;
+ return rep->tag == EXTERNAL || rep->tag >= FLAT;
+}
+
+// Computes the estimated memory size of the provided data edge.
+// External reps are assumed 'heap allocated at their exact size'.
+template <Mode mode>
+void AnalyzeDataEdge(CordRepRef<mode> rep, RawUsage<mode>& raw_usage) {
+ assert(IsDataEdge(rep.rep));
+
+ // Consume all substrings
+ if (rep.rep->tag == SUBSTRING) {
+ raw_usage.Add(sizeof(CordRepSubstring), rep);
+ rep = rep.Child(rep.rep->substring()->child);
+ }
+
+ // Consume FLAT / EXTERNAL
+ const size_t size =
+ rep.rep->tag >= FLAT
+ ? rep.rep->flat()->AllocatedSize()
+ : rep.rep->length + sizeof(CordRepExternalImpl<intptr_t>);
+ raw_usage.Add(size, rep);
+}
+
+// Computes the memory size of the provided Concat tree.
+template <Mode mode>
+void AnalyzeConcat(CordRepRef<mode> rep, RawUsage<mode>& raw_usage) {
+ absl::InlinedVector<CordRepRef<mode>, 47> pending;
+
+ while (rep.rep != nullptr) {
+ const CordRepConcat* concat = rep.rep->concat();
+ CordRepRef<mode> left = rep.Child(concat->left);
+ CordRepRef<mode> right = rep.Child(concat->right);
+
+ raw_usage.Add(sizeof(CordRepConcat), rep);
+
+ switch ((IsDataEdge(left.rep) ? 1 : 0) | (IsDataEdge(right.rep) ? 2 : 0)) {
+ case 0: // neither left or right are data edges
+ rep = left;
+ pending.push_back(right);
+ break;
+ case 1: // only left is a data edge
+ AnalyzeDataEdge(left, raw_usage);
+ rep = right;
+ break;
+ case 2: // only right is a data edge
+ AnalyzeDataEdge(right, raw_usage);
+ rep = left;
+ break;
+ case 3: // left and right are data edges
+ AnalyzeDataEdge(right, raw_usage);
+ AnalyzeDataEdge(left, raw_usage);
+ if (!pending.empty()) {
+ rep = pending.back();
+ pending.pop_back();
+ } else {
+ rep.rep = nullptr;
+ }
+ break;
+ }
+ }
+}
+
+// Computes the memory size of the provided Ring tree.
+template <Mode mode>
+void AnalyzeRing(CordRepRef<mode> rep, RawUsage<mode>& raw_usage) {
+ const CordRepRing* ring = rep.rep->ring();
+ raw_usage.Add(CordRepRing::AllocSize(ring->capacity()), rep);
+ ring->ForEach([&](CordRepRing::index_type pos) {
+ AnalyzeDataEdge(rep.Child(ring->entry_child(pos)), raw_usage);
+ });
+}
+
+// Computes the memory size of the provided Btree tree.
+template <Mode mode>
+void AnalyzeBtree(CordRepRef<mode> rep, RawUsage<mode>& raw_usage) {
+ raw_usage.Add(sizeof(CordRepBtree), rep);
+ const CordRepBtree* tree = rep.rep->btree();
+ if (tree->height() > 0) {
+ for (CordRep* edge : tree->Edges()) {
+ AnalyzeBtree(rep.Child(edge), raw_usage);
+ }
+ } else {
+ for (CordRep* edge : tree->Edges()) {
+ AnalyzeDataEdge(rep.Child(edge), raw_usage);
+ }
+ }
+}
+
+template <Mode mode>
+size_t GetEstimatedUsage(const CordRep* rep) {
+ // Zero initialized memory usage totals.
+ RawUsage<mode> raw_usage;
+
+ // Capture top level node and refcount into a CordRepRef.
+ CordRepRef<mode> repref(rep);
+
+ // Consume the top level CRC node if present.
+ if (repref.rep->tag == CRC) {
+ raw_usage.Add(sizeof(CordRepCrc), repref);
+ repref = repref.Child(repref.rep->crc()->child);
+ }
+
+ if (IsDataEdge(repref.rep)) {
+ AnalyzeDataEdge(repref, raw_usage);
+ } else if (repref.rep->tag == BTREE) {
+ AnalyzeBtree(repref, raw_usage);
+ } else if (repref.rep->tag == CONCAT) {
+ AnalyzeConcat(repref, raw_usage);
+ } else if (repref.rep->tag == RING) {
+ AnalyzeRing(repref, raw_usage);
+ } else {
+ assert(false);
+ }
+
+ return static_cast<size_t>(raw_usage.total);
+}
+
+} // namespace
+
+size_t GetEstimatedMemoryUsage(const CordRep* rep) {
+ return GetEstimatedUsage<Mode::kTotal>(rep);
+}
+
+size_t GetEstimatedFairShareMemoryUsage(const CordRep* rep) {
+ return GetEstimatedUsage<Mode::kFairShare>(rep);
+}
+
+} // namespace cord_internal
+ABSL_NAMESPACE_END
+} // namespace absl