summaryrefslogtreecommitdiff
path: root/absl/strings
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2023-06-29 09:25:56 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2023-06-29 09:26:57 -0700
commit53fbcb883dc8ca208dc58a8cc168d0628fe2556f (patch)
tree7075fe3a70f796194137f08957e54a6086b4cf9e /absl/strings
parentbde85071e497254e954c27b1b81b442a441ad4b0 (diff)
Introduce a kTotalMorePrecise accounting mode for Cord::EstimatedMemoryUsage(). This mode avoids double-counting blocks that a Cord references more than once; otherwise it is similar to the existing kTotal mode.
There's no change to the existing kTotal or kFairShare accounting modes. PiperOrigin-RevId: 544378591 Change-Id: I7b4ae55cd93d631194e59a9cd0ff07f47611219e
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/cord.h35
-rw-r--r--absl/strings/cord_analysis.cc24
-rw-r--r--absl/strings/cord_analysis.h18
-rw-r--r--absl/strings/cord_test.cc79
4 files changed, 151 insertions, 5 deletions
diff --git a/absl/strings/cord.h b/absl/strings/cord.h
index f5a2da97..457ccf06 100644
--- a/absl/strings/cord.h
+++ b/absl/strings/cord.h
@@ -110,9 +110,30 @@ enum class CordMemoryAccounting {
// Counts the *approximate* number of bytes held in full or in part by this
// Cord (which may not remain the same between invocations). Cords that share
// memory could each be "charged" independently for the same shared memory.
+ // See also comment on `kTotalMorePrecise` on internally shared memory.
kTotal,
// Counts the *approximate* number of bytes held in full or in part by this
+ // Cord for the distinct memory held by this cord. This option is similar
+ // to `kTotal`, except that if the cord has multiple references to the same
+ // memory, that memory is only counted once.
+ //
+ // For example:
+ // absl::Cord cord;
+ // cord.append(some_other_cord);
+ // cord.append(some_other_cord);
+ // // Counts `some_other_cord` twice:
+ // cord.EstimatedMemoryUsage(kTotal);
+ // // Counts `some_other_cord` once:
+ // cord.EstimatedMemoryUsage(kTotalMorePrecise);
+ //
+ // The `kTotalMorePrecise` number is more expensive to compute as it requires
+ // deduplicating all memory references. Applications should prefer to use
+ // `kFairShare` or `kTotal` unless they really need a more precise estimate
+ // on "how much memory is potentially held / kept alive by this cord?"
+ kTotalMorePrecise,
+
+ // Counts the *approximate* number of bytes held in full or in part by this
// Cord weighted by the sharing ratio of that data. For example, if some data
// edge is shared by 4 different Cords, then each cord is attributed 1/4th of
// the total memory usage as a 'fair share' of the total memory usage.
@@ -1273,10 +1294,16 @@ inline size_t Cord::EstimatedMemoryUsage(
CordMemoryAccounting accounting_method) const {
size_t result = sizeof(Cord);
if (const absl::cord_internal::CordRep* rep = contents_.tree()) {
- if (accounting_method == CordMemoryAccounting::kFairShare) {
- result += cord_internal::GetEstimatedFairShareMemoryUsage(rep);
- } else {
- result += cord_internal::GetEstimatedMemoryUsage(rep);
+ switch (accounting_method) {
+ case CordMemoryAccounting::kFairShare:
+ result += cord_internal::GetEstimatedFairShareMemoryUsage(rep);
+ break;
+ case CordMemoryAccounting::kTotalMorePrecise:
+ result += cord_internal::GetMorePreciseMemoryUsage(rep);
+ break;
+ case CordMemoryAccounting::kTotal:
+ result += cord_internal::GetEstimatedMemoryUsage(rep);
+ break;
}
}
return result;
diff --git a/absl/strings/cord_analysis.cc b/absl/strings/cord_analysis.cc
index 73d3c4e6..e859b0db 100644
--- a/absl/strings/cord_analysis.cc
+++ b/absl/strings/cord_analysis.cc
@@ -16,6 +16,7 @@
#include <cstddef>
#include <cstdint>
+#include <unordered_set>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
@@ -37,7 +38,7 @@ namespace cord_internal {
namespace {
// Accounting mode for analyzing memory usage.
-enum class Mode { kTotal, kFairShare };
+enum class Mode { kFairShare, kTotal, kTotalMorePrecise };
// CordRepRef holds a `const CordRep*` reference in rep, and depending on mode,
// holds a 'fraction' representing a cumulative inverse refcount weight.
@@ -62,6 +63,23 @@ struct RawUsage {
void Add(size_t size, CordRepRef<mode>) { total += size; }
};
+// Overloaded representation of RawUsage that tracks the set of objects
+// counted, and avoids double-counting objects referenced more than once
+// by the same Cord.
+template <>
+struct RawUsage<Mode::kTotalMorePrecise> {
+ size_t total = 0;
+ // TODO(b/289250880): Replace this with a flat_hash_set.
+ std::unordered_set<const CordRep*> counted;
+
+ void Add(size_t size, CordRepRef<Mode::kTotalMorePrecise> repref) {
+ if (counted.find(repref.rep) == counted.end()) {
+ counted.insert(repref.rep);
+ total += size;
+ }
+ }
+};
+
// Returns n / refcount avoiding a div for the common refcount == 1.
template <typename refcount_t>
double MaybeDiv(double d, refcount_t refcount) {
@@ -183,6 +201,10 @@ size_t GetEstimatedFairShareMemoryUsage(const CordRep* rep) {
return GetEstimatedUsage<Mode::kFairShare>(rep);
}
+size_t GetMorePreciseMemoryUsage(const CordRep* rep) {
+ return GetEstimatedUsage<Mode::kTotalMorePrecise>(rep);
+}
+
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
diff --git a/absl/strings/cord_analysis.h b/absl/strings/cord_analysis.h
index 7041ad1a..9b9527a5 100644
--- a/absl/strings/cord_analysis.h
+++ b/absl/strings/cord_analysis.h
@@ -31,6 +31,24 @@ namespace cord_internal {
size_t GetEstimatedMemoryUsage(const CordRep* rep);
// Returns the *approximate* number of bytes held in full or in part by this
+// Cord for the distinct memory held by this cord. This is similar to
+// `GetEstimatedMemoryUsage()`, except that if the cord has multiple references
+// to the same memory, that memory is only counted once.
+//
+// For example:
+// absl::Cord cord;
+// cord.append(some_other_cord);
+// cord.append(some_other_cord);
+// // Calls GetEstimatedMemoryUsage() and counts `other_cord` twice:
+// cord.EstimatedMemoryUsage(kTotal);
+// // Calls GetMorePreciseMemoryUsage() and counts `other_cord` once:
+// cord.EstimatedMemoryUsage(kTotalMorePrecise);
+//
+// This is more expensive than `GetEstimatedMemoryUsage()` as it requires
+// deduplicating all memory references.
+size_t GetMorePreciseMemoryUsage(const CordRep* rep);
+
+// Returns the *approximate* number of bytes held in full or in part by this
// CordRep weighted by the sharing ratio of that data. For example, if some data
// edge is shared by 4 different Cords, then each cord is attribute 1/4th of
// the total memory usage as a 'fair share' of the total memory usage.
diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc
index 55412c7f..36e397ed 100644
--- a/absl/strings/cord_test.cc
+++ b/absl/strings/cord_test.cc
@@ -1765,6 +1765,8 @@ TEST_P(CordTest, ExternalMemoryGet) {
// of empty and inlined cords, and flat nodes.
constexpr auto kFairShare = absl::CordMemoryAccounting::kFairShare;
+constexpr auto kTotalMorePrecise =
+ absl::CordMemoryAccounting::kTotalMorePrecise;
// Creates a cord of `n` `c` values, making sure no string stealing occurs.
absl::Cord MakeCord(size_t n, char c) {
@@ -1776,12 +1778,14 @@ TEST(CordTest, CordMemoryUsageEmpty) {
absl::Cord cord;
EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage());
EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage(kFairShare));
+ EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage(kTotalMorePrecise));
}
TEST(CordTest, CordMemoryUsageInlined) {
absl::Cord a("hello");
EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord));
EXPECT_EQ(a.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord));
+ EXPECT_EQ(a.EstimatedMemoryUsage(kTotalMorePrecise), sizeof(absl::Cord));
}
TEST(CordTest, CordMemoryUsageExternalMemory) {
@@ -1791,6 +1795,7 @@ TEST(CordTest, CordMemoryUsageExternalMemory) {
sizeof(absl::Cord) + 1000 + sizeof(CordRepExternal) + sizeof(intptr_t);
EXPECT_EQ(cord.EstimatedMemoryUsage(), expected);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), expected);
+ EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise), expected);
}
TEST(CordTest, CordMemoryUsageFlat) {
@@ -1800,6 +1805,8 @@ TEST(CordTest, CordMemoryUsageFlat) {
EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + flat_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + flat_size);
+ EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + flat_size);
}
TEST(CordTest, CordMemoryUsageSubStringSharedFlat) {
@@ -1809,6 +1816,8 @@ TEST(CordTest, CordMemoryUsageSubStringSharedFlat) {
absl::Cord cord = flat.Subcord(500, 1000);
EXPECT_EQ(cord.EstimatedMemoryUsage(),
sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size);
+ EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size / 2);
}
@@ -1819,6 +1828,8 @@ TEST(CordTest, CordMemoryUsageFlatShared) {
const size_t flat_size =
absl::CordTestPeer::Tree(cord)->flat()->AllocatedSize();
EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + flat_size);
+ EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + flat_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + flat_size / 2);
}
@@ -1837,6 +1848,8 @@ TEST(CordTest, CordMemoryUsageFlatHardenedAndShared) {
absl::Cord cord2(cord);
EXPECT_EQ(cord2.EstimatedMemoryUsage(),
sizeof(absl::Cord) + sizeof(CordRepCrc) + flat_size);
+ EXPECT_EQ(cord2.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + sizeof(CordRepCrc) + flat_size);
EXPECT_EQ(cord2.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + (sizeof(CordRepCrc) + flat_size / 2) / 2);
}
@@ -1863,6 +1876,8 @@ TEST(CordTest, CordMemoryUsageBTree) {
size_t rep1_shared_size = sizeof(CordRepBtree) + flats1_size / 2;
EXPECT_EQ(cord1.EstimatedMemoryUsage(), sizeof(absl::Cord) + rep1_size);
+ EXPECT_EQ(cord1.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + rep1_size);
EXPECT_EQ(cord1.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + rep1_shared_size);
@@ -1877,6 +1892,8 @@ TEST(CordTest, CordMemoryUsageBTree) {
size_t rep2_size = sizeof(CordRepBtree) + flats2_size;
EXPECT_EQ(cord2.EstimatedMemoryUsage(), sizeof(absl::Cord) + rep2_size);
+ EXPECT_EQ(cord2.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + rep2_size);
EXPECT_EQ(cord2.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + rep2_size);
@@ -1885,6 +1902,8 @@ TEST(CordTest, CordMemoryUsageBTree) {
EXPECT_EQ(cord.EstimatedMemoryUsage(),
sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_size + rep2_size);
+ EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_size + rep2_size);
EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare),
sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_shared_size / 2 +
rep2_size);
@@ -1903,6 +1922,66 @@ TEST_P(CordTest, CordMemoryUsageInlineRep) {
EXPECT_EQ(c1.EstimatedMemoryUsage(), c2.EstimatedMemoryUsage());
}
+TEST_P(CordTest, CordMemoryUsageTotalMorePreciseMode) {
+ constexpr size_t kChunkSize = 2000;
+ std::string tmp_str(kChunkSize, 'x');
+ const absl::Cord flat(std::move(tmp_str));
+
+ // Construct `fragmented` with two references into the same
+ // underlying buffer shared with `flat`:
+ absl::Cord fragmented(flat);
+ fragmented.Append(flat);
+
+ // Memory usage of `flat`, minus the top-level Cord object:
+ const size_t flat_internal_usage =
+ flat.EstimatedMemoryUsage() - sizeof(absl::Cord);
+
+ // `fragmented` holds a Cord and a CordRepBtree. That tree points to two
+ // copies of flat's internals, which we expect to dedup:
+ EXPECT_EQ(fragmented.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) +
+ sizeof(CordRepBtree) +
+ flat_internal_usage);
+
+ // This is a case where kTotal produces an overestimate:
+ EXPECT_EQ(fragmented.EstimatedMemoryUsage(),
+ sizeof(absl::Cord) +
+ sizeof(CordRepBtree) +
+ 2 * flat_internal_usage);
+}
+
+TEST_P(CordTest, CordMemoryUsageTotalMorePreciseModeWithSubstring) {
+ constexpr size_t kChunkSize = 2000;
+ std::string tmp_str(kChunkSize, 'x');
+ const absl::Cord flat(std::move(tmp_str));
+
+ // Construct `fragmented` with two references into the same
+ // underlying buffer shared with `flat`.
+ //
+ // This time, each reference is through a Subcord():
+ absl::Cord fragmented;
+ fragmented.Append(flat.Subcord(1, kChunkSize - 2));
+ fragmented.Append(flat.Subcord(1, kChunkSize - 2));
+
+ // Memory usage of `flat`, minus the top-level Cord object:
+ const size_t flat_internal_usage =
+ flat.EstimatedMemoryUsage() - sizeof(absl::Cord);
+
+ // `fragmented` holds a Cord and a CordRepBtree. That tree points to two
+ // CordRepSubstrings, each pointing at flat's internals.
+ EXPECT_EQ(fragmented.EstimatedMemoryUsage(kTotalMorePrecise),
+ sizeof(absl::Cord) +
+ sizeof(CordRepBtree) +
+ 2 * sizeof(CordRepSubstring) +
+ flat_internal_usage);
+
+ // This is a case where kTotal produces an overestimate:
+ EXPECT_EQ(fragmented.EstimatedMemoryUsage(),
+ sizeof(absl::Cord) +
+ sizeof(CordRepBtree) +
+ 2 * sizeof(CordRepSubstring) +
+ 2 * flat_internal_usage);
+}
} // namespace
// Regtest for 7510292 (fix a bug introduced by 7465150)