From 53fbcb883dc8ca208dc58a8cc168d0628fe2556f Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Thu, 29 Jun 2023 09:25:56 -0700 Subject: Introduce a kTotalMorePrecise accounting mode for Cord::EstimatedMemoryUsage(). This mode avoids double-counting blocks that a Cord references more than once; otherwise it is similar to the existing kTotal mode. There's no change to the existing kTotal or kFairShare accounting modes. PiperOrigin-RevId: 544378591 Change-Id: I7b4ae55cd93d631194e59a9cd0ff07f47611219e --- absl/strings/cord.h | 35 ++++++++++++++++--- absl/strings/cord_analysis.cc | 24 ++++++++++++- absl/strings/cord_analysis.h | 18 ++++++++++ absl/strings/cord_test.cc | 79 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 151 insertions(+), 5 deletions(-) diff --git a/absl/strings/cord.h b/absl/strings/cord.h index f5a2da97..457ccf06 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -110,8 +110,29 @@ enum class CordMemoryAccounting { // Counts the *approximate* number of bytes held in full or in part by this // Cord (which may not remain the same between invocations). Cords that share // memory could each be "charged" independently for the same shared memory. + // See also comment on `kTotalMorePrecise` on internally shared memory. kTotal, + // Counts the *approximate* number of bytes held in full or in part by this + // Cord for the distinct memory held by this cord. This option is similar + // to `kTotal`, except that if the cord has multiple references to the same + // memory, that memory is only counted once. + // + // For example: + // absl::Cord cord; + // cord.append(some_other_cord); + // cord.append(some_other_cord); + // // Counts `some_other_cord` twice: + // cord.EstimatedMemoryUsage(kTotal); + // // Counts `some_other_cord` once: + // cord.EstimatedMemoryUsage(kTotalMorePrecise); + // + // The `kTotalMorePrecise` number is more expensive to compute as it requires + // deduplicating all memory references. Applications should prefer to use + // `kFairShare` or `kTotal` unless they really need a more precise estimate + // on "how much memory is potentially held / kept alive by this cord?" + kTotalMorePrecise, + // Counts the *approximate* number of bytes held in full or in part by this // Cord weighted by the sharing ratio of that data. For example, if some data // edge is shared by 4 different Cords, then each cord is attributed 1/4th of @@ -1273,10 +1294,16 @@ inline size_t Cord::EstimatedMemoryUsage( CordMemoryAccounting accounting_method) const { size_t result = sizeof(Cord); if (const absl::cord_internal::CordRep* rep = contents_.tree()) { - if (accounting_method == CordMemoryAccounting::kFairShare) { - result += cord_internal::GetEstimatedFairShareMemoryUsage(rep); - } else { - result += cord_internal::GetEstimatedMemoryUsage(rep); + switch (accounting_method) { + case CordMemoryAccounting::kFairShare: + result += cord_internal::GetEstimatedFairShareMemoryUsage(rep); + break; + case CordMemoryAccounting::kTotalMorePrecise: + result += cord_internal::GetMorePreciseMemoryUsage(rep); + break; + case CordMemoryAccounting::kTotal: + result += cord_internal::GetEstimatedMemoryUsage(rep); + break; } } return result; diff --git a/absl/strings/cord_analysis.cc b/absl/strings/cord_analysis.cc index 73d3c4e6..e859b0db 100644 --- a/absl/strings/cord_analysis.cc +++ b/absl/strings/cord_analysis.cc @@ -16,6 +16,7 @@ #include #include +#include #include "absl/base/attributes.h" #include "absl/base/config.h" @@ -37,7 +38,7 @@ namespace cord_internal { namespace { // Accounting mode for analyzing memory usage. -enum class Mode { kTotal, kFairShare }; +enum class Mode { kFairShare, kTotal, kTotalMorePrecise }; // CordRepRef holds a `const CordRep*` reference in rep, and depending on mode, // holds a 'fraction' representing a cumulative inverse refcount weight. @@ -62,6 +63,23 @@ struct RawUsage { void Add(size_t size, CordRepRef) { total += size; } }; +// Overloaded representation of RawUsage that tracks the set of objects +// counted, and avoids double-counting objects referenced more than once +// by the same Cord. +template <> +struct RawUsage { + size_t total = 0; + // TODO(b/289250880): Replace this with a flat_hash_set. + std::unordered_set counted; + + void Add(size_t size, CordRepRef repref) { + if (counted.find(repref.rep) == counted.end()) { + counted.insert(repref.rep); + total += size; + } + } +}; + // Returns n / refcount avoiding a div for the common refcount == 1. template double MaybeDiv(double d, refcount_t refcount) { @@ -183,6 +201,10 @@ size_t GetEstimatedFairShareMemoryUsage(const CordRep* rep) { return GetEstimatedUsage(rep); } +size_t GetMorePreciseMemoryUsage(const CordRep* rep) { + return GetEstimatedUsage(rep); +} + } // namespace cord_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/cord_analysis.h b/absl/strings/cord_analysis.h index 7041ad1a..9b9527a5 100644 --- a/absl/strings/cord_analysis.h +++ b/absl/strings/cord_analysis.h @@ -30,6 +30,24 @@ namespace cord_internal { // memory could each be "charged" independently for the same shared memory. size_t GetEstimatedMemoryUsage(const CordRep* rep); +// Returns the *approximate* number of bytes held in full or in part by this +// Cord for the distinct memory held by this cord. This is similar to +// `GetEstimatedMemoryUsage()`, except that if the cord has multiple references +// to the same memory, that memory is only counted once. +// +// For example: +// absl::Cord cord; +// cord.append(some_other_cord); +// cord.append(some_other_cord); +// // Calls GetEstimatedMemoryUsage() and counts `other_cord` twice: +// cord.EstimatedMemoryUsage(kTotal); +// // Calls GetMorePreciseMemoryUsage() and counts `other_cord` once: +// cord.EstimatedMemoryUsage(kTotalMorePrecise); +// +// This is more expensive than `GetEstimatedMemoryUsage()` as it requires +// deduplicating all memory references. +size_t GetMorePreciseMemoryUsage(const CordRep* rep); + // Returns the *approximate* number of bytes held in full or in part by this // CordRep weighted by the sharing ratio of that data. For example, if some data // edge is shared by 4 different Cords, then each cord is attribute 1/4th of diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index 55412c7f..36e397ed 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -1765,6 +1765,8 @@ TEST_P(CordTest, ExternalMemoryGet) { // of empty and inlined cords, and flat nodes. constexpr auto kFairShare = absl::CordMemoryAccounting::kFairShare; +constexpr auto kTotalMorePrecise = + absl::CordMemoryAccounting::kTotalMorePrecise; // Creates a cord of `n` `c` values, making sure no string stealing occurs. absl::Cord MakeCord(size_t n, char c) { @@ -1776,12 +1778,14 @@ TEST(CordTest, CordMemoryUsageEmpty) { absl::Cord cord; EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage()); EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage(kFairShare)); + EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage(kTotalMorePrecise)); } TEST(CordTest, CordMemoryUsageInlined) { absl::Cord a("hello"); EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); EXPECT_EQ(a.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord)); + EXPECT_EQ(a.EstimatedMemoryUsage(kTotalMorePrecise), sizeof(absl::Cord)); } TEST(CordTest, CordMemoryUsageExternalMemory) { @@ -1791,6 +1795,7 @@ TEST(CordTest, CordMemoryUsageExternalMemory) { sizeof(absl::Cord) + 1000 + sizeof(CordRepExternal) + sizeof(intptr_t); EXPECT_EQ(cord.EstimatedMemoryUsage(), expected); EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), expected); + EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise), expected); } TEST(CordTest, CordMemoryUsageFlat) { @@ -1800,6 +1805,8 @@ TEST(CordTest, CordMemoryUsageFlat) { EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + flat_size); EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + flat_size); + EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + flat_size); } TEST(CordTest, CordMemoryUsageSubStringSharedFlat) { @@ -1809,6 +1816,8 @@ TEST(CordTest, CordMemoryUsageSubStringSharedFlat) { absl::Cord cord = flat.Subcord(500, 1000); EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size); + EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size); EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size / 2); } @@ -1819,6 +1828,8 @@ TEST(CordTest, CordMemoryUsageFlatShared) { const size_t flat_size = absl::CordTestPeer::Tree(cord)->flat()->AllocatedSize(); EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + flat_size); + EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + flat_size); EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + flat_size / 2); } @@ -1837,6 +1848,8 @@ TEST(CordTest, CordMemoryUsageFlatHardenedAndShared) { absl::Cord cord2(cord); EXPECT_EQ(cord2.EstimatedMemoryUsage(), sizeof(absl::Cord) + sizeof(CordRepCrc) + flat_size); + EXPECT_EQ(cord2.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + sizeof(CordRepCrc) + flat_size); EXPECT_EQ(cord2.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + (sizeof(CordRepCrc) + flat_size / 2) / 2); } @@ -1863,6 +1876,8 @@ TEST(CordTest, CordMemoryUsageBTree) { size_t rep1_shared_size = sizeof(CordRepBtree) + flats1_size / 2; EXPECT_EQ(cord1.EstimatedMemoryUsage(), sizeof(absl::Cord) + rep1_size); + EXPECT_EQ(cord1.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + rep1_size); EXPECT_EQ(cord1.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + rep1_shared_size); @@ -1877,6 +1892,8 @@ TEST(CordTest, CordMemoryUsageBTree) { size_t rep2_size = sizeof(CordRepBtree) + flats2_size; EXPECT_EQ(cord2.EstimatedMemoryUsage(), sizeof(absl::Cord) + rep2_size); + EXPECT_EQ(cord2.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + rep2_size); EXPECT_EQ(cord2.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + rep2_size); @@ -1885,6 +1902,8 @@ TEST(CordTest, CordMemoryUsageBTree) { EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_size + rep2_size); + EXPECT_EQ(cord.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_size + rep2_size); EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_shared_size / 2 + rep2_size); @@ -1903,6 +1922,66 @@ TEST_P(CordTest, CordMemoryUsageInlineRep) { EXPECT_EQ(c1.EstimatedMemoryUsage(), c2.EstimatedMemoryUsage()); } +TEST_P(CordTest, CordMemoryUsageTotalMorePreciseMode) { + constexpr size_t kChunkSize = 2000; + std::string tmp_str(kChunkSize, 'x'); + const absl::Cord flat(std::move(tmp_str)); + + // Construct `fragmented` with two references into the same + // underlying buffer shared with `flat`: + absl::Cord fragmented(flat); + fragmented.Append(flat); + + // Memory usage of `flat`, minus the top-level Cord object: + const size_t flat_internal_usage = + flat.EstimatedMemoryUsage() - sizeof(absl::Cord); + + // `fragmented` holds a Cord and a CordRepBtree. That tree points to two + // copies of flat's internals, which we expect to dedup: + EXPECT_EQ(fragmented.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + + sizeof(CordRepBtree) + + flat_internal_usage); + + // This is a case where kTotal produces an overestimate: + EXPECT_EQ(fragmented.EstimatedMemoryUsage(), + sizeof(absl::Cord) + + sizeof(CordRepBtree) + + 2 * flat_internal_usage); +} + +TEST_P(CordTest, CordMemoryUsageTotalMorePreciseModeWithSubstring) { + constexpr size_t kChunkSize = 2000; + std::string tmp_str(kChunkSize, 'x'); + const absl::Cord flat(std::move(tmp_str)); + + // Construct `fragmented` with two references into the same + // underlying buffer shared with `flat`. + // + // This time, each reference is through a Subcord(): + absl::Cord fragmented; + fragmented.Append(flat.Subcord(1, kChunkSize - 2)); + fragmented.Append(flat.Subcord(1, kChunkSize - 2)); + + // Memory usage of `flat`, minus the top-level Cord object: + const size_t flat_internal_usage = + flat.EstimatedMemoryUsage() - sizeof(absl::Cord); + + // `fragmented` holds a Cord and a CordRepBtree. That tree points to two + // CordRepSubstrings, each pointing at flat's internals. + EXPECT_EQ(fragmented.EstimatedMemoryUsage(kTotalMorePrecise), + sizeof(absl::Cord) + + sizeof(CordRepBtree) + + 2 * sizeof(CordRepSubstring) + + flat_internal_usage); + + // This is a case where kTotal produces an overestimate: + EXPECT_EQ(fragmented.EstimatedMemoryUsage(), + sizeof(absl::Cord) + + sizeof(CordRepBtree) + + 2 * sizeof(CordRepSubstring) + + 2 * flat_internal_usage); +} } // namespace // Regtest for 7510292 (fix a bug introduced by 7465150) -- cgit v1.2.3