From 35f55764b81390a085fb90f624082c196fbd6229 Mon Sep 17 00:00:00 2001 From: mtklein Date: Wed, 8 Apr 2015 14:09:41 -0700 Subject: Revert of Rearrange SkRecord with small N in mind (patchset #8 id:120001 of https://codereview.chromium.org/1061783002/) Reason for revert: https://uberchromegw.corp.google.com/i/client.skia/builders/Test-Ubuntu-GCC-GCE-CPU-AVX2-x86-Debug/builds/149/steps/dm/logs/stdio Original issue's description: > Rearrange SkRecord with small N in mind > > This rearranges the record pointers and types so they can go in a single array, then preallocates some space for them and for the SkVarAlloc. > > picture_overhead_draw bench drops from ~1000ns to 500-600ns, with no effect on picture_overhead_nodraw. > > I don't see any significant effect on large picture recording times from our .skps. > > BUG=chromium:470553 > > Committed: https://skia.googlesource.com/skia/+/e2dd9408cd711777afaa9410427fb0d761ab004a TBR=reed@google.com,mtklein@chromium.org NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=chromium:470553 Review URL: https://codereview.chromium.org/1068383003 --- src/core/SkRecord.cpp | 13 ++--- src/core/SkRecord.h | 124 ++++++++++++++++++++++++++++++++---------------- src/core/SkVarAlloc.cpp | 13 ----- src/core/SkVarAlloc.h | 10 ---- 4 files changed, 85 insertions(+), 75 deletions(-) (limited to 'src') diff --git a/src/core/SkRecord.cpp b/src/core/SkRecord.cpp index c2008a850a..e2d919b777 100644 --- a/src/core/SkRecord.cpp +++ b/src/core/SkRecord.cpp @@ -1,10 +1,3 @@ -/* - * Copyright 2015 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - #include "SkRecord.h" SkRecord::~SkRecord() { @@ -16,13 +9,13 @@ SkRecord::~SkRecord() { void SkRecord::grow() { SkASSERT(fCount == fReserved); - SkASSERT(fReserved > 0); - fReserved *= 2; + fReserved = SkTMax(kFirstReserveCount, fReserved*2); fRecords.realloc(fReserved); + fTypes.realloc(fReserved); } size_t SkRecord::bytesUsed() const { return fAlloc.approxBytesAllocated() + - (fReserved - kInlineRecords) * sizeof(Record) + + fReserved * (sizeof(Record) + sizeof(Type8)) + sizeof(SkRecord); } diff --git a/src/core/SkRecord.h b/src/core/SkRecord.h index ef25e8d901..fb082949b4 100644 --- a/src/core/SkRecord.h +++ b/src/core/SkRecord.h @@ -13,7 +13,7 @@ #include "SkTemplates.h" #include "SkVarAlloc.h" -// SkRecord represents a sequence of SkCanvas calls, saved for future use. +// SkRecord (REC-ord) represents a sequence of SkCanvas calls, saved for future use. // These future uses may include: replay, optimization, serialization, or combinations of those. // // Though an enterprising user may find calling alloc(), append(), visit(), and mutate() enough to @@ -27,16 +27,10 @@ class SkRecord : public SkNVRefCnt { enum { - // TODO: tune these two constants. - kInlineRecords = 4, // Ideally our lower limit on recorded ops per picture. - kInlineAllocLgBytes = 8, // 1<<8 == 256 bytes inline, then SkVarAlloc starting at 512 bytes. + kFirstReserveCount = 64 / sizeof(void*), }; public: - SkRecord() - : fCount(0) - , fReserved(kInlineRecords) - , fAlloc(kInlineAllocLgBytes+1, // First malloc'd block is 2x as large as fInlineAlloc. - fInlineAlloc, sizeof(fInlineAlloc)) {} + SkRecord() : fCount(0), fReserved(0), fAlloc(8/*start block sizes at 256 bytes*/) {} ~SkRecord(); // Returns the number of canvas commands in this SkRecord. @@ -49,7 +43,7 @@ public: template R visit(unsigned i, F& f) const { SkASSERT(i < this->count()); - return fRecords[i].visit(f); + return fRecords[i].visit(fTypes[i], f); } // Mutate the i-th canvas command with a functor matching this interface: @@ -59,15 +53,15 @@ public: template R mutate(unsigned i, F& f) { SkASSERT(i < this->count()); - return fRecords[i].mutate(f); + return fRecords[i].mutate(fTypes[i], f); } - - // TODO: It'd be nice to infer R from F for visit and mutate. + // TODO: It'd be nice to infer R from F for visit and mutate if we ever get std::result_of. // Allocate contiguous space for count Ts, to be freed when the SkRecord is destroyed. // Here T can be any class, not just those from SkRecords. Throws on failure. template T* alloc(size_t count = 1) { + // Bump up to the next pointer width if needed, so all allocations start pointer-aligned. return (T*)fAlloc.alloc(sizeof(T) * count, SK_MALLOC_THROW); } @@ -78,6 +72,7 @@ public: if (fCount == fReserved) { this->grow(); } + fTypes[fCount] = T::kType; return fRecords[fCount++].set(this->allocCommand()); } @@ -91,6 +86,7 @@ public: Destroyer destroyer; this->mutate(i, destroyer); + fTypes[i] = T::kType; return fRecords[i].set(this->allocCommand()); } @@ -101,9 +97,10 @@ public: T* replace(unsigned i, const SkRecords::Adopted& proofOfAdoption) { SkASSERT(i < this->count()); - SkASSERT(Existing::kType == fRecords[i].type()); - SkASSERT(proofOfAdoption == fRecords[i].ptr()); + SkASSERT(Existing::kType == fTypes[i]); + SkASSERT(proofOfAdoption == fRecords[i].ptr()); + fTypes[i] = T::kType; return fRecords[i].set(this->allocCommand()); } @@ -112,7 +109,9 @@ public: size_t bytesUsed() const; private: - // An SkRecord is structured as an array of pointers into a big chunk of memory where + // Implementation notes! + // + // Logically an SkRecord is structured as an array of pointers into a big chunk of memory where // records representing each canvas draw call are stored: // // fRecords: [*][*][*]... @@ -124,8 +123,27 @@ private: // v v v // fAlloc: [SkRecords::DrawRect][SkRecords::DrawPosTextH][SkRecords::DrawRect]... // - // We store the types of each of the pointers alongside the pointer. - // The cost to append a T to this structure is 8 + sizeof(T) bytes. + // In the scheme above, the pointers in fRecords are void*: they have no type. The type is not + // stored in fAlloc either; we just write raw data there. But we need that type information. + // Here are some options: + // 1) use inheritance, virtuals, and vtables to make the fRecords pointers smarter + // 2) store the type data manually in fAlloc at the start of each record + // 3) store the type data manually somewhere with fRecords + // + // This code uses approach 3). The implementation feels very similar to 1), but it's + // devirtualized instead of using the language's polymorphism mechanisms. This lets us work + // with the types themselves (as SkRecords::Type), a sort of limited free RTTI; it lets us pay + // only 1 byte to store the type instead of a full pointer (4-8 bytes); and it leads to better + // decoupling between the SkRecords::* record types and the operations performed on them in + // visit() or mutate(). The recorded canvas calls don't have to have any idea about the + // operations performed on them. + // + // We store the types in a parallel fTypes array, mainly so that they can be tightly packed as + // single bytes. This has the side effect of allowing very fast analysis passes over an + // SkRecord looking for just patterns of draw commands (or using this as a quick reject + // mechanism) though there's admittedly not a very good API exposed publically for this. + // + // The cost to append a T into this structure is 1 + sizeof(void*) + sizeof(T). // A mutator that can be used with replace to destroy canvas commands. struct Destroyer { @@ -133,6 +151,19 @@ private: void operator()(T* record) { record->~T(); } }; + // Logically the same as SkRecords::Type, but packed into 8 bits. + struct Type8 { + public: + // This intentionally converts implicitly back and forth. + Type8(SkRecords::Type type) : fType(type) { SkASSERT(*this == type); } + operator SkRecords::Type () { return (SkRecords::Type)fType; } + + private: + uint8_t fType; + }; + + // No point in allocating any more than one of an empty struct. + // We could just return NULL but it's sort of confusing to return NULL on success. template SK_WHEN(SkTIsEmpty, T*) allocCommand() { static T singleton = {}; @@ -142,56 +173,65 @@ private: template SK_WHEN(!SkTIsEmpty, T*) allocCommand() { return this->alloc(); } + // Called when we've run out of room to record new commands. void grow(); - // A typed pointer to some bytes in fAlloc. visit() and mutate() allow polymorphic dispatch. + // An untyped pointer to some bytes in fAlloc. This is the interface for polymorphic dispatch: + // visit() and mutate() work with the parallel fTypes array to do the work of a vtable. struct Record { - // On 32-bit machines we store type in 4 bytes, followed by a pointer. Simple. - // On 64-bit machines we store a pointer with the type slotted into two top (unused) bytes. - // FWIW, SkRecords::Type is tiny. It can easily fit in one byte. - uint64_t fTypeAndPtr; - static const int kTypeShift = sizeof(void*) == 4 ? 32 : 48; - + public: // Point this record to its data in fAlloc. Returns ptr for convenience. template T* set(T* ptr) { - fTypeAndPtr = ((uint64_t)T::kType) << kTypeShift | (uint64_t)ptr; + fPtr = ptr; return ptr; } - SkRecords::Type type() const { return (SkRecords::Type)(fTypeAndPtr >> kTypeShift); } - void* ptr() const { return (void*)(fTypeAndPtr & ((1ull< + T* ptr() const { return (T*)fPtr; } - // Visit this record with functor F (see public API above). + // Visit this record with functor F (see public API above) assuming the record we're + // pointing to has this type. template - R visit(F& f) const { - #define CASE(T) case SkRecords::T##_Type: return f(*(const SkRecords::T*)this->ptr()); - switch(this->type()) { SK_RECORD_TYPES(CASE) } + R visit(Type8 type, F& f) const { + #define CASE(T) case SkRecords::T##_Type: return f(*this->ptr()); + switch(type) { SK_RECORD_TYPES(CASE) } #undef CASE SkDEBUGFAIL("Unreachable"); return R(); } - // Mutate this record with functor F (see public API above). + // Mutate this record with functor F (see public API above) assuming the record we're + // pointing to has this type. template - R mutate(F& f) { - #define CASE(T) case SkRecords::T##_Type: return f((SkRecords::T*)this->ptr()); - switch(this->type()) { SK_RECORD_TYPES(CASE) } + R mutate(Type8 type, F& f) { + #define CASE(T) case SkRecords::T##_Type: return f(this->ptr()); + switch(type) { SK_RECORD_TYPES(CASE) } #undef CASE SkDEBUGFAIL("Unreachable"); return R(); } - }; - // fRecords needs to be a data structure that can append fixed length data, and need to - // support efficient random access and forward iteration. (It doesn't need to be contiguous.) - unsigned fCount, fReserved; - SkAutoSTMalloc fRecords; + private: + void* fPtr; + }; // fAlloc needs to be a data structure which can append variable length data in contiguous // chunks, returning a stable handle to that data for later retrieval. + // + // fRecords and fTypes need to be data structures that can append fixed length data, and need to + // support efficient random access and forward iteration. (They don't need to be contiguous.) + + // fCount and fReserved measure both fRecords and fTypes, which always grow in lock step. + unsigned fCount; + unsigned fReserved; + SkAutoTMalloc fRecords; + SkAutoTMalloc fTypes; SkVarAlloc fAlloc; - char fInlineAlloc[1 << kInlineAllocLgBytes]; + // Strangely the order of these fields matters. If the unsigneds don't go first we're 56 bytes. + // tomhudson and mtklein have no idea why. }; +SK_COMPILE_ASSERT(sizeof(SkRecord) <= 56, SkRecordSize); #endif//SkRecord_DEFINED diff --git a/src/core/SkVarAlloc.cpp b/src/core/SkVarAlloc.cpp index fa89d38c23..88bd17028f 100644 --- a/src/core/SkVarAlloc.cpp +++ b/src/core/SkVarAlloc.cpp @@ -1,10 +1,3 @@ -/* - * Copyright 2015 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - #include "SkVarAlloc.h" // We use non-standard malloc diagnostic methods to make sure our allocations are sized well. @@ -32,12 +25,6 @@ SkVarAlloc::SkVarAlloc(size_t minLgSize) , fLgSize(minLgSize) , fBlock(NULL) {} -SkVarAlloc::SkVarAlloc(size_t minLgSize, char* storage, size_t len) - : fByte(storage) - , fRemaining(len) - , fLgSize(minLgSize) - , fBlock(NULL) {} - SkVarAlloc::~SkVarAlloc() { Block* b = fBlock; while (b) { diff --git a/src/core/SkVarAlloc.h b/src/core/SkVarAlloc.h index 8a55b36615..fb55192439 100644 --- a/src/core/SkVarAlloc.h +++ b/src/core/SkVarAlloc.h @@ -1,10 +1,3 @@ -/* - * Copyright 2015 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - #ifndef SkVarAlloc_DEFINED #define SkVarAlloc_DEFINED @@ -14,9 +7,6 @@ class SkVarAlloc : SkNoncopyable { public: // Smallest block we'll allocate is 2**N bytes. explicit SkVarAlloc(size_t minLgSize); - // Same as above, but first uses up to len bytes from storage. - SkVarAlloc(size_t minLgSize, char* storage, size_t len); - ~SkVarAlloc(); // Returns contiguous bytes aligned at least for pointers. You may pass SK_MALLOC_THROW, etc. -- cgit v1.2.3