From e2dd9408cd711777afaa9410427fb0d761ab004a Mon Sep 17 00:00:00 2001 From: mtklein Date: Wed, 8 Apr 2015 14:02:31 -0700 Subject: Rearrange SkRecord with small N in mind This rearranges the record pointers and types so they can go in a single array, then preallocates some space for them and for the SkVarAlloc. picture_overhead_draw bench drops from ~1000ns to 500-600ns, with no effect on picture_overhead_nodraw. I don't see any significant effect on large picture recording times from our .skps. BUG=chromium:470553 Review URL: https://codereview.chromium.org/1061783002 --- src/core/SkRecord.cpp | 13 +++-- src/core/SkRecord.h | 124 ++++++++++++++++-------------------------------- src/core/SkVarAlloc.cpp | 13 +++++ src/core/SkVarAlloc.h | 10 ++++ 4 files changed, 75 insertions(+), 85 deletions(-) (limited to 'src') diff --git a/src/core/SkRecord.cpp b/src/core/SkRecord.cpp index e2d919b777..c2008a850a 100644 --- a/src/core/SkRecord.cpp +++ b/src/core/SkRecord.cpp @@ -1,3 +1,10 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + #include "SkRecord.h" SkRecord::~SkRecord() { @@ -9,13 +16,13 @@ SkRecord::~SkRecord() { void SkRecord::grow() { SkASSERT(fCount == fReserved); - fReserved = SkTMax(kFirstReserveCount, fReserved*2); + SkASSERT(fReserved > 0); + fReserved *= 2; fRecords.realloc(fReserved); - fTypes.realloc(fReserved); } size_t SkRecord::bytesUsed() const { return fAlloc.approxBytesAllocated() + - fReserved * (sizeof(Record) + sizeof(Type8)) + + (fReserved - kInlineRecords) * sizeof(Record) + sizeof(SkRecord); } diff --git a/src/core/SkRecord.h b/src/core/SkRecord.h index fb082949b4..ef25e8d901 100644 --- a/src/core/SkRecord.h +++ b/src/core/SkRecord.h @@ -13,7 +13,7 @@ #include "SkTemplates.h" #include "SkVarAlloc.h" -// SkRecord (REC-ord) represents a sequence of SkCanvas calls, saved for future use. +// SkRecord represents a sequence of SkCanvas calls, saved for future use. // These future uses may include: replay, optimization, serialization, or combinations of those. // // Though an enterprising user may find calling alloc(), append(), visit(), and mutate() enough to @@ -27,10 +27,16 @@ class SkRecord : public SkNVRefCnt { enum { - kFirstReserveCount = 64 / sizeof(void*), + // TODO: tune these two constants. + kInlineRecords = 4, // Ideally our lower limit on recorded ops per picture. + kInlineAllocLgBytes = 8, // 1<<8 == 256 bytes inline, then SkVarAlloc starting at 512 bytes. }; public: - SkRecord() : fCount(0), fReserved(0), fAlloc(8/*start block sizes at 256 bytes*/) {} + SkRecord() + : fCount(0) + , fReserved(kInlineRecords) + , fAlloc(kInlineAllocLgBytes+1, // First malloc'd block is 2x as large as fInlineAlloc. + fInlineAlloc, sizeof(fInlineAlloc)) {} ~SkRecord(); // Returns the number of canvas commands in this SkRecord. @@ -43,7 +49,7 @@ public: template R visit(unsigned i, F& f) const { SkASSERT(i < this->count()); - return fRecords[i].visit(fTypes[i], f); + return fRecords[i].visit(f); } // Mutate the i-th canvas command with a functor matching this interface: @@ -53,15 +59,15 @@ public: template R mutate(unsigned i, F& f) { SkASSERT(i < this->count()); - return fRecords[i].mutate(fTypes[i], f); + return fRecords[i].mutate(f); } - // TODO: It'd be nice to infer R from F for visit and mutate if we ever get std::result_of. + + // TODO: It'd be nice to infer R from F for visit and mutate. // Allocate contiguous space for count Ts, to be freed when the SkRecord is destroyed. // Here T can be any class, not just those from SkRecords. Throws on failure. template T* alloc(size_t count = 1) { - // Bump up to the next pointer width if needed, so all allocations start pointer-aligned. return (T*)fAlloc.alloc(sizeof(T) * count, SK_MALLOC_THROW); } @@ -72,7 +78,6 @@ public: if (fCount == fReserved) { this->grow(); } - fTypes[fCount] = T::kType; return fRecords[fCount++].set(this->allocCommand()); } @@ -86,7 +91,6 @@ public: Destroyer destroyer; this->mutate(i, destroyer); - fTypes[i] = T::kType; return fRecords[i].set(this->allocCommand()); } @@ -97,10 +101,9 @@ public: T* replace(unsigned i, const SkRecords::Adopted& proofOfAdoption) { SkASSERT(i < this->count()); - SkASSERT(Existing::kType == fTypes[i]); - SkASSERT(proofOfAdoption == fRecords[i].ptr()); + SkASSERT(Existing::kType == fRecords[i].type()); + SkASSERT(proofOfAdoption == fRecords[i].ptr()); - fTypes[i] = T::kType; return fRecords[i].set(this->allocCommand()); } @@ -109,9 +112,7 @@ public: size_t bytesUsed() const; private: - // Implementation notes! - // - // Logically an SkRecord is structured as an array of pointers into a big chunk of memory where + // An SkRecord is structured as an array of pointers into a big chunk of memory where // records representing each canvas draw call are stored: // // fRecords: [*][*][*]... @@ -123,27 +124,8 @@ private: // v v v // fAlloc: [SkRecords::DrawRect][SkRecords::DrawPosTextH][SkRecords::DrawRect]... // - // In the scheme above, the pointers in fRecords are void*: they have no type. The type is not - // stored in fAlloc either; we just write raw data there. But we need that type information. - // Here are some options: - // 1) use inheritance, virtuals, and vtables to make the fRecords pointers smarter - // 2) store the type data manually in fAlloc at the start of each record - // 3) store the type data manually somewhere with fRecords - // - // This code uses approach 3). The implementation feels very similar to 1), but it's - // devirtualized instead of using the language's polymorphism mechanisms. This lets us work - // with the types themselves (as SkRecords::Type), a sort of limited free RTTI; it lets us pay - // only 1 byte to store the type instead of a full pointer (4-8 bytes); and it leads to better - // decoupling between the SkRecords::* record types and the operations performed on them in - // visit() or mutate(). The recorded canvas calls don't have to have any idea about the - // operations performed on them. - // - // We store the types in a parallel fTypes array, mainly so that they can be tightly packed as - // single bytes. This has the side effect of allowing very fast analysis passes over an - // SkRecord looking for just patterns of draw commands (or using this as a quick reject - // mechanism) though there's admittedly not a very good API exposed publically for this. - // - // The cost to append a T into this structure is 1 + sizeof(void*) + sizeof(T). + // We store the types of each of the pointers alongside the pointer. + // The cost to append a T to this structure is 8 + sizeof(T) bytes. // A mutator that can be used with replace to destroy canvas commands. struct Destroyer { @@ -151,19 +133,6 @@ private: void operator()(T* record) { record->~T(); } }; - // Logically the same as SkRecords::Type, but packed into 8 bits. - struct Type8 { - public: - // This intentionally converts implicitly back and forth. - Type8(SkRecords::Type type) : fType(type) { SkASSERT(*this == type); } - operator SkRecords::Type () { return (SkRecords::Type)fType; } - - private: - uint8_t fType; - }; - - // No point in allocating any more than one of an empty struct. - // We could just return NULL but it's sort of confusing to return NULL on success. template SK_WHEN(SkTIsEmpty, T*) allocCommand() { static T singleton = {}; @@ -173,65 +142,56 @@ private: template SK_WHEN(!SkTIsEmpty, T*) allocCommand() { return this->alloc(); } - // Called when we've run out of room to record new commands. void grow(); - // An untyped pointer to some bytes in fAlloc. This is the interface for polymorphic dispatch: - // visit() and mutate() work with the parallel fTypes array to do the work of a vtable. + // A typed pointer to some bytes in fAlloc. visit() and mutate() allow polymorphic dispatch. struct Record { - public: + // On 32-bit machines we store type in 4 bytes, followed by a pointer. Simple. + // On 64-bit machines we store a pointer with the type slotted into two top (unused) bytes. + // FWIW, SkRecords::Type is tiny. It can easily fit in one byte. + uint64_t fTypeAndPtr; + static const int kTypeShift = sizeof(void*) == 4 ? 32 : 48; + // Point this record to its data in fAlloc. Returns ptr for convenience. template T* set(T* ptr) { - fPtr = ptr; + fTypeAndPtr = ((uint64_t)T::kType) << kTypeShift | (uint64_t)ptr; return ptr; } - // Get the data in fAlloc, assuming it's of type T. - template - T* ptr() const { return (T*)fPtr; } + SkRecords::Type type() const { return (SkRecords::Type)(fTypeAndPtr >> kTypeShift); } + void* ptr() const { return (void*)(fTypeAndPtr & ((1ull< - R visit(Type8 type, F& f) const { - #define CASE(T) case SkRecords::T##_Type: return f(*this->ptr()); - switch(type) { SK_RECORD_TYPES(CASE) } + R visit(F& f) const { + #define CASE(T) case SkRecords::T##_Type: return f(*(const SkRecords::T*)this->ptr()); + switch(this->type()) { SK_RECORD_TYPES(CASE) } #undef CASE SkDEBUGFAIL("Unreachable"); return R(); } - // Mutate this record with functor F (see public API above) assuming the record we're - // pointing to has this type. + // Mutate this record with functor F (see public API above). template - R mutate(Type8 type, F& f) { - #define CASE(T) case SkRecords::T##_Type: return f(this->ptr()); - switch(type) { SK_RECORD_TYPES(CASE) } + R mutate(F& f) { + #define CASE(T) case SkRecords::T##_Type: return f((SkRecords::T*)this->ptr()); + switch(this->type()) { SK_RECORD_TYPES(CASE) } #undef CASE SkDEBUGFAIL("Unreachable"); return R(); } - - private: - void* fPtr; }; + // fRecords needs to be a data structure that can append fixed length data, and need to + // support efficient random access and forward iteration. (It doesn't need to be contiguous.) + unsigned fCount, fReserved; + SkAutoSTMalloc fRecords; + // fAlloc needs to be a data structure which can append variable length data in contiguous // chunks, returning a stable handle to that data for later retrieval. - // - // fRecords and fTypes need to be data structures that can append fixed length data, and need to - // support efficient random access and forward iteration. (They don't need to be contiguous.) - - // fCount and fReserved measure both fRecords and fTypes, which always grow in lock step. - unsigned fCount; - unsigned fReserved; - SkAutoTMalloc fRecords; - SkAutoTMalloc fTypes; SkVarAlloc fAlloc; - // Strangely the order of these fields matters. If the unsigneds don't go first we're 56 bytes. - // tomhudson and mtklein have no idea why. + char fInlineAlloc[1 << kInlineAllocLgBytes]; }; -SK_COMPILE_ASSERT(sizeof(SkRecord) <= 56, SkRecordSize); #endif//SkRecord_DEFINED diff --git a/src/core/SkVarAlloc.cpp b/src/core/SkVarAlloc.cpp index 88bd17028f..fa89d38c23 100644 --- a/src/core/SkVarAlloc.cpp +++ b/src/core/SkVarAlloc.cpp @@ -1,3 +1,10 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + #include "SkVarAlloc.h" // We use non-standard malloc diagnostic methods to make sure our allocations are sized well. @@ -25,6 +32,12 @@ SkVarAlloc::SkVarAlloc(size_t minLgSize) , fLgSize(minLgSize) , fBlock(NULL) {} +SkVarAlloc::SkVarAlloc(size_t minLgSize, char* storage, size_t len) + : fByte(storage) + , fRemaining(len) + , fLgSize(minLgSize) + , fBlock(NULL) {} + SkVarAlloc::~SkVarAlloc() { Block* b = fBlock; while (b) { diff --git a/src/core/SkVarAlloc.h b/src/core/SkVarAlloc.h index fb55192439..8a55b36615 100644 --- a/src/core/SkVarAlloc.h +++ b/src/core/SkVarAlloc.h @@ -1,3 +1,10 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + #ifndef SkVarAlloc_DEFINED #define SkVarAlloc_DEFINED @@ -7,6 +14,9 @@ class SkVarAlloc : SkNoncopyable { public: // Smallest block we'll allocate is 2**N bytes. explicit SkVarAlloc(size_t minLgSize); + // Same as above, but first uses up to len bytes from storage. + SkVarAlloc(size_t minLgSize, char* storage, size_t len); + ~SkVarAlloc(); // Returns contiguous bytes aligned at least for pointers. You may pass SK_MALLOC_THROW, etc. -- cgit v1.2.3