aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@google.com>2015-04-08 14:09:41 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-04-08 14:09:41 -0700
commit35f55764b81390a085fb90f624082c196fbd6229 (patch)
treee16cf8961b814300b4c3f02551aff5cd869ffd06 /src
parente2dd9408cd711777afaa9410427fb0d761ab004a (diff)
Revert of Rearrange SkRecord with small N in mind (patchset #8 id:120001 of https://codereview.chromium.org/1061783002/)
Reason for revert: https://uberchromegw.corp.google.com/i/client.skia/builders/Test-Ubuntu-GCC-GCE-CPU-AVX2-x86-Debug/builds/149/steps/dm/logs/stdio Original issue's description: > Rearrange SkRecord with small N in mind > > This rearranges the record pointers and types so they can go in a single array, then preallocates some space for them and for the SkVarAlloc. > > picture_overhead_draw bench drops from ~1000ns to 500-600ns, with no effect on picture_overhead_nodraw. > > I don't see any significant effect on large picture recording times from our .skps. > > BUG=chromium:470553 > > Committed: https://skia.googlesource.com/skia/+/e2dd9408cd711777afaa9410427fb0d761ab004a TBR=reed@google.com,mtklein@chromium.org NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=chromium:470553 Review URL: https://codereview.chromium.org/1068383003
Diffstat (limited to 'src')
-rw-r--r--src/core/SkRecord.cpp13
-rw-r--r--src/core/SkRecord.h124
-rw-r--r--src/core/SkVarAlloc.cpp13
-rw-r--r--src/core/SkVarAlloc.h10
4 files changed, 85 insertions, 75 deletions
diff --git a/src/core/SkRecord.cpp b/src/core/SkRecord.cpp
index c2008a850a..e2d919b777 100644
--- a/src/core/SkRecord.cpp
+++ b/src/core/SkRecord.cpp
@@ -1,10 +1,3 @@
-/*
- * Copyright 2015 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
#include "SkRecord.h"
SkRecord::~SkRecord() {
@@ -16,13 +9,13 @@ SkRecord::~SkRecord() {
void SkRecord::grow() {
SkASSERT(fCount == fReserved);
- SkASSERT(fReserved > 0);
- fReserved *= 2;
+ fReserved = SkTMax<unsigned>(kFirstReserveCount, fReserved*2);
fRecords.realloc(fReserved);
+ fTypes.realloc(fReserved);
}
size_t SkRecord::bytesUsed() const {
return fAlloc.approxBytesAllocated() +
- (fReserved - kInlineRecords) * sizeof(Record) +
+ fReserved * (sizeof(Record) + sizeof(Type8)) +
sizeof(SkRecord);
}
diff --git a/src/core/SkRecord.h b/src/core/SkRecord.h
index ef25e8d901..fb082949b4 100644
--- a/src/core/SkRecord.h
+++ b/src/core/SkRecord.h
@@ -13,7 +13,7 @@
#include "SkTemplates.h"
#include "SkVarAlloc.h"
-// SkRecord represents a sequence of SkCanvas calls, saved for future use.
+// SkRecord (REC-ord) represents a sequence of SkCanvas calls, saved for future use.
// These future uses may include: replay, optimization, serialization, or combinations of those.
//
// Though an enterprising user may find calling alloc(), append(), visit(), and mutate() enough to
@@ -27,16 +27,10 @@
class SkRecord : public SkNVRefCnt<SkRecord> {
enum {
- // TODO: tune these two constants.
- kInlineRecords = 4, // Ideally our lower limit on recorded ops per picture.
- kInlineAllocLgBytes = 8, // 1<<8 == 256 bytes inline, then SkVarAlloc starting at 512 bytes.
+ kFirstReserveCount = 64 / sizeof(void*),
};
public:
- SkRecord()
- : fCount(0)
- , fReserved(kInlineRecords)
- , fAlloc(kInlineAllocLgBytes+1, // First malloc'd block is 2x as large as fInlineAlloc.
- fInlineAlloc, sizeof(fInlineAlloc)) {}
+ SkRecord() : fCount(0), fReserved(0), fAlloc(8/*start block sizes at 256 bytes*/) {}
~SkRecord();
// Returns the number of canvas commands in this SkRecord.
@@ -49,7 +43,7 @@ public:
template <typename R, typename F>
R visit(unsigned i, F& f) const {
SkASSERT(i < this->count());
- return fRecords[i].visit<R>(f);
+ return fRecords[i].visit<R>(fTypes[i], f);
}
// Mutate the i-th canvas command with a functor matching this interface:
@@ -59,15 +53,15 @@ public:
template <typename R, typename F>
R mutate(unsigned i, F& f) {
SkASSERT(i < this->count());
- return fRecords[i].mutate<R>(f);
+ return fRecords[i].mutate<R>(fTypes[i], f);
}
-
- // TODO: It'd be nice to infer R from F for visit and mutate.
+ // TODO: It'd be nice to infer R from F for visit and mutate if we ever get std::result_of.
// Allocate contiguous space for count Ts, to be freed when the SkRecord is destroyed.
// Here T can be any class, not just those from SkRecords. Throws on failure.
template <typename T>
T* alloc(size_t count = 1) {
+ // Bump up to the next pointer width if needed, so all allocations start pointer-aligned.
return (T*)fAlloc.alloc(sizeof(T) * count, SK_MALLOC_THROW);
}
@@ -78,6 +72,7 @@ public:
if (fCount == fReserved) {
this->grow();
}
+ fTypes[fCount] = T::kType;
return fRecords[fCount++].set(this->allocCommand<T>());
}
@@ -91,6 +86,7 @@ public:
Destroyer destroyer;
this->mutate<void>(i, destroyer);
+ fTypes[i] = T::kType;
return fRecords[i].set(this->allocCommand<T>());
}
@@ -101,9 +97,10 @@ public:
T* replace(unsigned i, const SkRecords::Adopted<Existing>& proofOfAdoption) {
SkASSERT(i < this->count());
- SkASSERT(Existing::kType == fRecords[i].type());
- SkASSERT(proofOfAdoption == fRecords[i].ptr());
+ SkASSERT(Existing::kType == fTypes[i]);
+ SkASSERT(proofOfAdoption == fRecords[i].ptr<Existing>());
+ fTypes[i] = T::kType;
return fRecords[i].set(this->allocCommand<T>());
}
@@ -112,7 +109,9 @@ public:
size_t bytesUsed() const;
private:
- // An SkRecord is structured as an array of pointers into a big chunk of memory where
+ // Implementation notes!
+ //
+ // Logically an SkRecord is structured as an array of pointers into a big chunk of memory where
// records representing each canvas draw call are stored:
//
// fRecords: [*][*][*]...
@@ -124,8 +123,27 @@ private:
// v v v
// fAlloc: [SkRecords::DrawRect][SkRecords::DrawPosTextH][SkRecords::DrawRect]...
//
- // We store the types of each of the pointers alongside the pointer.
- // The cost to append a T to this structure is 8 + sizeof(T) bytes.
+ // In the scheme above, the pointers in fRecords are void*: they have no type. The type is not
+ // stored in fAlloc either; we just write raw data there. But we need that type information.
+ // Here are some options:
+ // 1) use inheritance, virtuals, and vtables to make the fRecords pointers smarter
+ // 2) store the type data manually in fAlloc at the start of each record
+ // 3) store the type data manually somewhere with fRecords
+ //
+ // This code uses approach 3). The implementation feels very similar to 1), but it's
+ // devirtualized instead of using the language's polymorphism mechanisms. This lets us work
+ // with the types themselves (as SkRecords::Type), a sort of limited free RTTI; it lets us pay
+ // only 1 byte to store the type instead of a full pointer (4-8 bytes); and it leads to better
+ // decoupling between the SkRecords::* record types and the operations performed on them in
+ // visit() or mutate(). The recorded canvas calls don't have to have any idea about the
+ // operations performed on them.
+ //
+ // We store the types in a parallel fTypes array, mainly so that they can be tightly packed as
+ // single bytes. This has the side effect of allowing very fast analysis passes over an
+ // SkRecord looking for just patterns of draw commands (or using this as a quick reject
+ // mechanism) though there's admittedly not a very good API exposed publically for this.
+ //
+ // The cost to append a T into this structure is 1 + sizeof(void*) + sizeof(T).
// A mutator that can be used with replace to destroy canvas commands.
struct Destroyer {
@@ -133,6 +151,19 @@ private:
void operator()(T* record) { record->~T(); }
};
+ // Logically the same as SkRecords::Type, but packed into 8 bits.
+ struct Type8 {
+ public:
+ // This intentionally converts implicitly back and forth.
+ Type8(SkRecords::Type type) : fType(type) { SkASSERT(*this == type); }
+ operator SkRecords::Type () { return (SkRecords::Type)fType; }
+
+ private:
+ uint8_t fType;
+ };
+
+ // No point in allocating any more than one of an empty struct.
+ // We could just return NULL but it's sort of confusing to return NULL on success.
template <typename T>
SK_WHEN(SkTIsEmpty<T>, T*) allocCommand() {
static T singleton = {};
@@ -142,56 +173,65 @@ private:
template <typename T>
SK_WHEN(!SkTIsEmpty<T>, T*) allocCommand() { return this->alloc<T>(); }
+ // Called when we've run out of room to record new commands.
void grow();
- // A typed pointer to some bytes in fAlloc. visit() and mutate() allow polymorphic dispatch.
+ // An untyped pointer to some bytes in fAlloc. This is the interface for polymorphic dispatch:
+ // visit() and mutate() work with the parallel fTypes array to do the work of a vtable.
struct Record {
- // On 32-bit machines we store type in 4 bytes, followed by a pointer. Simple.
- // On 64-bit machines we store a pointer with the type slotted into two top (unused) bytes.
- // FWIW, SkRecords::Type is tiny. It can easily fit in one byte.
- uint64_t fTypeAndPtr;
- static const int kTypeShift = sizeof(void*) == 4 ? 32 : 48;
-
+ public:
// Point this record to its data in fAlloc. Returns ptr for convenience.
template <typename T>
T* set(T* ptr) {
- fTypeAndPtr = ((uint64_t)T::kType) << kTypeShift | (uint64_t)ptr;
+ fPtr = ptr;
return ptr;
}
- SkRecords::Type type() const { return (SkRecords::Type)(fTypeAndPtr >> kTypeShift); }
- void* ptr() const { return (void*)(fTypeAndPtr & ((1ull<<kTypeShift)-1)); }
+ // Get the data in fAlloc, assuming it's of type T.
+ template <typename T>
+ T* ptr() const { return (T*)fPtr; }
- // Visit this record with functor F (see public API above).
+ // Visit this record with functor F (see public API above) assuming the record we're
+ // pointing to has this type.
template <typename R, typename F>
- R visit(F& f) const {
- #define CASE(T) case SkRecords::T##_Type: return f(*(const SkRecords::T*)this->ptr());
- switch(this->type()) { SK_RECORD_TYPES(CASE) }
+ R visit(Type8 type, F& f) const {
+ #define CASE(T) case SkRecords::T##_Type: return f(*this->ptr<SkRecords::T>());
+ switch(type) { SK_RECORD_TYPES(CASE) }
#undef CASE
SkDEBUGFAIL("Unreachable");
return R();
}
- // Mutate this record with functor F (see public API above).
+ // Mutate this record with functor F (see public API above) assuming the record we're
+ // pointing to has this type.
template <typename R, typename F>
- R mutate(F& f) {
- #define CASE(T) case SkRecords::T##_Type: return f((SkRecords::T*)this->ptr());
- switch(this->type()) { SK_RECORD_TYPES(CASE) }
+ R mutate(Type8 type, F& f) {
+ #define CASE(T) case SkRecords::T##_Type: return f(this->ptr<SkRecords::T>());
+ switch(type) { SK_RECORD_TYPES(CASE) }
#undef CASE
SkDEBUGFAIL("Unreachable");
return R();
}
- };
- // fRecords needs to be a data structure that can append fixed length data, and need to
- // support efficient random access and forward iteration. (It doesn't need to be contiguous.)
- unsigned fCount, fReserved;
- SkAutoSTMalloc<kInlineRecords, Record> fRecords;
+ private:
+ void* fPtr;
+ };
// fAlloc needs to be a data structure which can append variable length data in contiguous
// chunks, returning a stable handle to that data for later retrieval.
+ //
+ // fRecords and fTypes need to be data structures that can append fixed length data, and need to
+ // support efficient random access and forward iteration. (They don't need to be contiguous.)
+
+ // fCount and fReserved measure both fRecords and fTypes, which always grow in lock step.
+ unsigned fCount;
+ unsigned fReserved;
+ SkAutoTMalloc<Record> fRecords;
+ SkAutoTMalloc<Type8> fTypes;
SkVarAlloc fAlloc;
- char fInlineAlloc[1 << kInlineAllocLgBytes];
+ // Strangely the order of these fields matters. If the unsigneds don't go first we're 56 bytes.
+ // tomhudson and mtklein have no idea why.
};
+SK_COMPILE_ASSERT(sizeof(SkRecord) <= 56, SkRecordSize);
#endif//SkRecord_DEFINED
diff --git a/src/core/SkVarAlloc.cpp b/src/core/SkVarAlloc.cpp
index fa89d38c23..88bd17028f 100644
--- a/src/core/SkVarAlloc.cpp
+++ b/src/core/SkVarAlloc.cpp
@@ -1,10 +1,3 @@
-/*
- * Copyright 2015 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
#include "SkVarAlloc.h"
// We use non-standard malloc diagnostic methods to make sure our allocations are sized well.
@@ -32,12 +25,6 @@ SkVarAlloc::SkVarAlloc(size_t minLgSize)
, fLgSize(minLgSize)
, fBlock(NULL) {}
-SkVarAlloc::SkVarAlloc(size_t minLgSize, char* storage, size_t len)
- : fByte(storage)
- , fRemaining(len)
- , fLgSize(minLgSize)
- , fBlock(NULL) {}
-
SkVarAlloc::~SkVarAlloc() {
Block* b = fBlock;
while (b) {
diff --git a/src/core/SkVarAlloc.h b/src/core/SkVarAlloc.h
index 8a55b36615..fb55192439 100644
--- a/src/core/SkVarAlloc.h
+++ b/src/core/SkVarAlloc.h
@@ -1,10 +1,3 @@
-/*
- * Copyright 2015 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
#ifndef SkVarAlloc_DEFINED
#define SkVarAlloc_DEFINED
@@ -14,9 +7,6 @@ class SkVarAlloc : SkNoncopyable {
public:
// Smallest block we'll allocate is 2**N bytes.
explicit SkVarAlloc(size_t minLgSize);
- // Same as above, but first uses up to len bytes from storage.
- SkVarAlloc(size_t minLgSize, char* storage, size_t len);
-
~SkVarAlloc();
// Returns contiguous bytes aligned at least for pointers. You may pass SK_MALLOC_THROW, etc.