diff options
author | Yuqian Li <liyuqian@google.com> | 2017-08-08 17:09:01 -0400 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-08-10 17:56:25 +0000 |
commit | 92e6cc6b2d42f3826b36bd24da7dd3fe8443b114 (patch) | |
tree | f89a4cfa153989ea411e232539a9b532a88d525d /src | |
parent | 80488229ea6e37da9c85ffdb640b99fff3b11f2f (diff) |
Some performance tweaks for DAA
1. Always inline (Clang previously ignored inline and got 25% slower)
2. SIMD everywhere other than x86 gcc:
non-SIMD is only faster in my desktop with gcc;
with Clang on my desktop, SIMD is 50% faster than non-SIMD.
3. Allocate 4x memory instead of 2x when running out of space:
on old Android devices with Linux kernel 3.10 (e.g., Nexus 6P, 5X),
the alloc/memcpy will triger a major bottleneck in kernel (30% of
the running time). Such bottleneck goes away (the kernel is no
longer doing stupid things during alloc/memcpy) in Linux kernel
3.18 (e.g., Pixel), and that's why DAA is much faster on Pixel than
on Nexus 6P.
I think maybe I should adopt SkRasterPipeline for device-specific
optimizations.
Bug: skia:
Change-Id: I0408aa7671a5f1b39aad3bec25f8fc994ff5a1bb
Reviewed-on: https://skia-review.googlesource.com/30820
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Yuqian Li <liyuqian@google.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/core/SkCoverageDelta.cpp | 16 | ||||
-rw-r--r-- | src/core/SkCoverageDelta.h | 77 |
2 files changed, 42 insertions, 51 deletions
diff --git a/src/core/SkCoverageDelta.cpp b/src/core/SkCoverageDelta.cpp index 8f109cec1a..43449e9440 100644 --- a/src/core/SkCoverageDelta.cpp +++ b/src/core/SkCoverageDelta.cpp @@ -79,6 +79,9 @@ SkCoverageDeltaMask::SkCoverageDeltaMask(const SkIRect& bounds) : fBounds(bounds memset(fDeltaStorage, 0, (fExpandedWidth * bounds.height() + PADDING * 2) * sizeof(SkFixed));; } +// TODO As this function is so performance-critical (and we're thinking so much about SIMD), use +// SkOpts framework to compile multiple versions of this function so we can choose the best one +// available at runtime. void SkCoverageDeltaMask::convertCoverageToAlpha(bool isEvenOdd, bool isInverse, bool isConvex) { SkFixed* deltaRow = &this->delta(fBounds.fLeft, fBounds.fTop); SkAlpha* maskRow = fMask; @@ -117,24 +120,11 @@ void SkCoverageDeltaMask::convertCoverageToAlpha(bool isEvenOdd, bool isInverse, c[j] = c[j - 1] + deltaRow[ix + j]; } - // My SIMD CoverageToAlpha seems to be only faster with SSSE3. - // (On linux, even with -mavx2, my SIMD still seems to be slow...) - // Even with only SSSE2, it's still faster to do SIMD_WIDTH non-SIMD computations at one - // time (i.e., SIMD_WIDTH = 8 is faster than SIMD_WIDTH = 1 even if SK_CPU_SSE_LEVEL is - // less than SK_CPU_SSE_LEVEL_SSSE3). Maybe the compiler is doing some SIMD by itself. -#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 using SkNi = SkNx<SIMD_WIDTH, int>; - SkNi cn = SkNi::Load(c); SkNi an = isConvex ? ConvexCoverageToAlpha(cn, isInverse) : CoverageToAlpha(cn, isEvenOdd, isInverse); SkNx_cast<SkAlpha>(an).store(maskRow + ix); -#else - for(int j = 0; j < SIMD_WIDTH; ++j) { - maskRow[ix + j] = isConvex ? ConvexCoverageToAlpha(c[j], isInverse) - : CoverageToAlpha(c[j], isEvenOdd, isInverse); - } -#endif } // Finally, advance to the next row diff --git a/src/core/SkCoverageDelta.h b/src/core/SkCoverageDelta.h index 4c6b12bae6..9218157708 100644 --- a/src/core/SkCoverageDelta.h +++ b/src/core/SkCoverageDelta.h @@ -55,21 +55,21 @@ public: SkCoverageDeltaList(SkCoverageDeltaAllocator* alloc, int top, int bottom, bool forceRLE); - inline int top() const { return fTop; } - inline int bottom() const { return fBottom; } - inline bool forceRLE() const { return fForceRLE; } - inline int count(int y) const { this->checkY(y); return fCounts[y]; } - inline bool sorted(int y) const { this->checkY(y); return fSorted[y]; } - inline void addDelta(int x, int y, SkFixed delta) { this->push_back(y, {x, delta}); } - - inline const SkCoverageDelta& getDelta(int y, int i) const { + int top() const { return fTop; } + int bottom() const { return fBottom; } + bool forceRLE() const { return fForceRLE; } + int count(int y) const { this->checkY(y); return fCounts[y]; } + bool sorted(int y) const { this->checkY(y); return fSorted[y]; } + + SK_ALWAYS_INLINE void addDelta(int x, int y, SkFixed delta) { this->push_back(y, {x, delta}); } + SK_ALWAYS_INLINE const SkCoverageDelta& getDelta(int y, int i) const { this->checkY(y); SkASSERT(i < fCounts[y]); return fRows[y][i]; } // It might be better to sort right before blitting to make the memory hot - inline void sort(int y) { + void sort(int y) { this->checkY(y); if (!fSorted[y]) { SkTQSort(fRows[y], fRows[y] + fCounts[y] - 1); @@ -77,25 +77,12 @@ public: } } - inline const SkAntiRect& getAntiRect() const { return fAntiRect; } - inline void setAntiRect(int x, int y, int width, int height, + const SkAntiRect& getAntiRect() const { return fAntiRect; } + void setAntiRect(int x, int y, int width, int height, SkAlpha leftAlpha, SkAlpha rightAlpha) { fAntiRect = {x, y, width, height, leftAlpha, rightAlpha}; } - inline void push_back(int y, const SkCoverageDelta& delta) { - this->checkY(y); - if (fCounts[y] == fMaxCounts[y]) { - fMaxCounts[y] *= 2; - SkCoverageDelta* newRow = fAlloc->makeArrayDefault<SkCoverageDelta>(fMaxCounts[y]); - memcpy(newRow, fRows[y], sizeof(SkCoverageDelta) * fCounts[y]); - fRows[y] = newRow; - } - SkASSERT(fCounts[y] < fMaxCounts[y]); - fRows[y][fCounts[y]++] = delta; - fSorted[y] = fSorted[y] && (fCounts[y] == 1 || delta.fX >= fRows[y][fCounts[y] - 2].fX); - } - private: SkCoverageDeltaAllocator* fAlloc; SkCoverageDelta** fRows; @@ -113,7 +100,20 @@ private: int fReservedCounts[RESERVED_HEIGHT]; int fReservedMaxCounts[RESERVED_HEIGHT]; - inline void checkY(int y) const { SkASSERT(y >= fTop && y < fBottom); } + void checkY(int y) const { SkASSERT(y >= fTop && y < fBottom); } + + SK_ALWAYS_INLINE void push_back(int y, const SkCoverageDelta& delta) { + this->checkY(y); + if (fCounts[y] == fMaxCounts[y]) { + fMaxCounts[y] *= 4; + SkCoverageDelta* newRow = fAlloc->makeArrayDefault<SkCoverageDelta>(fMaxCounts[y]); + memcpy(newRow, fRows[y], sizeof(SkCoverageDelta) * fCounts[y]); + fRows[y] = newRow; + } + SkASSERT(fCounts[y] < fMaxCounts[y]); + fRows[y][fCounts[y]++] = delta; + fSorted[y] = fSorted[y] && (fCounts[y] == 1 || delta.fX >= fRows[y][fCounts[y] - 2].fX); + } }; class SkCoverageDeltaMask { @@ -136,24 +136,24 @@ public: SkCoverageDeltaMask(const SkIRect& bounds); - inline int top() const { return fBounds.fTop; } - inline int bottom() const { return fBounds.fBottom; } - inline SkAlpha* getMask() { return fMask; } - inline const SkIRect& getBounds() const { return fBounds; } + int top() const { return fBounds.fTop; } + int bottom() const { return fBounds.fBottom; } + SkAlpha* getMask() { return fMask; } + const SkIRect& getBounds() const { return fBounds; } - inline void addDelta (int x, int y, SkFixed delta) { this->delta(x, y) += delta; } - inline SkFixed& delta (int x, int y) { + SK_ALWAYS_INLINE void addDelta (int x, int y, SkFixed delta) { this->delta(x, y) += delta; } + SK_ALWAYS_INLINE SkFixed& delta (int x, int y) { this->checkX(x); this->checkY(y); return fDeltas[this->index(x, y)]; } - inline void setAntiRect(int x, int y, int width, int height, + void setAntiRect(int x, int y, int width, int height, SkAlpha leftAlpha, SkAlpha rightAlpha) { fAntiRect = {x, y, width, height, leftAlpha, rightAlpha}; } - inline SkMask prepareSkMask() { + SkMask prepareSkMask() { SkMask mask; mask.fImage = fMask; mask.fBounds = fBounds; @@ -172,9 +172,10 @@ private: int fExpandedWidth; SkAntiRect fAntiRect; - inline int index(int x, int y) const { return y * fExpandedWidth + x; } - inline void checkY(int y) const { SkASSERT(y >= fBounds.fTop && y < fBounds.fBottom); } - inline void checkX(int x) const { + SK_ALWAYS_INLINE int index(int x, int y) const { return y * fExpandedWidth + x; } + + void checkY(int y) const { SkASSERT(y >= fBounds.fTop && y < fBounds.fBottom); } + void checkX(int x) const { SkASSERT(x >= fBounds.fLeft - PADDING && x < fBounds.fRight + PADDING); } }; @@ -192,7 +193,7 @@ static SK_ALWAYS_INLINE SkAlpha CoverageToAlpha(SkFixed coverage, bool isEvenOdd } template<typename T> -static SK_ALWAYS_INLINE T CoverageToAlpha(T coverage, bool isEvenOdd, bool isInverse) { +static SK_ALWAYS_INLINE T CoverageToAlpha(const T& coverage, bool isEvenOdd, bool isInverse) { T t0(0), t255(255); T result; if (isEvenOdd) { @@ -217,7 +218,7 @@ static SK_ALWAYS_INLINE SkAlpha ConvexCoverageToAlpha(SkFixed coverage, bool isI } template<typename T> -static SK_ALWAYS_INLINE T ConvexCoverageToAlpha(T coverage, bool isInverse) { +static SK_ALWAYS_INLINE T ConvexCoverageToAlpha(const T& coverage, bool isInverse) { // allTrue is not implemented // SkASSERT((coverage >= 0).allTrue() && (coverage <= SK_Fixed1).allTrue()); T result = coverage.abs() >> 8; |