aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Yuqian Li <liyuqian@google.com>2017-08-08 17:09:01 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-08-10 17:56:25 +0000
commit92e6cc6b2d42f3826b36bd24da7dd3fe8443b114 (patch)
treef89a4cfa153989ea411e232539a9b532a88d525d /src
parent80488229ea6e37da9c85ffdb640b99fff3b11f2f (diff)
Some performance tweaks for DAA
1. Always inline (Clang previously ignored inline and got 25% slower) 2. SIMD everywhere other than x86 gcc: non-SIMD is only faster in my desktop with gcc; with Clang on my desktop, SIMD is 50% faster than non-SIMD. 3. Allocate 4x memory instead of 2x when running out of space: on old Android devices with Linux kernel 3.10 (e.g., Nexus 6P, 5X), the alloc/memcpy will triger a major bottleneck in kernel (30% of the running time). Such bottleneck goes away (the kernel is no longer doing stupid things during alloc/memcpy) in Linux kernel 3.18 (e.g., Pixel), and that's why DAA is much faster on Pixel than on Nexus 6P. I think maybe I should adopt SkRasterPipeline for device-specific optimizations. Bug: skia: Change-Id: I0408aa7671a5f1b39aad3bec25f8fc994ff5a1bb Reviewed-on: https://skia-review.googlesource.com/30820 Reviewed-by: Mike Klein <mtklein@google.com> Commit-Queue: Yuqian Li <liyuqian@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/SkCoverageDelta.cpp16
-rw-r--r--src/core/SkCoverageDelta.h77
2 files changed, 42 insertions, 51 deletions
diff --git a/src/core/SkCoverageDelta.cpp b/src/core/SkCoverageDelta.cpp
index 8f109cec1a..43449e9440 100644
--- a/src/core/SkCoverageDelta.cpp
+++ b/src/core/SkCoverageDelta.cpp
@@ -79,6 +79,9 @@ SkCoverageDeltaMask::SkCoverageDeltaMask(const SkIRect& bounds) : fBounds(bounds
memset(fDeltaStorage, 0, (fExpandedWidth * bounds.height() + PADDING * 2) * sizeof(SkFixed));;
}
+// TODO As this function is so performance-critical (and we're thinking so much about SIMD), use
+// SkOpts framework to compile multiple versions of this function so we can choose the best one
+// available at runtime.
void SkCoverageDeltaMask::convertCoverageToAlpha(bool isEvenOdd, bool isInverse, bool isConvex) {
SkFixed* deltaRow = &this->delta(fBounds.fLeft, fBounds.fTop);
SkAlpha* maskRow = fMask;
@@ -117,24 +120,11 @@ void SkCoverageDeltaMask::convertCoverageToAlpha(bool isEvenOdd, bool isInverse,
c[j] = c[j - 1] + deltaRow[ix + j];
}
- // My SIMD CoverageToAlpha seems to be only faster with SSSE3.
- // (On linux, even with -mavx2, my SIMD still seems to be slow...)
- // Even with only SSSE2, it's still faster to do SIMD_WIDTH non-SIMD computations at one
- // time (i.e., SIMD_WIDTH = 8 is faster than SIMD_WIDTH = 1 even if SK_CPU_SSE_LEVEL is
- // less than SK_CPU_SSE_LEVEL_SSSE3). Maybe the compiler is doing some SIMD by itself.
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
using SkNi = SkNx<SIMD_WIDTH, int>;
-
SkNi cn = SkNi::Load(c);
SkNi an = isConvex ? ConvexCoverageToAlpha(cn, isInverse)
: CoverageToAlpha(cn, isEvenOdd, isInverse);
SkNx_cast<SkAlpha>(an).store(maskRow + ix);
-#else
- for(int j = 0; j < SIMD_WIDTH; ++j) {
- maskRow[ix + j] = isConvex ? ConvexCoverageToAlpha(c[j], isInverse)
- : CoverageToAlpha(c[j], isEvenOdd, isInverse);
- }
-#endif
}
// Finally, advance to the next row
diff --git a/src/core/SkCoverageDelta.h b/src/core/SkCoverageDelta.h
index 4c6b12bae6..9218157708 100644
--- a/src/core/SkCoverageDelta.h
+++ b/src/core/SkCoverageDelta.h
@@ -55,21 +55,21 @@ public:
SkCoverageDeltaList(SkCoverageDeltaAllocator* alloc, int top, int bottom, bool forceRLE);
- inline int top() const { return fTop; }
- inline int bottom() const { return fBottom; }
- inline bool forceRLE() const { return fForceRLE; }
- inline int count(int y) const { this->checkY(y); return fCounts[y]; }
- inline bool sorted(int y) const { this->checkY(y); return fSorted[y]; }
- inline void addDelta(int x, int y, SkFixed delta) { this->push_back(y, {x, delta}); }
-
- inline const SkCoverageDelta& getDelta(int y, int i) const {
+ int top() const { return fTop; }
+ int bottom() const { return fBottom; }
+ bool forceRLE() const { return fForceRLE; }
+ int count(int y) const { this->checkY(y); return fCounts[y]; }
+ bool sorted(int y) const { this->checkY(y); return fSorted[y]; }
+
+ SK_ALWAYS_INLINE void addDelta(int x, int y, SkFixed delta) { this->push_back(y, {x, delta}); }
+ SK_ALWAYS_INLINE const SkCoverageDelta& getDelta(int y, int i) const {
this->checkY(y);
SkASSERT(i < fCounts[y]);
return fRows[y][i];
}
// It might be better to sort right before blitting to make the memory hot
- inline void sort(int y) {
+ void sort(int y) {
this->checkY(y);
if (!fSorted[y]) {
SkTQSort(fRows[y], fRows[y] + fCounts[y] - 1);
@@ -77,25 +77,12 @@ public:
}
}
- inline const SkAntiRect& getAntiRect() const { return fAntiRect; }
- inline void setAntiRect(int x, int y, int width, int height,
+ const SkAntiRect& getAntiRect() const { return fAntiRect; }
+ void setAntiRect(int x, int y, int width, int height,
SkAlpha leftAlpha, SkAlpha rightAlpha) {
fAntiRect = {x, y, width, height, leftAlpha, rightAlpha};
}
- inline void push_back(int y, const SkCoverageDelta& delta) {
- this->checkY(y);
- if (fCounts[y] == fMaxCounts[y]) {
- fMaxCounts[y] *= 2;
- SkCoverageDelta* newRow = fAlloc->makeArrayDefault<SkCoverageDelta>(fMaxCounts[y]);
- memcpy(newRow, fRows[y], sizeof(SkCoverageDelta) * fCounts[y]);
- fRows[y] = newRow;
- }
- SkASSERT(fCounts[y] < fMaxCounts[y]);
- fRows[y][fCounts[y]++] = delta;
- fSorted[y] = fSorted[y] && (fCounts[y] == 1 || delta.fX >= fRows[y][fCounts[y] - 2].fX);
- }
-
private:
SkCoverageDeltaAllocator* fAlloc;
SkCoverageDelta** fRows;
@@ -113,7 +100,20 @@ private:
int fReservedCounts[RESERVED_HEIGHT];
int fReservedMaxCounts[RESERVED_HEIGHT];
- inline void checkY(int y) const { SkASSERT(y >= fTop && y < fBottom); }
+ void checkY(int y) const { SkASSERT(y >= fTop && y < fBottom); }
+
+ SK_ALWAYS_INLINE void push_back(int y, const SkCoverageDelta& delta) {
+ this->checkY(y);
+ if (fCounts[y] == fMaxCounts[y]) {
+ fMaxCounts[y] *= 4;
+ SkCoverageDelta* newRow = fAlloc->makeArrayDefault<SkCoverageDelta>(fMaxCounts[y]);
+ memcpy(newRow, fRows[y], sizeof(SkCoverageDelta) * fCounts[y]);
+ fRows[y] = newRow;
+ }
+ SkASSERT(fCounts[y] < fMaxCounts[y]);
+ fRows[y][fCounts[y]++] = delta;
+ fSorted[y] = fSorted[y] && (fCounts[y] == 1 || delta.fX >= fRows[y][fCounts[y] - 2].fX);
+ }
};
class SkCoverageDeltaMask {
@@ -136,24 +136,24 @@ public:
SkCoverageDeltaMask(const SkIRect& bounds);
- inline int top() const { return fBounds.fTop; }
- inline int bottom() const { return fBounds.fBottom; }
- inline SkAlpha* getMask() { return fMask; }
- inline const SkIRect& getBounds() const { return fBounds; }
+ int top() const { return fBounds.fTop; }
+ int bottom() const { return fBounds.fBottom; }
+ SkAlpha* getMask() { return fMask; }
+ const SkIRect& getBounds() const { return fBounds; }
- inline void addDelta (int x, int y, SkFixed delta) { this->delta(x, y) += delta; }
- inline SkFixed& delta (int x, int y) {
+ SK_ALWAYS_INLINE void addDelta (int x, int y, SkFixed delta) { this->delta(x, y) += delta; }
+ SK_ALWAYS_INLINE SkFixed& delta (int x, int y) {
this->checkX(x);
this->checkY(y);
return fDeltas[this->index(x, y)];
}
- inline void setAntiRect(int x, int y, int width, int height,
+ void setAntiRect(int x, int y, int width, int height,
SkAlpha leftAlpha, SkAlpha rightAlpha) {
fAntiRect = {x, y, width, height, leftAlpha, rightAlpha};
}
- inline SkMask prepareSkMask() {
+ SkMask prepareSkMask() {
SkMask mask;
mask.fImage = fMask;
mask.fBounds = fBounds;
@@ -172,9 +172,10 @@ private:
int fExpandedWidth;
SkAntiRect fAntiRect;
- inline int index(int x, int y) const { return y * fExpandedWidth + x; }
- inline void checkY(int y) const { SkASSERT(y >= fBounds.fTop && y < fBounds.fBottom); }
- inline void checkX(int x) const {
+ SK_ALWAYS_INLINE int index(int x, int y) const { return y * fExpandedWidth + x; }
+
+ void checkY(int y) const { SkASSERT(y >= fBounds.fTop && y < fBounds.fBottom); }
+ void checkX(int x) const {
SkASSERT(x >= fBounds.fLeft - PADDING && x < fBounds.fRight + PADDING);
}
};
@@ -192,7 +193,7 @@ static SK_ALWAYS_INLINE SkAlpha CoverageToAlpha(SkFixed coverage, bool isEvenOdd
}
template<typename T>
-static SK_ALWAYS_INLINE T CoverageToAlpha(T coverage, bool isEvenOdd, bool isInverse) {
+static SK_ALWAYS_INLINE T CoverageToAlpha(const T& coverage, bool isEvenOdd, bool isInverse) {
T t0(0), t255(255);
T result;
if (isEvenOdd) {
@@ -217,7 +218,7 @@ static SK_ALWAYS_INLINE SkAlpha ConvexCoverageToAlpha(SkFixed coverage, bool isI
}
template<typename T>
-static SK_ALWAYS_INLINE T ConvexCoverageToAlpha(T coverage, bool isInverse) {
+static SK_ALWAYS_INLINE T ConvexCoverageToAlpha(const T& coverage, bool isInverse) {
// allTrue is not implemented
// SkASSERT((coverage >= 0).allTrue() && (coverage <= SK_Fixed1).allTrue());
T result = coverage.abs() >> 8;