aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/SkCoverageDelta.cpp
diff options
context:
space:
mode:
authorGravatar Yuqian Li <liyuqian@google.com>2017-08-08 17:09:01 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-08-10 17:56:25 +0000
commit92e6cc6b2d42f3826b36bd24da7dd3fe8443b114 (patch)
treef89a4cfa153989ea411e232539a9b532a88d525d /src/core/SkCoverageDelta.cpp
parent80488229ea6e37da9c85ffdb640b99fff3b11f2f (diff)
Some performance tweaks for DAA
1. Always inline (Clang previously ignored inline and got 25% slower) 2. SIMD everywhere other than x86 gcc: non-SIMD is only faster in my desktop with gcc; with Clang on my desktop, SIMD is 50% faster than non-SIMD. 3. Allocate 4x memory instead of 2x when running out of space: on old Android devices with Linux kernel 3.10 (e.g., Nexus 6P, 5X), the alloc/memcpy will triger a major bottleneck in kernel (30% of the running time). Such bottleneck goes away (the kernel is no longer doing stupid things during alloc/memcpy) in Linux kernel 3.18 (e.g., Pixel), and that's why DAA is much faster on Pixel than on Nexus 6P. I think maybe I should adopt SkRasterPipeline for device-specific optimizations. Bug: skia: Change-Id: I0408aa7671a5f1b39aad3bec25f8fc994ff5a1bb Reviewed-on: https://skia-review.googlesource.com/30820 Reviewed-by: Mike Klein <mtklein@google.com> Commit-Queue: Yuqian Li <liyuqian@google.com>
Diffstat (limited to 'src/core/SkCoverageDelta.cpp')
-rw-r--r--src/core/SkCoverageDelta.cpp16
1 files changed, 3 insertions, 13 deletions
diff --git a/src/core/SkCoverageDelta.cpp b/src/core/SkCoverageDelta.cpp
index 8f109cec1a..43449e9440 100644
--- a/src/core/SkCoverageDelta.cpp
+++ b/src/core/SkCoverageDelta.cpp
@@ -79,6 +79,9 @@ SkCoverageDeltaMask::SkCoverageDeltaMask(const SkIRect& bounds) : fBounds(bounds
memset(fDeltaStorage, 0, (fExpandedWidth * bounds.height() + PADDING * 2) * sizeof(SkFixed));;
}
+// TODO As this function is so performance-critical (and we're thinking so much about SIMD), use
+// SkOpts framework to compile multiple versions of this function so we can choose the best one
+// available at runtime.
void SkCoverageDeltaMask::convertCoverageToAlpha(bool isEvenOdd, bool isInverse, bool isConvex) {
SkFixed* deltaRow = &this->delta(fBounds.fLeft, fBounds.fTop);
SkAlpha* maskRow = fMask;
@@ -117,24 +120,11 @@ void SkCoverageDeltaMask::convertCoverageToAlpha(bool isEvenOdd, bool isInverse,
c[j] = c[j - 1] + deltaRow[ix + j];
}
- // My SIMD CoverageToAlpha seems to be only faster with SSSE3.
- // (On linux, even with -mavx2, my SIMD still seems to be slow...)
- // Even with only SSSE2, it's still faster to do SIMD_WIDTH non-SIMD computations at one
- // time (i.e., SIMD_WIDTH = 8 is faster than SIMD_WIDTH = 1 even if SK_CPU_SSE_LEVEL is
- // less than SK_CPU_SSE_LEVEL_SSSE3). Maybe the compiler is doing some SIMD by itself.
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
using SkNi = SkNx<SIMD_WIDTH, int>;
-
SkNi cn = SkNi::Load(c);
SkNi an = isConvex ? ConvexCoverageToAlpha(cn, isInverse)
: CoverageToAlpha(cn, isEvenOdd, isInverse);
SkNx_cast<SkAlpha>(an).store(maskRow + ix);
-#else
- for(int j = 0; j < SIMD_WIDTH; ++j) {
- maskRow[ix + j] = isConvex ? ConvexCoverageToAlpha(c[j], isInverse)
- : CoverageToAlpha(c[j], isEvenOdd, isInverse);
- }
-#endif
}
// Finally, advance to the next row