aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/Sk4px.h
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-08-10 12:58:17 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-08-10 12:58:17 -0700
commit4977983510028712528743aa877f6da83781b381 (patch)
treed56b58c866da7e86a80b38d0e85f03abd3f7d87c /src/core/Sk4px.h
parentc699873ac7c6b21bbca96053cdb9720c80f69916 (diff)
Sk4px blit mask.
Local SKP nanobenching ranges SSE between 1.05x and 0.87x, much more heavily weighted toward <1.0x ratios (speedups). I profiled the top five regressions (1.05x-1.01x) and they look like noise. Will follow up after broad bot results. NEON looks similar but less extreme than SSE changes, ranging between 1.02x and 0.95x, again mostly speedups in 0.99x-0.97x range. The old code trifurcated into black, opaque-but-not-black, and general versions as a function of the constant src color. I did not see a significant difference between general and opaque-but-not-black, and I don't think a black version would be faster using SIMD. So we have here just one version of the code, the general version. Somewhat fantastically, I see no pixel diffs on GMs or SKPs. I will be following up with more CLs for the other procs called by SkBlitMask. BUG=skia: Review URL: https://codereview.chromium.org/1278253003
Diffstat (limited to 'src/core/Sk4px.h')
-rw-r--r--src/core/Sk4px.h28
1 files changed, 28 insertions, 0 deletions
diff --git a/src/core/Sk4px.h b/src/core/Sk4px.h
index 24a21c66c1..ffde1af504 100644
--- a/src/core/Sk4px.h
+++ b/src/core/Sk4px.h
@@ -165,6 +165,34 @@ public:
}
}
+ // As above, but with dst4' = fn(dst4, alpha4).
+ template <typename Fn, typename Dst>
+ static void MapDstAlpha(int n, Dst* dst, const SkAlpha* a, const Fn& fn) {
+ while (n > 0) {
+ if (n >= 8) {
+ Sk4px dst0 = fn(Load4(dst+0), Load4Alphas(a+0)),
+ dst4 = fn(Load4(dst+4), Load4Alphas(a+4));
+ dst0.store4(dst+0);
+ dst4.store4(dst+4);
+ dst += 8; a += 8; n -= 8;
+ continue; // Keep our stride at 8 pixels as long as possible.
+ }
+ SkASSERT(n <= 7);
+ if (n >= 4) {
+ fn(Load4(dst), Load4Alphas(a)).store4(dst);
+ dst += 4; a += 4; n -= 4;
+ }
+ if (n >= 2) {
+ fn(Load2(dst), Load2Alphas(a)).store2(dst);
+ dst += 2; a += 2; n -= 2;
+ }
+ if (n >= 1) {
+ fn(Load1(dst), DupAlpha(*a)).store1(dst);
+ }
+ break;
+ }
+ }
+
// As above, but with dst4' = fn(dst4, src4, alpha4).
template <typename Fn, typename Dst>
static void MapDstSrcAlpha(int n, Dst* dst, const SkPMColor* src, const SkAlpha* a,