diff options
author | mtklein <mtklein@chromium.org> | 2015-07-14 10:54:19 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-07-14 10:54:19 -0700 |
commit | 4be181e304d2b280c6801bd13369cfba236d1a66 (patch) | |
tree | ae0510f8a6504c3333582fa004e961a8771a2d99 /src/core | |
parent | a5517e2b190a8083b38964972b031c13e99f1012 (diff) |
3-15% speedup to HardLight / Overlay xfermodes.
While investigating my bug (skia:4052) I saw this TODO and figured
it'd make me feel better about an otherwise unsuccessful investigation.
This speeds up HardLight and Overlay (same code) by about 15% with SSE, mostly
by rewriting the logic from 1 cheap comparison and 2 expensive div255() calls
to 2 cheap comparisons and 1 expensive div255().
NEON speeds up by a more modest ~3%.
BUG=skia:
Review URL: https://codereview.chromium.org/1230663005
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/Sk4px.h | 2 | ||||
-rw-r--r-- | src/core/Sk4pxXfermode.h | 6 |
2 files changed, 4 insertions, 4 deletions
diff --git a/src/core/Sk4px.h b/src/core/Sk4px.h index e046e265fe..e1d4dc1244 100644 --- a/src/core/Sk4px.h +++ b/src/core/Sk4px.h @@ -70,6 +70,7 @@ public: Wide operator >> (int bits) const { return INHERITED::operator>>(bits); } Wide operator << (int bits) const { return INHERITED::operator<<(bits); } static Wide Min(const Wide& a, const Wide& b) { return INHERITED::Min(a,b); } + Wide thenElse(const Wide& t, const Wide& e) const { return INHERITED::thenElse(t,e); } private: typedef Sk16h INHERITED; @@ -77,6 +78,7 @@ public: Wide widenLo() const; // ARGB -> 0A 0R 0G 0B Wide widenHi() const; // ARGB -> A0 R0 G0 B0 + Wide widenLoHi() const; // ARGB -> AA RR GG BB Wide mulWiden(const Sk16b&) const; // 8-bit x 8-bit -> 16-bit components. // The only 8-bit multiply we use is 8-bit x 8-bit -> 16-bit. Might as well make it pithy. diff --git a/src/core/Sk4pxXfermode.h b/src/core/Sk4pxXfermode.h index 98b0bd901f..97321b7413 100644 --- a/src/core/Sk4pxXfermode.h +++ b/src/core/Sk4pxXfermode.h @@ -68,15 +68,13 @@ XFERMODE(HardLight) { auto sa = s.alphas(), da = d.alphas(); - auto isLite = (sa-s) < s; + auto isLite = ((sa-s) < s).widenLoHi(); auto dark = s*d << 1, lite = sa*da - ((da-d)*(sa-s) << 1), both = s*da.inv() + d*sa.inv(); - // TODO: do isLite in 16-bit so we only have to div255() once. - auto colors = isLite.thenElse((lite + both).div255(), - (dark + both).div255()); + auto colors = (both + isLite.thenElse(lite, dark)).div255(); return alphas.zeroColors() + colors.zeroAlphas(); } XFERMODE(Overlay) { return HardLight::Xfer(d,s); } |