diff options
author | 2015-06-26 10:46:31 -0700 | |
---|---|---|
committer | 2015-06-26 10:46:31 -0700 | |
commit | 2aab22a58a366df4752c1cf0f004092c6e7be335 (patch) | |
tree | bc4026ca98f28068b99ca6394c05a0129f0dc4d6 /src/core/Sk4pxXfermode.h | |
parent | cdb42bb55c3bdbbd6682dcd50b5c77322bb6e565 (diff) |
Color dodge and burn with SkPMFloat.
Both 25-35% faster with SSE.
With NEON, Burn measures as a ~10% regression, Dodge a huge 2.9x improvement.
The Burn regression is somewhat artificial: we're drawing random colored rects onto an opaque white dst, so we're heavily biased toward the (d==da) fast path in the serial code. In the vector code there's no short-circuiting and we always pay a fixed cost for ColorBurn regardless of src or dst content.
Dodge's fast paths, in contrast, only trigger when (s==sa) or (d==0), neither of which happens any more than randomly in our benchmark. I don't think (d==0) should happen at all. Similarly, the (s==0) Burn fast path is really only going to happen as often as SkRandom allows.
In practice, the existing Burn benchmark is hitting its fast path 100% of the time. So I actually feel really great that this only dings the benchmark by 10%.
Chrome's still guarded by SK_SUPPORT_LEGACY_XFERMODES, which I'll lift after finishing the last xfermode, SoftLight.
BUG=skia:
Review URL: https://codereview.chromium.org/1214443002
Diffstat (limited to 'src/core/Sk4pxXfermode.h')
-rw-r--r-- | src/core/Sk4pxXfermode.h | 75 |
1 files changed, 74 insertions, 1 deletions
diff --git a/src/core/Sk4pxXfermode.h b/src/core/Sk4pxXfermode.h index 09490dc990..b587183046 100644 --- a/src/core/Sk4pxXfermode.h +++ b/src/core/Sk4pxXfermode.h @@ -9,11 +9,13 @@ #define Sk4pxXfermode_DEFINED #include "Sk4px.h" +#include "SkPMFloat.h" // This file is possibly included into multiple .cpp files. // Each gets its own independent instantiation by wrapping in an anonymous namespace. namespace { +// Most xfermodes can be done most efficiently 4 pixels at a time in 8 or 16-bit fixed point. #define XFERMODE(Name) \ struct Name { \ static Sk4px Xfer(const Sk4px&, const Sk4px&); \ @@ -97,7 +99,48 @@ XFERMODE(Lighten) { colors = (sda < dsa).thenElse(dstover, srcover); return alphas.zeroColors() + colors.zeroAlphas(); } +#undef XFERMODE + +// Some xfermodes use math like divide or sqrt that's best done in floats 1 pixel at a time. +#define XFERMODE(Name) \ + struct Name { \ + static SkPMFloat Xfer(const SkPMFloat&, const SkPMFloat&); \ + static const SkXfermode::Mode kMode = SkXfermode::k##Name##_Mode; \ + }; \ + inline SkPMFloat Name::Xfer(const SkPMFloat& s, const SkPMFloat& d) + +XFERMODE(ColorDodge) { + auto sa = s.alphas(), + da = d.alphas(), + isa = Sk4f(1)-sa, + ida = Sk4f(1)-da; + auto srcover = s + d*isa, + dstover = d + s*ida, + otherwise = sa * Sk4f::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida + d*isa; + + // Order matters here, preferring d==0 over s==sa. + auto colors = (d == Sk4f(0)).thenElse(dstover, + (s == sa).thenElse(srcover, + otherwise)); + return srcover * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1); +} +XFERMODE(ColorBurn) { + auto sa = s.alphas(), + da = d.alphas(), + isa = Sk4f(1)-sa, + ida = Sk4f(1)-da; + + auto srcover = s + d*isa, + dstover = d + s*ida, + otherwise = sa*(da-Sk4f::Min(da, (da-d)*sa*s.approxInvert())) + s*ida + d*isa; + + // Order matters here, preferring d==da over s==0. + auto colors = (d == da).thenElse(dstover, + (s == Sk4f(0)).thenElse(srcover, + otherwise)); + return srcover * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1); +} #undef XFERMODE // A reasonable fallback mode for doing AA is to simply apply the transfermode first, @@ -140,7 +183,34 @@ public: } private: - SkT4pxXfermode(const ProcCoeff& rec) : SkProcCoeffXfermode(rec, ProcType::kMode) {} + SkT4pxXfermode(const ProcCoeff& rec) : INHERITED(rec, ProcType::kMode) {} + + typedef SkProcCoeffXfermode INHERITED; +}; + +template <typename ProcType> +class SkTPMFloatXfermode : public SkProcCoeffXfermode { +public: + static SkProcCoeffXfermode* Create(const ProcCoeff& rec) { + return SkNEW_ARGS(SkTPMFloatXfermode, (rec)); + } + + void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override { + for (int i = 0; i < n; i++) { + SkPMFloat s(src[i]), + d(dst[i]), + b(ProcType::Xfer(s,d)); + if (aa) { + // We do aa in full float precision before going back down to bytes, because we can! + SkPMFloat a = Sk4f(aa[i]) * Sk4f(1.0f/255); + b = b*a + d*(Sk4f(1)-a); + } + dst[i] = b.round(); + } + } + +private: + SkTPMFloatXfermode(const ProcCoeff& rec) : INHERITED(rec, ProcType::kMode) {} typedef SkProcCoeffXfermode INHERITED; }; @@ -171,6 +241,9 @@ static SkProcCoeffXfermode* SkCreate4pxXfermode(const ProcCoeff& rec, SkXfermode case SkXfermode::kOverlay_Mode: return SkT4pxXfermode<Overlay>::Create(rec); case SkXfermode::kDarken_Mode: return SkT4pxXfermode<Darken>::Create(rec); case SkXfermode::kLighten_Mode: return SkT4pxXfermode<Lighten>::Create(rec); + + case SkXfermode::kColorDodge_Mode: return SkTPMFloatXfermode<ColorDodge>::Create(rec); + case SkXfermode::kColorBurn_Mode: return SkTPMFloatXfermode<ColorBurn>::Create(rec); #endif default: break; } |