aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-07-21 12:39:57 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-07-21 12:39:57 -0700
commitcd1930d4f19297c4089d7cb278243e4815c5793d (patch)
tree76fc74503628b0753ce5e090f59ad98832a301a1 /src
parentc3dcb67f07173af1c17771965ab7254910d52ef3 (diff)
De-templatize Sk4pxXfermode code a bit.
This deduplicates a few pieces of code: - we end up with one copy of each xfer32() driver loop instead of one per xfermode; - we end up with two* copies of each xfermode implementation instead of ten**. * For a given Mode: Mode() itself and xfer_aa<Mode>(). ** From unrolling: twice at a stride of 8, once at 4, once at 2, and once at 1, then all again for when we have AA. This decreases the size of SkXfermode.o from 1.5M to 620K on x86-64 and from 1.3M to 680K on ARMv7+NEON. If we wanted to, we could eliminate the xfer_aa<Mode>() copy by tagging each Mode() function as __attribute__((noinline)) or its equivalent. This would result in another ~100K space savings. Performance is affected in proportion to the original xfermode speed: fast modes like Plus take the largest proportional hit, and slow modes like HardLight or SoftLight see essentially no hit at all. This adds SK_VECTORCALL to help keep this code fast on ARMv7 and Windows. I've looked at the ARMv7 generated code... it looks good, even pretty. For compatibility with SK_VECTORCALL, we now pass the vector-sized arguments by value instead of by reference. Some refactoring now allows us to declare each mode as just a static function instead of a struct, which simplifies things. TBR=reed@google.com No public API changes. BUG=skia: Committed: https://skia.googlesource.com/skia/+/e617e1525916d7ee684142728c0905828caf49da CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm7-Debug-Android_NoNeon-Trybot Review URL: https://codereview.chromium.org/1242743004
Diffstat (limited to 'src')
-rw-r--r--src/core/Sk4pxXfermode.h138
1 files changed, 72 insertions, 66 deletions
diff --git a/src/core/Sk4pxXfermode.h b/src/core/Sk4pxXfermode.h
index 0c8dcb5302..fc0b643be1 100644
--- a/src/core/Sk4pxXfermode.h
+++ b/src/core/Sk4pxXfermode.h
@@ -16,13 +16,15 @@
// Each gets its own independent instantiation by wrapping in an anonymous namespace.
namespace {
+#if defined(SK_CPU_ARM32) && !defined(SK_ARM_HAS_NEON)
+ // Signals SkXfermode.cpp to look for runtime-detected NEON.
+ static SkProcCoeffXfermode* SkCreate4pxXfermode(const ProcCoeff& rec, SkXfermode::Mode mode) {
+ return nullptr;
+ }
+#else
+
// Most xfermodes can be done most efficiently 4 pixels at a time in 8 or 16-bit fixed point.
-#define XFERMODE(Name) \
- struct Name { \
- static Sk4px Xfer(const Sk4px&, const Sk4px&); \
- static const SkXfermode::Mode kMode = SkXfermode::k##Name##_Mode; \
- }; \
- inline Sk4px Name::Xfer(const Sk4px& s, const Sk4px& d)
+#define XFERMODE(Name) static Sk4px SK_VECTORCALL Name(Sk4px s, Sk4px d)
XFERMODE(Clear) { return Sk4px::DupPMColor(0); }
XFERMODE(Src) { return s; }
@@ -30,13 +32,13 @@ XFERMODE(Dst) { return d; }
XFERMODE(SrcIn) { return s.approxMulDiv255(d.alphas() ); }
XFERMODE(SrcOut) { return s.approxMulDiv255(d.alphas().inv()); }
XFERMODE(SrcOver) { return s + d.approxMulDiv255(s.alphas().inv()); }
-XFERMODE(DstIn) { return SrcIn ::Xfer(d,s); }
-XFERMODE(DstOut) { return SrcOut ::Xfer(d,s); }
-XFERMODE(DstOver) { return SrcOver::Xfer(d,s); }
+XFERMODE(DstIn) { return SrcIn (d,s); }
+XFERMODE(DstOut) { return SrcOut (d,s); }
+XFERMODE(DstOver) { return SrcOver(d,s); }
// [ S * Da + (1 - Sa) * D]
XFERMODE(SrcATop) { return (s * d.alphas() + d * s.alphas().inv()).div255(); }
-XFERMODE(DstATop) { return SrcATop::Xfer(d,s); }
+XFERMODE(DstATop) { return SrcATop(d,s); }
//[ S * (1 - Da) + (1 - Sa) * D ]
XFERMODE(Xor) { return (s * d.alphas().inv() + d * s.alphas().inv()).div255(); }
// [S + D ]
@@ -86,7 +88,7 @@ XFERMODE(HardLight) {
auto colors = (both + isLite.thenElse(lite, dark)).div255();
return alphas.zeroColors() + colors.zeroAlphas();
}
-XFERMODE(Overlay) { return HardLight::Xfer(d,s); }
+XFERMODE(Overlay) { return HardLight(d,s); }
XFERMODE(Darken) {
auto sa = s.alphas(),
@@ -117,12 +119,7 @@ XFERMODE(Lighten) {
#undef XFERMODE
// Some xfermodes use math like divide or sqrt that's best done in floats 1 pixel at a time.
-#define XFERMODE(Name) \
- struct Name { \
- static SkPMFloat Xfer(const SkPMFloat&, const SkPMFloat&); \
- static const SkXfermode::Mode kMode = SkXfermode::k##Name##_Mode; \
- }; \
- inline SkPMFloat Name::Xfer(const SkPMFloat& s, const SkPMFloat& d)
+#define XFERMODE(Name) static SkPMFloat SK_VECTORCALL Name(SkPMFloat s, SkPMFloat d)
XFERMODE(ColorDodge) {
auto sa = s.alphas(),
@@ -185,15 +182,15 @@ XFERMODE(SoftLight) {
// A reasonable fallback mode for doing AA is to simply apply the transfermode first,
// then linearly interpolate the AA.
-template <typename Mode>
-static Sk4px xfer_aa(const Sk4px& s, const Sk4px& d, const Sk4px& aa) {
- Sk4px bw = Mode::Xfer(s, d);
+template <Sk4px (SK_VECTORCALL *Mode)(Sk4px, Sk4px)>
+static Sk4px SK_VECTORCALL xfer_aa(Sk4px s, Sk4px d, Sk4px aa) {
+ Sk4px bw = Mode(s, d);
return (bw * aa + d * aa.inv()).div255();
}
// For some transfermodes we specialize AA, either for correctness or performance.
#define XFERMODE_AA(Name) \
- template <> Sk4px xfer_aa<Name>(const Sk4px& s, const Sk4px& d, const Sk4px& aa)
+ template <> Sk4px SK_VECTORCALL xfer_aa<Name>(Sk4px s, Sk4px d, Sk4px aa)
// Plus' clamp needs to happen after AA. skia:3852
XFERMODE_AA(Plus) { // [ clamp( (1-AA)D + (AA)(S+D) ) == clamp(D + AA*S) ]
@@ -202,44 +199,47 @@ XFERMODE_AA(Plus) { // [ clamp( (1-AA)D + (AA)(S+D) ) == clamp(D + AA*S) ]
#undef XFERMODE_AA
-template <typename ProcType>
-class SkT4pxXfermode : public SkProcCoeffXfermode {
+class Sk4pxXfermode : public SkProcCoeffXfermode {
public:
- static SkProcCoeffXfermode* Create(const ProcCoeff& rec) {
- return SkNEW_ARGS(SkT4pxXfermode, (rec));
- }
+ typedef Sk4px (SK_VECTORCALL *Proc4)(Sk4px, Sk4px);
+ typedef Sk4px (SK_VECTORCALL *AAProc4)(Sk4px, Sk4px, Sk4px);
+
+ Sk4pxXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, Proc4 proc4, AAProc4 aaproc4)
+ : INHERITED(rec, mode)
+ , fProc4(proc4)
+ , fAAProc4(aaproc4) {}
void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override {
if (NULL == aa) {
Sk4px::MapDstSrc(n, dst, src, [&](const Sk4px& dst4, const Sk4px& src4) {
- return ProcType::Xfer(src4, dst4);
+ return fProc4(src4, dst4);
});
} else {
Sk4px::MapDstSrcAlpha(n, dst, src, aa,
[&](const Sk4px& dst4, const Sk4px& src4, const Sk4px& alpha) {
- return xfer_aa<ProcType>(src4, dst4, alpha);
+ return fAAProc4(src4, dst4, alpha);
});
}
}
private:
- SkT4pxXfermode(const ProcCoeff& rec) : INHERITED(rec, ProcType::kMode) {}
-
+ Proc4 fProc4;
+ AAProc4 fAAProc4;
typedef SkProcCoeffXfermode INHERITED;
};
-template <typename ProcType>
-class SkTPMFloatXfermode : public SkProcCoeffXfermode {
+class SkPMFloatXfermode : public SkProcCoeffXfermode {
public:
- static SkProcCoeffXfermode* Create(const ProcCoeff& rec) {
- return SkNEW_ARGS(SkTPMFloatXfermode, (rec));
- }
+ typedef SkPMFloat (SK_VECTORCALL *ProcF)(SkPMFloat, SkPMFloat);
+ SkPMFloatXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf)
+ : INHERITED(rec, mode)
+ , fProcF(procf) {}
void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override {
for (int i = 0; i < n; i++) {
SkPMFloat s(src[i]),
d(dst[i]),
- b(ProcType::Xfer(s,d));
+ b(fProcF(s,d));
if (aa) {
// We do aa in full float precision before going back down to bytes, because we can!
SkPMFloat a = Sk4f(aa[i]) * Sk4f(1.0f/255);
@@ -250,46 +250,52 @@ public:
}
private:
- SkTPMFloatXfermode(const ProcCoeff& rec) : INHERITED(rec, ProcType::kMode) {}
-
+ ProcF fProcF;
typedef SkProcCoeffXfermode INHERITED;
};
static SkProcCoeffXfermode* SkCreate4pxXfermode(const ProcCoeff& rec, SkXfermode::Mode mode) {
-#if !defined(SK_CPU_ARM32) || defined(SK_ARM_HAS_NEON)
switch (mode) {
- case SkXfermode::kClear_Mode: return SkT4pxXfermode<Clear>::Create(rec);
- case SkXfermode::kSrc_Mode: return SkT4pxXfermode<Src>::Create(rec);
- case SkXfermode::kDst_Mode: return SkT4pxXfermode<Dst>::Create(rec);
- case SkXfermode::kSrcOver_Mode: return SkT4pxXfermode<SrcOver>::Create(rec);
- case SkXfermode::kDstOver_Mode: return SkT4pxXfermode<DstOver>::Create(rec);
- case SkXfermode::kSrcIn_Mode: return SkT4pxXfermode<SrcIn>::Create(rec);
- case SkXfermode::kDstIn_Mode: return SkT4pxXfermode<DstIn>::Create(rec);
- case SkXfermode::kSrcOut_Mode: return SkT4pxXfermode<SrcOut>::Create(rec);
- case SkXfermode::kDstOut_Mode: return SkT4pxXfermode<DstOut>::Create(rec);
- case SkXfermode::kSrcATop_Mode: return SkT4pxXfermode<SrcATop>::Create(rec);
- case SkXfermode::kDstATop_Mode: return SkT4pxXfermode<DstATop>::Create(rec);
- case SkXfermode::kXor_Mode: return SkT4pxXfermode<Xor>::Create(rec);
- case SkXfermode::kPlus_Mode: return SkT4pxXfermode<Plus>::Create(rec);
- case SkXfermode::kModulate_Mode: return SkT4pxXfermode<Modulate>::Create(rec);
- case SkXfermode::kScreen_Mode: return SkT4pxXfermode<Screen>::Create(rec);
- case SkXfermode::kMultiply_Mode: return SkT4pxXfermode<Multiply>::Create(rec);
- case SkXfermode::kDifference_Mode: return SkT4pxXfermode<Difference>::Create(rec);
- case SkXfermode::kExclusion_Mode: return SkT4pxXfermode<Exclusion>::Create(rec);
- case SkXfermode::kHardLight_Mode: return SkT4pxXfermode<HardLight>::Create(rec);
- case SkXfermode::kOverlay_Mode: return SkT4pxXfermode<Overlay>::Create(rec);
- case SkXfermode::kDarken_Mode: return SkT4pxXfermode<Darken>::Create(rec);
- case SkXfermode::kLighten_Mode: return SkT4pxXfermode<Lighten>::Create(rec);
-
- case SkXfermode::kColorDodge_Mode: return SkTPMFloatXfermode<ColorDodge>::Create(rec);
- case SkXfermode::kColorBurn_Mode: return SkTPMFloatXfermode<ColorBurn>::Create(rec);
- case SkXfermode::kSoftLight_Mode: return SkTPMFloatXfermode<SoftLight>::Create(rec);
+ #define CASE(Mode) case SkXfermode::k##Mode##_Mode: \
+ return SkNEW_ARGS(Sk4pxXfermode, (rec, mode, &Mode, &xfer_aa<Mode>))
+ CASE(Clear);
+ CASE(Src);
+ CASE(Dst);
+ CASE(SrcOver);
+ CASE(DstOver);
+ CASE(SrcIn);
+ CASE(DstIn);
+ CASE(SrcOut);
+ CASE(DstOut);
+ CASE(SrcATop);
+ CASE(DstATop);
+ CASE(Xor);
+ CASE(Plus);
+ CASE(Modulate);
+ CASE(Screen);
+ CASE(Multiply);
+ CASE(Difference);
+ CASE(Exclusion);
+ CASE(HardLight);
+ CASE(Overlay);
+ CASE(Darken);
+ CASE(Lighten);
+ #undef CASE
+
+ #define CASE(Mode) case SkXfermode::k##Mode##_Mode: \
+ return SkNEW_ARGS(SkPMFloatXfermode, (rec, mode, &Mode))
+ CASE(ColorDodge);
+ CASE(ColorBurn);
+ CASE(SoftLight);
+ #undef CASE
+
default: break;
}
-#endif
return nullptr;
}
+#endif
+
} // namespace
#endif//Sk4pxXfermode_DEFINED