aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkXfermode_opts.h
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-11-11 11:39:09 -0800
committerGravatar Commit bot <commit-bot@chromium.org>2015-11-11 11:39:09 -0800
commit084db25d47dbad3ffbd7d15c04b63d344b351f90 (patch)
tree1152701d1608211322670f1246c7228978d8e32e /src/opts/SkXfermode_opts.h
parent9be5ff6f9871ef22740094e7c25dd67329a73d20 (diff)
float xfermodes (burn, dodge, softlight) in Sk8f, possibly using AVX.
Xfermode_ColorDodge_aa 10.3ms -> 7.85ms 0.76x Xfermode_SoftLight_aa 13.8ms -> 10.2ms 0.74x Xfermode_ColorBurn_aa 10.7ms -> 7.82ms 0.73x Xfermode_SoftLight 33.6ms -> 23.2ms 0.69x Xfermode_ColorDodge 25ms -> 16.5ms 0.66x Xfermode_ColorBurn 26.1ms -> 16.6ms 0.63x Ought to be no pixel diffs: https://gold.skia.org/search2?issue=1432903002&unt=true&query=source_type%3Dgm&master=false Incidental stuff: I made the SkNx(T) constructors implicit to make writing math expressions simpler. This allows us to write expressions like Sk4f v; ... v = v*4; rather than Sk4f v; ... v = v * Sk4f(4); As written it only works when the constant is on the right-hand side, so expressions like `(Sk4f(1) - da)` have to stay for now. I plan on following up with a CL that lets those become `(1 - da)` too. BUG=skia:4117 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review URL: https://codereview.chromium.org/1432903002
Diffstat (limited to 'src/opts/SkXfermode_opts.h')
-rw-r--r--src/opts/SkXfermode_opts.h130
1 files changed, 67 insertions, 63 deletions
diff --git a/src/opts/SkXfermode_opts.h b/src/opts/SkXfermode_opts.h
index 69f2b420f5..a40faec21a 100644
--- a/src/opts/SkXfermode_opts.h
+++ b/src/opts/SkXfermode_opts.h
@@ -109,71 +109,76 @@ XFERMODE(Lighten) {
}
#undef XFERMODE
-// Some xfermodes use math like divide or sqrt that's best done in floats 1 pixel at a time.
-#define XFERMODE(Name) static Sk4f SK_VECTORCALL Name(Sk4f d, Sk4f s)
+// Some xfermodes use math like divide or sqrt that's best done in floats.
+// We write it generically, then call it 1 or 2 pixels at a time (T == Sk4f or Sk8f).
+#define XFERMODE(Name) struct Name { template <typename T> T operator()(const T&, const T&); }; \
+ template <typename T> T Name::operator()(const T& d, const T& s)
+static_assert(SK_A32_SHIFT == 24, "");
static inline Sk4f a_rgb(const Sk4f& a, const Sk4f& rgb) {
- static_assert(SK_A32_SHIFT == 24, "");
return a * Sk4f(0,0,0,1) + rgb * Sk4f(1,1,1,0);
}
-static inline Sk4f alphas(const Sk4f& f) {
- return SkNx_dup<SK_A32_SHIFT/8>(f);
+static inline Sk8f a_rgb(const Sk8f& a, const Sk8f& rgb) {
+ // TODO: SkNx_blend<0,0,0,1,0,0,0,1>(a, rgb) to let us use _mm256_blend_ps?
+ return a * Sk8f(0,0,0,1,0,0,0,1) + rgb * Sk8f(1,1,1,0,1,1,1,0);
}
+static inline Sk4f alphas(const Sk4f& f) { return SkNx_shuffle<3,3,3,3> (f); }
+static inline Sk8f alphas(const Sk8f& f) { return SkNx_shuffle<3,3,3,3,7,7,7,7>(f); }
XFERMODE(ColorDodge) {
auto sa = alphas(s),
da = alphas(d),
- isa = Sk4f(1)-sa,
- ida = Sk4f(1)-da;
+ isa = T(1)-sa,
+ ida = T(1)-da;
auto srcover = s + d*isa,
dstover = d + s*ida,
- otherwise = sa * Sk4f::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida + d*isa;
+ otherwise = sa * T::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida + d*isa;
// Order matters here, preferring d==0 over s==sa.
- auto colors = (d == Sk4f(0)).thenElse(dstover,
- (s == sa).thenElse(srcover,
- otherwise));
+ auto colors = (d == 0).thenElse(dstover,
+ (s == sa).thenElse(srcover,
+ otherwise));
return a_rgb(srcover, colors);
}
XFERMODE(ColorBurn) {
auto sa = alphas(s),
da = alphas(d),
- isa = Sk4f(1)-sa,
- ida = Sk4f(1)-da;
+ isa = T(1)-sa,
+ ida = T(1)-da;
auto srcover = s + d*isa,
dstover = d + s*ida,
- otherwise = sa*(da-Sk4f::Min(da, (da-d)*sa*s.approxInvert())) + s*ida + d*isa;
+ otherwise = sa*(da-T::Min(da, (da-d)*sa*s.approxInvert())) + s*ida + d*isa;
// Order matters here, preferring d==da over s==0.
- auto colors = (d == da).thenElse(dstover,
- (s == Sk4f(0)).thenElse(srcover,
- otherwise));
+ auto colors = (d == da).thenElse(dstover,
+ (s == 0).thenElse(srcover,
+ otherwise));
return a_rgb(srcover, colors);
}
XFERMODE(SoftLight) {
auto sa = alphas(s),
da = alphas(d),
- isa = Sk4f(1)-sa,
- ida = Sk4f(1)-da;
+ isa = T(1)-sa,
+ ida = T(1)-da;
// Some common terms.
- auto m = (da > Sk4f(0)).thenElse(d / da, Sk4f(0)),
- s2 = Sk4f(2)*s,
- m4 = Sk4f(4)*m;
+ auto m = (da > 0).thenElse(d / da, 0),
+ s2 = s*2,
+ m4 = m*4;
// The logic forks three ways:
// 1. dark src?
// 2. light src, dark dst?
// 3. light src, light dst?
- auto darkSrc = d*(sa + (s2 - sa)*(Sk4f(1) - m)), // Used in case 1.
- darkDst = (m4*m4 + m4)*(m - Sk4f(1)) + Sk4f(7)*m, // Used in case 2.
- liteDst = m.sqrt() - m, // Used in case 3.
- liteSrc = d*sa + da*(s2-sa)*(Sk4f(4)*d <= da).thenElse(darkDst, liteDst); // Case 2 or 3?
+ auto darkSrc = d*(sa + (s2 - sa)*(T(1) - m)), // Used in case 1.
+ darkDst = (m4*m4 + m4)*(m - 1) + m*7, // Used in case 2.
+ liteDst = m.sqrt() - m, // Used in case 3.
+ liteSrc = d*sa + da*(s2-sa)*(d*4 <= da).thenElse(darkDst, liteDst); // Case 2 or 3?
auto alpha = s + d*isa;
- auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3?
+ auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3?
return a_rgb(alpha, colors);
}
@@ -240,53 +245,52 @@ private:
typedef SkProcCoeffXfermode INHERITED;
};
-class Sk4fXfermode : public SkProcCoeffXfermode {
+template <typename BlendFn>
+class FloatXfermode : public SkProcCoeffXfermode {
public:
- typedef Sk4f (SK_VECTORCALL *ProcF)(Sk4f, Sk4f);
- Sk4fXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf)
- : INHERITED(rec, mode)
- , fProcF(procf) {}
+ FloatXfermode(const ProcCoeff& rec, SkXfermode::Mode mode)
+ : INHERITED(rec, mode) {}
void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override {
- for (int i = 0; i < n; i++) {
- dst[i] = aa ? this->xfer32(dst[i], src[i], aa[i])
- : this->xfer32(dst[i], src[i]);
+ BlendFn blend;
+ while (n >= 2) {
+ auto d = Sk8f::FromBytes((const uint8_t*)dst) * (1.0f/255),
+ s = Sk8f::FromBytes((const uint8_t*)src) * (1.0f/255),
+ b = blend(d, s);
+ if (aa) {
+ auto a255 = Sk8f(aa[0],aa[0],aa[0],aa[0], aa[1],aa[1],aa[1],aa[1]);
+ (b*a255 + d*(Sk8f(255)-a255) + 0.5).toBytes((uint8_t*)dst);
+ aa += 2;
+ } else {
+ (b * 255 + 0.5).toBytes((uint8_t*)dst);
+ }
+ dst += 2;
+ src += 2;
+ n -= 2;
+ }
+ if (n) {
+ auto d = Sk4f::FromBytes((const uint8_t*)dst) * (1.0f/255),
+ s = Sk4f::FromBytes((const uint8_t*)src) * (1.0f/255),
+ b = blend(d, s);
+ if (aa) {
+ auto a255 = Sk4f(aa[0],aa[0],aa[0],aa[0]);
+ (b*a255 + d*(Sk4f(255)-a255) + 0.5).toBytes((uint8_t*)dst);
+ aa++;
+ } else {
+ (b * 255 + 0.5).toBytes((uint8_t*)dst);
+ }
}
}
void xfer16(uint16_t dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override {
for (int i = 0; i < n; i++) {
- SkPMColor dst32 = SkPixel16ToPixel32(dst[i]);
- dst32 = aa ? this->xfer32(dst32, src[i], aa[i])
- : this->xfer32(dst32, src[i]);
- dst[i] = SkPixel32ToPixel16(dst32);
+ SkPMColor dst32 = SkPixel16ToPixel32(dst[i]); // Convert dst up to 8888.
+ this->xfer32(&dst32, src+i, 1, aa ? aa+i : nullptr); // Blend 1 pixel.
+ dst[i] = SkPixel32ToPixel16(dst32); // Repack dst to 565 and store.
}
}
private:
- static Sk4f Load(SkPMColor c) {
- return Sk4f::FromBytes((uint8_t*)&c) * Sk4f(1.0f/255);
- }
- static SkPMColor Round(const Sk4f& f) {
- SkPMColor c;
- (f * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c);
- return c;
- }
- inline SkPMColor xfer32(SkPMColor dst, SkPMColor src) const {
- return Round(fProcF(Load(dst), Load(src)));
- }
-
- inline SkPMColor xfer32(SkPMColor dst, SkPMColor src, SkAlpha aa) const {
- Sk4f s(Load(src)),
- d(Load(dst)),
- b(fProcF(d,s));
- // We do aa in full float precision before going back down to bytes, because we can!
- Sk4f a = Sk4f(aa) * Sk4f(1.0f/255);
- b = b*a + d*(Sk4f(1)-a);
- return Round(b);
- }
-
- ProcF fProcF;
typedef SkProcCoeffXfermode INHERITED;
};
@@ -323,7 +327,7 @@ static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode)
#undef CASE
#define CASE(Mode) \
- case SkXfermode::k##Mode##_Mode: return new Sk4fXfermode(rec, mode, &Mode)
+ case SkXfermode::k##Mode##_Mode: return new FloatXfermode<Mode>(rec, mode)
CASE(ColorDodge);
CASE(ColorBurn);
CASE(SoftLight);