diff options
author | 2015-06-25 08:56:28 -0700 | |
---|---|---|
committer | 2015-06-25 08:56:28 -0700 | |
commit | e9a3e3c17a313942042d6cfb9f4f0361a900d9e7 (patch) | |
tree | 913d6d293e2578f223ec82f2bbac69fbf43b711b /bench | |
parent | 538bacb4bb3ceac7786108cd68b04ed58b1c29c7 (diff) |
Convert SkPMFloat to [0,1] range and prune its API.
Now that Sk4px exists, there's a lot less sense in eeking out every
cycle of speed from SkPMFloat: if we need to go _really_ fast, we
should use Sk4px. SkPMFloat's going to be used for things that are
already slow: large-range intermediates, divides, sqrts, etc.
A [0,1] range is easier to work with, and can even be faster if we
eliminate enough *255 and *1/255 steps. This is particularly true
on ARM, where NEON can do the *255 and /255 steps for us while
converting float<->int.
We have lots of experimental SkPMFloat <-> SkPMColor APIs that
I'm now removing. Of the existing APIs, roundClamp() is the sanest,
so I've kept only that, now called round(). The 4-at-a-time APIs
never panned out, so they're gone.
There will be small diffs on:
colormatrix coloremoji colorfilterimagefilter fadefilter imagefilters_xfermodes imagefilterscropexpand imagefiltersgraph tileimagefilter
BUG=skia:
Review URL: https://codereview.chromium.org/1201343004
Diffstat (limited to 'bench')
-rw-r--r-- | bench/PMFloatBench.cpp | 84 |
1 files changed, 18 insertions, 66 deletions
diff --git a/bench/PMFloatBench.cpp b/bench/PMFloatBench.cpp index 37542e3525..540fdb7815 100644 --- a/bench/PMFloatBench.cpp +++ b/bench/PMFloatBench.cpp @@ -20,20 +20,10 @@ static uint32_t lcg_rand(uint32_t* seed) { } // I'm having better luck getting these to constant-propagate away as template parameters. -template <bool kClamp, bool kWide> -struct PMFloatGetSetBench : public Benchmark { - PMFloatGetSetBench() {} +struct PMFloatRoundtripBench : public Benchmark { + PMFloatRoundtripBench() {} - const char* onGetName() override { - switch (kClamp << 1 | kWide) { - case 0: return "SkPMFloat_get_1x"; - case 1: return "SkPMFloat_get_4x"; - case 2: return "SkPMFloat_clamp_1x"; - case 3: return "SkPMFloat_clamp_4x"; - } - SkFAIL("unreachable"); - return "oh bother"; - } + const char* onGetName() override { return "SkPMFloat_roundtrip"; } bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; } void onDraw(const int loops, SkCanvas* canvas) override { @@ -41,61 +31,23 @@ struct PMFloatGetSetBench : public Benchmark { uint32_t junk = 0; uint32_t seed = 0; for (int i = 0; i < loops; i++) { - SkPMColor colors[4]; + SkPMColor color; #ifdef SK_DEBUG - for (int i = 0; i < 4; i++) { - // Our SkASSERTs will remind us that it's technically required that we premultiply. - colors[i] = SkPreMultiplyColor(lcg_rand(&seed)); - } + // Our SkASSERTs will remind us that it's technically required that we premultiply. + color = SkPreMultiplyColor(lcg_rand(&seed)); #else // But it's a lot faster not to, and this code won't really mind the non-PM colors. - (void)lcg_rand(&seed); - colors[0] = seed + 0; - colors[1] = seed + 1; - colors[2] = seed + 2; - colors[3] = seed + 3; + color = lcg_rand(&seed); #endif - SkPMFloat fa,fb,fc,fd; - if (kWide) { - SkPMFloat::From4PMColors(colors, &fa, &fb, &fc, &fd); - } else { - fa = SkPMFloat::FromPMColor(colors[0]); - fb = SkPMFloat::FromPMColor(colors[1]); - fc = SkPMFloat::FromPMColor(colors[2]); - fd = SkPMFloat::FromPMColor(colors[3]); - } - - SkPMColor back[4]; - switch (kClamp << 1 | kWide) { - case 0: { - back[0] = fa.round(); - back[1] = fb.round(); - back[2] = fc.round(); - back[3] = fd.round(); - } break; - case 1: SkPMFloat::RoundTo4PMColors(fa, fb, fc, fd, back); break; - case 2: { - back[0] = fa.roundClamp(); - back[1] = fb.roundClamp(); - back[2] = fc.roundClamp(); - back[3] = fd.roundClamp(); - } break; - case 3: SkPMFloat::RoundClampTo4PMColors(fa, fb, fc, fd, back); break; - } - for (int i = 0; i < 4; i++) { - junk ^= back[i]; - } + auto f = SkPMFloat::FromPMColor(color); + SkPMColor back = f.round(); + junk ^= back; } blackhole ^= junk; } }; - -// Extra () help DEF_BENCH not get confused by the comma inside the <>. -DEF_BENCH(return (new PMFloatGetSetBench< true, true>);) -DEF_BENCH(return (new PMFloatGetSetBench<false, true>);) -DEF_BENCH(return (new PMFloatGetSetBench< true, false>);) -DEF_BENCH(return (new PMFloatGetSetBench<false, false>);) +DEF_BENCH(return new PMFloatRoundtripBench;) struct PMFloatGradientBench : public Benchmark { const char* onGetName() override { return "PMFloat_gradient"; } @@ -103,8 +55,8 @@ struct PMFloatGradientBench : public Benchmark { SkPMColor fDevice[100]; void onDraw(const int loops, SkCanvas*) override { - Sk4f c0 = SkPMFloat::FromARGB(255, 255, 0, 0), - c1 = SkPMFloat::FromARGB(255, 0, 0, 255), + Sk4f c0 = SkPMFloat::FromARGB(1, 1, 0, 0), + c1 = SkPMFloat::FromARGB(1, 0, 0, 1), dc = c1 - c0, fx(0.1f), dx(0.002f), @@ -112,15 +64,15 @@ struct PMFloatGradientBench : public Benchmark { dcdx4(dcdx+dcdx+dcdx+dcdx); for (int n = 0; n < loops; n++) { - Sk4f a = c0 + dc*fx + Sk4f(0.5f), // The +0.5f lets us call trunc() instead of get(). + Sk4f a = c0 + dc*fx, b = a + dcdx, c = b + dcdx, d = c + dcdx; for (size_t i = 0; i < SK_ARRAY_COUNT(fDevice); i += 4) { - fDevice[i+0] = SkPMFloat(a).trunc(); - fDevice[i+1] = SkPMFloat(b).trunc(); - fDevice[i+2] = SkPMFloat(c).trunc(); - fDevice[i+3] = SkPMFloat(d).trunc(); + fDevice[i+0] = SkPMFloat(a).round(); + fDevice[i+1] = SkPMFloat(b).round(); + fDevice[i+2] = SkPMFloat(c).round(); + fDevice[i+3] = SkPMFloat(d).round(); a = a + dcdx4; b = b + dcdx4; c = c + dcdx4; |