Convert SkPMFloat to [0,1] range and prune its API.

Now that Sk4px exists, there's a lot less sense in eeking out every cycle of speed from SkPMFloat: if we need to go _really_ fast, we should use Sk4px. SkPMFloat's going to be used for things that are already slow: large-range intermediates, divides, sqrts, etc. A [0,1] range is easier to work with, and can even be faster if we eliminate enough *255 and *1/255 steps. This is particularly true on ARM, where NEON can do the *255 and /255 steps for us while converting float<->int. We have lots of experimental SkPMFloat <-> SkPMColor APIs that I'm now removing. Of the existing APIs, roundClamp() is the sanest, so I've kept only that, now called round(). The 4-at-a-time APIs never panned out, so they're gone. There will be small diffs on: colormatrix coloremoji colorfilterimagefilter fadefilter imagefilters_xfermodes imagefilterscropexpand imagefiltersgraph tileimagefilter BUG=skia: Review URL: https://codereview.chromium.org/1201343004
author: mtklein <mtklein@chromium.org> 2015-06-25 08:56:28 -0700
committer: Commit bot <commit-bot@chromium.org> 2015-06-25 08:56:28 -0700
commit: e9a3e3c17a313942042d6cfb9f4f0361a900d9e7 (patch)
tree: 913d6d293e2578f223ec82f2bbac69fbf43b711b /bench
parent: 538bacb4bb3ceac7786108cd68b04ed58b1c29c7 (diff)
1 files changed, 18 insertions, 66 deletions
diff --git a/bench/PMFloatBench.cpp b/bench/PMFloatBench.cpp
index 37542e3525..540fdb7815 100644
--- a/bench/PMFloatBench.cpp
+++ b/bench/PMFloatBench.cpp
@@ -20,20 +20,10 @@ static uint32_t lcg_rand(uint32_t* seed) {
 }
 
 // I'm having better luck getting these to constant-propagate away as template parameters.
-template <bool kClamp, bool kWide>
-struct PMFloatGetSetBench : public Benchmark {
-    PMFloatGetSetBench() {}
+struct PMFloatRoundtripBench : public Benchmark {
+    PMFloatRoundtripBench() {}
 
-    const char* onGetName() override {
-        switch (kClamp << 1 | kWide) {
-            case 0: return "SkPMFloat_get_1x";
-            case 1: return "SkPMFloat_get_4x";
-            case 2: return "SkPMFloat_clamp_1x";
-            case 3: return "SkPMFloat_clamp_4x";
-        }
-        SkFAIL("unreachable");
-        return "oh bother";
-    }
+    const char* onGetName() override { return "SkPMFloat_roundtrip"; }
     bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
 
     void onDraw(const int loops, SkCanvas* canvas) override {
@@ -41,61 +31,23 @@ struct PMFloatGetSetBench : public Benchmark {
         uint32_t junk = 0;
         uint32_t seed = 0;
         for (int i = 0; i < loops; i++) {
-            SkPMColor colors[4];
+            SkPMColor color;
         #ifdef SK_DEBUG
-            for (int i = 0; i < 4; i++) {
-                // Our SkASSERTs will remind us that it's technically required that we premultiply.
-                colors[i] = SkPreMultiplyColor(lcg_rand(&seed));
-            }
+            // Our SkASSERTs will remind us that it's technically required that we premultiply.
+            color = SkPreMultiplyColor(lcg_rand(&seed));
         #else
             // But it's a lot faster not to, and this code won't really mind the non-PM colors.
-            (void)lcg_rand(&seed);
-            colors[0] = seed + 0;
-            colors[1] = seed + 1;
-            colors[2] = seed + 2;
-            colors[3] = seed + 3;
+            color = lcg_rand(&seed);
         #endif
 
-            SkPMFloat fa,fb,fc,fd;
-            if (kWide) {
-                SkPMFloat::From4PMColors(colors, &fa, &fb, &fc, &fd);
-            } else {
-                fa = SkPMFloat::FromPMColor(colors[0]);
-                fb = SkPMFloat::FromPMColor(colors[1]);
-                fc = SkPMFloat::FromPMColor(colors[2]);
-                fd = SkPMFloat::FromPMColor(colors[3]);
-            }
-
-            SkPMColor back[4];
-            switch (kClamp << 1 | kWide) {
-                case 0: {
-                    back[0] = fa.round();
-                    back[1] = fb.round();
-                    back[2] = fc.round();
-                    back[3] = fd.round();
-                } break;
-                case 1: SkPMFloat::RoundTo4PMColors(fa, fb, fc, fd, back); break;
-                case 2: {
-                    back[0] = fa.roundClamp();
-                    back[1] = fb.roundClamp();
-                    back[2] = fc.roundClamp();
-                    back[3] = fd.roundClamp();
-                } break;
-                case 3: SkPMFloat::RoundClampTo4PMColors(fa, fb, fc, fd, back); break;
-            }
-            for (int i = 0; i < 4; i++) {
-                junk ^= back[i];
-            }
+            auto f = SkPMFloat::FromPMColor(color);
+            SkPMColor back = f.round();
+            junk ^= back;
         }
         blackhole ^= junk;
     }
 };
-
-// Extra () help DEF_BENCH not get confused by the comma inside the <>.
-DEF_BENCH(return (new PMFloatGetSetBench< true,  true>);)
-DEF_BENCH(return (new PMFloatGetSetBench<false,  true>);)
-DEF_BENCH(return (new PMFloatGetSetBench< true, false>);)
-DEF_BENCH(return (new PMFloatGetSetBench<false, false>);)
+DEF_BENCH(return new PMFloatRoundtripBench;)
 
 struct PMFloatGradientBench : public Benchmark {
     const char* onGetName() override { return "PMFloat_gradient"; }
@@ -103,8 +55,8 @@ struct PMFloatGradientBench : public Benchmark {
 
     SkPMColor fDevice[100];
     void onDraw(const int loops, SkCanvas*) override {
-        Sk4f c0 = SkPMFloat::FromARGB(255, 255, 0, 0),
-             c1 = SkPMFloat::FromARGB(255, 0, 0, 255),
+        Sk4f c0 = SkPMFloat::FromARGB(1, 1, 0, 0),
+             c1 = SkPMFloat::FromARGB(1, 0, 0, 1),
              dc = c1 - c0,
              fx(0.1f),
              dx(0.002f),
@@ -112,15 +64,15 @@ struct PMFloatGradientBench : public Benchmark {
              dcdx4(dcdx+dcdx+dcdx+dcdx);
 
         for (int n = 0; n < loops; n++) {
-            Sk4f a = c0 + dc*fx + Sk4f(0.5f),  // The +0.5f lets us call trunc() instead of get().
+            Sk4f a = c0 + dc*fx,
                  b = a + dcdx,
                  c = b + dcdx,
                  d = c + dcdx;
             for (size_t i = 0; i < SK_ARRAY_COUNT(fDevice); i += 4) {
-                fDevice[i+0] = SkPMFloat(a).trunc();
-                fDevice[i+1] = SkPMFloat(b).trunc();
-                fDevice[i+2] = SkPMFloat(c).trunc();
-                fDevice[i+3] = SkPMFloat(d).trunc();
+                fDevice[i+0] = SkPMFloat(a).round();
+                fDevice[i+1] = SkPMFloat(b).round();
+                fDevice[i+2] = SkPMFloat(c).round();
+                fDevice[i+3] = SkPMFloat(d).round();
                 a = a + dcdx4;
                 b = b + dcdx4;
                 c = c + dcdx4;
author	mtklein <mtklein@chromium.org>	2015-06-25 08:56:28 -0700
committer	Commit bot <commit-bot@chromium.org>	2015-06-25 08:56:28 -0700
commit	e9a3e3c17a313942042d6cfb9f4f0361a900d9e7 (patch)
tree	913d6d293e2578f223ec82f2bbac69fbf43b711b /bench
parent	538bacb4bb3ceac7786108cd68b04ed58b1c29c7 (diff)