From cc6db406e482914ab6abbe86aa89f0f0d4ec83c7 Mon Sep 17 00:00:00 2001
From: "commit-bot@chromium.org"
 <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>
Date: Thu, 5 Dec 2013 16:43:08 +0000
Subject: Make sure four_byte_interp benches the entire FourByteInterp call.

Relative bench performance still says switch to Fast.

Desktop:
    four_byte_interp_slow_256  11.68
    four_byte_interp_slow_255  12.11
    four_byte_interp_fast_256   7.17
    four_byte_interp_fast_255   8.17

N5:
    four_byte_interp_slow_256  28.49
    four_byte_interp_slow_255  25.08
    four_byte_interp_fast_256  19.40
    four_byte_interp_fast_255  21.69

BUG=
R=reed@google.com

Author: mtklein@google.com

Review URL: https://codereview.chromium.org/102053006

git-svn-id: http://skia.googlecode.com/svn/trunk@12512 2bbb7eff-a529-9590-31e7-b0007b416f81
---
 bench/ColorPrivBench.cpp | 61 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 18 deletions(-)

(limited to 'bench/ColorPrivBench.cpp')

diff --git a/bench/ColorPrivBench.cpp b/bench/ColorPrivBench.cpp
index 328de4aebd..9c15a8400c 100644
--- a/bench/ColorPrivBench.cpp
+++ b/bench/ColorPrivBench.cpp
@@ -10,12 +10,6 @@ public:
         fName.set("four_byte_interp");
         fName.append(kFast ? "_fast" : "_slow");
         fName.append(kScale ? "_255" : "_256");
-
-        // We'll exhaustively test all scales instead of using random numbers.
-        for (int i = 0; i <= 256; i++) {
-            fScales[i] = i;
-        }
-        if (kScale) fScales[256] = 255;  // We'll just do 255 twice if we're limited to [0,255].
     }
 
     virtual bool isSuitableFor(Backend backend) SK_OVERRIDE {
@@ -24,20 +18,48 @@ public:
 
     virtual const char* onGetName() SK_OVERRIDE { return fName.c_str(); }
 
+    virtual void onPreDraw() SK_OVERRIDE {
+        // A handful of random srcs and dsts.
+        SkRandom rand;
+        for (int i = 0; i < kInputs; i++) {
+            fSrcs[i] = SkPreMultiplyColor(rand.nextU());
+            fDsts[i] = SkPreMultiplyColor(rand.nextU());
+        }
+
+        // We'll exhaustively test all scales instead of using random numbers.
+        for (int i = 0; i <= 256; i++) {
+            fScales[i] = i;
+        }
+        if (kScale) fScales[256] = 255;  // We'll just do 255 twice if we're limited to [0,255].
+    }
+
     virtual void onDraw(const int loops, SkCanvas*) SK_OVERRIDE {
-        const SkPMColor src = 0xAB998877, dst = 0x66334455;
+        // We xor results of FourByteInterp into junk to make sure the function runs.
         volatile SkPMColor junk = 0;
-        for (int i = 0; i < 10*loops; ++i) {
-            for (size_t j = 0; j <= 256; j++) {
-                const unsigned scale = fScales[j];
-                if (kFast && kScale) {
-                    junk ^= SkFastFourByteInterp(src, dst, scale);
-                } else if (kFast) {
-                    junk ^= SkFastFourByteInterp256(src, dst, scale);
-                } else if (kScale) {
-                    junk ^= SkFourByteInterp(src, dst, scale);
-                } else {
-                    junk ^= SkFourByteInterp256(src, dst, scale);
+
+        for (int loop = 0; loop < loops; loop++) {
+            for (int i = 0; i < kInputs; i++) {
+                for (size_t j = 0; j <= 256; j++) {
+                    // Note: we really want to load src and dst here and not outside in the i-loop.
+                    // If we put the loads there, a clever compiler will do the not-insignificant
+                    // work in the FourByteInterps that depends only on src and dst outside this
+                    // loop, so we'd only be benchmarking the back half of those functions that also
+                    // depends on scale.  Even here, these must be volatile arrays to prevent that
+                    // clever compiler from hoisting the loads out of the loop on its own.
+                    const SkPMColor src = fSrcs[i];
+                    const SkPMColor dst = fDsts[i];
+
+                    const unsigned scale = fScales[j];
+
+                    if (kFast && kScale) {
+                        junk ^= SkFastFourByteInterp(src, dst, scale);
+                    } else if (kFast) {
+                        junk ^= SkFastFourByteInterp256(src, dst, scale);
+                    } else if (kScale) {
+                        junk ^= SkFourByteInterp(src, dst, scale);
+                    } else {
+                        junk ^= SkFourByteInterp256(src, dst, scale);
+                    }
                 }
             }
         }
@@ -45,6 +67,9 @@ public:
 
 private:
     SkString fName;
+    static const int kInputs = 10;  // Arbitrary.
+    volatile unsigned fSrcs[kInputs];
+    volatile unsigned fDsts[kInputs];
     unsigned fScales[257];  // We need space for [0, 256].
 };
 
-- 
cgit v1.2.3