From cc6db406e482914ab6abbe86aa89f0f0d4ec83c7 Mon Sep 17 00:00:00 2001 From: "commit-bot@chromium.org" Date: Thu, 5 Dec 2013 16:43:08 +0000 Subject: Make sure four_byte_interp benches the entire FourByteInterp call. Relative bench performance still says switch to Fast. Desktop: four_byte_interp_slow_256 11.68 four_byte_interp_slow_255 12.11 four_byte_interp_fast_256 7.17 four_byte_interp_fast_255 8.17 N5: four_byte_interp_slow_256 28.49 four_byte_interp_slow_255 25.08 four_byte_interp_fast_256 19.40 four_byte_interp_fast_255 21.69 BUG= R=reed@google.com Author: mtklein@google.com Review URL: https://codereview.chromium.org/102053006 git-svn-id: http://skia.googlecode.com/svn/trunk@12512 2bbb7eff-a529-9590-31e7-b0007b416f81 --- bench/ColorPrivBench.cpp | 61 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 18 deletions(-) (limited to 'bench/ColorPrivBench.cpp') diff --git a/bench/ColorPrivBench.cpp b/bench/ColorPrivBench.cpp index 328de4aebd..9c15a8400c 100644 --- a/bench/ColorPrivBench.cpp +++ b/bench/ColorPrivBench.cpp @@ -10,12 +10,6 @@ public: fName.set("four_byte_interp"); fName.append(kFast ? "_fast" : "_slow"); fName.append(kScale ? "_255" : "_256"); - - // We'll exhaustively test all scales instead of using random numbers. - for (int i = 0; i <= 256; i++) { - fScales[i] = i; - } - if (kScale) fScales[256] = 255; // We'll just do 255 twice if we're limited to [0,255]. } virtual bool isSuitableFor(Backend backend) SK_OVERRIDE { @@ -24,20 +18,48 @@ public: virtual const char* onGetName() SK_OVERRIDE { return fName.c_str(); } + virtual void onPreDraw() SK_OVERRIDE { + // A handful of random srcs and dsts. + SkRandom rand; + for (int i = 0; i < kInputs; i++) { + fSrcs[i] = SkPreMultiplyColor(rand.nextU()); + fDsts[i] = SkPreMultiplyColor(rand.nextU()); + } + + // We'll exhaustively test all scales instead of using random numbers. + for (int i = 0; i <= 256; i++) { + fScales[i] = i; + } + if (kScale) fScales[256] = 255; // We'll just do 255 twice if we're limited to [0,255]. + } + virtual void onDraw(const int loops, SkCanvas*) SK_OVERRIDE { - const SkPMColor src = 0xAB998877, dst = 0x66334455; + // We xor results of FourByteInterp into junk to make sure the function runs. volatile SkPMColor junk = 0; - for (int i = 0; i < 10*loops; ++i) { - for (size_t j = 0; j <= 256; j++) { - const unsigned scale = fScales[j]; - if (kFast && kScale) { - junk ^= SkFastFourByteInterp(src, dst, scale); - } else if (kFast) { - junk ^= SkFastFourByteInterp256(src, dst, scale); - } else if (kScale) { - junk ^= SkFourByteInterp(src, dst, scale); - } else { - junk ^= SkFourByteInterp256(src, dst, scale); + + for (int loop = 0; loop < loops; loop++) { + for (int i = 0; i < kInputs; i++) { + for (size_t j = 0; j <= 256; j++) { + // Note: we really want to load src and dst here and not outside in the i-loop. + // If we put the loads there, a clever compiler will do the not-insignificant + // work in the FourByteInterps that depends only on src and dst outside this + // loop, so we'd only be benchmarking the back half of those functions that also + // depends on scale. Even here, these must be volatile arrays to prevent that + // clever compiler from hoisting the loads out of the loop on its own. + const SkPMColor src = fSrcs[i]; + const SkPMColor dst = fDsts[i]; + + const unsigned scale = fScales[j]; + + if (kFast && kScale) { + junk ^= SkFastFourByteInterp(src, dst, scale); + } else if (kFast) { + junk ^= SkFastFourByteInterp256(src, dst, scale); + } else if (kScale) { + junk ^= SkFourByteInterp(src, dst, scale); + } else { + junk ^= SkFourByteInterp256(src, dst, scale); + } } } } @@ -45,6 +67,9 @@ public: private: SkString fName; + static const int kInputs = 10; // Arbitrary. + volatile unsigned fSrcs[kInputs]; + volatile unsigned fDsts[kInputs]; unsigned fScales[257]; // We need space for [0, 256]. }; -- cgit v1.2.3