aboutsummaryrefslogtreecommitdiffhomepage
path: root/bench/ColorPrivBench.cpp
diff options
context:
space:
mode:
authorGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2013-12-05 16:43:08 +0000
committerGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2013-12-05 16:43:08 +0000
commitcc6db406e482914ab6abbe86aa89f0f0d4ec83c7 (patch)
treef853aec701d84d04de4d24191c03b1710c66c8f6 /bench/ColorPrivBench.cpp
parent5a1cb5b408942b61891d04357413e67f885510c4 (diff)
Make sure four_byte_interp benches the entire FourByteInterp call.
Relative bench performance still says switch to Fast. Desktop: four_byte_interp_slow_256 11.68 four_byte_interp_slow_255 12.11 four_byte_interp_fast_256 7.17 four_byte_interp_fast_255 8.17 N5: four_byte_interp_slow_256 28.49 four_byte_interp_slow_255 25.08 four_byte_interp_fast_256 19.40 four_byte_interp_fast_255 21.69 BUG= R=reed@google.com Author: mtklein@google.com Review URL: https://codereview.chromium.org/102053006 git-svn-id: http://skia.googlecode.com/svn/trunk@12512 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'bench/ColorPrivBench.cpp')
-rw-r--r--bench/ColorPrivBench.cpp61
1 files changed, 43 insertions, 18 deletions
diff --git a/bench/ColorPrivBench.cpp b/bench/ColorPrivBench.cpp
index 328de4aebd..9c15a8400c 100644
--- a/bench/ColorPrivBench.cpp
+++ b/bench/ColorPrivBench.cpp
@@ -10,12 +10,6 @@ public:
fName.set("four_byte_interp");
fName.append(kFast ? "_fast" : "_slow");
fName.append(kScale ? "_255" : "_256");
-
- // We'll exhaustively test all scales instead of using random numbers.
- for (int i = 0; i <= 256; i++) {
- fScales[i] = i;
- }
- if (kScale) fScales[256] = 255; // We'll just do 255 twice if we're limited to [0,255].
}
virtual bool isSuitableFor(Backend backend) SK_OVERRIDE {
@@ -24,20 +18,48 @@ public:
virtual const char* onGetName() SK_OVERRIDE { return fName.c_str(); }
+ virtual void onPreDraw() SK_OVERRIDE {
+ // A handful of random srcs and dsts.
+ SkRandom rand;
+ for (int i = 0; i < kInputs; i++) {
+ fSrcs[i] = SkPreMultiplyColor(rand.nextU());
+ fDsts[i] = SkPreMultiplyColor(rand.nextU());
+ }
+
+ // We'll exhaustively test all scales instead of using random numbers.
+ for (int i = 0; i <= 256; i++) {
+ fScales[i] = i;
+ }
+ if (kScale) fScales[256] = 255; // We'll just do 255 twice if we're limited to [0,255].
+ }
+
virtual void onDraw(const int loops, SkCanvas*) SK_OVERRIDE {
- const SkPMColor src = 0xAB998877, dst = 0x66334455;
+ // We xor results of FourByteInterp into junk to make sure the function runs.
volatile SkPMColor junk = 0;
- for (int i = 0; i < 10*loops; ++i) {
- for (size_t j = 0; j <= 256; j++) {
- const unsigned scale = fScales[j];
- if (kFast && kScale) {
- junk ^= SkFastFourByteInterp(src, dst, scale);
- } else if (kFast) {
- junk ^= SkFastFourByteInterp256(src, dst, scale);
- } else if (kScale) {
- junk ^= SkFourByteInterp(src, dst, scale);
- } else {
- junk ^= SkFourByteInterp256(src, dst, scale);
+
+ for (int loop = 0; loop < loops; loop++) {
+ for (int i = 0; i < kInputs; i++) {
+ for (size_t j = 0; j <= 256; j++) {
+ // Note: we really want to load src and dst here and not outside in the i-loop.
+ // If we put the loads there, a clever compiler will do the not-insignificant
+ // work in the FourByteInterps that depends only on src and dst outside this
+ // loop, so we'd only be benchmarking the back half of those functions that also
+ // depends on scale. Even here, these must be volatile arrays to prevent that
+ // clever compiler from hoisting the loads out of the loop on its own.
+ const SkPMColor src = fSrcs[i];
+ const SkPMColor dst = fDsts[i];
+
+ const unsigned scale = fScales[j];
+
+ if (kFast && kScale) {
+ junk ^= SkFastFourByteInterp(src, dst, scale);
+ } else if (kFast) {
+ junk ^= SkFastFourByteInterp256(src, dst, scale);
+ } else if (kScale) {
+ junk ^= SkFourByteInterp(src, dst, scale);
+ } else {
+ junk ^= SkFourByteInterp256(src, dst, scale);
+ }
}
}
}
@@ -45,6 +67,9 @@ public:
private:
SkString fName;
+ static const int kInputs = 10; // Arbitrary.
+ volatile unsigned fSrcs[kInputs];
+ volatile unsigned fDsts[kInputs];
unsigned fScales[257]; // We need space for [0, 256].
};