diff options
author | mtklein <mtklein@chromium.org> | 2014-06-16 14:04:32 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2014-06-16 14:04:34 -0700 |
commit | 90c471e73fb0bd09aef8ad1e3f0842e5d46fb342 (patch) | |
tree | 09796391108d88282b20bb931f7337cea90640aa | |
parent | ad2ab61b4eec0ae43f213af3f18ffe373fe8c4aa (diff) |
Refine bench_record and bench_playback:
- use high-precision wall timer only
- warm caches once before measuring
- measure independent samples, calculating statistics
- add --verbose to control how much data we output
Also removed some unloved features from bench_record.
BUG=skia:
R=jcgregorio@google.com, mtklein@google.com
Author: mtklein@chromium.org
Review URL: https://codereview.chromium.org/338203002
-rw-r--r-- | bench/BenchTimer.cpp | 16 | ||||
-rw-r--r-- | bench/BenchTimer.h | 23 | ||||
-rw-r--r-- | tools/Stats.h | 32 | ||||
-rw-r--r-- | tools/bench_playback.cpp | 68 | ||||
-rw-r--r-- | tools/bench_record.cpp | 123 |
5 files changed, 197 insertions, 65 deletions
diff --git a/bench/BenchTimer.cpp b/bench/BenchTimer.cpp index 3617f9de58..f3e8e3b0ec 100644 --- a/bench/BenchTimer.cpp +++ b/bench/BenchTimer.cpp @@ -76,3 +76,19 @@ void BenchTimer::truncatedEnd() { fTruncatedCpu = fTruncatedSysTimer->endCpu() * fDurationScale; fTruncatedWall = fTruncatedSysTimer->endWall() * fDurationScale; } + +WallTimer::WallTimer() : fWall(-1.0), fSysTimer(new BenchSysTimer) {} + +WallTimer::~WallTimer() { + delete fSysTimer; +} + +void WallTimer::start(double durationScale) { + fDurationScale = durationScale; + fSysTimer->startWall(); +} + +void WallTimer::end() { + fWall = fSysTimer->endWall() * fDurationScale; +} + diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h index 09950a734b..2abf10b80f 100644 --- a/bench/BenchTimer.h +++ b/bench/BenchTimer.h @@ -39,12 +39,29 @@ public: double fGpu; private: - BenchSysTimer *fSysTimer; - BenchSysTimer *fTruncatedSysTimer; + BenchSysTimer* fSysTimer; + BenchSysTimer* fTruncatedSysTimer; #if SK_SUPPORT_GPU - BenchGpuTimer *fGpuTimer; + BenchGpuTimer* fGpuTimer; #endif double fDurationScale; // for this start/end session }; +// Same as BenchTimer above, supporting only fWall but with much lower overhead. +// (Typically, ~30ns instead of BenchTimer's ~1us.) +class WallTimer { +public: + WallTimer(); + ~WallTimer(); + + void start(double durationScale = 1); + void end(); + + double fWall; + +private: + BenchSysTimer* fSysTimer; + double fDurationScale; +}; + #endif diff --git a/tools/Stats.h b/tools/Stats.h new file mode 100644 index 0000000000..2370084fd6 --- /dev/null +++ b/tools/Stats.h @@ -0,0 +1,32 @@ +#ifndef Stats_DEFINED +#define Stats_DEFINED + +struct Stats { + Stats(const double samples[], int n) { + min = samples[0]; + max = samples[0]; + for (int i = 0; i < n; i++) { + if (samples[i] < min) { min = samples[i]; } + if (samples[i] > max) { max = samples[i]; } + } + + double sum = 0.0; + for (int i = 0 ; i < n; i++) { + sum += samples[i]; + } + mean = sum / n; + + double err = 0.0; + for (int i = 0 ; i < n; i++) { + err += (samples[i] - mean) * (samples[i] - mean); + } + var = err / (n-1); + } + + double min; + double max; + double mean; // Estimate of population mean. + double var; // Estimate of population variance. +}; + +#endif//Stats_DEFINED diff --git a/tools/bench_playback.cpp b/tools/bench_playback.cpp index ffe9e23e55..f07fa8e300 100644 --- a/tools/bench_playback.cpp +++ b/tools/bench_playback.cpp @@ -5,7 +5,6 @@ * found in the LICENSE file. */ -#include "BenchTimer.h" #include "SkCommandLineFlags.h" #include "SkForceLinking.h" #include "SkGraphics.h" @@ -16,19 +15,27 @@ #include "SkStream.h" #include "SkString.h" +#include "BenchTimer.h" +#include "Stats.h" + +typedef WallTimer Timer; + __SK_FORCE_IMAGE_DECODER_LINKING; -DEFINE_string2(skps, r, "skps", "Directory containing SKPs to read and re-record."); -DEFINE_int32(loops, 10, "Number of times to play back each SKP."); +DEFINE_string2(skps, r, "skps", "Directory containing SKPs to playback."); +DEFINE_int32(samples, 10, "Gather this many samples of each picture playback."); DEFINE_bool(skr, false, "Play via SkRecord instead of SkPicture."); DEFINE_int32(tile, 1000000000, "Simulated tile size."); DEFINE_string(match, "", "The usual filters on file names of SKPs to bench."); DEFINE_string(timescale, "ms", "Print times in ms, us, or ns"); - -static double scale_time(double ms) { - if (FLAGS_timescale.contains("us")) ms *= 1000; - if (FLAGS_timescale.contains("ns")) ms *= 1000000; - return ms; +DEFINE_int32(verbose, 0, "0: print min sample; " + "1: print min, mean, max and noise indication " + "2: print all samples"); + +static double timescale() { + if (FLAGS_timescale.contains("us")) return 1000; + if (FLAGS_timescale.contains("ns")) return 1000000; + return 1; } static SkPicture* rerecord_with_tilegrid(SkPicture& src) { @@ -49,6 +56,14 @@ static EXPERIMENTAL::SkPlayback* rerecord_with_skr(SkPicture& src) { return recording.releasePlayback(); } +static void draw(const EXPERIMENTAL::SkPlayback& skr, const SkPicture& skp, SkCanvas* canvas) { + if (FLAGS_skr) { + skr.draw(canvas); + } else { + skp.draw(canvas); + } +} + static void bench(SkPMColor* scratch, SkPicture& src, const char* name) { SkAutoTUnref<SkPicture> picture(rerecord_with_tilegrid(src)); SkAutoTDelete<EXPERIMENTAL::SkPlayback> record(rerecord_with_skr(src)); @@ -59,19 +74,34 @@ static void bench(SkPMColor* scratch, SkPicture& src, const char* name) { src.width() * sizeof(SkPMColor))); canvas->clipRect(SkRect::MakeWH(SkIntToScalar(FLAGS_tile), SkIntToScalar(FLAGS_tile))); - BenchTimer timer; - timer.start(); - for (int i = 0; i < FLAGS_loops; i++) { - if (FLAGS_skr) { - record->draw(canvas.get()); - } else { - picture->draw(canvas.get()); - } + // Draw once to warm any caches. The first sample otherwise can be very noisy. + draw(*record, *picture, canvas.get()); + + Timer timer; + SkAutoTMalloc<double> samples(FLAGS_samples); + for (int i = 0; i < FLAGS_samples; i++) { + // We assume timer overhead (typically, ~30ns) is insignificant + // compared to draw runtime (at least ~100us, usually several ms). + timer.start(timescale()); + draw(*record, *picture, canvas.get()); + timer.end(); + samples[i] = timer.fWall; } - timer.end(); - const double msPerLoop = timer.fCpu / (double)FLAGS_loops; - printf("%f\t%s\n", scale_time(msPerLoop), name); + Stats stats(samples.get(), FLAGS_samples); + if (FLAGS_verbose == 0) { + printf("%g\t%s\n", stats.min, name); + } else if (FLAGS_verbose == 1) { + // Get a rough idea of how noisy the measurements were. + const double noisePercent = 100 * sqrt(stats.var) / stats.mean; + printf("%g\t%g\t%g\t±%.0f%%\t%s\n", stats.min, stats.mean, stats.max, noisePercent, name); + } else if (FLAGS_verbose == 2) { + printf("%s", name); + for (int i = 0; i < FLAGS_samples; i++) { + printf("\t%g", samples[i]); + } + printf("\n"); + } } int tool_main(int argc, char** argv); diff --git a/tools/bench_record.cpp b/tools/bench_record.cpp index 025127e4c2..a8d7a8a0e5 100644 --- a/tools/bench_record.cpp +++ b/tools/bench_record.cpp @@ -5,7 +5,6 @@ * found in the LICENSE file. */ -#include "BenchTimer.h" #include "SkCommandLineFlags.h" #include "SkForceLinking.h" #include "SkGraphics.h" @@ -15,28 +14,32 @@ #include "SkRecording.h" #include "SkStream.h" #include "SkString.h" + +#include "BenchTimer.h" #include "LazyDecodeBitmap.h" +#include "Stats.h" -__SK_FORCE_IMAGE_DECODER_LINKING; +typedef WallTimer Timer; -// Just reading all the SKPs takes about 2 seconds for me, which is the same as about 100 loops of -// rerecording all the SKPs. So we default to --loops=900, which makes ~90% of our time spent in -// recording, and this should take ~20 seconds to run. +__SK_FORCE_IMAGE_DECODER_LINKING; DEFINE_string2(skps, r, "skps", "Directory containing SKPs to read and re-record."); -DEFINE_int32(loops, 900, "Number of times to re-record each SKP."); -DEFINE_bool(endRecording, true, "If false, don't time SkPicture::endRecording()"); -DEFINE_int32(nullSize, 1000, "Pretend dimension of null source picture."); +DEFINE_int32(samples, 10, "Number of times to re-record each SKP."); DEFINE_int32(tileGridSize, 512, "Set the tile grid size. Has no effect if bbh is not set to tilegrid."); DEFINE_string(bbh, "", "Turn on the bbh and select the type, one of rtree, tilegrid, quadtree"); DEFINE_bool(skr, false, "Record SKR instead of SKP."); DEFINE_string(match, "", "The usual filters on file names of SKPs to bench."); DEFINE_string(timescale, "us", "Print times in ms, us, or ns"); - -static double scale_time(double ms) { - if (FLAGS_timescale.contains("us")) ms *= 1000; - if (FLAGS_timescale.contains("ns")) ms *= 1000000; - return ms; +DEFINE_double(overheadGoal, 0.0001, + "Try to make timer overhead at most this fraction of our sample measurements."); +DEFINE_int32(verbose, 0, "0: print min sample; " + "1: print min, mean, max and noise indication " + "2: print all samples"); + +static double timescale() { + if (FLAGS_timescale.contains("us")) return 1000; + if (FLAGS_timescale.contains("ns")) return 1000000; + return 1; } static SkBBHFactory* parse_FLAGS_bbh() { @@ -61,35 +64,62 @@ static SkBBHFactory* parse_FLAGS_bbh() { return NULL; } -static void bench_record(SkPicture* src, const char* name, SkBBHFactory* bbhFactory) { - BenchTimer timer; - timer.start(); - const int width = src ? src->width() : FLAGS_nullSize; - const int height = src ? src->height() : FLAGS_nullSize; - - for (int i = 0; i < FLAGS_loops; i++) { - if (FLAGS_skr) { - EXPERIMENTAL::SkRecording recording(width, height); - if (NULL != src) { - src->draw(recording.canvas()); - } - // Release and delete the SkPlayback so that recording optimizes its SkRecord. - SkDELETE(recording.releasePlayback()); - } else { - SkPictureRecorder recorder; - SkCanvas* canvas = recorder.beginRecording(width, height, bbhFactory); - if (NULL != src) { - src->draw(canvas); - } - if (FLAGS_endRecording) { - SkAutoTUnref<SkPicture> dst(recorder.endRecording()); - } +static void rerecord(const SkPicture& src, SkBBHFactory* bbhFactory) { + if (FLAGS_skr) { + EXPERIMENTAL::SkRecording recording(src.width(), src.height()); + src.draw(recording.canvas()); + // Release and delete the SkPlayback so that recording optimizes its SkRecord. + SkDELETE(recording.releasePlayback()); + } else { + SkPictureRecorder recorder; + src.draw(recorder.beginRecording(src.width(), src.height(), bbhFactory)); + SkAutoTUnref<SkPicture> dst(recorder.endRecording()); + } +} + +static void bench_record(const SkPicture& src, + const double timerOverhead, + const char* name, + SkBBHFactory* bbhFactory) { + // Rerecord once to warm up any caches. Otherwise the first sample can be very noisy. + rerecord(src, bbhFactory); + + // Rerecord once to see how many times we should loop to make timer overhead insignificant. + Timer timer; + do { + timer.start(timescale()); + rerecord(src, bbhFactory); + timer.end(); + } while (timer.fWall < timerOverhead); // Loop just in case something bizarre happens. + + // We want (timer overhead / measurement) to be less than FLAGS_overheadGoal. + // So in each sample, we'll loop enough times to have made that true for our first measurement. + const int loops = (int)ceil(timerOverhead / timer.fWall / FLAGS_overheadGoal); + + SkAutoTMalloc<double> samples(FLAGS_samples); + for (int i = 0; i < FLAGS_samples; i++) { + timer.start(timescale()); + for (int j = 0; j < loops; j++) { + rerecord(src, bbhFactory); } + timer.end(); + samples[i] = timer.fWall / loops; } - timer.end(); - const double msPerLoop = timer.fCpu / (double)FLAGS_loops; - printf("%f\t%s\n", scale_time(msPerLoop), name); + Stats stats(samples.get(), FLAGS_samples); + if (FLAGS_verbose == 0) { + printf("%g\t%s\n", stats.min, name); + } else if (FLAGS_verbose == 1) { + // Get a rough idea of how noisy the measurements were. + const double noisePercent = 100 * sqrt(stats.var) / stats.mean; + printf("%g\t%g\t%g\t±%.0f%%\t%s\n", stats.min, stats.mean, stats.max, noisePercent, name); + } else if (FLAGS_verbose == 2) { + printf("%s", name); + for (int i = 0; i < FLAGS_samples; i++) { + printf("\t%g", samples[i]); + } + printf("\n"); + } } int tool_main(int argc, char** argv); @@ -103,10 +133,17 @@ int tool_main(int argc, char** argv) { } SkAutoTDelete<SkBBHFactory> bbhFactory(parse_FLAGS_bbh()); - bench_record(NULL, "NULL", bbhFactory.get()); - if (FLAGS_skps.isEmpty()) { - return 0; + + // Each run will use this timer overhead estimate to guess how many times it should run. + static const int kOverheadLoops = 10000000; + Timer timer; + double overheadEstimate = 0.0; + for (int i = 0; i < kOverheadLoops; i++) { + timer.start(timescale()); + timer.end(); + overheadEstimate += timer.fWall; } + overheadEstimate /= kOverheadLoops; SkOSFile::Iter it(FLAGS_skps[0], ".skp"); SkString filename; @@ -131,7 +168,7 @@ int tool_main(int argc, char** argv) { failed = true; continue; } - bench_record(src, filename.c_str(), bbhFactory.get()); + bench_record(*src, overheadEstimate, filename.c_str(), bbhFactory.get()); } return failed ? 1 : 0; } |