aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--bench/BenchTimer.cpp16
-rw-r--r--bench/BenchTimer.h23
-rw-r--r--tools/Stats.h32
-rw-r--r--tools/bench_playback.cpp68
-rw-r--r--tools/bench_record.cpp123
5 files changed, 197 insertions, 65 deletions
diff --git a/bench/BenchTimer.cpp b/bench/BenchTimer.cpp
index 3617f9de58..f3e8e3b0ec 100644
--- a/bench/BenchTimer.cpp
+++ b/bench/BenchTimer.cpp
@@ -76,3 +76,19 @@ void BenchTimer::truncatedEnd() {
fTruncatedCpu = fTruncatedSysTimer->endCpu() * fDurationScale;
fTruncatedWall = fTruncatedSysTimer->endWall() * fDurationScale;
}
+
+WallTimer::WallTimer() : fWall(-1.0), fSysTimer(new BenchSysTimer) {}
+
+WallTimer::~WallTimer() {
+ delete fSysTimer;
+}
+
+void WallTimer::start(double durationScale) {
+ fDurationScale = durationScale;
+ fSysTimer->startWall();
+}
+
+void WallTimer::end() {
+ fWall = fSysTimer->endWall() * fDurationScale;
+}
+
diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h
index 09950a734b..2abf10b80f 100644
--- a/bench/BenchTimer.h
+++ b/bench/BenchTimer.h
@@ -39,12 +39,29 @@ public:
double fGpu;
private:
- BenchSysTimer *fSysTimer;
- BenchSysTimer *fTruncatedSysTimer;
+ BenchSysTimer* fSysTimer;
+ BenchSysTimer* fTruncatedSysTimer;
#if SK_SUPPORT_GPU
- BenchGpuTimer *fGpuTimer;
+ BenchGpuTimer* fGpuTimer;
#endif
double fDurationScale; // for this start/end session
};
+// Same as BenchTimer above, supporting only fWall but with much lower overhead.
+// (Typically, ~30ns instead of BenchTimer's ~1us.)
+class WallTimer {
+public:
+ WallTimer();
+ ~WallTimer();
+
+ void start(double durationScale = 1);
+ void end();
+
+ double fWall;
+
+private:
+ BenchSysTimer* fSysTimer;
+ double fDurationScale;
+};
+
#endif
diff --git a/tools/Stats.h b/tools/Stats.h
new file mode 100644
index 0000000000..2370084fd6
--- /dev/null
+++ b/tools/Stats.h
@@ -0,0 +1,32 @@
+#ifndef Stats_DEFINED
+#define Stats_DEFINED
+
+struct Stats {
+ Stats(const double samples[], int n) {
+ min = samples[0];
+ max = samples[0];
+ for (int i = 0; i < n; i++) {
+ if (samples[i] < min) { min = samples[i]; }
+ if (samples[i] > max) { max = samples[i]; }
+ }
+
+ double sum = 0.0;
+ for (int i = 0 ; i < n; i++) {
+ sum += samples[i];
+ }
+ mean = sum / n;
+
+ double err = 0.0;
+ for (int i = 0 ; i < n; i++) {
+ err += (samples[i] - mean) * (samples[i] - mean);
+ }
+ var = err / (n-1);
+ }
+
+ double min;
+ double max;
+ double mean; // Estimate of population mean.
+ double var; // Estimate of population variance.
+};
+
+#endif//Stats_DEFINED
diff --git a/tools/bench_playback.cpp b/tools/bench_playback.cpp
index ffe9e23e55..f07fa8e300 100644
--- a/tools/bench_playback.cpp
+++ b/tools/bench_playback.cpp
@@ -5,7 +5,6 @@
* found in the LICENSE file.
*/
-#include "BenchTimer.h"
#include "SkCommandLineFlags.h"
#include "SkForceLinking.h"
#include "SkGraphics.h"
@@ -16,19 +15,27 @@
#include "SkStream.h"
#include "SkString.h"
+#include "BenchTimer.h"
+#include "Stats.h"
+
+typedef WallTimer Timer;
+
__SK_FORCE_IMAGE_DECODER_LINKING;
-DEFINE_string2(skps, r, "skps", "Directory containing SKPs to read and re-record.");
-DEFINE_int32(loops, 10, "Number of times to play back each SKP.");
+DEFINE_string2(skps, r, "skps", "Directory containing SKPs to playback.");
+DEFINE_int32(samples, 10, "Gather this many samples of each picture playback.");
DEFINE_bool(skr, false, "Play via SkRecord instead of SkPicture.");
DEFINE_int32(tile, 1000000000, "Simulated tile size.");
DEFINE_string(match, "", "The usual filters on file names of SKPs to bench.");
DEFINE_string(timescale, "ms", "Print times in ms, us, or ns");
-
-static double scale_time(double ms) {
- if (FLAGS_timescale.contains("us")) ms *= 1000;
- if (FLAGS_timescale.contains("ns")) ms *= 1000000;
- return ms;
+DEFINE_int32(verbose, 0, "0: print min sample; "
+ "1: print min, mean, max and noise indication "
+ "2: print all samples");
+
+static double timescale() {
+ if (FLAGS_timescale.contains("us")) return 1000;
+ if (FLAGS_timescale.contains("ns")) return 1000000;
+ return 1;
}
static SkPicture* rerecord_with_tilegrid(SkPicture& src) {
@@ -49,6 +56,14 @@ static EXPERIMENTAL::SkPlayback* rerecord_with_skr(SkPicture& src) {
return recording.releasePlayback();
}
+static void draw(const EXPERIMENTAL::SkPlayback& skr, const SkPicture& skp, SkCanvas* canvas) {
+ if (FLAGS_skr) {
+ skr.draw(canvas);
+ } else {
+ skp.draw(canvas);
+ }
+}
+
static void bench(SkPMColor* scratch, SkPicture& src, const char* name) {
SkAutoTUnref<SkPicture> picture(rerecord_with_tilegrid(src));
SkAutoTDelete<EXPERIMENTAL::SkPlayback> record(rerecord_with_skr(src));
@@ -59,19 +74,34 @@ static void bench(SkPMColor* scratch, SkPicture& src, const char* name) {
src.width() * sizeof(SkPMColor)));
canvas->clipRect(SkRect::MakeWH(SkIntToScalar(FLAGS_tile), SkIntToScalar(FLAGS_tile)));
- BenchTimer timer;
- timer.start();
- for (int i = 0; i < FLAGS_loops; i++) {
- if (FLAGS_skr) {
- record->draw(canvas.get());
- } else {
- picture->draw(canvas.get());
- }
+ // Draw once to warm any caches. The first sample otherwise can be very noisy.
+ draw(*record, *picture, canvas.get());
+
+ Timer timer;
+ SkAutoTMalloc<double> samples(FLAGS_samples);
+ for (int i = 0; i < FLAGS_samples; i++) {
+ // We assume timer overhead (typically, ~30ns) is insignificant
+ // compared to draw runtime (at least ~100us, usually several ms).
+ timer.start(timescale());
+ draw(*record, *picture, canvas.get());
+ timer.end();
+ samples[i] = timer.fWall;
}
- timer.end();
- const double msPerLoop = timer.fCpu / (double)FLAGS_loops;
- printf("%f\t%s\n", scale_time(msPerLoop), name);
+ Stats stats(samples.get(), FLAGS_samples);
+ if (FLAGS_verbose == 0) {
+ printf("%g\t%s\n", stats.min, name);
+ } else if (FLAGS_verbose == 1) {
+ // Get a rough idea of how noisy the measurements were.
+ const double noisePercent = 100 * sqrt(stats.var) / stats.mean;
+ printf("%g\t%g\t%g\t±%.0f%%\t%s\n", stats.min, stats.mean, stats.max, noisePercent, name);
+ } else if (FLAGS_verbose == 2) {
+ printf("%s", name);
+ for (int i = 0; i < FLAGS_samples; i++) {
+ printf("\t%g", samples[i]);
+ }
+ printf("\n");
+ }
}
int tool_main(int argc, char** argv);
diff --git a/tools/bench_record.cpp b/tools/bench_record.cpp
index 025127e4c2..a8d7a8a0e5 100644
--- a/tools/bench_record.cpp
+++ b/tools/bench_record.cpp
@@ -5,7 +5,6 @@
* found in the LICENSE file.
*/
-#include "BenchTimer.h"
#include "SkCommandLineFlags.h"
#include "SkForceLinking.h"
#include "SkGraphics.h"
@@ -15,28 +14,32 @@
#include "SkRecording.h"
#include "SkStream.h"
#include "SkString.h"
+
+#include "BenchTimer.h"
#include "LazyDecodeBitmap.h"
+#include "Stats.h"
-__SK_FORCE_IMAGE_DECODER_LINKING;
+typedef WallTimer Timer;
-// Just reading all the SKPs takes about 2 seconds for me, which is the same as about 100 loops of
-// rerecording all the SKPs. So we default to --loops=900, which makes ~90% of our time spent in
-// recording, and this should take ~20 seconds to run.
+__SK_FORCE_IMAGE_DECODER_LINKING;
DEFINE_string2(skps, r, "skps", "Directory containing SKPs to read and re-record.");
-DEFINE_int32(loops, 900, "Number of times to re-record each SKP.");
-DEFINE_bool(endRecording, true, "If false, don't time SkPicture::endRecording()");
-DEFINE_int32(nullSize, 1000, "Pretend dimension of null source picture.");
+DEFINE_int32(samples, 10, "Number of times to re-record each SKP.");
DEFINE_int32(tileGridSize, 512, "Set the tile grid size. Has no effect if bbh is not set to tilegrid.");
DEFINE_string(bbh, "", "Turn on the bbh and select the type, one of rtree, tilegrid, quadtree");
DEFINE_bool(skr, false, "Record SKR instead of SKP.");
DEFINE_string(match, "", "The usual filters on file names of SKPs to bench.");
DEFINE_string(timescale, "us", "Print times in ms, us, or ns");
-
-static double scale_time(double ms) {
- if (FLAGS_timescale.contains("us")) ms *= 1000;
- if (FLAGS_timescale.contains("ns")) ms *= 1000000;
- return ms;
+DEFINE_double(overheadGoal, 0.0001,
+ "Try to make timer overhead at most this fraction of our sample measurements.");
+DEFINE_int32(verbose, 0, "0: print min sample; "
+ "1: print min, mean, max and noise indication "
+ "2: print all samples");
+
+static double timescale() {
+ if (FLAGS_timescale.contains("us")) return 1000;
+ if (FLAGS_timescale.contains("ns")) return 1000000;
+ return 1;
}
static SkBBHFactory* parse_FLAGS_bbh() {
@@ -61,35 +64,62 @@ static SkBBHFactory* parse_FLAGS_bbh() {
return NULL;
}
-static void bench_record(SkPicture* src, const char* name, SkBBHFactory* bbhFactory) {
- BenchTimer timer;
- timer.start();
- const int width = src ? src->width() : FLAGS_nullSize;
- const int height = src ? src->height() : FLAGS_nullSize;
-
- for (int i = 0; i < FLAGS_loops; i++) {
- if (FLAGS_skr) {
- EXPERIMENTAL::SkRecording recording(width, height);
- if (NULL != src) {
- src->draw(recording.canvas());
- }
- // Release and delete the SkPlayback so that recording optimizes its SkRecord.
- SkDELETE(recording.releasePlayback());
- } else {
- SkPictureRecorder recorder;
- SkCanvas* canvas = recorder.beginRecording(width, height, bbhFactory);
- if (NULL != src) {
- src->draw(canvas);
- }
- if (FLAGS_endRecording) {
- SkAutoTUnref<SkPicture> dst(recorder.endRecording());
- }
+static void rerecord(const SkPicture& src, SkBBHFactory* bbhFactory) {
+ if (FLAGS_skr) {
+ EXPERIMENTAL::SkRecording recording(src.width(), src.height());
+ src.draw(recording.canvas());
+ // Release and delete the SkPlayback so that recording optimizes its SkRecord.
+ SkDELETE(recording.releasePlayback());
+ } else {
+ SkPictureRecorder recorder;
+ src.draw(recorder.beginRecording(src.width(), src.height(), bbhFactory));
+ SkAutoTUnref<SkPicture> dst(recorder.endRecording());
+ }
+}
+
+static void bench_record(const SkPicture& src,
+ const double timerOverhead,
+ const char* name,
+ SkBBHFactory* bbhFactory) {
+ // Rerecord once to warm up any caches. Otherwise the first sample can be very noisy.
+ rerecord(src, bbhFactory);
+
+ // Rerecord once to see how many times we should loop to make timer overhead insignificant.
+ Timer timer;
+ do {
+ timer.start(timescale());
+ rerecord(src, bbhFactory);
+ timer.end();
+ } while (timer.fWall < timerOverhead); // Loop just in case something bizarre happens.
+
+ // We want (timer overhead / measurement) to be less than FLAGS_overheadGoal.
+ // So in each sample, we'll loop enough times to have made that true for our first measurement.
+ const int loops = (int)ceil(timerOverhead / timer.fWall / FLAGS_overheadGoal);
+
+ SkAutoTMalloc<double> samples(FLAGS_samples);
+ for (int i = 0; i < FLAGS_samples; i++) {
+ timer.start(timescale());
+ for (int j = 0; j < loops; j++) {
+ rerecord(src, bbhFactory);
}
+ timer.end();
+ samples[i] = timer.fWall / loops;
}
- timer.end();
- const double msPerLoop = timer.fCpu / (double)FLAGS_loops;
- printf("%f\t%s\n", scale_time(msPerLoop), name);
+ Stats stats(samples.get(), FLAGS_samples);
+ if (FLAGS_verbose == 0) {
+ printf("%g\t%s\n", stats.min, name);
+ } else if (FLAGS_verbose == 1) {
+ // Get a rough idea of how noisy the measurements were.
+ const double noisePercent = 100 * sqrt(stats.var) / stats.mean;
+ printf("%g\t%g\t%g\t±%.0f%%\t%s\n", stats.min, stats.mean, stats.max, noisePercent, name);
+ } else if (FLAGS_verbose == 2) {
+ printf("%s", name);
+ for (int i = 0; i < FLAGS_samples; i++) {
+ printf("\t%g", samples[i]);
+ }
+ printf("\n");
+ }
}
int tool_main(int argc, char** argv);
@@ -103,10 +133,17 @@ int tool_main(int argc, char** argv) {
}
SkAutoTDelete<SkBBHFactory> bbhFactory(parse_FLAGS_bbh());
- bench_record(NULL, "NULL", bbhFactory.get());
- if (FLAGS_skps.isEmpty()) {
- return 0;
+
+ // Each run will use this timer overhead estimate to guess how many times it should run.
+ static const int kOverheadLoops = 10000000;
+ Timer timer;
+ double overheadEstimate = 0.0;
+ for (int i = 0; i < kOverheadLoops; i++) {
+ timer.start(timescale());
+ timer.end();
+ overheadEstimate += timer.fWall;
}
+ overheadEstimate /= kOverheadLoops;
SkOSFile::Iter it(FLAGS_skps[0], ".skp");
SkString filename;
@@ -131,7 +168,7 @@ int tool_main(int argc, char** argv) {
failed = true;
continue;
}
- bench_record(src, filename.c_str(), bbhFactory.get());
+ bench_record(*src, overheadEstimate, filename.c_str(), bbhFactory.get());
}
return failed ? 1 : 0;
}