aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-08-11 10:37:35 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-08-11 15:32:36 +0000
commit4f6e271596cf8aaa5fd90801c0b353d5eb7c9f78 (patch)
tree16591823313cec7e31fee0c19e78dd6cac5a524c /tools
parent880768032d7bd1528dc84f5b4408d4fa418bae7d (diff)
ok, add a bench source
This new source acts like other sources (GMs, SKPs) for benchmarks. It times multiple samples (controlled by samples=N, default 20), and each of those samples uses the same strategy as monobench, growing loops exponentially until it runs for at least 10ms. When done it prints the fastest and the two slowest samples. In practice the 100th percentile sample is very different from the next slowest due to caching, and the fastest is always interesting. Because these benchmarks run in whatever execution engine ok has selected, on non-Windows platforms you have some real control over the interaction between benchmarks. In its default "fork" mode each benchmark runs independently in its own process, so the 100th percentiles really stand out. The other modes "thread" and "serial" work as you'd expect too. Here's an example where you can see how the different interactions work: out/ok bench:samples=100 8888 filter:search=text_16_AA fork [text_16_AA_WT] 2.32µs @0 6.23µs @99 24.3ms @100 [text_16_AA_FF] 2.41µs @0 5.7µs @99 23.3ms @100 [text_16_AA_88] 2.55µs @0 5.6µs @99 24.8ms @100 [text_16_AA_BK] 1.97µs @0 5.44µs @99 23.2ms @100 out/ok bench:samples=100 8888 filter:search=text_16_AA thread [text_16_AA_FF] 2.45µs @0 23.5µs @99 24.8ms @100 [text_16_AA_WT] 2.52µs @0 17.8µs @99 24.7ms @100 [text_16_AA_88] 2.55µs @0 19.7µs @99 25.1ms @100 [text_16_AA_BK] 1.8µs @0 14.7µs @99 25.1ms @100 out/ok bench:samples=100 8888 filter:search=text_16_AA serial [text_16_AA_88] 2.35µs @0 3.53µs @99 16.7ms @100 [text_16_AA_FF] 2.09µs @0 2.73µs @99 2.91µs @100 [text_16_AA_BK] 1.75µs @0 2.46µs @99 2.65µs @100 [text_16_AA_WT] 2.1µs @0 3.16µs @99 3.17µs @100 In the first "fork" case all runs are independent and have roughly the same profile. "thread" looks similar except you can see them contending at the 99th percentile. In "serial", the first bench warms up the rest, so their 100th percentiles are all much faster. Change-Id: I01a9f8c54b540221a9f232b271bb8ef3fda2569c Reviewed-on: https://skia-review.googlesource.com/33585 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/ok_srcs.cpp103
1 files changed, 101 insertions, 2 deletions
diff --git a/tools/ok_srcs.cpp b/tools/ok_srcs.cpp
index dc040f86c9..ae1c5d8ca7 100644
--- a/tools/ok_srcs.cpp
+++ b/tools/ok_srcs.cpp
@@ -5,11 +5,16 @@
* found in the LICENSE file.
*/
-#include "ok.h"
-#include "gm.h"
+#include "Benchmark.h"
#include "SkData.h"
#include "SkOSFile.h"
#include "SkPicture.h"
+#include "Timer.h"
+#include "gm.h"
+#include "ok.h"
+#include <algorithm>
+#include <chrono>
+#include <limits>
#include <vector>
struct GMStream : Stream {
@@ -112,3 +117,97 @@ struct SKPStream : Stream {
}
};
static Register skp{"skp", "draw SKPs from dir=skps", SKPStream::Create};
+
+struct BenchStream : Stream {
+ const BenchRegistry* registry = BenchRegistry::Head();
+ int samples;
+
+ static std::unique_ptr<Stream> Create(Options options) {
+ BenchStream stream;
+ stream.samples = std::max(1, atoi(options("samples", "20").c_str()));
+ return move_unique(stream);
+ }
+
+ struct BenchSrc : Src {
+ Benchmark* (*factory)(void*);
+ std::unique_ptr<Benchmark> bench;
+ int samples;
+
+ void init() {
+ if (bench) { return; }
+ bench.reset(factory(nullptr));
+ }
+
+ std::string name() override {
+ this->init();
+ return bench->getName();
+ }
+
+ SkISize size() override {
+ this->init();
+ return { bench->getSize().x(), bench->getSize().y() };
+ }
+
+ Status draw(SkCanvas* canvas) override {
+ this->init();
+
+ using ms = std::chrono::duration<double, std::milli>;
+ std::vector<ms> sample(samples);
+
+ bench->delayedSetup();
+ if (canvas) {
+ bench->perCanvasPreDraw(canvas);
+ }
+ for (int i = 0; i < samples; i++) {
+ using clock = std::chrono::high_resolution_clock;
+ for (int loops = 1; loops < 1000000000; loops *= 2) {
+ bench->preDraw(canvas);
+ auto start = clock::now();
+ bench->draw(loops, canvas);
+ ms elapsed = clock::now() - start;
+ bench->postDraw(canvas);
+
+ if (elapsed.count() < 10) {
+ continue;
+ }
+
+ sample[i] = elapsed / loops;
+ break;
+ }
+ }
+ if (canvas) {
+ bench->perCanvasPostDraw(canvas);
+ }
+
+ std::sort(sample.begin(), sample.end());
+
+ SkString msg = SkStringPrintf("%s\t@0", HumanizeMs(sample[0].count()).c_str());
+ if (samples > 2) {
+ msg.appendf("\t%s\t@%g", HumanizeMs(sample[samples-2].count()).c_str()
+ , 100.0*(samples-1) / samples);
+ }
+ if (samples > 1) {
+ msg.appendf("\t%s\t@100", HumanizeMs(sample[samples-1].count()).c_str());
+ }
+ ok_log(msg.c_str());
+
+ return Status::OK;
+ }
+ };
+
+ std::unique_ptr<Src> next() override {
+ if (!registry) {
+ return nullptr;
+ }
+ BenchSrc src;
+ src.factory = registry->factory();
+ src.samples = samples;
+ registry = registry->next();
+ return move_unique(src);
+ }
+};
+static Register bench{
+ "bench",
+ "time benchmarks linked into this binary samples=20 times each",
+ BenchStream::Create,
+};