From 4f6e271596cf8aaa5fd90801c0b353d5eb7c9f78 Mon Sep 17 00:00:00 2001 From: Mike Klein Date: Fri, 11 Aug 2017 10:37:35 -0400 Subject: ok, add a bench source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This new source acts like other sources (GMs, SKPs) for benchmarks. It times multiple samples (controlled by samples=N, default 20), and each of those samples uses the same strategy as monobench, growing loops exponentially until it runs for at least 10ms. When done it prints the fastest and the two slowest samples. In practice the 100th percentile sample is very different from the next slowest due to caching, and the fastest is always interesting. Because these benchmarks run in whatever execution engine ok has selected, on non-Windows platforms you have some real control over the interaction between benchmarks. In its default "fork" mode each benchmark runs independently in its own process, so the 100th percentiles really stand out. The other modes "thread" and "serial" work as you'd expect too. Here's an example where you can see how the different interactions work: out/ok bench:samples=100 8888 filter:search=text_16_AA fork [text_16_AA_WT] 2.32µs @0 6.23µs @99 24.3ms @100 [text_16_AA_FF] 2.41µs @0 5.7µs @99 23.3ms @100 [text_16_AA_88] 2.55µs @0 5.6µs @99 24.8ms @100 [text_16_AA_BK] 1.97µs @0 5.44µs @99 23.2ms @100 out/ok bench:samples=100 8888 filter:search=text_16_AA thread [text_16_AA_FF] 2.45µs @0 23.5µs @99 24.8ms @100 [text_16_AA_WT] 2.52µs @0 17.8µs @99 24.7ms @100 [text_16_AA_88] 2.55µs @0 19.7µs @99 25.1ms @100 [text_16_AA_BK] 1.8µs @0 14.7µs @99 25.1ms @100 out/ok bench:samples=100 8888 filter:search=text_16_AA serial [text_16_AA_88] 2.35µs @0 3.53µs @99 16.7ms @100 [text_16_AA_FF] 2.09µs @0 2.73µs @99 2.91µs @100 [text_16_AA_BK] 1.75µs @0 2.46µs @99 2.65µs @100 [text_16_AA_WT] 2.1µs @0 3.16µs @99 3.17µs @100 In the first "fork" case all runs are independent and have roughly the same profile. "thread" looks similar except you can see them contending at the 99th percentile. In "serial", the first bench warms up the rest, so their 100th percentiles are all much faster. Change-Id: I01a9f8c54b540221a9f232b271bb8ef3fda2569c Reviewed-on: https://skia-review.googlesource.com/33585 Reviewed-by: Herb Derby Commit-Queue: Mike Klein --- tools/ok_srcs.cpp | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/ok_srcs.cpp b/tools/ok_srcs.cpp index dc040f86c9..ae1c5d8ca7 100644 --- a/tools/ok_srcs.cpp +++ b/tools/ok_srcs.cpp @@ -5,11 +5,16 @@ * found in the LICENSE file. */ -#include "ok.h" -#include "gm.h" +#include "Benchmark.h" #include "SkData.h" #include "SkOSFile.h" #include "SkPicture.h" +#include "Timer.h" +#include "gm.h" +#include "ok.h" +#include +#include +#include #include struct GMStream : Stream { @@ -112,3 +117,97 @@ struct SKPStream : Stream { } }; static Register skp{"skp", "draw SKPs from dir=skps", SKPStream::Create}; + +struct BenchStream : Stream { + const BenchRegistry* registry = BenchRegistry::Head(); + int samples; + + static std::unique_ptr Create(Options options) { + BenchStream stream; + stream.samples = std::max(1, atoi(options("samples", "20").c_str())); + return move_unique(stream); + } + + struct BenchSrc : Src { + Benchmark* (*factory)(void*); + std::unique_ptr bench; + int samples; + + void init() { + if (bench) { return; } + bench.reset(factory(nullptr)); + } + + std::string name() override { + this->init(); + return bench->getName(); + } + + SkISize size() override { + this->init(); + return { bench->getSize().x(), bench->getSize().y() }; + } + + Status draw(SkCanvas* canvas) override { + this->init(); + + using ms = std::chrono::duration; + std::vector sample(samples); + + bench->delayedSetup(); + if (canvas) { + bench->perCanvasPreDraw(canvas); + } + for (int i = 0; i < samples; i++) { + using clock = std::chrono::high_resolution_clock; + for (int loops = 1; loops < 1000000000; loops *= 2) { + bench->preDraw(canvas); + auto start = clock::now(); + bench->draw(loops, canvas); + ms elapsed = clock::now() - start; + bench->postDraw(canvas); + + if (elapsed.count() < 10) { + continue; + } + + sample[i] = elapsed / loops; + break; + } + } + if (canvas) { + bench->perCanvasPostDraw(canvas); + } + + std::sort(sample.begin(), sample.end()); + + SkString msg = SkStringPrintf("%s\t@0", HumanizeMs(sample[0].count()).c_str()); + if (samples > 2) { + msg.appendf("\t%s\t@%g", HumanizeMs(sample[samples-2].count()).c_str() + , 100.0*(samples-1) / samples); + } + if (samples > 1) { + msg.appendf("\t%s\t@100", HumanizeMs(sample[samples-1].count()).c_str()); + } + ok_log(msg.c_str()); + + return Status::OK; + } + }; + + std::unique_ptr next() override { + if (!registry) { + return nullptr; + } + BenchSrc src; + src.factory = registry->factory(); + src.samples = samples; + registry = registry->next(); + return move_unique(src); + } +}; +static Register bench{ + "bench", + "time benchmarks linked into this binary samples=20 times each", + BenchStream::Create, +}; -- cgit v1.2.3