diff options
-rw-r--r-- | bench/nanobench.cpp | 82 | ||||
-rw-r--r-- | tools/Stats.h | 17 | ||||
-rw-r--r-- | tools/VisualBench.cpp | 2 |
3 files changed, 76 insertions, 25 deletions
diff --git a/bench/nanobench.cpp b/bench/nanobench.cpp index 9f3887bc20..4aad61da72 100644 --- a/bench/nanobench.cpp +++ b/bench/nanobench.cpp @@ -55,6 +55,8 @@ __SK_FORCE_IMAGE_DECODER_LINKING; +static const int kTimedSampling = 0; + static const int kAutoTuneLoops = 0; static const int kDefaultLoops = @@ -72,9 +74,17 @@ static SkString loops_help_txt() { return help; } +static SkString to_string(int n) { + SkString str; + str.appendS32(n); + return str; +} + DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str()); DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); +DEFINE_string(samplingTime, "0", "Amount of time to run each bench. Takes precedence over samples." + "Must be \"0\", \"%%lfs\", or \"%%lfms\""); DEFINE_int32(overheadLoops, 100000, "Loops to estimate timer overhead."); DEFINE_double(overheadGoal, 0.0001, "Loop until timer overhead is at most this fraction of our measurments."); @@ -269,7 +279,7 @@ static bool write_canvas_png(Target* target, const SkString& filename) { } static int kFailedLoops = -2; -static int cpu_bench(const double overhead, Target* target, Benchmark* bench, double* samples) { +static int setup_cpu_bench(const double overhead, Target* target, Benchmark* bench) { // First figure out approximately how many loops of bench it takes to make overhead negligible. double bench_plus_overhead = 0.0; int round = 0; @@ -310,16 +320,10 @@ static int cpu_bench(const double overhead, Target* target, Benchmark* bench, do loops = detect_forever_loops(loops); } - for (int i = 0; i < FLAGS_samples; i++) { - samples[i] = time(loops, bench, target) / loops; - } return loops; } -static int gpu_bench(Target* target, - Benchmark* bench, - double* samples, - int maxGpuFrameLag) { +static int setup_gpu_bench(Target* target, Benchmark* bench, int maxGpuFrameLag) { // First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs. int loops = bench->calculateLoops(FLAGS_loops); if (kAutoTuneLoops == loops) { @@ -355,11 +359,6 @@ static int gpu_bench(Target* target, time(loops, bench, target); } - // Now, actually do the timing! - for (int i = 0; i < FLAGS_samples; i++) { - samples[i] = time(loops, bench, target) / loops; - } - return loops; } @@ -946,6 +945,24 @@ int nanobench_main() { FLAGS_verbose = true; } + double samplingTimeMs = 0; + if (0 != strcmp("0", FLAGS_samplingTime[0])) { + SkSTArray<8, char> timeUnit; + timeUnit.push_back_n(static_cast<int>(strlen(FLAGS_samplingTime[0])) + 1); + if (2 != sscanf(FLAGS_samplingTime[0], "%lf%s", &samplingTimeMs, timeUnit.begin()) || + (0 != strcmp("s", timeUnit.begin()) && 0 != strcmp("ms", timeUnit.begin()))) { + SkDebugf("Invalid --samplingTime \"%s\". Must be \"0\", \"%%lfs\", or \"%%lfms\"\n", + FLAGS_samplingTime[0]); + exit(0); + } + if (0 == strcmp("s", timeUnit.begin())) { + samplingTimeMs *= 1000; + } + if (samplingTimeMs) { + FLAGS_samples = kTimedSampling; + } + } + if (kAutoTuneLoops != FLAGS_loops) { FLAGS_samples = 1; FLAGS_gpuFrameLag = 0; @@ -983,7 +1000,7 @@ int nanobench_main() { const double overhead = estimate_timer_overhead(); SkDebugf("Timer overhead: %s\n", HUMANIZE(overhead)); - SkAutoTMalloc<double> samples(FLAGS_samples); + SkTArray<double> samples; if (kAutoTuneLoops != FLAGS_loops) { SkDebugf("Fixed number of loops; times would only be misleading so we won't print them.\n"); @@ -991,6 +1008,8 @@ int nanobench_main() { // No header. } else if (FLAGS_quiet) { SkDebugf("median\tbench\tconfig\n"); + } else if (kTimedSampling == FLAGS_samples) { + SkDebugf("curr/maxrss\tloops\tmin\tmedian\tmean\tmax\tstddev\tsamples\tconfig\tbench\n"); } else { SkDebugf("curr/maxrss\tloops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n", FLAGS_samples, "samples"); @@ -1022,11 +1041,29 @@ int nanobench_main() { targets[j]->setup(); bench->perCanvasPreDraw(canvas); - int frameLag; - const int loops = - targets[j]->needsFrameTiming(&frameLag) - ? gpu_bench(targets[j], bench.get(), samples.get(), frameLag) - : cpu_bench(overhead, targets[j], bench.get(), samples.get()); + int maxFrameLag; + const int loops = targets[j]->needsFrameTiming(&maxFrameLag) + ? setup_gpu_bench(targets[j], bench.get(), maxFrameLag) + : setup_cpu_bench(overhead, targets[j], bench.get()); + + if (kTimedSampling != FLAGS_samples) { + samples.reset(FLAGS_samples); + for (int s = 0; s < FLAGS_samples; s++) { + samples[s] = time(loops, bench, targets[j]) / loops; + } + } else if (samplingTimeMs) { + samples.reset(); + if (FLAGS_verbose) { + SkDebugf("Begin sampling %s for %ims\n", + bench->getUniqueName(), static_cast<int>(samplingTimeMs)); + } + WallTimer timer; + timer.start(); + do { + samples.push_back(time(loops, bench, targets[j]) / loops); + timer.end(); + } while (timer.fWall < samplingTimeMs); + } bench->perCanvasPostDraw(canvas); @@ -1043,7 +1080,7 @@ int nanobench_main() { continue; } - Stats stats(samples.get(), FLAGS_samples); + Stats stats(samples); log->config(config); log->configOption("name", bench->getName()); benchStream.fillCurrentOptions(log.get()); @@ -1063,7 +1100,7 @@ int nanobench_main() { , bench->getUniqueName() , config); } else if (FLAGS_verbose) { - for (int i = 0; i < FLAGS_samples; i++) { + for (int i = 0; i < samples.count(); i++) { SkDebugf("%s ", HUMANIZE(samples[i])); } SkDebugf("%s\n", bench->getUniqueName()); @@ -1083,7 +1120,8 @@ int nanobench_main() { , HUMANIZE(stats.mean) , HUMANIZE(stats.max) , stddev_percent - , stats.plot.c_str() + , kTimedSampling != FLAGS_samples ? stats.plot.c_str() + : to_string(samples.count()).c_str() , config , bench->getUniqueName() ); diff --git a/tools/Stats.h b/tools/Stats.h index 8487a9497d..12c1d35e1f 100644 --- a/tools/Stats.h +++ b/tools/Stats.h @@ -1,3 +1,10 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + #ifndef Stats_DEFINED #define Stats_DEFINED @@ -11,7 +18,13 @@ #endif struct Stats { - Stats(const double samples[], int n) { + Stats(const SkTArray<double>& samples) { + int n = samples.count(); + if (!n) { + min = max = mean = var = median = 0; + return; + } + min = samples[0]; max = samples[0]; for (int i = 0; i < n; i++) { @@ -32,7 +45,7 @@ struct Stats { var = err / (n-1); SkAutoTMalloc<double> sorted(n); - memcpy(sorted.get(), samples, n * sizeof(double)); + memcpy(sorted.get(), samples.begin(), n * sizeof(double)); SkTQSort(sorted.get(), sorted.get() + n - 1); median = sorted[n/2]; diff --git a/tools/VisualBench.cpp b/tools/VisualBench.cpp index cbc8994592..ac53b43167 100644 --- a/tools/VisualBench.cpp +++ b/tools/VisualBench.cpp @@ -152,7 +152,7 @@ void VisualBench::printStats() { SkDebugf("%s\n", shortName.c_str()); } else { SkASSERT(measurements.count()); - Stats stats(measurements.begin(), measurements.count()); + Stats stats(measurements); const double stdDevPercent = 100 * sqrt(stats.var) / stats.mean; SkDebugf("%4d/%-4dMB\t%d\t%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\n", sk_tools::getCurrResidentSetSizeMB(), |