/* * Copyright 2014 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include #include "Benchmark.h" #include "CrashHandler.h" #include "GMBench.h" #include "ProcStats.h" #include "ResultsWriter.h" #include "RecordingBench.h" #include "SKPBench.h" #include "Stats.h" #include "Timer.h" #include "SkBBHFactory.h" #include "SkCanvas.h" #include "SkCommonFlags.h" #include "SkForceLinking.h" #include "SkGraphics.h" #include "SkOSFile.h" #include "SkPictureRecorder.h" #include "SkString.h" #include "SkSurface.h" #if SK_SUPPORT_GPU #include "gl/GrGLDefines.h" #include "GrContextFactory.h" SkAutoTDelete gGrFactory; #endif __SK_FORCE_IMAGE_DECODER_LINKING; static const int kAutoTuneLoops = -1; static const int kDefaultLoops = #ifdef SK_DEBUG 1; #else kAutoTuneLoops; #endif static SkString loops_help_txt() { SkString help; help.printf("Number of times to run each bench. Set this to %d to auto-" "tune for each bench. Timings are only reported when auto-tuning.", kAutoTuneLoops); return help; } DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str()); DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); DEFINE_int32(overheadLoops, 100000, "Loops to estimate timer overhead."); DEFINE_double(overheadGoal, 0.0001, "Loop until timer overhead is at most this fraction of our measurments."); DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); DEFINE_int32(gpuFrameLag, 5, "Overestimate of maximum number of frames GPU allows to lag."); DEFINE_bool(gpuCompressAlphaMasks, false, "Compress masks generated from falling back to " "software path rendering."); DEFINE_string(outResultsFile, "", "If given, write results here as JSON."); DEFINE_int32(maxCalibrationAttempts, 3, "Try up to this many times to guess loops for a bench, or skip the bench."); DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this."); DEFINE_string(clip, "0,0,1000,1000", "Clip for SKPs."); DEFINE_string(scales, "1.0", "Space-separated scales for SKPs."); DEFINE_bool(bbh, true, "Build a BBH for SKPs?"); static SkString humanize(double ms) { if (ms > 1e+3) return SkStringPrintf("%.3gs", ms/1e3); if (ms < 1e-3) return SkStringPrintf("%.3gns", ms*1e6); #ifdef SK_BUILD_FOR_WIN if (ms < 1) return SkStringPrintf("%.3gus", ms*1e3); #else if (ms < 1) return SkStringPrintf("%.3gµs", ms*1e3); #endif return SkStringPrintf("%.3gms", ms); } #define HUMANIZE(ms) humanize(ms).c_str() static double time(int loops, Benchmark* bench, SkCanvas* canvas, SkGLContextHelper* gl) { if (canvas) { canvas->clear(SK_ColorWHITE); } WallTimer timer; timer.start(); if (bench) { bench->draw(loops, canvas); } if (canvas) { canvas->flush(); } #if SK_SUPPORT_GPU if (gl) { SK_GL(*gl, Flush()); gl->swapBuffers(); } #endif timer.end(); return timer.fWall; } static double estimate_timer_overhead() { double overhead = 0; for (int i = 0; i < FLAGS_overheadLoops; i++) { overhead += time(1, NULL, NULL, NULL); } return overhead / FLAGS_overheadLoops; } static int clamp_loops(int loops) { if (loops < 1) { SkDebugf("ERROR: clamping loops from %d to 1.\n", loops); return 1; } if (loops > FLAGS_maxLoops) { SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loops, FLAGS_maxLoops); return FLAGS_maxLoops; } return loops; } static bool write_canvas_png(SkCanvas* canvas, const SkString& filename) { if (filename.isEmpty()) { return false; } if (kUnknown_SkColorType == canvas->imageInfo().colorType()) { return false; } SkBitmap bmp; bmp.setInfo(canvas->imageInfo()); if (!canvas->readPixels(&bmp, 0, 0)) { SkDebugf("Can't read canvas pixels.\n"); return false; } SkString dir = SkOSPath::Dirname(filename.c_str()); if (!sk_mkdir(dir.c_str())) { SkDebugf("Can't make dir %s.\n", dir.c_str()); return false; } SkFILEWStream stream(filename.c_str()); if (!stream.isValid()) { SkDebugf("Can't write %s.\n", filename.c_str()); return false; } if (!SkImageEncoder::EncodeStream(&stream, bmp, SkImageEncoder::kPNG_Type, 100)) { SkDebugf("Can't encode a PNG.\n"); return false; } return true; } static int kFailedLoops = -2; static int cpu_bench(const double overhead, Benchmark* bench, SkCanvas* canvas, double* samples) { // First figure out approximately how many loops of bench it takes to make overhead negligible. double bench_plus_overhead = 0.0; int round = 0; if (kAutoTuneLoops == FLAGS_loops) { while (bench_plus_overhead < overhead) { if (round++ == FLAGS_maxCalibrationAttempts) { SkDebugf("WARNING: Can't estimate loops for %s (%s vs. %s); skipping.\n", bench->getUniqueName(), HUMANIZE(bench_plus_overhead), HUMANIZE(overhead)); return kFailedLoops; } bench_plus_overhead = time(1, bench, canvas, NULL); } } // Later we'll just start and stop the timer once but loop N times. // We'll pick N to make timer overhead negligible: // // overhead // ------------------------- < FLAGS_overheadGoal // overhead + N * Bench Time // // where bench_plus_overhead ≈ overhead + Bench Time. // // Doing some math, we get: // // (overhead / FLAGS_overheadGoal) - overhead // ------------------------------------------ < N // bench_plus_overhead - overhead) // // Luckily, this also works well in practice. :) int loops = FLAGS_loops; if (kAutoTuneLoops == loops) { const double numer = overhead / FLAGS_overheadGoal - overhead; const double denom = bench_plus_overhead - overhead; loops = (int)ceil(numer / denom); } loops = clamp_loops(loops); for (int i = 0; i < FLAGS_samples; i++) { samples[i] = time(loops, bench, canvas, NULL) / loops; } return loops; } #if SK_SUPPORT_GPU static int gpu_bench(SkGLContextHelper* gl, Benchmark* bench, SkCanvas* canvas, double* samples) { gl->makeCurrent(); // Make sure we're done with whatever came before. SK_GL(*gl, Finish()); // First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs. int loops = FLAGS_loops; if (kAutoTuneLoops == loops) { loops = 1; double elapsed = 0; do { loops *= 2; // If the GPU lets frames lag at all, we need to make sure we're timing // _this_ round, not still timing last round. We force this by looping // more times than any reasonable GPU will allow frames to lag. for (int i = 0; i < FLAGS_gpuFrameLag; i++) { elapsed = time(loops, bench, canvas, gl); } } while (elapsed < FLAGS_gpuMs); // We've overshot at least a little. Scale back linearly. loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); // Might as well make sure we're not still timing our calibration. SK_GL(*gl, Finish()); } loops = clamp_loops(loops); // Pretty much the same deal as the calibration: do some warmup to make // sure we're timing steady-state pipelined frames. for (int i = 0; i < FLAGS_gpuFrameLag; i++) { time(loops, bench, canvas, gl); } // Now, actually do the timing! for (int i = 0; i < FLAGS_samples; i++) { samples[i] = time(loops, bench, canvas, gl) / loops; } return loops; } #endif static SkString to_lower(const char* str) { SkString lower(str); for (size_t i = 0; i < lower.size(); i++) { lower[i] = tolower(lower[i]); } return lower; } struct Config { const char* name; Benchmark::Backend backend; SkColorType color; SkAlphaType alpha; int samples; #if SK_SUPPORT_GPU GrContextFactory::GLContextType ctxType; #else int bogusInt; #endif }; struct Target { explicit Target(const Config& c) : config(c) {} const Config config; SkAutoTDelete surface; #if SK_SUPPORT_GPU SkGLContextHelper* gl; #endif }; static bool is_cpu_config_allowed(const char* name) { for (int i = 0; i < FLAGS_config.count(); i++) { if (to_lower(FLAGS_config[i]).equals(name)) { return true; } } return false; } #if SK_SUPPORT_GPU static bool is_gpu_config_allowed(const char* name, GrContextFactory::GLContextType ctxType, int sampleCnt) { if (!is_cpu_config_allowed(name)) { return false; } if (const GrContext* ctx = gGrFactory->get(ctxType)) { return sampleCnt <= ctx->getMaxSampleCount(); } return false; } #endif #if SK_SUPPORT_GPU #define kBogusGLContextType GrContextFactory::kNative_GLContextType #else #define kBogusGLContextType 0 #endif // Append all configs that are enabled and supported. static void create_configs(SkTDArray* configs) { #define CPU_CONFIG(name, backend, color, alpha) \ if (is_cpu_config_allowed(#name)) { \ Config config = { #name, Benchmark::backend, color, alpha, 0, kBogusGLContextType }; \ configs->push(config); \ } if (FLAGS_cpu) { CPU_CONFIG(nonrendering, kNonRendering_Backend, kUnknown_SkColorType, kUnpremul_SkAlphaType) CPU_CONFIG(8888, kRaster_Backend, kN32_SkColorType, kPremul_SkAlphaType) CPU_CONFIG(565, kRaster_Backend, kRGB_565_SkColorType, kOpaque_SkAlphaType) } #if SK_SUPPORT_GPU #define GPU_CONFIG(name, ctxType, samples) \ if (is_gpu_config_allowed(#name, GrContextFactory::ctxType, samples)) { \ Config config = { \ #name, \ Benchmark::kGPU_Backend, \ kN32_SkColorType, \ kPremul_SkAlphaType, \ samples, \ GrContextFactory::ctxType }; \ configs->push(config); \ } if (FLAGS_gpu) { GPU_CONFIG(gpu, kNative_GLContextType, 0) GPU_CONFIG(msaa4, kNative_GLContextType, 4) GPU_CONFIG(msaa16, kNative_GLContextType, 16) GPU_CONFIG(nvprmsaa4, kNVPR_GLContextType, 4) GPU_CONFIG(nvprmsaa16, kNVPR_GLContextType, 16) GPU_CONFIG(debug, kDebug_GLContextType, 0) GPU_CONFIG(nullgpu, kNull_GLContextType, 0) #ifdef SK_ANGLE GPU_CONFIG(angle, kANGLE_GLContextType, 0) #endif } #endif } // If bench is enabled for config, returns a Target* for it, otherwise NULL. static Target* is_enabled(Benchmark* bench, const Config& config) { if (!bench->isSuitableFor(config.backend)) { return NULL; } SkImageInfo info = SkImageInfo::Make(bench->getSize().fX, bench->getSize().fY, config.color, config.alpha); Target* target = new Target(config); if (Benchmark::kRaster_Backend == config.backend) { target->surface.reset(SkSurface::NewRaster(info)); } #if SK_SUPPORT_GPU else if (Benchmark::kGPU_Backend == config.backend) { target->surface.reset(SkSurface::NewRenderTarget(gGrFactory->get(config.ctxType), info, config.samples)); target->gl = gGrFactory->getGLContext(config.ctxType); } #endif if (Benchmark::kNonRendering_Backend != config.backend && !target->surface.get()) { delete target; return NULL; } return target; } // Creates targets for a benchmark and a set of configs. static void create_targets(SkTDArray* targets, Benchmark* b, const SkTDArray& configs) { for (int i = 0; i < configs.count(); ++i) { if (Target* t = is_enabled(b, configs[i])) { targets->push(t); } } } #if SK_SUPPORT_GPU static void fill_gpu_options(ResultsWriter* log, SkGLContextHelper* ctx) { const GrGLubyte* version; SK_GL_RET(*ctx, version, GetString(GR_GL_VERSION)); log->configOption("GL_VERSION", (const char*)(version)); SK_GL_RET(*ctx, version, GetString(GR_GL_RENDERER)); log->configOption("GL_RENDERER", (const char*) version); SK_GL_RET(*ctx, version, GetString(GR_GL_VENDOR)); log->configOption("GL_VENDOR", (const char*) version); SK_GL_RET(*ctx, version, GetString(GR_GL_SHADING_LANGUAGE_VERSION)); log->configOption("GL_SHADING_LANGUAGE_VERSION", (const char*) version); } #endif class BenchmarkStream { public: BenchmarkStream() : fBenches(BenchRegistry::Head()) , fGMs(skiagm::GMRegistry::Head()) , fCurrentRecording(0) , fCurrentScale(0) , fCurrentSKP(0) { for (int i = 0; i < FLAGS_skps.count(); i++) { if (SkStrEndsWith(FLAGS_skps[i], ".skp")) { fSKPs.push_back() = FLAGS_skps[i]; } else { SkOSFile::Iter it(FLAGS_skps[i], ".skp"); SkString path; while (it.next(&path)) { fSKPs.push_back() = SkOSPath::Join(FLAGS_skps[0], path.c_str()); } } } if (4 != sscanf(FLAGS_clip[0], "%d,%d,%d,%d", &fClip.fLeft, &fClip.fTop, &fClip.fRight, &fClip.fBottom)) { SkDebugf("Can't parse %s from --clip as an SkIRect.\n", FLAGS_clip[0]); exit(1); } for (int i = 0; i < FLAGS_scales.count(); i++) { if (1 != sscanf(FLAGS_scales[i], "%f", &fScales.push_back())) { SkDebugf("Can't parse %s from --scales as an SkScalar.\n", FLAGS_scales[i]); exit(1); } } } static bool ReadPicture(const char* path, SkAutoTUnref* pic) { // Not strictly necessary, as it will be checked again later, // but helps to avoid a lot of pointless work if we're going to skip it. if (SkCommandLineFlags::ShouldSkip(FLAGS_match, path)) { return false; } SkAutoTUnref stream(SkStream::NewFromFile(path)); if (stream.get() == NULL) { SkDebugf("Could not read %s.\n", path); return false; } pic->reset(SkPicture::CreateFromStream(stream.get())); if (pic->get() == NULL) { SkDebugf("Could not read %s as an SkPicture.\n", path); return false; } return true; } Benchmark* next() { if (fBenches) { Benchmark* bench = fBenches->factory()(NULL); fBenches = fBenches->next(); fSourceType = "bench"; fBenchType = "micro"; return bench; } while (fGMs) { SkAutoTDelete gm(fGMs->factory()(NULL)); fGMs = fGMs->next(); if (gm->getFlags() & skiagm::GM::kAsBench_Flag) { fSourceType = "gm"; fBenchType = "micro"; return SkNEW_ARGS(GMBench, (gm.detach())); } } // First add all .skps as RecordingBenches. while (fCurrentRecording < fSKPs.count()) { const SkString& path = fSKPs[fCurrentRecording++]; SkAutoTUnref pic; if (!ReadPicture(path.c_str(), &pic)) { continue; } SkString name = SkOSPath::Basename(path.c_str()); fSourceType = "skp"; fBenchType = "recording"; return SkNEW_ARGS(RecordingBench, (name.c_str(), pic.get(), FLAGS_bbh)); } // Then once each for each scale as SKPBenches (playback). while (fCurrentScale < fScales.count()) { while (fCurrentSKP < fSKPs.count()) { const SkString& path = fSKPs[fCurrentSKP++]; SkAutoTUnref pic; if (!ReadPicture(path.c_str(), &pic)) { continue; } if (FLAGS_bbh) { // The SKP we read off disk doesn't have a BBH. Re-record so it grows one. // Here we use an SkTileGrid with parameters optimized for FLAGS_clip. const SkTileGridFactory::TileGridInfo info = { SkISize::Make(fClip.width(), fClip.height()), // tile interval SkISize::Make(0,0), // margin SkIPoint::Make(fClip.left(), fClip.top()), // offset }; SkTileGridFactory factory(info); SkPictureRecorder recorder; pic->playback(recorder.beginRecording(pic->cullRect().width(), pic->cullRect().height(), &factory)); pic.reset(recorder.endRecording()); } SkString name = SkOSPath::Basename(path.c_str()); fSourceType = "skp"; fBenchType = "playback"; return SkNEW_ARGS(SKPBench, (name.c_str(), pic.get(), fClip, fScales[fCurrentScale])); } fCurrentSKP = 0; fCurrentScale++; } return NULL; } void fillCurrentOptions(ResultsWriter* log) const { log->configOption("source_type", fSourceType); log->configOption("bench_type", fBenchType); if (0 == strcmp(fSourceType, "skp")) { log->configOption("clip", SkStringPrintf("%d %d %d %d", fClip.fLeft, fClip.fTop, fClip.fRight, fClip.fBottom).c_str()); log->configOption("scale", SkStringPrintf("%.2g", fScales[fCurrentScale]).c_str()); } } private: const BenchRegistry* fBenches; const skiagm::GMRegistry* fGMs; SkIRect fClip; SkTArray fScales; SkTArray fSKPs; const char* fSourceType; // What we're benching: bench, GM, SKP, ... const char* fBenchType; // How we bench it: micro, recording, playback, ... int fCurrentRecording; int fCurrentScale; int fCurrentSKP; }; int nanobench_main(); int nanobench_main() { SetupCrashHandler(); SkAutoGraphics ag; #if SK_SUPPORT_GPU GrContext::Options grContextOpts; grContextOpts.fDrawPathToCompressedTexture = FLAGS_gpuCompressAlphaMasks; gGrFactory.reset(SkNEW_ARGS(GrContextFactory, (grContextOpts))); #endif if (kAutoTuneLoops != FLAGS_loops) { FLAGS_samples = 1; FLAGS_gpuFrameLag = 0; } if (!FLAGS_writePath.isEmpty()) { SkDebugf("Writing files to %s.\n", FLAGS_writePath[0]); if (!sk_mkdir(FLAGS_writePath[0])) { SkDebugf("Could not create %s. Files won't be written.\n", FLAGS_writePath[0]); FLAGS_writePath.set(0, NULL); } } SkAutoTDelete log(SkNEW(ResultsWriter)); if (!FLAGS_outResultsFile.isEmpty()) { log.reset(SkNEW(NanoJSONResultsWriter(FLAGS_outResultsFile[0]))); } if (1 == FLAGS_properties.count() % 2) { SkDebugf("ERROR: --properties must be passed with an even number of arguments.\n"); return 1; } for (int i = 1; i < FLAGS_properties.count(); i += 2) { log->property(FLAGS_properties[i-1], FLAGS_properties[i]); } if (1 == FLAGS_key.count() % 2) { SkDebugf("ERROR: --key must be passed with an even number of arguments.\n"); return 1; } for (int i = 1; i < FLAGS_key.count(); i += 2) { log->key(FLAGS_key[i-1], FLAGS_key[i]); } const double overhead = estimate_timer_overhead(); SkDebugf("Timer overhead: %s\n", HUMANIZE(overhead)); SkAutoTMalloc samples(FLAGS_samples); if (kAutoTuneLoops != FLAGS_loops) { SkDebugf("Fixed number of loops; times would only be misleading so we won't print them.\n"); } else if (FLAGS_verbose) { // No header. } else if (FLAGS_quiet) { SkDebugf("median\tbench\tconfig\n"); } else { SkDebugf("maxrss\tloops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n", FLAGS_samples, "samples"); } SkTDArray configs; create_configs(&configs); BenchmarkStream benchStream; while (Benchmark* b = benchStream.next()) { SkAutoTDelete bench(b); if (SkCommandLineFlags::ShouldSkip(FLAGS_match, bench->getUniqueName())) { continue; } SkTDArray targets; create_targets(&targets, bench.get(), configs); if (!targets.isEmpty()) { log->bench(bench->getUniqueName(), bench->getSize().fX, bench->getSize().fY); bench->preDraw(); } for (int j = 0; j < targets.count(); j++) { SkCanvas* canvas = targets[j]->surface.get() ? targets[j]->surface->getCanvas() : NULL; const char* config = targets[j]->config.name; const int loops = #if SK_SUPPORT_GPU Benchmark::kGPU_Backend == targets[j]->config.backend ? gpu_bench(targets[j]->gl, bench.get(), canvas, samples.get()) : #endif cpu_bench( overhead, bench.get(), canvas, samples.get()); if (canvas && !FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) { SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], config); pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName()); pngFilename.append(".png"); write_canvas_png(canvas, pngFilename); } if (kFailedLoops == loops) { // Can't be timed. A warning note has already been printed. continue; } Stats stats(samples.get(), FLAGS_samples); log->config(config); log->configOption("name", bench->getName()); benchStream.fillCurrentOptions(log.get()); #if SK_SUPPORT_GPU if (Benchmark::kGPU_Backend == targets[j]->config.backend) { fill_gpu_options(log.get(), targets[j]->gl); } #endif log->timer("min_ms", stats.min); log->timer("median_ms", stats.median); log->timer("mean_ms", stats.mean); log->timer("max_ms", stats.max); log->timer("stddev_ms", sqrt(stats.var)); if (kAutoTuneLoops != FLAGS_loops) { if (targets.count() == 1) { config = ""; // Only print the config if we run the same bench on more than one. } SkDebugf("%s\t%s\n", bench->getUniqueName(), config); } else if (FLAGS_verbose) { for (int i = 0; i < FLAGS_samples; i++) { SkDebugf("%s ", HUMANIZE(samples[i])); } SkDebugf("%s\n", bench->getUniqueName()); } else if (FLAGS_quiet) { if (targets.count() == 1) { config = ""; // Only print the config if we run the same bench on more than one. } SkDebugf("%s\t%s\t%s\n", HUMANIZE(stats.median), bench->getUniqueName(), config); } else { const double stddev_percent = 100 * sqrt(stats.var) / stats.mean; SkDebugf("%4dM\t%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n" , sk_tools::getMaxResidentSetSizeMB() , loops , HUMANIZE(stats.min) , HUMANIZE(stats.median) , HUMANIZE(stats.mean) , HUMANIZE(stats.max) , stddev_percent , stats.plot.c_str() , config , bench->getUniqueName() ); } } targets.deleteAll(); #if SK_SUPPORT_GPU if (FLAGS_abandonGpuContext) { gGrFactory->abandonContexts(); } if (FLAGS_resetGpuContext || FLAGS_abandonGpuContext) { gGrFactory->destroyContexts(); } #endif } return 0; } #if !defined SK_BUILD_FOR_IOS int main(int argc, char** argv) { SkCommandLineFlags::Parse(argc, argv); return nanobench_main(); } #endif