diff options
author | csmartdalton <csmartdalton@google.com> | 2016-10-05 08:42:03 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-10-05 08:42:03 -0700 |
commit | c6618dd1dadeac8b47b81fbee108c42cca8ab166 (patch) | |
tree | 749fdc785cffc5639d0b6d1ba375d6b9904aae83 | |
parent | a86952a5ef9e757832a016506709ce4827b059ec (diff) |
skpbench: add option for gpu timing
Adds a gpu timing option with a GL implementation.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2388433003
Committed: https://skia.googlesource.com/skia/+/c06720d06faab3b01eba1b8693e0ac791f06dc96
Review-Url: https://codereview.chromium.org/2388433003
-rw-r--r-- | tools/gpu/FenceSync.h | 4 | ||||
-rw-r--r-- | tools/gpu/GpuTimer.h | 77 | ||||
-rw-r--r-- | tools/gpu/TestContext.cpp | 12 | ||||
-rw-r--r-- | tools/gpu/TestContext.h | 9 | ||||
-rw-r--r-- | tools/gpu/gl/GLTestContext.cpp | 132 | ||||
-rw-r--r-- | tools/skpbench/_benchresult.py | 5 | ||||
-rwxr-xr-x | tools/skpbench/parseskpbench.py | 91 | ||||
-rw-r--r-- | tools/skpbench/skpbench.cpp | 109 | ||||
-rwxr-xr-x | tools/skpbench/skpbench.py | 12 |
9 files changed, 383 insertions, 68 deletions
diff --git a/tools/gpu/FenceSync.h b/tools/gpu/FenceSync.h index 8f2bbe2e86..b430f5dfa9 100644 --- a/tools/gpu/FenceSync.h +++ b/tools/gpu/FenceSync.h @@ -13,7 +13,7 @@ namespace sk_gpu_test { using PlatformFence = uint64_t; -static constexpr PlatformFence kInvalidPlatformFence = 0; +static constexpr PlatformFence kInvalidFence = 0; /* * This class provides an interface to interact with fence syncs. A fence sync is an object that the @@ -29,6 +29,6 @@ public: virtual ~FenceSync() {} }; -} +} // namespace sk_gpu_test #endif diff --git a/tools/gpu/GpuTimer.h b/tools/gpu/GpuTimer.h new file mode 100644 index 0000000000..7678421ca6 --- /dev/null +++ b/tools/gpu/GpuTimer.h @@ -0,0 +1,77 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef GpuTimer_DEFINED +#define GpuTimer_DEFINED + +#include "SkTypes.h" +#include "SkExchange.h" +#include <chrono> + +namespace sk_gpu_test { + +using PlatformTimerQuery = uint64_t; +static constexpr PlatformTimerQuery kInvalidTimerQuery = 0; + +/** + * Platform-independent interface for timing operations on the GPU. + */ +class GpuTimer { +public: + GpuTimer(bool disjointSupport) + : fDisjointSupport(disjointSupport) + , fActiveTimer(kInvalidTimerQuery) { + } + virtual ~GpuTimer() { SkASSERT(!fActiveTimer); } + + /** + * Returns whether this timer can detect disjoint GPU operations while timing. If false, a query + * has less confidence when it completes with QueryStatus::kAccurate. + */ + bool disjointSupport() const { return fDisjointSupport; } + + /** + * Inserts a "start timing" command in the GPU command stream. + */ + void queueStart() { + SkASSERT(!fActiveTimer); + fActiveTimer = this->onQueueTimerStart(); + } + + /** + * Inserts a "stop timing" command in the GPU command stream. + * + * @return a query object that can retrieve the time elapsed once the timer has completed. + */ + PlatformTimerQuery SK_WARN_UNUSED_RESULT queueStop() { + SkASSERT(fActiveTimer); + this->onQueueTimerStop(fActiveTimer); + return skstd::exchange(fActiveTimer, kInvalidTimerQuery); + } + + enum class QueryStatus { + kInvalid, //<! the timer query is invalid. + kPending, //<! the timer is still running on the GPU. + kDisjoint, //<! the query is complete, but dubious due to disjoint GPU operations. + kAccurate //<! the query is complete and reliable. + }; + + virtual QueryStatus checkQueryStatus(PlatformTimerQuery) = 0; + virtual std::chrono::nanoseconds getTimeElapsed(PlatformTimerQuery) = 0; + virtual void deleteQuery(PlatformTimerQuery) = 0; + +private: + virtual PlatformTimerQuery onQueueTimerStart() const = 0; + virtual void onQueueTimerStop(PlatformTimerQuery) const = 0; + + bool const fDisjointSupport; + PlatformTimerQuery fActiveTimer; +}; + +} // namespace sk_gpu_test + +#endif diff --git a/tools/gpu/TestContext.cpp b/tools/gpu/TestContext.cpp index 8a78b903b1..90aba43880 100644 --- a/tools/gpu/TestContext.cpp +++ b/tools/gpu/TestContext.cpp @@ -8,8 +8,13 @@ #include "TestContext.h" +#include "GpuTimer.h" + namespace sk_gpu_test { -TestContext::TestContext() : fFenceSync(nullptr), fCurrentFenceIdx(0) { +TestContext::TestContext() + : fFenceSync(nullptr) + , fGpuTimer(nullptr) + , fCurrentFenceIdx(0) { memset(fFrameFences, 0, sizeof(fFrameFences)); } @@ -21,6 +26,7 @@ TestContext::~TestContext() { } #endif SkASSERT(!fFenceSync); + SkASSERT(!fGpuTimer); } void TestContext::makeCurrent() const { this->onPlatformMakeCurrent(); } @@ -60,9 +66,9 @@ void TestContext::teardown() { fFrameFences[i] = 0; } } - delete fFenceSync; - fFenceSync = nullptr; + fFenceSync.reset(); } + fGpuTimer.reset(); } } diff --git a/tools/gpu/TestContext.h b/tools/gpu/TestContext.h index d01cb02af2..8722a337b3 100644 --- a/tools/gpu/TestContext.h +++ b/tools/gpu/TestContext.h @@ -14,6 +14,9 @@ #include "../private/SkTemplates.h" namespace sk_gpu_test { + +class GpuTimer; + /** * An offscreen 3D context. This class is intended for Skia's internal testing needs and not * for general use. @@ -27,6 +30,9 @@ public: bool fenceSyncSupport() const { return fFenceSync != nullptr; } FenceSync* fenceSync() { SkASSERT(fFenceSync); return fFenceSync; } + bool gpuTimingSupport() const { return fGpuTimer != nullptr; } + GpuTimer* gpuTimer() const { SkASSERT(fGpuTimer); return fGpuTimer; } + bool getMaxGpuFrameLag(int *maxFrameLag) const { if (!fFenceSync) { return false; @@ -75,7 +81,8 @@ public: virtual void finish() = 0; protected: - FenceSync* fFenceSync; + SkAutoTDelete<FenceSync> fFenceSync; + SkAutoTDelete<GpuTimer> fGpuTimer; TestContext(); diff --git a/tools/gpu/gl/GLTestContext.cpp b/tools/gpu/gl/GLTestContext.cpp index 1b077d5a02..20a9908381 100644 --- a/tools/gpu/gl/GLTestContext.cpp +++ b/tools/gpu/gl/GLTestContext.cpp @@ -6,6 +6,8 @@ */ #include "GLTestContext.h" + +#include "GpuTimer.h" #include "gl/GrGLUtil.h" namespace { @@ -78,6 +80,135 @@ void GLFenceSync::deleteFence(sk_gpu_test::PlatformFence fence) const { fGLDeleteSync(glsync); } +class GLGpuTimer : public sk_gpu_test::GpuTimer { +public: + static GLGpuTimer* CreateIfSupported(const sk_gpu_test::GLTestContext*); + + QueryStatus checkQueryStatus(sk_gpu_test::PlatformTimerQuery) override; + std::chrono::nanoseconds getTimeElapsed(sk_gpu_test::PlatformTimerQuery) override; + void deleteQuery(sk_gpu_test::PlatformTimerQuery) override; + +private: + GLGpuTimer(bool disjointSupport, const sk_gpu_test::GLTestContext*, const char* ext = ""); + + bool validate() const; + + sk_gpu_test::PlatformTimerQuery onQueueTimerStart() const override; + void onQueueTimerStop(sk_gpu_test::PlatformTimerQuery) const override; + + static constexpr GrGLenum GL_QUERY_RESULT = 0x8866; + static constexpr GrGLenum GL_QUERY_RESULT_AVAILABLE = 0x8867; + static constexpr GrGLenum GL_TIME_ELAPSED = 0x88bf; + static constexpr GrGLenum GL_GPU_DISJOINT = 0x8fbb; + + typedef void (GR_GL_FUNCTION_TYPE* GLGetIntegervProc) (GrGLenum, GrGLint*); + typedef void (GR_GL_FUNCTION_TYPE* GLGenQueriesProc) (GrGLsizei, GrGLuint*); + typedef void (GR_GL_FUNCTION_TYPE* GLDeleteQueriesProc) (GrGLsizei, const GrGLuint*); + typedef void (GR_GL_FUNCTION_TYPE* GLBeginQueryProc) (GrGLenum, GrGLuint); + typedef void (GR_GL_FUNCTION_TYPE* GLEndQueryProc) (GrGLenum); + typedef void (GR_GL_FUNCTION_TYPE* GLGetQueryObjectuivProc) (GrGLuint, GrGLenum, GrGLuint*); + typedef void (GR_GL_FUNCTION_TYPE* GLGetQueryObjectui64vProc) (GrGLuint, GrGLenum, GrGLuint64*); + + GLGetIntegervProc fGLGetIntegerv; + GLGenQueriesProc fGLGenQueries; + GLDeleteQueriesProc fGLDeleteQueries; + GLBeginQueryProc fGLBeginQuery; + GLEndQueryProc fGLEndQuery; + GLGetQueryObjectuivProc fGLGetQueryObjectuiv; + GLGetQueryObjectui64vProc fGLGetQueryObjectui64v; + + + typedef sk_gpu_test::GpuTimer INHERITED; +}; + +GLGpuTimer* GLGpuTimer::CreateIfSupported(const sk_gpu_test::GLTestContext* ctx) { + SkAutoTDelete<GLGpuTimer> ret; + const GrGLInterface* gl = ctx->gl(); + if (gl->fExtensions.has("GL_EXT_disjoint_timer_query")) { + ret.reset(new GLGpuTimer(true, ctx, "EXT")); + } else if (kGL_GrGLStandard == gl->fStandard && + (GrGLGetVersion(gl) > GR_GL_VER(3,3) || gl->fExtensions.has("GL_ARB_timer_query"))) { + ret.reset(new GLGpuTimer(false, ctx)); + } else if (gl->fExtensions.has("GL_EXT_timer_query")) { + ret.reset(new GLGpuTimer(false, ctx, "EXT")); + } + return ret && ret->validate() ? ret.release() : nullptr; +} + +GLGpuTimer::GLGpuTimer(bool disjointSupport, const sk_gpu_test::GLTestContext* ctx, const char* ext) + : INHERITED(disjointSupport) { + ctx->getGLProcAddress(&fGLGetIntegerv, "glGetIntegerv"); + ctx->getGLProcAddress(&fGLGenQueries, "glGenQueries", ext); + ctx->getGLProcAddress(&fGLDeleteQueries, "glDeleteQueries", ext); + ctx->getGLProcAddress(&fGLBeginQuery, "glBeginQuery", ext); + ctx->getGLProcAddress(&fGLEndQuery, "glEndQuery", ext); + ctx->getGLProcAddress(&fGLGetQueryObjectuiv, "glGetQueryObjectuiv", ext); + ctx->getGLProcAddress(&fGLGetQueryObjectui64v, "glGetQueryObjectui64v", ext); +} + +bool GLGpuTimer::validate() const { + return fGLGetIntegerv && fGLGenQueries && fGLDeleteQueries && fGLBeginQuery && fGLEndQuery && + fGLGetQueryObjectuiv && fGLGetQueryObjectui64v; +} + +sk_gpu_test::PlatformTimerQuery GLGpuTimer::onQueueTimerStart() const { + GrGLuint queryID; + fGLGenQueries(1, &queryID); + if (!queryID) { + return sk_gpu_test::kInvalidTimerQuery; + } + if (this->disjointSupport()) { + // Clear the disjoint flag. + GrGLint disjoint; + fGLGetIntegerv(GL_GPU_DISJOINT, &disjoint); + } + fGLBeginQuery(GL_TIME_ELAPSED, queryID); + return static_cast<sk_gpu_test::PlatformTimerQuery>(queryID); +} + +void GLGpuTimer::onQueueTimerStop(sk_gpu_test::PlatformTimerQuery platformTimer) const { + if (sk_gpu_test::kInvalidTimerQuery == platformTimer) { + return; + } + fGLEndQuery(GL_TIME_ELAPSED); +} + +sk_gpu_test::GpuTimer::QueryStatus +GLGpuTimer::checkQueryStatus(sk_gpu_test::PlatformTimerQuery platformTimer) { + const GrGLuint queryID = static_cast<GrGLuint>(platformTimer); + if (!queryID) { + return QueryStatus::kInvalid; + } + GrGLuint available = 0; + fGLGetQueryObjectuiv(queryID, GL_QUERY_RESULT_AVAILABLE, &available); + if (!available) { + return QueryStatus::kPending; + } + if (this->disjointSupport()) { + GrGLint disjoint = 1; + fGLGetIntegerv(GL_GPU_DISJOINT, &disjoint); + if (disjoint) { + return QueryStatus::kDisjoint; + } + } + return QueryStatus::kAccurate; +} + +std::chrono::nanoseconds GLGpuTimer::getTimeElapsed(sk_gpu_test::PlatformTimerQuery platformTimer) { + SkASSERT(this->checkQueryStatus(platformTimer) >= QueryStatus::kDisjoint); + const GrGLuint queryID = static_cast<GrGLuint>(platformTimer); + GrGLuint64 nanoseconds; + fGLGetQueryObjectui64v(queryID, GL_QUERY_RESULT, &nanoseconds); + return std::chrono::nanoseconds(nanoseconds); +} + +void GLGpuTimer::deleteQuery(sk_gpu_test::PlatformTimerQuery platformTimer) { + const GrGLuint queryID = static_cast<GrGLuint>(platformTimer); + fGLDeleteQueries(1, &queryID); +} + +GR_STATIC_ASSERT(sizeof(GrGLuint) <= sizeof(sk_gpu_test::PlatformTimerQuery)); + } // anonymous namespace namespace sk_gpu_test { @@ -92,6 +223,7 @@ void GLTestContext::init(const GrGLInterface* gl, FenceSync* fenceSync) { SkASSERT(!fGL.get()); fGL.reset(gl); fFenceSync = fenceSync ? fenceSync : GLFenceSync::CreateIfSupported(this); + fGpuTimer = GLGpuTimer::CreateIfSupported(this); } void GLTestContext::teardown() { diff --git a/tools/skpbench/_benchresult.py b/tools/skpbench/_benchresult.py index 94c110569c..666878bdc9 100644 --- a/tools/skpbench/_benchresult.py +++ b/tools/skpbench/_benchresult.py @@ -25,6 +25,8 @@ class BenchResult: '(?P<samples>\d+)' '(?P<sample_ms_pad> +)' '(?P<sample_ms>\d+)' + '(?P<clock_pad> +)' + '(?P<clock>[cg]pu)' '(?P<metric_pad> +)' '(?P<metric>ms|fps)' '(?P<config_pad> +)' @@ -45,6 +47,7 @@ class BenchResult: self.stddev = float(match.group('stddev')[:-1]) # Drop '%' sign. self.samples = int(match.group('samples')) self.sample_ms = int(match.group('sample_ms')) + self.clock = match.group('clock') self.metric = match.group('metric') self.config = match.group('config') self.bench = match.group('bench') @@ -59,7 +62,7 @@ class BenchResult: else: values = list() for name in ['accum', 'median', 'max', 'min', 'stddev', - 'samples', 'sample_ms', 'metric', 'config']: + 'samples', 'sample_ms', 'clock', 'metric', 'config']: values.append(self.get_string(name + '_pad')) values.append(self.get_string(name)) values.append(config_suffix) diff --git a/tools/skpbench/parseskpbench.py b/tools/skpbench/parseskpbench.py index 5fe146ee09..800c1ca124 100755 --- a/tools/skpbench/parseskpbench.py +++ b/tools/skpbench/parseskpbench.py @@ -8,8 +8,8 @@ from __future__ import print_function from _benchresult import BenchResult from argparse import ArgumentParser +from collections import defaultdict, namedtuple from datetime import datetime -import collections import operator import os import sys @@ -27,7 +27,7 @@ This script can also be used to generate a Google sheet: (1) Install the "Office Editing for Docs, Sheets & Slides" Chrome extension: https://chrome.google.com/webstore/detail/office-editing-for-docs-s/gbkeegbaiigmenfmjfclcdgdpimamgkj -(2) Designate Chrome os-wide as the default application for opening .csv files. +(2) Update your global OS file associations to use Chrome for .csv files. (3) Run parseskpbench.py with the --open flag. @@ -49,75 +49,92 @@ __argparse.add_argument('sources', FLAGS = __argparse.parse_args() +RESULT_QUALIFIERS = ('sample_ms', 'clock', 'metric') + +class FullConfig(namedtuple('fullconfig', ('config',) + RESULT_QUALIFIERS)): + def qualified_name(self, qualifiers=RESULT_QUALIFIERS): + return get_qualified_name(self.config.replace(',', ' '), + {x:getattr(self, x) for x in qualifiers}) + +def get_qualified_name(name, qualifiers): + if not qualifiers: + return name + else: + args = ('%s=%s' % (k,v) for k,v in qualifiers.iteritems()) + return '%s (%s)' % (name, ' '.join(args)) class Parser: def __init__(self): - self.configs = list() # use list to preserve the order configs appear in. - self.rows = collections.defaultdict(dict) - self.cols = collections.defaultdict(dict) - self.metric = None - self.sample_ms = None + self.sheet_qualifiers = {x:None for x in RESULT_QUALIFIERS} + self.config_qualifiers = set() + self.fullconfigs = list() # use list to preserve the order. + self.rows = defaultdict(dict) + self.cols = defaultdict(dict) def parse_file(self, infile): for line in infile: match = BenchResult.match(line) if not match: continue - if self.metric is None: - self.metric = match.metric - elif match.metric != self.metric: - raise ValueError("results have mismatched metrics (%s and %s)" % - (self.metric, match.metric)) - if self.sample_ms is None: - self.sample_ms = match.sample_ms - elif not FLAGS.force and match.sample_ms != self.sample_ms: - raise ValueError("results have mismatched sampling times. " - "(use --force to ignore)") - if not match.config in self.configs: - self.configs.append(match.config) - self.rows[match.bench][match.config] = match.get_string(FLAGS.result) - self.cols[match.config][match.bench] = getattr(match, FLAGS.result) + + fullconfig = FullConfig(*(match.get_string(x) + for x in FullConfig._fields)) + if not fullconfig in self.fullconfigs: + self.fullconfigs.append(fullconfig) + + for qualifier, value in self.sheet_qualifiers.items(): + if value is None: + self.sheet_qualifiers[qualifier] = match.get_string(qualifier) + elif value != match.get_string(qualifier): + del self.sheet_qualifiers[qualifier] + self.config_qualifiers.add(qualifier) + + self.rows[match.bench][fullconfig] = match.get_string(FLAGS.result) + self.cols[fullconfig][match.bench] = getattr(match, FLAGS.result) def print_csv(self, outfile=sys.stdout): - print('%s_%s' % (FLAGS.result, self.metric), file=outfile) + # Write the title. + print(get_qualified_name(FLAGS.result, self.sheet_qualifiers), file=outfile) # Write the header. outfile.write('bench,') - for config in self.configs: - outfile.write('%s,' % config) + for fullconfig in self.fullconfigs: + outfile.write('%s,' % fullconfig.qualified_name(self.config_qualifiers)) outfile.write('\n') # Write the rows. - for bench, row in self.rows.items(): + for bench, row in self.rows.iteritems(): outfile.write('%s,' % bench) - for config in self.configs: - if config in row: - outfile.write('%s,' % row[config]) + for fullconfig in self.fullconfigs: + if fullconfig in row: + outfile.write('%s,' % row[fullconfig]) elif FLAGS.force: - outfile.write(',') + outfile.write('NULL,') else: raise ValueError("%s: missing value for %s. (use --force to ignore)" % - (bench, config)) + (bench, + fullconfig.qualified_name(self.config_qualifiers))) outfile.write('\n') # Add simple, literal averages. if len(self.rows) > 1: outfile.write('\n') - self.__print_computed_row('MEAN', + self._print_computed_row('MEAN', lambda col: reduce(operator.add, col.values()) / len(col), outfile=outfile) - self.__print_computed_row('GEOMEAN', + self._print_computed_row('GEOMEAN', lambda col: reduce(operator.mul, col.values()) ** (1.0 / len(col)), outfile=outfile) - def __print_computed_row(self, name, func, outfile=sys.stdout): + def _print_computed_row(self, name, func, outfile=sys.stdout): outfile.write('%s,' % name) - for config in self.configs: - assert(len(self.cols[config]) == len(self.rows)) - outfile.write('%.4g,' % func(self.cols[config])) + for fullconfig in self.fullconfigs: + if len(self.cols[fullconfig]) != len(self.rows): + outfile.write('NULL,') + continue + outfile.write('%.4g,' % func(self.cols[fullconfig])) outfile.write('\n') - def main(): parser = Parser() diff --git a/tools/skpbench/skpbench.cpp b/tools/skpbench/skpbench.cpp index adb6af0b14..6d0381a28d 100644 --- a/tools/skpbench/skpbench.cpp +++ b/tools/skpbench/skpbench.cpp @@ -5,6 +5,7 @@ * found in the LICENSE file. */ +#include "GpuTimer.h" #include "GrContextFactory.h" #include "SkCanvas.h" #include "SkOSFile.h" @@ -33,12 +34,9 @@ * Currently, only GPU configs are supported. */ -using sk_gpu_test::PlatformFence; -using sk_gpu_test::kInvalidPlatformFence; -using sk_gpu_test::FenceSync; - DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark"); DEFINE_int32(sampleMs, 50, "minimum duration of a sample"); +DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)"); DEFINE_bool(fps, false, "use fps instead of ms"); DEFINE_string(skp, "", "path to a single .skp file to benchmark"); DEFINE_string(png, "", "if set, save a .png proof to disk at this file location"); @@ -46,13 +44,13 @@ DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)"); DEFINE_bool(suppressHeader, false, "don't print a header row before the results"); static const char* header = - " accum median max min stddev samples sample_ms metric config bench"; +" accum median max min stddev samples sample_ms clock metric config bench"; static const char* resultFormat = - "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-6s %-9s %s"; +"%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-5s %-6s %-9s %s"; struct Sample { - using clock = std::chrono::high_resolution_clock; + using duration = std::chrono::nanoseconds; Sample() : fFrames(0), fDuration(0) {} double seconds() const { return std::chrono::duration<double>(fDuration).count(); } @@ -60,13 +58,13 @@ struct Sample { double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; } static const char* metric() { return FLAGS_fps ? "fps" : "ms"; } - int fFrames; - clock::duration fDuration; + int fFrames; + duration fDuration; }; class GpuSync { public: - GpuSync(const FenceSync* fenceSync); + GpuSync(const sk_gpu_test::FenceSync* fenceSync); ~GpuSync(); void syncToPreviousFrame(); @@ -74,8 +72,8 @@ public: private: void updateFence(); - const FenceSync* const fFenceSync; - PlatformFence fFence; + const sk_gpu_test::FenceSync* const fFenceSync; + sk_gpu_test::PlatformFence fFence; }; enum class ExitErr { @@ -92,10 +90,10 @@ static bool mkdir_p(const SkString& name); static SkString join(const SkCommandLineFlags::StringArray&); static void exitf(ExitErr, const char* format, ...); -static void run_benchmark(const FenceSync* fenceSync, SkCanvas* canvas, const SkPicture* skp, - std::vector<Sample>* samples) { - using clock = Sample::clock; - const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs); +static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas, + const SkPicture* skp, std::vector<Sample>* samples) { + using clock = std::chrono::high_resolution_clock; + const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs); const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration); draw_skp_and_flush(canvas, skp); @@ -123,6 +121,66 @@ static void run_benchmark(const FenceSync* fenceSync, SkCanvas* canvas, const Sk } while (now < endTime || 0 == samples->size() % 2); } +static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer, + const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas, + const SkPicture* skp, std::vector<Sample>* samples) { + using sk_gpu_test::PlatformTimerQuery; + using clock = std::chrono::steady_clock; + const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs); + const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration); + + if (!gpuTimer->disjointSupport()) { + fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; " + "results may be unreliable\n"); + } + + draw_skp_and_flush(canvas, skp); + GpuSync gpuSync(fenceSync); + + gpuTimer->queueStart(); + draw_skp_and_flush(canvas, skp); + PlatformTimerQuery previousTime = gpuTimer->queueStop(); + gpuSync.syncToPreviousFrame(); + + clock::time_point now = clock::now(); + const clock::time_point endTime = now + benchDuration; + + do { + const clock::time_point sampleEndTime = now + sampleDuration; + samples->emplace_back(); + Sample& sample = samples->back(); + + do { + gpuTimer->queueStart(); + draw_skp_and_flush(canvas, skp); + PlatformTimerQuery time = gpuTimer->queueStop(); + gpuSync.syncToPreviousFrame(); + + switch (gpuTimer->checkQueryStatus(previousTime)) { + using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus; + case QueryStatus::kInvalid: + exitf(ExitErr::kUnavailable, "GPU timer failed"); + case QueryStatus::kPending: + exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync"); + case QueryStatus::kDisjoint: + if (FLAGS_verbosity >= 4) { + fprintf(stderr, "discarding timer query due to disjoint operations.\n"); + } + break; + case QueryStatus::kAccurate: + sample.fDuration += gpuTimer->getTimeElapsed(previousTime); + ++sample.fFrames; + break; + } + gpuTimer->deleteQuery(previousTime); + previousTime = time; + now = clock::now(); + } while (now < sampleEndTime || 0 == sample.fFrames); + } while (now < endTime || 0 == samples->size() % 2); + + gpuTimer->deleteQuery(previousTime); +} + void print_result(const std::vector<Sample>& samples, const char* config, const char* bench) { if (0 == (samples.size() % 2)) { exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples"); @@ -149,7 +207,8 @@ void print_result(const std::vector<Sample>& samples, const char* config, const const double stddev = 100/*%*/ * sqrt(variance) / accumValue; printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(), - stddev, values.size(), FLAGS_sampleMs, Sample::metric(), config, bench); + stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(), + config, bench); printf("\n"); fflush(stdout); } @@ -247,7 +306,15 @@ int main(int argc, char** argv) { // Run the benchmark. SkCanvas* canvas = surface->getCanvas(); canvas->translate(-skp->cullRect().x(), -skp->cullRect().y()); - run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples); + if (!FLAGS_gpuClock) { + run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples); + } else { + if (!testCtx->gpuTimingSupport()) { + exitf(ExitErr::kUnavailable, "GPU does not support timing"); + } + run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas, skp.get(), + &samples); + } print_result(samples, config->getTag().c_str(), SkOSPath::Basename(skpfile).c_str()); // Save a proof (if one was requested). @@ -300,7 +367,7 @@ static void exitf(ExitErr err, const char* format, ...) { exit((int)err); } -GpuSync::GpuSync(const FenceSync* fenceSync) +GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync) : fFenceSync(fenceSync) { this->updateFence(); } @@ -310,7 +377,7 @@ GpuSync::~GpuSync() { } void GpuSync::syncToPreviousFrame() { - if (kInvalidPlatformFence == fFence) { + if (sk_gpu_test::kInvalidFence == fFence) { exitf(ExitErr::kSoftware, "attempted to sync with invalid fence"); } if (!fFenceSync->waitFence(fFence)) { @@ -322,7 +389,7 @@ void GpuSync::syncToPreviousFrame() { void GpuSync::updateFence() { fFence = fFenceSync->insertFence(); - if (kInvalidPlatformFence == fFence) { + if (sk_gpu_test::kInvalidFence == fFence) { exitf(ExitErr::kUnavailable, "failed to insert fence"); } } diff --git a/tools/skpbench/skpbench.py b/tools/skpbench/skpbench.py index 83aaf84000..6bf39750a4 100755 --- a/tools/skpbench/skpbench.py +++ b/tools/skpbench/skpbench.py @@ -32,7 +32,8 @@ unacceptable stddev. __argparse.add_argument('--adb', action='store_true', help="execute skpbench over adb") __argparse.add_argument('-s', '--device-serial', - help="if using adb, id of the specific device to target") + help="if using adb, ID of the specific device to target " + "(only required if more than 1 device is attached)") __argparse.add_argument('-p', '--path', help="directory to execute ./skpbench from") __argparse.add_argument('-m', '--max-stddev', @@ -47,7 +48,10 @@ __argparse.add_argument('-v','--verbosity', __argparse.add_argument('-d', '--duration', type=int, help="number of milliseconds to run each benchmark") __argparse.add_argument('-l', '--sample-ms', - type=int, help="minimum duration of a sample") + type=int, help="duration of a sample (minimum)") +__argparse.add_argument('--gpu', + action='store_true', + help="perform timing on the gpu clock instead of cpu (gpu work only)") __argparse.add_argument('--fps', action='store_true', help="use fps instead of ms") __argparse.add_argument('-c', '--config', @@ -93,6 +97,8 @@ class SKPBench: ARGV.extend(['--duration', str(FLAGS.duration)]) if FLAGS.sample_ms: ARGV.extend(['--sampleMs', str(FLAGS.sample_ms)]) + if FLAGS.gpu: + ARGV.extend(['--gpuClock', 'true']) if FLAGS.fps: ARGV.extend(['--fps', 'true']) if FLAGS.path: @@ -188,7 +194,7 @@ class SKPBench: def terminate(self): if self._proc: - self._proc.kill() + self._proc.terminate() self._monitor.join() self._proc.wait() self._proc = None |