Implement SkGLContext swapBuffers with fence syncs

Improves the GPU measuring accuracy of nanobench by using fence syncs. Fence syncs are very widely supported and available on almost every platform. NO_MERGE_BUILDS BUG=skia: Review URL: https://codereview.chromium.org/1194783003
author: cdalton <cdalton@nvidia.com> 2015-06-23 13:23:44 -0700
committer: Commit bot <commit-bot@chromium.org> 2015-06-23 13:23:44 -0700
commit: d416a5b10ff9e6d4f55a1f5b0419722132d68ff3 (patch)
tree: f9f39528fd8fd7c033882c137d711e12939c6fa2 /bench
parent: b607767703ff7898611cf88c1218d5d69535e984 (diff)
4 files changed, 28 insertions, 12 deletions
diff --git a/bench/nanobench.cpp b/bench/nanobench.cpp
index 65d9216e35..13ae9f6239 100644
--- a/bench/nanobench.cpp
+++ b/bench/nanobench.cpp
@@ -79,7 +79,7 @@ DEFINE_int32(overheadLoops, 100000, "Loops to estimate timer overhead.");
 DEFINE_double(overheadGoal, 0.0001,
               "Loop until timer overhead is at most this fraction of our measurments.");
 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU.");
-DEFINE_int32(gpuFrameLag, 5, "Overestimate of maximum number of frames GPU allows to lag.");
+DEFINE_int32(gpuFrameLag, 5, "If unknown, estimated maximum number of frames GPU allows to lag.");
 DEFINE_bool(gpuCompressAlphaMasks, false, "Compress masks generated from falling back to "
                                           "software path rendering.");
 
@@ -144,7 +144,13 @@ struct GPUTarget : public Target {
         SK_GL(*this->gl, Finish());
     }
 
-    bool needsFrameTiming() const override { return true; }
+    bool needsFrameTiming(int* maxFrameLag) const override {
+        if (!this->gl->getMaxGpuFrameLag(maxFrameLag)) {
+            // Frame lag is unknown.
+            *maxFrameLag = FLAGS_gpuFrameLag;
+        }
+        return true;
+    }
     bool init(SkImageInfo info, Benchmark* bench) override {
         uint32_t flags = this->config.useDFText ? SkSurfaceProps::kUseDistanceFieldFonts_Flag : 0;
         SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType);
@@ -155,6 +161,10 @@ struct GPUTarget : public Target {
         if (!this->surface.get()) {
             return false;
         }
+        if (!this->gl->fenceSyncSupport()) {
+            SkDebugf("WARNING: GL context for config \"%s\" does not support fence sync. "
+                     "Timings might not be accurate.\n", this->config.name);
+        }
         return true;
     }
     void fillOptions(ResultsWriter* log) override {
@@ -307,7 +317,8 @@ static int cpu_bench(const double overhead, Target* target, Benchmark* bench, do
 
 static int gpu_bench(Target* target,
                      Benchmark* bench,
-                     double* samples) {
+                     double* samples,
+                     int maxGpuFrameLag) {
     // First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs.
     int loops = FLAGS_loops;
     if (kAutoTuneLoops == loops) {
@@ -321,9 +332,8 @@ static int gpu_bench(Target* target,
             }
             loops *= 2;
             // If the GPU lets frames lag at all, we need to make sure we're timing
-            // _this_ round, not still timing last round.  We force this by looping
-            // more times than any reasonable GPU will allow frames to lag.
-            for (int i = 0; i < FLAGS_gpuFrameLag; i++) {
+            // _this_ round, not still timing last round.
+            for (int i = 0; i < maxGpuFrameLag; i++) {
                 elapsed = time(loops, bench, target);
             }
         } while (elapsed < FLAGS_gpuMs);
@@ -340,7 +350,7 @@ static int gpu_bench(Target* target,
 
     // Pretty much the same deal as the calibration: do some warmup to make
     // sure we're timing steady-state pipelined frames.
-    for (int i = 0; i < FLAGS_gpuFrameLag; i++) {
+    for (int i = 0; i < maxGpuFrameLag - 1; i++) {
         time(loops, bench, target);
     }
 
@@ -429,6 +439,9 @@ static void create_configs(SkTDArray<Config>* configs) {
 #ifdef SK_ANGLE
         GPU_CONFIG(angle, kANGLE_GLContextType, 0, false)
 #endif
+#if SK_MESA
+        GPU_CONFIG(mesa, kMESA_GLContextType, 0, false)
+#endif
     }
 #endif
 
@@ -1008,9 +1021,10 @@ int nanobench_main() {
             targets[j]->setup();
             bench->perCanvasPreDraw(canvas);
 
+            int frameLag;
             const int loops =
-                targets[j]->needsFrameTiming()
-                ? gpu_bench(targets[j], bench.get(), samples.get())
+                targets[j]->needsFrameTiming(&frameLag)
+                ? gpu_bench(targets[j], bench.get(), samples.get(), frameLag)
                 : cpu_bench(overhead, targets[j], bench.get(), samples.get());
 
             bench->perCanvasPostDraw(canvas);
diff --git a/bench/nanobench.h b/bench/nanobench.h
index 1dc0b8b90a..f556f7dc14 100644
--- a/bench/nanobench.h
+++ b/bench/nanobench.h
@@ -63,7 +63,7 @@ struct Target {
     /** CPU-like targets can just be timed, but GPU-like
         targets need to pay attention to frame boundaries
         or other similar details. */
-    virtual bool needsFrameTiming() const { return false; }
+    virtual bool needsFrameTiming(int* frameLag) const { return false; }
 
     /** Called once per target, during program initialization.
         Returns false if initialization fails. */
diff --git a/bench/nanobenchAndroid.cpp b/bench/nanobenchAndroid.cpp
index 3d5cda4929..50673c9748 100644
--- a/bench/nanobenchAndroid.cpp
+++ b/bench/nanobenchAndroid.cpp
@@ -42,7 +42,9 @@ void HWUITarget::fence() {
     this->renderer.proxy->fence();
 }
 
-bool HWUITarget::needsFrameTiming() const {
+bool HWUITarget::needsFrameTiming(int* frameLag) const {
+    extern int FLAGS_gpuFrameLag;
+    *frameLag = FLAGS_gpuFrameLag;
     return true;
 }
 
diff --git a/bench/nanobenchAndroid.h b/bench/nanobenchAndroid.h
index 7f6ff441d6..16a81cfe5e 100644
--- a/bench/nanobenchAndroid.h
+++ b/bench/nanobenchAndroid.h
@@ -23,7 +23,7 @@ struct HWUITarget : public Target {
     SkCanvas* beginTiming(SkCanvas* canvas) override;
     void endTiming() override;
     void fence() override;
-    bool needsFrameTiming() const override;
+    bool needsFrameTiming(int* frameLag) const override;
 
     bool init(SkImageInfo info, Benchmark* bench) override;
     bool capturePixels(SkBitmap* bmp) override;
author	cdalton <cdalton@nvidia.com>	2015-06-23 13:23:44 -0700
committer	Commit bot <commit-bot@chromium.org>	2015-06-23 13:23:44 -0700
commit	d416a5b10ff9e6d4f55a1f5b0419722132d68ff3 (patch)
tree	f9f39528fd8fd7c033882c137d711e12939c6fa2 /bench
parent	b607767703ff7898611cf88c1218d5d69535e984 (diff)