aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-01-05 15:03:53 -0500
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-01-05 20:41:43 +0000
commita2d25ec0ef1b1054aa5b3e61280bd226fce4ab9a (patch)
tree9b63a117138b09374c30ee56b08a5dc05a424ac5
parent4a224f60111643f36d6f1c204142ce34511a32a1 (diff)
Use stack instead of malloc() for most calls to SkRasterPipeline::run().
Also split bench into run/compile variants to measure the effect: Before …f16_compile 1x …f16_run 1.02x …srgb_compile 1.56x …srgb_run 1.61x After …f16_run 1x …f16_compile 1.01x …srgb_compile 1.58x …srgb_run 1.59x CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD Change-Id: I8e65fb2acdbb05ccc0b3894f16d7646603c3e74d Reviewed-on: https://skia-review.googlesource.com/6621 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
-rw-r--r--bench/SkRasterPipelineBench.cpp29
-rw-r--r--src/opts/SkRasterPipeline_opts.h68
2 files changed, 61 insertions, 36 deletions
diff --git a/bench/SkRasterPipelineBench.cpp b/bench/SkRasterPipelineBench.cpp
index 16dea8aa66..376efdeb84 100644
--- a/bench/SkRasterPipelineBench.cpp
+++ b/bench/SkRasterPipelineBench.cpp
@@ -22,13 +22,18 @@ static uint8_t mask[N]; // 8-bit linear
// - src = srcover(dst, src)
// - store src back as srgb/f16
-template <bool kF16>
+template <bool kF16, bool kCompiled>
class SkRasterPipelineBench : public Benchmark {
public:
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
const char* onGetName() override {
- return kF16 ? "SkRasterPipeline_f16"
- : "SkRasterPipeline_srgb";
+ switch ((int)kCompiled << 1 | (int)kF16) {
+ case 0: return "SkRasterPipeline_srgb_run";
+ case 1: return "SkRasterPipeline_f16_run";
+ case 2: return "SkRasterPipeline_srgb_compile";
+ case 3: return "SkRasterPipeline_f16_compile";
+ }
+ return "whoops";
}
void onDraw(int loops, SkCanvas*) override {
@@ -53,12 +58,20 @@ public:
p.append(SkRasterPipeline::to_srgb);
p.append(SkRasterPipeline::store_8888, &dst_ctx);
}
- auto compiled = p.compile();
- while (loops --> 0) {
- compiled(0,0, N);
+ if (kCompiled) {
+ auto compiled = p.compile();
+ while (loops --> 0) {
+ compiled(0,0, N);
+ }
+ } else {
+ while (loops --> 0) {
+ p.run(0,0, N);
+ }
}
}
};
-DEF_BENCH( return new SkRasterPipelineBench<true>; )
-DEF_BENCH( return new SkRasterPipelineBench<false>; )
+DEF_BENCH( return (new SkRasterPipelineBench< true, true>); )
+DEF_BENCH( return (new SkRasterPipelineBench<false, true>); )
+DEF_BENCH( return (new SkRasterPipelineBench< true, false>); )
+DEF_BENCH( return (new SkRasterPipelineBench<false, false>); )
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 3b877ce156..9721e2abef 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -965,9 +965,37 @@ SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {
namespace {
+ static void build_program(void** program, const SkRasterPipeline::Stage* stages, int nstages) {
+ for (int i = 0; i < nstages; i++) {
+ *program++ = (void*)enum_to_Fn(stages[i].stage);
+ if (stages[i].ctx) {
+ *program++ = stages[i].ctx;
+ }
+ }
+ *program++ = (void*)just_return;
+ }
+
+ static void run_program(void** program, size_t x, size_t y, size_t n) {
+ float dx[] = { 0,1,2,3,4,5,6,7 };
+ SkNf X = SkNf(x) + SkNf::Load(dx) + 0.5f,
+ Y = SkNf(y) + 0.5f,
+ _0 = SkNf(0),
+ _1 = SkNf(1);
+
+ auto start = (Fn)load_and_increment(&program);
+ while (n >= N) {
+ start(x*N, program, X,Y,_1,_0, _0,_0,_0,_0);
+ X += (float)N;
+ x += N;
+ n -= N;
+ }
+ if (n) {
+ start(x*N+n, program, X,Y,_1,_0, _0,_0,_0,_0);
+ }
+ }
+
// Compiled manages its memory manually because it's not safe to use
// std::vector, SkTDArray, etc without setting us up for big ODR violations.
-
struct Compiled {
Compiled(const SkRasterPipeline::Stage* stages, int nstages) {
int slots = nstages + 1; // One extra for just_return.
@@ -977,15 +1005,7 @@ namespace {
}
}
fProgram = (void**)sk_malloc_throw(slots * sizeof(void*));
-
- void** ip = fProgram;
- for (int i = 0; i < nstages; i++) {
- *ip++ = (void*)enum_to_Fn(stages[i].stage);
- if (stages[i].ctx) {
- *ip++ = stages[i].ctx;
- }
- }
- *ip++ = (void*)just_return;
+ build_program(fProgram, stages, nstages);
}
~Compiled() { sk_free(fProgram); }
@@ -998,23 +1018,7 @@ namespace {
}
void operator()(size_t x, size_t y, size_t n) {
- float dx[] = { 0,1,2,3,4,5,6,7 };
- SkNf X = SkNf(x) + SkNf::Load(dx) + 0.5f,
- Y = SkNf(y) + 0.5f,
- _0 = SkNf(0),
- _1 = SkNf(1);
-
- void** p = fProgram;
- auto start = (Fn)load_and_increment(&p);
- while (n >= N) {
- start(x*N, p, X,Y,_1,_0, _0,_0,_0,_0);
- X += (float)N;
- x += N;
- n -= N;
- }
- if (n) {
- start(x*N+n, p, X,Y,_1,_0, _0,_0,_0,_0);
- }
+ run_program(fProgram, x, y, n);
}
void** fProgram;
@@ -1030,7 +1034,15 @@ namespace SK_OPTS_NS {
SI void run_pipeline(size_t x, size_t y, size_t n,
const SkRasterPipeline::Stage* stages, int nstages) {
- Compiled{stages,nstages}(x,y,n);
+ static const int kStackMax = 256;
+ // Worst case is nstages stages with nstages context pointers, and just_return.
+ if (2*nstages+1 <= kStackMax) {
+ void* program[kStackMax];
+ build_program(program, stages, nstages);
+ run_program(program, x,y,n);
+ } else {
+ Compiled{stages,nstages}(x,y,n);
+ }
}
} // namespace SK_OPTS_NS