aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-05-22 12:01:59 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-05-22 16:41:22 +0000
commit0a76b413eac46ec218b367c5456709059557f5db (patch)
treeacf1df36b1d2a8302e69beb3221a22da154ccdf4
parent929bfeb17c4d6c422c17027a18640af8d710d030 (diff)
add compile, use it in blitter
I expanded an existing bench to show off the difference: SkRasterPipeline_… 300 …compile 1x …run 1.14x Change-Id: I5d63d602cda3f78d2d0891fcc85baf5514632900 Reviewed-on: https://skia-review.googlesource.com/17458 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Herb Derby <herb@google.com>
-rw-r--r--bench/SkRasterPipelineBench.cpp24
-rw-r--r--src/core/SkRasterPipeline.h5
-rw-r--r--src/core/SkRasterPipelineBlitter.cpp42
-rw-r--r--src/jumper/SkJumper.cpp67
4 files changed, 97 insertions, 41 deletions
diff --git a/bench/SkRasterPipelineBench.cpp b/bench/SkRasterPipelineBench.cpp
index 2848218977..bef80b3fca 100644
--- a/bench/SkRasterPipelineBench.cpp
+++ b/bench/SkRasterPipelineBench.cpp
@@ -66,11 +66,13 @@ public:
DEF_BENCH( return (new SkRasterPipelineBench< true>); )
DEF_BENCH( return (new SkRasterPipelineBench<false>); )
-class SkRasterPipelineLegacyBench : public Benchmark {
+class SkRasterPipelineCompileVsRunBench : public Benchmark {
public:
+ explicit SkRasterPipelineCompileVsRunBench(bool compile) : fCompile(compile) {}
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
const char* onGetName() override {
- return "SkRasterPipeline_legacy";
+ return fCompile ? "SkRasterPipeline_compile"
+ : "SkRasterPipeline_run";
}
void onDraw(int loops, SkCanvas*) override {
@@ -84,12 +86,24 @@ public:
p.append(SkRasterPipeline::srcover);
p.append(SkRasterPipeline::store_8888, &dst_ctx);
- while (loops --> 0) {
- p.run(0,N);
+ if (fCompile) {
+ char buffer[1024];
+ SkArenaAlloc alloc(buffer);
+ auto fn = p.compile(&alloc);
+ while (loops --> 0) {
+ fn(0,N);
+ }
+ } else {
+ while (loops --> 0) {
+ p.run(0,N);
+ }
}
}
+private:
+ bool fCompile;
};
-DEF_BENCH( return (new SkRasterPipelineLegacyBench); )
+DEF_BENCH( return (new SkRasterPipelineCompileVsRunBench(true )); )
+DEF_BENCH( return (new SkRasterPipelineCompileVsRunBench(false)); )
static SkColorSpaceTransferFn gamma(float g) {
SkColorSpaceTransferFn fn = {0,0,0,0,0,0,0};
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index 3b2d617c69..6287d627b7 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -8,10 +8,12 @@
#ifndef SkRasterPipeline_DEFINED
#define SkRasterPipeline_DEFINED
+#include "SkArenaAlloc.h"
#include "SkImageInfo.h"
#include "SkNx.h"
#include "SkTArray.h"
#include "SkTypes.h"
+#include <functional>
#include <vector>
/**
@@ -123,6 +125,9 @@ public:
// Runs the pipeline walking x through [x,x+n).
void run(size_t x, size_t n) const;
+ // Allocates a thunk which amortizes run() setup cost in alloc.
+ std::function<void(size_t, size_t)> compile(SkArenaAlloc*) const;
+
void dump() const;
struct Stage {
diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp
index 6397cae8d9..026b92bee7 100644
--- a/src/core/SkRasterPipelineBlitter.cpp
+++ b/src/core/SkRasterPipelineBlitter.cpp
@@ -30,9 +30,10 @@ public:
const SkRasterPipeline& shaderPipeline,
bool is_opaque, bool is_constant, bool wants_dither);
- SkRasterPipelineBlitter(SkPixmap dst, SkBlendMode blend)
+ SkRasterPipelineBlitter(SkPixmap dst, SkBlendMode blend, SkArenaAlloc* alloc)
: fDst(dst)
, fBlend(blend)
+ , fAlloc(alloc)
{}
void blitH (int x, int y, int w) override;
@@ -51,6 +52,7 @@ private:
SkPixmap fDst;
SkBlendMode fBlend;
+ SkArenaAlloc* fAlloc;
SkRasterPipeline fColorPipeline;
// We may be able to specialize blitH() into a memset.
@@ -58,10 +60,10 @@ private:
uint64_t fMemsetColor = 0; // Big enough for largest dst format, F16.
// Built lazily on first use.
- SkRasterPipeline fBlitH,
- fBlitAntiH,
- fBlitMaskA8,
- fBlitMaskLCD16;
+ std::function<void(size_t, size_t)> fBlitH,
+ fBlitAntiH,
+ fBlitMaskA8,
+ fBlitMaskLCD16;
// These values are pointed to by the blit pipelines above,
// which allows us to adjust them from call to call.
@@ -131,7 +133,7 @@ SkBlitter* SkRasterPipelineBlitter::Create(const SkPixmap& dst,
bool is_opaque,
bool is_constant,
bool wants_dither) {
- auto blitter = alloc->make<SkRasterPipelineBlitter>(dst, paint.getBlendMode());
+ auto blitter = alloc->make<SkRasterPipelineBlitter>(dst, paint.getBlendMode(), alloc);
// Our job in this factory is to fill out the blitter's color pipeline.
// This is the common front of the full blit pipelines, each constructed lazily on first use.
@@ -265,8 +267,8 @@ void SkRasterPipelineBlitter::blitH(int x, int y, int w) {
}
}
- auto& p = fBlitH;
- if (p.empty()) {
+ if (!fBlitH) {
+ SkRasterPipeline p;
p.extend(fColorPipeline);
if (fBlend != SkBlendMode::kSrc) {
this->append_load_d(&p);
@@ -274,13 +276,14 @@ void SkRasterPipelineBlitter::blitH(int x, int y, int w) {
this->maybe_clamp(&p);
}
this->append_store(&p);
+ fBlitH = p.compile(fAlloc);
}
- p.run(x,w);
+ fBlitH(x,w);
}
void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const int16_t runs[]) {
- auto& p = fBlitAntiH;
- if (p.empty()) {
+ if (!fBlitAntiH) {
+ SkRasterPipeline p;
p.extend(fColorPipeline);
if (fBlend == SkBlendMode::kSrcOver) {
p.append(SkRasterPipeline::scale_1_float, &fCurrentCoverage);
@@ -293,6 +296,7 @@ void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const
}
this->maybe_clamp(&p);
this->append_store(&p);
+ fBlitAntiH = p.compile(fAlloc);
}
fDstPtr = fDst.writable_addr(0,y);
@@ -303,7 +307,7 @@ void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const
case 0xff: this->blitH(x,y,run); break;
default:
fCurrentCoverage = *aa * (1/255.0f);
- p.run(x,run);
+ fBlitAntiH(x,run);
}
x += run;
runs += run;
@@ -317,8 +321,8 @@ void SkRasterPipelineBlitter::blitMask(const SkMask& mask, const SkIRect& clip)
return INHERITED::blitMask(mask, clip);
}
- if (mask.fFormat == SkMask::kA8_Format && fBlitMaskA8.empty()) {
- auto& p = fBlitMaskA8;
+ if (mask.fFormat == SkMask::kA8_Format && !fBlitMaskA8) {
+ SkRasterPipeline p;
p.extend(fColorPipeline);
if (fBlend == SkBlendMode::kSrcOver) {
p.append(SkRasterPipeline::scale_u8, &fMaskPtr);
@@ -331,16 +335,18 @@ void SkRasterPipelineBlitter::blitMask(const SkMask& mask, const SkIRect& clip)
}
this->maybe_clamp(&p);
this->append_store(&p);
+ fBlitMaskA8 = p.compile(fAlloc);
}
- if (mask.fFormat == SkMask::kLCD16_Format && fBlitMaskLCD16.empty()) {
- auto& p = fBlitMaskLCD16;
+ if (mask.fFormat == SkMask::kLCD16_Format && !fBlitMaskLCD16) {
+ SkRasterPipeline p;
p.extend(fColorPipeline);
this->append_load_d(&p);
this->append_blend(&p);
p.append(SkRasterPipeline::lerp_565, &fMaskPtr);
this->maybe_clamp(&p);
this->append_store(&p);
+ fBlitMaskLCD16 = p.compile(fAlloc);
}
int x = clip.left();
@@ -351,11 +357,11 @@ void SkRasterPipelineBlitter::blitMask(const SkMask& mask, const SkIRect& clip)
switch (mask.fFormat) {
case SkMask::kA8_Format:
fMaskPtr = mask.getAddr8(x,y)-x;
- fBlitMaskA8.run(x,clip.width());
+ fBlitMaskA8(x,clip.width());
break;
case SkMask::kLCD16_Format:
fMaskPtr = mask.getAddrLCD16(x,y)-x;
- fBlitMaskLCD16.run(x,clip.width());
+ fBlitMaskLCD16(x,clip.width());
break;
default:
// TODO
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp
index 1e703e1b1c..0e184b97c3 100644
--- a/src/jumper/SkJumper.cpp
+++ b/src/jumper/SkJumper.cpp
@@ -112,6 +112,7 @@ static const SkJumper_Engine kPortable = {
};
// ...and a platform-specific engine chosen on first use based on CPU features.
static SkJumper_Engine gPlatform = kPortable;
+static SkOnce gChooseEngineOnce;
static SkJumper_Engine choose_engine() {
#if __has_feature(memory_sanitizer)
@@ -172,29 +173,59 @@ static SkJumper_Engine choose_engine() {
return kPortable;
}
+static void build_pipeline(const SkRasterPipeline::Stage* stages, int nstages,
+ const SkJumper_Engine& engine, void** ip) {
+ for (int i = 0; i < nstages; i++) {
+ const auto& st = stages[i];
+ StageFn* fn = engine.stages[st.stage];
+
+ *ip++ = (void*)fn;
+ if (st.ctx) {
+ *ip++ = st.ctx;
+ }
+ }
+ *ip = (void*)engine.just_return;
+}
+
void SkRasterPipeline::run(size_t x, size_t n) const {
- static SkOnce once;
- once([]{ gPlatform = choose_engine(); });
+ gChooseEngineOnce([]{ gPlatform = choose_engine(); });
SkAutoSTMalloc<64, void*> program(2*fStages.size() + 1);
const size_t limit = x+n;
- auto build_and_run = [&](const SkJumper_Engine& engine) {
- if (x + engine.min_stride <= limit) {
- void** ip = program.get();
- for (auto&& st : fStages) {
- StageFn* fn = engine.stages[st.stage];
- *ip++ = (void*)fn;
- if (st.ctx) {
- *ip++ = st.ctx;
- }
- }
- *ip = (void*)engine.just_return;
-
- x = engine.start_pipeline(x, program.get(), &kConstants, limit);
+ if (x + gPlatform.min_stride <= limit) {
+ build_pipeline(fStages.data(), SkToInt(fStages.size()), gPlatform, program.get());
+ x = gPlatform.start_pipeline(x, program.get(), &kConstants, limit);
+ }
+ if (x < limit) {
+ build_pipeline(fStages.data(), SkToInt(fStages.size()), kPortable, program.get());
+ kPortable.start_pipeline(x, program.get(), &kConstants, limit);
+ }
+}
+
+std::function<void(size_t, size_t)> SkRasterPipeline::compile(SkArenaAlloc* alloc) const {
+ gChooseEngineOnce([]{ gPlatform = choose_engine(); });
+
+ void** platform = alloc->makeArray<void*>(2*fStages.size() + 1);
+ build_pipeline(fStages.data(), SkToInt(fStages.size()), gPlatform, platform);
+
+ if (gPlatform.min_stride == 1) {
+ return [=](size_t x, size_t n) {
+ const size_t limit = x+n;
+ gPlatform.start_pipeline(x, platform, &kConstants, limit);
+ };
+ }
+
+ void** portable = alloc->makeArray<void*>(2*fStages.size() + 1);
+ build_pipeline(fStages.data(), SkToInt(fStages.size()), kPortable, portable);
+
+ return [=](size_t x, size_t n) {
+ const size_t limit = x+n;
+ if (x + gPlatform.min_stride <= limit) {
+ x = gPlatform.start_pipeline(x, platform, &kConstants, limit);
+ }
+ if (x < limit) {
+ kPortable.start_pipeline(x, portable, &kConstants, limit);
}
};
-
- build_and_run(gPlatform);
- build_and_run(kPortable);
}