diff options
author | 2017-03-01 13:07:40 -0500 | |
---|---|---|
committer | 2017-03-02 16:14:45 +0000 | |
commit | c31858bcba3f6d9eb6b57ae03c15b266324a5c23 (patch) | |
tree | 3148d117b2b6c883c6aefa8d399fea147a536df5 /src/jumper/SkJumper.cpp | |
parent | f7cf81aefd28e5bfe74d40b4fc037df72f157f33 (diff) |
SkJumper: handle the <kStride tail in AVX+ mode.
We have plenty general purpose registers to spare on x86-64,
so the cheapest thing to do is use one to hold the usual 'tail'.
Speedups on HSW:
SkRasterPipeline_srgb: 292 -> 170
SkRasterPipeline_f16: 122 -> 90
There's plenty more room to improve here, e.g. using mask loads and
stores, but this seems to be enough to get things working reasonably.
BUG=skia:6289
Change-Id: I8c0ed325391822e9f36636500350205e93942111
Reviewed-on: https://skia-review.googlesource.com/9110
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/jumper/SkJumper.cpp')
-rw-r--r-- | src/jumper/SkJumper.cpp | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp index 488caf6da8..97132e3c66 100644 --- a/src/jumper/SkJumper.cpp +++ b/src/jumper/SkJumper.cpp @@ -249,11 +249,11 @@ bool SkRasterPipeline::run_with_jumper(size_t x, size_t n) const { SkAutoSTMalloc<64, void*> program(2*fStages.size() + 1); const size_t limit = x+n; - auto build_and_run = [&](size_t stride, + auto build_and_run = [&](size_t min_stride, StageFn* (*lookup)(SkRasterPipeline::StockStage), StageFn* just_return, size_t (*start_pipeline)(size_t, void**, K*, size_t)) { - if (x + stride <= limit) { + if (x + min_stride <= limit) { void** ip = program.get(); for (auto&& st : fStages) { auto fn = lookup(st.stage); @@ -288,12 +288,12 @@ bool SkRasterPipeline::run_with_jumper(size_t x, size_t n) const { #elif defined(__x86_64__) || defined(_M_X64) if (1 && SkCpu::Supports(SkCpu::HSW)) { - if (!build_and_run(8, lookup_hsw, ASM(just_return,hsw), ASM(start_pipeline,hsw))) { + if (!build_and_run(1, lookup_hsw, ASM(just_return,hsw), ASM(start_pipeline,hsw))) { return false; } } if (1 && SkCpu::Supports(SkCpu::AVX)) { - if (!build_and_run(8, lookup_avx, ASM(just_return,avx), ASM(start_pipeline,avx))) { + if (!build_and_run(1, lookup_avx, ASM(just_return,avx), ASM(start_pipeline,avx))) { return false; } } |