aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper/SkJumper.cpp
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-03-01 13:07:40 -0500
committerGravatar Mike Klein <mtklein@chromium.org>2017-03-02 16:14:45 +0000
commitc31858bcba3f6d9eb6b57ae03c15b266324a5c23 (patch)
tree3148d117b2b6c883c6aefa8d399fea147a536df5 /src/jumper/SkJumper.cpp
parentf7cf81aefd28e5bfe74d40b4fc037df72f157f33 (diff)
SkJumper: handle the <kStride tail in AVX+ mode.
We have plenty general purpose registers to spare on x86-64, so the cheapest thing to do is use one to hold the usual 'tail'. Speedups on HSW: SkRasterPipeline_srgb: 292 -> 170 SkRasterPipeline_f16: 122 -> 90 There's plenty more room to improve here, e.g. using mask loads and stores, but this seems to be enough to get things working reasonably. BUG=skia:6289 Change-Id: I8c0ed325391822e9f36636500350205e93942111 Reviewed-on: https://skia-review.googlesource.com/9110 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/jumper/SkJumper.cpp')
-rw-r--r--src/jumper/SkJumper.cpp8
1 files changed, 4 insertions, 4 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp
index 488caf6da8..97132e3c66 100644
--- a/src/jumper/SkJumper.cpp
+++ b/src/jumper/SkJumper.cpp
@@ -249,11 +249,11 @@ bool SkRasterPipeline::run_with_jumper(size_t x, size_t n) const {
SkAutoSTMalloc<64, void*> program(2*fStages.size() + 1);
const size_t limit = x+n;
- auto build_and_run = [&](size_t stride,
+ auto build_and_run = [&](size_t min_stride,
StageFn* (*lookup)(SkRasterPipeline::StockStage),
StageFn* just_return,
size_t (*start_pipeline)(size_t, void**, K*, size_t)) {
- if (x + stride <= limit) {
+ if (x + min_stride <= limit) {
void** ip = program.get();
for (auto&& st : fStages) {
auto fn = lookup(st.stage);
@@ -288,12 +288,12 @@ bool SkRasterPipeline::run_with_jumper(size_t x, size_t n) const {
#elif defined(__x86_64__) || defined(_M_X64)
if (1 && SkCpu::Supports(SkCpu::HSW)) {
- if (!build_and_run(8, lookup_hsw, ASM(just_return,hsw), ASM(start_pipeline,hsw))) {
+ if (!build_and_run(1, lookup_hsw, ASM(just_return,hsw), ASM(start_pipeline,hsw))) {
return false;
}
}
if (1 && SkCpu::Supports(SkCpu::AVX)) {
- if (!build_and_run(8, lookup_avx, ASM(just_return,avx), ASM(start_pipeline,avx))) {
+ if (!build_and_run(1, lookup_avx, ASM(just_return,avx), ASM(start_pipeline,avx))) {
return false;
}
}