diff options
author | 2017-05-11 16:54:23 -0400 | |
---|---|---|
committer | 2017-05-11 21:24:28 +0000 | |
commit | 892501d09bc8608704362235c73a59bb23a386b3 (patch) | |
tree | 379a98c316db1c2129824d3de6c6a1e21907b89a /src/jumper/SkJumper_stages.cpp | |
parent | d95236dab0ae47a510530c340e6eaa72d3c616b6 (diff) |
Evenly space gradient stage.
This seems like an experiment at this point because I don't know how to do
this kind of thing on arm.
Numbers from Skylake...
Before:
./out/Release/nanobench --config srgb \
--match gradient_linear_clamp_3color gradient_linear_clamp_hicolor -q 19:48:13
Timer overhead: 36.7ns
! -> high variance, ? -> moderate variance
micros bench
439.92 ? gradient_linear_clamp_3color srgb
2697.60 gradient_linear_clamp_hicolor srgb
437.28 gradient_linear_clamp_3color_4f srgb
2700.50 gradient_linear_clamp_hicolor_4f srgb
After:
micros bench
382.35 gradient_linear_clamp_3color srgb
593.49 gradient_linear_clamp_hicolor srgb
382.36 gradient_linear_clamp_3color_4f srgb
565.60 gradient_linear_clamp_hicolor_4f srgb
Numbers on my Mac Trashcan are about even; there is no
speedup or slowdown between master and this change.
Change-Id: I04402452e23c0888512362fd1d6d5436cea61719
Reviewed-on: https://skia-review.googlesource.com/15960
Commit-Queue: Herb Derby <herb@google.com>
Reviewed-by: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/jumper/SkJumper_stages.cpp')
-rw-r--r-- | src/jumper/SkJumper_stages.cpp | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp index fb01dbbff5..9e6e426ade 100644 --- a/src/jumper/SkJumper_stages.cpp +++ b/src/jumper/SkJumper_stages.cpp @@ -1034,6 +1034,52 @@ STAGE(matrix_perspective) { g = G * rcp(Z); } +STAGE(evenly_spaced_linear_gradient) { + struct Ctx { + size_t stopCount; + float* fs[4]; + float* bs[4]; + }; + + auto c = (const Ctx*)ctx; + auto t = r; + auto i = trunc_(t*(c->stopCount - 1)); + +#if defined(JUMPER) && defined(__AVX2__) + if (c->stopCount <=8) { + auto fr = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[0]), i); + auto br = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[0]), i); + auto fg = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[1]), i); + auto bg = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[1]), i); + auto fb = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[2]), i); + auto bb = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[2]), i); + auto fa = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[3]), i); + auto ba = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[3]), i); + r = mad(t, fr, br); + g = mad(t, fg, bg); + b = mad(t, fb, bb); + a = mad(t, fa, ba); + + } else +#endif + { + auto fr = gather(c->fs[0], i); + auto br = gather(c->bs[0], i); + auto fg = gather(c->fs[1], i); + auto bg = gather(c->bs[1], i); + auto fb = gather(c->fs[2], i); + auto bb = gather(c->bs[2], i); + auto fa = gather(c->fs[3], i); + auto ba = gather(c->bs[3], i); + + r = mad(t, fr, br); + g = mad(t, fg, bg); + b = mad(t, fb, bb); + a = mad(t, fa, ba); + } + +} + STAGE(linear_gradient) { struct Stop { float pos; float f[4], b[4]; }; struct Ctx { size_t n; Stop *stops; float start[4]; }; |