aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper/SkJumper_stages.cpp
diff options
context:
space:
mode:
authorGravatar herb <herb@google.com>2017-05-11 16:54:23 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-05-11 21:24:28 +0000
commit892501d09bc8608704362235c73a59bb23a386b3 (patch)
tree379a98c316db1c2129824d3de6c6a1e21907b89a /src/jumper/SkJumper_stages.cpp
parentd95236dab0ae47a510530c340e6eaa72d3c616b6 (diff)
Evenly space gradient stage.
This seems like an experiment at this point because I don't know how to do this kind of thing on arm. Numbers from Skylake... Before: ./out/Release/nanobench --config srgb \ --match gradient_linear_clamp_3color gradient_linear_clamp_hicolor -q 19:48:13 Timer overhead: 36.7ns ! -> high variance, ? -> moderate variance micros bench 439.92 ? gradient_linear_clamp_3color srgb 2697.60 gradient_linear_clamp_hicolor srgb 437.28 gradient_linear_clamp_3color_4f srgb 2700.50 gradient_linear_clamp_hicolor_4f srgb After: micros bench 382.35 gradient_linear_clamp_3color srgb 593.49 gradient_linear_clamp_hicolor srgb 382.36 gradient_linear_clamp_3color_4f srgb 565.60 gradient_linear_clamp_hicolor_4f srgb Numbers on my Mac Trashcan are about even; there is no speedup or slowdown between master and this change. Change-Id: I04402452e23c0888512362fd1d6d5436cea61719 Reviewed-on: https://skia-review.googlesource.com/15960 Commit-Queue: Herb Derby <herb@google.com> Reviewed-by: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/jumper/SkJumper_stages.cpp')
-rw-r--r--src/jumper/SkJumper_stages.cpp46
1 files changed, 46 insertions, 0 deletions
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index fb01dbbff5..9e6e426ade 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -1034,6 +1034,52 @@ STAGE(matrix_perspective) {
g = G * rcp(Z);
}
+STAGE(evenly_spaced_linear_gradient) {
+ struct Ctx {
+ size_t stopCount;
+ float* fs[4];
+ float* bs[4];
+ };
+
+ auto c = (const Ctx*)ctx;
+ auto t = r;
+ auto i = trunc_(t*(c->stopCount - 1));
+
+#if defined(JUMPER) && defined(__AVX2__)
+ if (c->stopCount <=8) {
+ auto fr = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[0]), i);
+ auto br = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[0]), i);
+ auto fg = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[1]), i);
+ auto bg = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[1]), i);
+ auto fb = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[2]), i);
+ auto bb = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[2]), i);
+ auto fa = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[3]), i);
+ auto ba = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[3]), i);
+ r = mad(t, fr, br);
+ g = mad(t, fg, bg);
+ b = mad(t, fb, bb);
+ a = mad(t, fa, ba);
+
+ } else
+#endif
+ {
+ auto fr = gather(c->fs[0], i);
+ auto br = gather(c->bs[0], i);
+ auto fg = gather(c->fs[1], i);
+ auto bg = gather(c->bs[1], i);
+ auto fb = gather(c->fs[2], i);
+ auto bb = gather(c->bs[2], i);
+ auto fa = gather(c->fs[3], i);
+ auto ba = gather(c->bs[3], i);
+
+ r = mad(t, fr, br);
+ g = mad(t, fg, bg);
+ b = mad(t, fb, bb);
+ a = mad(t, fa, ba);
+ }
+
+}
+
STAGE(linear_gradient) {
struct Stop { float pos; float f[4], b[4]; };
struct Ctx { size_t n; Stop *stops; float start[4]; };