diff options
author | Mike Klein <mtklein@chromium.org> | 2017-01-20 12:51:36 -0500 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-02-14 14:13:17 +0000 |
commit | 351549febad86d785aab6c985c9489d55a0bd2a4 (patch) | |
tree | 2950245a836b12c6f8c2f92a32395b8ed9f301f1 /src/splicer/SkSplicer_stages.cpp | |
parent | 936f81b95882be2e171a623b3116cc2ff408c813 (diff) |
SkSplicer support for 2-point gradients.
Add some stages to SkSplicer:
- seed_shader
- matrix_2x3
- clamp_x
- clamp_y
- linear_gradient_2stops
seed_shader needed new constants, 0.5f and {0,1,2,3,4,5,6,7}.
$ out/nanobench -m gradient_linear_clamp\$ --config f16 --ms 2000 -q
Before: 612.17us
After: 163.80us
Change-Id: I6e03383c95ea070250424e743080a7930efeca77
Reviewed-on: https://skia-review.googlesource.com/7348
Reviewed-by: Mike Klein <mtklein@chromium.org>
Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/splicer/SkSplicer_stages.cpp')
-rw-r--r-- | src/splicer/SkSplicer_stages.cpp | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/src/splicer/SkSplicer_stages.cpp b/src/splicer/SkSplicer_stages.cpp index bff58c2518..badaf78695 100644 --- a/src/splicer/SkSplicer_stages.cpp +++ b/src/splicer/SkSplicer_stages.cpp @@ -169,6 +169,20 @@ STAGE(inc_x) { x += sizeof(F) / sizeof(float); } +STAGE(seed_shader) { + auto y = *(const int*)ctx; + + // It's important for speed to explicitly cast(x) and cast(y), + // which has the effect of splatting them to vectors before converting to floats. + // On Intel this breaks a data dependency on previous loop iterations' registers. + + r = cast(x) + k->_0_5 + unaligned_load<F>(k->iota); + g = cast(y) + k->_0_5; + b = k->_1; + a = 0; + dr = dg = db = da = 0; +} + STAGE(clear) { r = g = b = a = 0; } @@ -424,6 +438,21 @@ STAGE(store_f16) { #endif } +static F clamp(const F& v, float limit) { + F l = (F)((U32)F(limit) + U32(0xffffffff)); // limit - 1 ulp + return max(0, min(v, l)); +} +STAGE(clamp_x) { r = clamp(r, *(const float*)ctx); } +STAGE(clamp_y) { g = clamp(g, *(const float*)ctx); } + +STAGE(matrix_2x3) { + auto m = (const float*)ctx; + + auto R = fma(r,m[0], fma(g,m[2], m[4])), + G = fma(r,m[1], fma(g,m[3], m[5])); + r = R; + g = G; +} STAGE(matrix_3x4) { auto m = (const float*)ctx; @@ -434,3 +463,16 @@ STAGE(matrix_3x4) { g = G; b = B; } + +STAGE(linear_gradient_2stops) { + using F4 = float __attribute__((ext_vector_type(4))); + + struct Ctx { F4 c0, dc; }; + auto c = unaligned_load<Ctx>(ctx); + + auto t = r; + r = fma(t, c.dc[0], c.c0[0]); + g = fma(t, c.dc[1], c.c0[1]); + b = fma(t, c.dc[2], c.c0[2]); + a = fma(t, c.dc[3], c.c0[3]); +} |