diff options
author | mtklein <mtklein@google.com> | 2015-04-27 11:21:16 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-04-27 11:21:16 -0700 |
commit | 8d3e9dff3f3db3fa77c383e4cd6c47b9898a8fcd (patch) | |
tree | eee352eb6dc903b3762a47da5a8fecbdcbdb6016 /src/effects | |
parent | 8a0a16d0a5e684e22cc3464642bc03bf341149a8 (diff) |
Revert of Mike's radial gradient CL with better float -> int. (patchset #7 id:120001 of https://codereview.chromium.org/1109643002/)
Reason for revert:
compile failures.
Original issue's description:
> Mike's radial gradient CL with better float -> int.
>
> patch from issue 1072303005 at patchset 40001 (http://crrev.com/1072303005#ps40001)
>
> This looks quite launchable. radial_gradient3, min of 100 samples:
> N5: 985µs -> 946µs
> MBP: 395µs -> 279µs
>
> On my MBP, most of the meat looks like it's now in reading the cache and writing to dst one color at a time. Is that something we could do in float math rather than with a lookup table?
>
> BUG=skia:
>
> CQ_EXTRA_TRYBOTS=client.skia.android:Test-Android-GCC-Nexus5-CPU-NEON-Arm7-Debug-Trybot,Test-Android-GCC-Nexus9-CPU-Denver-Arm64-Debug-Trybot
>
> Committed: https://skia.googlesource.com/skia/+/abf6c5cf95e921fae59efb487480e5b5081cf0ec
TBR=reed@google.com,robertphillips@google.com,mtklein@chromium.org
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=skia:
Review URL: https://codereview.chromium.org/1109883003
Diffstat (limited to 'src/effects')
-rw-r--r-- | src/effects/gradients/SkRadialGradient.cpp | 87 |
1 files changed, 7 insertions, 80 deletions
diff --git a/src/effects/gradients/SkRadialGradient.cpp b/src/effects/gradients/SkRadialGradient.cpp index bf3c821f6b..b25a8750a2 100644 --- a/src/effects/gradients/SkRadialGradient.cpp +++ b/src/effects/gradients/SkRadialGradient.cpp @@ -8,7 +8,6 @@ #include "SkRadialGradient.h" #include "SkRadialGradient_Table.h" -#include "SkNx.h" #define kSQRT_TABLE_BITS 11 #define kSQRT_TABLE_SIZE (1 << kSQRT_TABLE_BITS) @@ -271,16 +270,13 @@ void SkRadialGradient::flatten(SkWriteBuffer& buffer) const { namespace { inline bool radial_completely_pinned(int fx, int dx, int fy, int dy) { - // fast, overly-conservative test: checks unit square instead of unit circle - bool xClamped = (fx >= SK_FixedHalf && dx >= 0) || (fx <= -SK_FixedHalf && dx <= 0); - bool yClamped = (fy >= SK_FixedHalf && dy >= 0) || (fy <= -SK_FixedHalf && dy <= 0); - return xClamped || yClamped; -} + // fast, overly-conservative test: checks unit square instead + // of unit circle + bool xClamped = (fx >= SK_FixedHalf && dx >= 0) || + (fx <= -SK_FixedHalf && dx <= 0); + bool yClamped = (fy >= SK_FixedHalf && dy >= 0) || + (fy <= -SK_FixedHalf && dy <= 0); -inline bool radial_completely_pinned(SkScalar fx, SkScalar dx, SkScalar fy, SkScalar dy) { - // fast, overly-conservative test: checks unit square instead of unit circle - bool xClamped = (fx >= 1 && dx >= 0) || (fx <= -1 && dx <= 0); - bool yClamped = (fy >= 1 && dy >= 0) || (fy <= -1 && dy <= 0); return xClamped || yClamped; } @@ -377,70 +373,6 @@ void shadeSpan_radial_clamp(SkScalar sfx, SkScalar sdx, } } -// TODO: can we get away with 0th approximatino of inverse-sqrt (i.e. faster than rsqrt)? -// seems like ~10bits is more than enough for our use, since we want a byte-index -static inline Sk4f fast_sqrt(const Sk4f& R) { - return R * R.rsqrt(); -} - -static inline Sk4f sum_squares(const Sk4f& a, const Sk4f& b) { - return a * a + b * b; -} - -void shadeSpan_radial_clamp2(SkScalar sfx, SkScalar sdx, SkScalar sfy, SkScalar sdy, - SkPMColor* SK_RESTRICT dstC, const SkPMColor* SK_RESTRICT cache, - int count, int toggle) { - if (radial_completely_pinned(sfx, sdx, sfy, sdy)) { - unsigned fi = SkGradientShaderBase::kCache32Count - 1; - sk_memset32_dither(dstC, - cache[toggle + fi], - cache[next_dither_toggle(toggle) + fi], - count); - } else { - const Sk4f max(255); - const float scale = 255; - sfx *= scale; - sfy *= scale; - sdx *= scale; - sdy *= scale; - const Sk4f fx4(sfx, sfx + sdx, sfx + 2*sdx, sfx + 3*sdx); - const Sk4f fy4(sfy, sfy + sdy, sfy + 2*sdy, sfy + 3*sdy); - const Sk4f dx4(sdx * 4); - const Sk4f dy4(sdy * 4); - - Sk4f tmpxy = fx4 * dx4 + fy4 * dy4; - Sk4f tmpdxdy = sum_squares(dx4, dy4); - Sk4f R = sum_squares(fx4, fy4); - Sk4f dR = tmpxy + tmpxy + tmpdxdy; - const Sk4f ddR = tmpdxdy + tmpdxdy; - - for (int i = 0; i < (count >> 2); ++i) { - Sk4f dist = Sk4f::Min(fast_sqrt(R), max); - R += dR; - dR += ddR; - - int fi[4]; - dist.castTrunc().store(fi); - - for (int i = 0; i < 4; i++) { - *dstC++ = cache[toggle + fi[i]]; - toggle = next_dither_toggle(toggle); - } - } - count &= 3; - if (count) { - Sk4f dist = Sk4f::Min(fast_sqrt(R), max); - - int fi[4]; - dist.castTrunc().store(fi); - for (int i = 0; i < count; i++) { - *dstC++ = cache[toggle + fi[i]]; - toggle = next_dither_toggle(toggle); - } - } - } -} - // Unrolling this loop doesn't seem to help (when float); we're stalling to // get the results of the sqrt (?), and don't have enough extra registers to // have many in flight. @@ -475,11 +407,6 @@ void shadeSpan_radial_repeat(SkScalar fx, SkScalar dx, SkScalar fy, SkScalar dy, void SkRadialGradient::RadialGradientContext::shadeSpan(int x, int y, SkPMColor* SK_RESTRICT dstC, int count) { -#ifdef SK_SUPPORT_LEGACY_RADIAL_GRADIENT_SQRT - const bool use_new_proc = false; -#else - const bool use_new_proc = true; -#endif SkASSERT(count > 0); const SkRadialGradient& radialGradient = static_cast<const SkRadialGradient&>(fShader); @@ -508,7 +435,7 @@ void SkRadialGradient::RadialGradientContext::shadeSpan(int x, int y, RadialShadeProc shadeProc = shadeSpan_radial_repeat; if (SkShader::kClamp_TileMode == radialGradient.fTileMode) { - shadeProc = use_new_proc ? shadeSpan_radial_clamp2 : shadeSpan_radial_clamp; + shadeProc = shadeSpan_radial_clamp; } else if (SkShader::kMirror_TileMode == radialGradient.fTileMode) { shadeProc = shadeSpan_radial_mirror; } else { |