diff options
author | mtklein <mtklein@chromium.org> | 2015-04-27 13:51:28 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-04-27 13:51:28 -0700 |
commit | 9de16283fdc8cc0d31a84f503578d0ecea4e8297 (patch) | |
tree | f72e0d124675aadf47fa5306bdb5dd3ccb53dcf6 | |
parent | a6f75070baf5f72604f27aac125c111ea750d725 (diff) |
Split rsqrt into rsqrt{0,1,2}, with increasing cost and precision on ARM
This is a logical no-op. Everything was using the equivalent of rsqrt1() before, and is now after.
BUG=skia:
Review URL: https://codereview.chromium.org/1109913002
-rw-r--r-- | src/core/SkNx.h | 10 | ||||
-rw-r--r-- | src/effects/gradients/SkRadialGradient.cpp | 5 | ||||
-rw-r--r-- | src/opts/SkNx_neon.h | 47 | ||||
-rw-r--r-- | src/opts/SkNx_sse.h | 12 | ||||
-rw-r--r-- | tests/SkNxTest.cpp | 4 |
5 files changed, 49 insertions, 29 deletions
diff --git a/src/core/SkNx.h b/src/core/SkNx.h index 65b5b97a83..4cfc6e3d9a 100644 --- a/src/core/SkNx.h +++ b/src/core/SkNx.h @@ -119,7 +119,11 @@ public: } SkNf sqrt() const { return SkNf(fLo. sqrt(), fHi. sqrt()); } - SkNf rsqrt() const { return SkNf(fLo.rsqrt(), fHi.rsqrt()); } + + // Generally, increasing precision, increasing cost. + SkNf rsqrt0() const { return SkNf(fLo.rsqrt0(), fHi.rsqrt0()); } + SkNf rsqrt1() const { return SkNf(fLo.rsqrt1(), fHi.rsqrt1()); } + SkNf rsqrt2() const { return SkNf(fLo.rsqrt2(), fHi.rsqrt2()); } SkNf invert() const { return SkNf(fLo. invert(), fHi. invert()); } SkNf approxInvert() const { return SkNf(fLo.approxInvert(), fHi.approxInvert()); } @@ -207,7 +211,9 @@ public: static SkNf Max(const SkNf& l, const SkNf& r) { return SkNf(SkTMax(l.fVal, r.fVal)); } SkNf sqrt() const { return SkNf(Sqrt(fVal)); } - SkNf rsqrt() const { return SkNf((T)1 / Sqrt(fVal)); } + SkNf rsqrt0() const { return SkNf((T)1 / Sqrt(fVal)); } + SkNf rsqrt1() const { return this->rsqrt0(); } + SkNf rsqrt2() const { return this->rsqrt1(); } SkNf invert() const { return SkNf((T)1 / fVal); } SkNf approxInvert() const { return this->invert(); } diff --git a/src/effects/gradients/SkRadialGradient.cpp b/src/effects/gradients/SkRadialGradient.cpp index bf3c821f6b..80562dece8 100644 --- a/src/effects/gradients/SkRadialGradient.cpp +++ b/src/effects/gradients/SkRadialGradient.cpp @@ -377,10 +377,9 @@ void shadeSpan_radial_clamp(SkScalar sfx, SkScalar sdx, } } -// TODO: can we get away with 0th approximatino of inverse-sqrt (i.e. faster than rsqrt)? -// seems like ~10bits is more than enough for our use, since we want a byte-index static inline Sk4f fast_sqrt(const Sk4f& R) { - return R * R.rsqrt(); + // R * R.rsqrt0() is much faster, but it's non-monotonic, which isn't so pretty for gradients. + return R * R.rsqrt1(); } static inline Sk4f sum_squares(const Sk4f& a, const Sk4f& b) { diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index 6b216827a8..f27c2b3856 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -81,20 +81,21 @@ public: static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fVec); } - SkNf rsqrt() const { - float32x2_t est0 = vrsqrte_f32(fVec), - est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); - return est1; + SkNf rsqrt0() const { return vrsqrte_f32(fVec); } + SkNf rsqrt1() const { + float32x2_t est0 = this->rsqrt0().fVec; + return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); + } + SkNf rsqrt2() const { + float32x2_t est1 = this->rsqrt1().fVec; + return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1); } SkNf sqrt() const { #if defined(SK_CPU_ARM64) return vsqrt_f32(fVec); #else - float32x2_t est1 = this->rsqrt().fVec, - // An extra step of Newton's method to refine the estimate of 1/sqrt(this). - est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1); - return vmul_f32(fVec, est2); + return *this * this->rsqrt2(); #endif } @@ -151,10 +152,15 @@ public: static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.fVec); } SkNf sqrt() const { return vsqrtq_f64(fVec); } - SkNf rsqrt() const { - float64x2_t est0 = vrsqrteq_f64(fVec), - est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0); - return est1; + + SkNf rsqrt0() const { return vrsqrteq_f64(fVec); } + SkNf rsqrt1() const { + float32x4_t est0 = this->rsqrt0().fVec; + return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0); + } + SkNf rsqrt2() const { + float32x4_t est1 = this->rsqrt1().fVec; + return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est1, est1)), est1); } SkNf approxInvert() const { @@ -269,20 +275,21 @@ public: static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.fVec); } - SkNf rsqrt() const { - float32x4_t est0 = vrsqrteq_f32(fVec), - est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0); - return est1; + SkNf rsqrt0() const { return vrsqrteq_f32(fVec); } + SkNf rsqrt1() const { + float32x4_t est0 = this->rsqrt0().fVec; + return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0); + } + SkNf rsqrt2() const { + float32x4_t est1 = this->rsqrt1().fVec; + return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1); } SkNf sqrt() const { #if defined(SK_CPU_ARM64) return vsqrtq_f32(fVec); #else - float32x4_t est1 = this->rsqrt().fVec, - // An extra step of Newton's method to refine the estimate of 1/sqrt(this). - est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1); - return vmulq_f32(fVec, est2); + return *this * this->rsqrt2(); #endif } diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index 2608525bd0..cbe624ba2d 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -82,7 +82,9 @@ public: static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); } SkNf sqrt() const { return _mm_sqrt_ps (fVec); } - SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); } + SkNf rsqrt0() const { return _mm_rsqrt_ps(fVec); } + SkNf rsqrt1() const { return this->rsqrt0(); } + SkNf rsqrt2() const { return this->rsqrt1(); } SkNf invert() const { return SkNf(1) / *this; } SkNf approxInvert() const { return _mm_rcp_ps(fVec); } @@ -126,7 +128,9 @@ public: static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.fVec); } SkNf sqrt() const { return _mm_sqrt_pd(fVec); } - SkNf rsqrt() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); } + SkNf rsqrt0() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); } + SkNf rsqrt1() const { return this->rsqrt0(); } + SkNf rsqrt2() const { return this->rsqrt1(); } SkNf invert() const { return SkNf(1) / *this; } SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec))); } @@ -210,7 +214,9 @@ public: static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); } SkNf sqrt() const { return _mm_sqrt_ps (fVec); } - SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); } + SkNf rsqrt0() const { return _mm_rsqrt_ps(fVec); } + SkNf rsqrt1() const { return this->rsqrt0(); } + SkNf rsqrt2() const { return this->rsqrt1(); } SkNf invert() const { return SkNf(1) / *this; } SkNf approxInvert() const { return _mm_rcp_ps(fVec); } diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp index 25ece3867b..dec7329c2c 100644 --- a/tests/SkNxTest.cpp +++ b/tests/SkNxTest.cpp @@ -50,7 +50,9 @@ static void test_Nf(skiatest::Reporter* r) { SkNf<N,T> fours(4); assert_eq(fours.sqrt(), 2,2,2,2); - assert_nearly_eq(0.001, fours.rsqrt(), 0.5, 0.5, 0.5, 0.5); + assert_nearly_eq(0.001, fours.rsqrt0(), 0.5, 0.5, 0.5, 0.5); + assert_nearly_eq(0.001, fours.rsqrt1(), 0.5, 0.5, 0.5, 0.5); + assert_nearly_eq(0.001, fours.rsqrt2(), 0.5, 0.5, 0.5, 0.5); assert_eq( fours. invert(), 0.25, 0.25, 0.25, 0.25); assert_nearly_eq(0.001, fours.approxInvert(), 0.25, 0.25, 0.25, 0.25); |