diff options
author | 2015-04-27 13:55:53 -0700 | |
---|---|---|
committer | 2015-04-27 13:55:53 -0700 | |
commit | 9a22f489e8722dd83c65f33fb886019d9f60e479 (patch) | |
tree | ef5b6192ffeefba00e5a525e727162bdb8baa2a3 /src/opts | |
parent | 9de16283fdc8cc0d31a84f503578d0ecea4e8297 (diff) |
Revert of Split rsqrt into rsqrt{0,1,2}, with increasing cost and precision on ARM (patchset #2 id:20001 of https://codereview.chromium.org/1109913002/)
Reason for revert:
arm64 typos
Original issue's description:
> Split rsqrt into rsqrt{0,1,2}, with increasing cost and precision on ARM
>
> This is a logical no-op. Everything was using the equivalent of rsqrt1() before, and is now after.
>
> BUG=skia:
>
> Committed: https://skia.googlesource.com/skia/+/9de16283fdc8cc0d31a84f503578d0ecea4e8297
TBR=reed@google.com,mtklein@chromium.org
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=skia:
Review URL: https://codereview.chromium.org/1105233003
Diffstat (limited to 'src/opts')
-rw-r--r-- | src/opts/SkNx_neon.h | 47 | ||||
-rw-r--r-- | src/opts/SkNx_sse.h | 12 |
2 files changed, 23 insertions, 36 deletions
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index f27c2b3856..6b216827a8 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -81,21 +81,20 @@ public: static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fVec); } - SkNf rsqrt0() const { return vrsqrte_f32(fVec); } - SkNf rsqrt1() const { - float32x2_t est0 = this->rsqrt0().fVec; - return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); - } - SkNf rsqrt2() const { - float32x2_t est1 = this->rsqrt1().fVec; - return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1); + SkNf rsqrt() const { + float32x2_t est0 = vrsqrte_f32(fVec), + est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); + return est1; } SkNf sqrt() const { #if defined(SK_CPU_ARM64) return vsqrt_f32(fVec); #else - return *this * this->rsqrt2(); + float32x2_t est1 = this->rsqrt().fVec, + // An extra step of Newton's method to refine the estimate of 1/sqrt(this). + est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1); + return vmul_f32(fVec, est2); #endif } @@ -152,15 +151,10 @@ public: static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.fVec); } SkNf sqrt() const { return vsqrtq_f64(fVec); } - - SkNf rsqrt0() const { return vrsqrteq_f64(fVec); } - SkNf rsqrt1() const { - float32x4_t est0 = this->rsqrt0().fVec; - return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0); - } - SkNf rsqrt2() const { - float32x4_t est1 = this->rsqrt1().fVec; - return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est1, est1)), est1); + SkNf rsqrt() const { + float64x2_t est0 = vrsqrteq_f64(fVec), + est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0); + return est1; } SkNf approxInvert() const { @@ -275,21 +269,20 @@ public: static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.fVec); } static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.fVec); } - SkNf rsqrt0() const { return vrsqrteq_f32(fVec); } - SkNf rsqrt1() const { - float32x4_t est0 = this->rsqrt0().fVec; - return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0); - } - SkNf rsqrt2() const { - float32x4_t est1 = this->rsqrt1().fVec; - return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1); + SkNf rsqrt() const { + float32x4_t est0 = vrsqrteq_f32(fVec), + est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0); + return est1; } SkNf sqrt() const { #if defined(SK_CPU_ARM64) return vsqrtq_f32(fVec); #else - return *this * this->rsqrt2(); + float32x4_t est1 = this->rsqrt().fVec, + // An extra step of Newton's method to refine the estimate of 1/sqrt(this). + est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1); + return vmulq_f32(fVec, est2); #endif } diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index cbe624ba2d..2608525bd0 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -82,9 +82,7 @@ public: static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); } SkNf sqrt() const { return _mm_sqrt_ps (fVec); } - SkNf rsqrt0() const { return _mm_rsqrt_ps(fVec); } - SkNf rsqrt1() const { return this->rsqrt0(); } - SkNf rsqrt2() const { return this->rsqrt1(); } + SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); } SkNf invert() const { return SkNf(1) / *this; } SkNf approxInvert() const { return _mm_rcp_ps(fVec); } @@ -128,9 +126,7 @@ public: static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.fVec); } SkNf sqrt() const { return _mm_sqrt_pd(fVec); } - SkNf rsqrt0() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); } - SkNf rsqrt1() const { return this->rsqrt0(); } - SkNf rsqrt2() const { return this->rsqrt1(); } + SkNf rsqrt() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); } SkNf invert() const { return SkNf(1) / *this; } SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec))); } @@ -214,9 +210,7 @@ public: static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); } SkNf sqrt() const { return _mm_sqrt_ps (fVec); } - SkNf rsqrt0() const { return _mm_rsqrt_ps(fVec); } - SkNf rsqrt1() const { return this->rsqrt0(); } - SkNf rsqrt2() const { return this->rsqrt1(); } + SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); } SkNf invert() const { return SkNf(1) / *this; } SkNf approxInvert() const { return _mm_rcp_ps(fVec); } |