From cea9f35fee49866410c6e0b9b9256df27961f495 Mon Sep 17 00:00:00 2001 From: mtklein Date: Fri, 20 Mar 2015 13:17:42 -0700 Subject: Sk2x::invert() and Sk2x::approxInvert() BUG=skia: Review URL: https://codereview.chromium.org/1024993002 --- src/core/Sk2x.h | 3 +++ src/opts/Sk2x_neon.h | 33 +++++++++++++++++++++++++++++---- src/opts/Sk2x_none.h | 3 +++ src/opts/Sk2x_sse.h | 7 +++++++ 4 files changed, 42 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/core/Sk2x.h b/src/core/Sk2x.h index 9b4e5ee61c..f49442563a 100644 --- a/src/core/Sk2x.h +++ b/src/core/Sk2x.h @@ -67,6 +67,9 @@ public: Sk2x rsqrt() const; // Approximate 1/this->sqrt(). Sk2x sqrt() const; // this->multiply(this->rsqrt()) may be faster, but less precise. + Sk2x invert() const; // 1/this. + Sk2x approxInvert() const; // Approximate 1/this, usually faster but less precise. + static Sk2x Min(const Sk2x&, const Sk2x&); static Sk2x Max(const Sk2x&, const Sk2x&); diff --git a/src/opts/Sk2x_neon.h b/src/opts/Sk2x_neon.h index ef61df4823..8e6e46164b 100644 --- a/src/opts/Sk2x_neon.h +++ b/src/opts/Sk2x_neon.h @@ -38,6 +38,18 @@ M(Sk2f&) operator=(const Sk2f& o) { fVec = o.fVec; return *this; } M(Sk2f) Load(const float vals[2]) { return vld1_f32(vals); } M(void) store(float vals[2]) const { vst1_f32(vals, fVec); } +M(Sk2f) approxInvert() const { + float32x2_t est0 = vrecpe_f32(fVec), + est1 = vmul_f32(vrecps_f32(est0, fVec), est0); + return est1; +} + +M(Sk2f) invert() const { + float32x2_t est1 = this->approxInvert().fVec, + est2 = vmul_f32(vrecps_f32(est1, fVec), est1); + return est2; +} + M(Sk2f) add(const Sk2f& o) const { return vadd_f32(fVec, o.fVec); } M(Sk2f) subtract(const Sk2f& o) const { return vsub_f32(fVec, o.fVec); } M(Sk2f) multiply(const Sk2f& o) const { return vmul_f32(fVec, o.fVec); } @@ -45,10 +57,7 @@ M(Sk2f) divide(const Sk2f& o) const { #if defined(SK_CPU_ARM64) return vdiv_f32(fVec, o.fVec); #else - float32x2_t est0 = vrecpe_f32(o.fVec), - est1 = vmul_f32(vrecps_f32(est0, o.fVec), est0), - est2 = vmul_f32(vrecps_f32(est1, o.fVec), est1); - return vmul_f32(est2, fVec); + return vmul_f32(fVec, o.invert().fVec); #endif } @@ -99,6 +108,19 @@ M(Sk2f) sqrt() const { } M(Sk2d) sqrt() const { return vsqrtq_f64(fVec); } + M(Sk2d) approxInvert() const { + float64x2_t est0 = vrecpeq_f64(fVec), + est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0); + return est1; + } + + M(Sk2d) invert() const { + float64x2_t est1 = this->approxInvert().fVec, + est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), + est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); + return est3; + } + #else // Scalar implementation for 32-bit chips, which don't have float64x2_t. M() Sk2x() {} M() Sk2x(double val) { fVec[0] = fVec[1] = val; } @@ -126,6 +148,9 @@ M(Sk2f) sqrt() const { M(Sk2d) rsqrt() const { return Sk2d(1.0/::sqrt(fVec[0]), 1.0/::sqrt(fVec[1])); } M(Sk2d) sqrt() const { return Sk2d( ::sqrt(fVec[0]), ::sqrt(fVec[1])); } + + M(Sk2d) invert() const { return Sk2d(1.0 / fVec[0], 1.0 / fVec[1]); } + M(Sk2d) approxInvert() const { return this->invert(); } #endif #undef M diff --git a/src/opts/Sk2x_none.h b/src/opts/Sk2x_none.h index 65cc6703fb..2c68e736f4 100644 --- a/src/opts/Sk2x_none.h +++ b/src/opts/Sk2x_none.h @@ -54,6 +54,9 @@ M(Sk2x) Max(const Sk2x& a, const Sk2x& b) { return Sk2x(SkTMax(a.fVec[0], b.fVec[0]), SkTMax(a.fVec[1], b.fVec[1])); } +M(Sk2x) invert() const { return Sk2x((T)1.0 / fVec[0], (T)1.0 / fVec[1]); } +M(Sk2x) approxInvert() const { return this->invert(); } + #undef M #define M template <> inline diff --git a/src/opts/Sk2x_sse.h b/src/opts/Sk2x_sse.h index 111d3c230e..1136f1d856 100644 --- a/src/opts/Sk2x_sse.h +++ b/src/opts/Sk2x_sse.h @@ -46,6 +46,9 @@ M(Sk2f) Max(const Sk2f& a, const Sk2f& b) { return _mm_max_ps(a.fVec, b.fVec); } M(Sk2f) rsqrt() const { return _mm_rsqrt_ps(fVec); } M(Sk2f) sqrt() const { return _mm_sqrt_ps (fVec); } +M(Sk2f) invert() const { return Sk2f(1.0f) / *this; } +M(Sk2f) approxInvert() const { return _mm_rcp_ps(fVec); } + #undef M #define M(...) template <> inline __VA_ARGS__ Sk2x:: @@ -70,6 +73,10 @@ M(Sk2d) Max(const Sk2d& a, const Sk2d& b) { return _mm_max_pd(a.fVec, b.fVec); } M(Sk2d) rsqrt() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); } M(Sk2d) sqrt() const { return _mm_sqrt_pd(fVec); } +// No _mm_rcp_pd, so do Sk2d::approxInvert() in floats. +M(Sk2d) invert() const { return Sk2d(1.0) / *this; } +M(Sk2d) approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec))); } + #undef M #endif -- cgit v1.2.3