aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-03-20 13:17:42 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-03-20 13:17:42 -0700
commitcea9f35fee49866410c6e0b9b9256df27961f495 (patch)
tree4c6a9df59db4158105c13e8e30edab0c09e0e159 /src/opts
parent7b462a2b8564f9f4ad621b36541125cff36515a8 (diff)
Sk2x::invert() and Sk2x::approxInvert()
Diffstat (limited to 'src/opts')
-rw-r--r--src/opts/Sk2x_neon.h33
-rw-r--r--src/opts/Sk2x_none.h3
-rw-r--r--src/opts/Sk2x_sse.h7
3 files changed, 39 insertions, 4 deletions
diff --git a/src/opts/Sk2x_neon.h b/src/opts/Sk2x_neon.h
index ef61df4823..8e6e46164b 100644
--- a/src/opts/Sk2x_neon.h
+++ b/src/opts/Sk2x_neon.h
@@ -38,6 +38,18 @@ M(Sk2f&) operator=(const Sk2f& o) { fVec = o.fVec; return *this; }
M(Sk2f) Load(const float vals[2]) { return vld1_f32(vals); }
M(void) store(float vals[2]) const { vst1_f32(vals, fVec); }
+M(Sk2f) approxInvert() const {
+ float32x2_t est0 = vrecpe_f32(fVec),
+ est1 = vmul_f32(vrecps_f32(est0, fVec), est0);
+ return est1;
+}
+
+M(Sk2f) invert() const {
+ float32x2_t est1 = this->approxInvert().fVec,
+ est2 = vmul_f32(vrecps_f32(est1, fVec), est1);
+ return est2;
+}
+
M(Sk2f) add(const Sk2f& o) const { return vadd_f32(fVec, o.fVec); }
M(Sk2f) subtract(const Sk2f& o) const { return vsub_f32(fVec, o.fVec); }
M(Sk2f) multiply(const Sk2f& o) const { return vmul_f32(fVec, o.fVec); }
@@ -45,10 +57,7 @@ M(Sk2f) divide(const Sk2f& o) const {
#if defined(SK_CPU_ARM64)
return vdiv_f32(fVec, o.fVec);
#else
- float32x2_t est0 = vrecpe_f32(o.fVec),
- est1 = vmul_f32(vrecps_f32(est0, o.fVec), est0),
- est2 = vmul_f32(vrecps_f32(est1, o.fVec), est1);
- return vmul_f32(est2, fVec);
+ return vmul_f32(fVec, o.invert().fVec);
#endif
}
@@ -99,6 +108,19 @@ M(Sk2f) sqrt() const {
}
M(Sk2d) sqrt() const { return vsqrtq_f64(fVec); }
+ M(Sk2d) approxInvert() const {
+ float64x2_t est0 = vrecpeq_f64(fVec),
+ est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0);
+ return est1;
+ }
+
+ M(Sk2d) invert() const {
+ float64x2_t est1 = this->approxInvert().fVec,
+ est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1),
+ est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2);
+ return est3;
+ }
+
#else // Scalar implementation for 32-bit chips, which don't have float64x2_t.
M() Sk2x() {}
M() Sk2x(double val) { fVec[0] = fVec[1] = val; }
@@ -126,6 +148,9 @@ M(Sk2f) sqrt() const {
M(Sk2d) rsqrt() const { return Sk2d(1.0/::sqrt(fVec[0]), 1.0/::sqrt(fVec[1])); }
M(Sk2d) sqrt() const { return Sk2d( ::sqrt(fVec[0]), ::sqrt(fVec[1])); }
+
+ M(Sk2d) invert() const { return Sk2d(1.0 / fVec[0], 1.0 / fVec[1]); }
+ M(Sk2d) approxInvert() const { return this->invert(); }
#endif
#undef M
diff --git a/src/opts/Sk2x_none.h b/src/opts/Sk2x_none.h
index 65cc6703fb..2c68e736f4 100644
--- a/src/opts/Sk2x_none.h
+++ b/src/opts/Sk2x_none.h
@@ -54,6 +54,9 @@ M(Sk2x<T>) Max(const Sk2x<T>& a, const Sk2x<T>& b) {
return Sk2x<T>(SkTMax(a.fVec[0], b.fVec[0]), SkTMax(a.fVec[1], b.fVec[1]));
}
+M(Sk2x<T>) invert() const { return Sk2x<T>((T)1.0 / fVec[0], (T)1.0 / fVec[1]); }
+M(Sk2x<T>) approxInvert() const { return this->invert(); }
+
#undef M
#define M template <> inline
diff --git a/src/opts/Sk2x_sse.h b/src/opts/Sk2x_sse.h
index 111d3c230e..1136f1d856 100644
--- a/src/opts/Sk2x_sse.h
+++ b/src/opts/Sk2x_sse.h
@@ -46,6 +46,9 @@ M(Sk2f) Max(const Sk2f& a, const Sk2f& b) { return _mm_max_ps(a.fVec, b.fVec); }
M(Sk2f) rsqrt() const { return _mm_rsqrt_ps(fVec); }
M(Sk2f) sqrt() const { return _mm_sqrt_ps (fVec); }
+M(Sk2f) invert() const { return Sk2f(1.0f) / *this; }
+M(Sk2f) approxInvert() const { return _mm_rcp_ps(fVec); }
+
#undef M
#define M(...) template <> inline __VA_ARGS__ Sk2x<double>::
@@ -70,6 +73,10 @@ M(Sk2d) Max(const Sk2d& a, const Sk2d& b) { return _mm_max_pd(a.fVec, b.fVec); }
M(Sk2d) rsqrt() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); }
M(Sk2d) sqrt() const { return _mm_sqrt_pd(fVec); }
+// No _mm_rcp_pd, so do Sk2d::approxInvert() in floats.
+M(Sk2d) invert() const { return Sk2d(1.0) / *this; }
+M(Sk2d) approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec))); }
+
#undef M
#endif