diff options
author | 2015-11-09 08:33:53 -0800 | |
---|---|---|
committer | 2015-11-09 08:33:53 -0800 | |
commit | 6f797092d2054f79374fb98fc1d57ca3554c7db4 (patch) | |
tree | e5e1ec524c11736085ef5c5080de3e10d57ec38e | |
parent | a4c26c0d3a1ffdd0bab08b1aec24bea025a0d9e2 (diff) |
prune unused SkNx features
- remove float -> int conversion, keeping float -> byte
- remove support for doubles
I was thinking of specializing Sk8f for AVX. This will help keep the complexity down.
This may cause minor diffs in radial gradients: toBytes() rounds where castTrunc() truncated. But I don't see any diffs in Gold.
https://gold.skia.org/search2?issue=1411563008&unt=true&query=source_type%3Dgm&master=false
BUG=skia:4117
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1411563008
-rw-r--r-- | src/core/SkNx.h | 96 | ||||
-rw-r--r-- | src/effects/gradients/SkRadialGradient.cpp | 8 | ||||
-rw-r--r-- | src/opts/SkNx_neon.h | 81 | ||||
-rw-r--r-- | src/opts/SkNx_sse.h | 53 | ||||
-rw-r--r-- | tests/SkNxTest.cpp | 45 |
5 files changed, 66 insertions, 217 deletions
diff --git a/src/core/SkNx.h b/src/core/SkNx.h index f8b27fc1ed..1e32a2aa26 100644 --- a/src/core/SkNx.h +++ b/src/core/SkNx.h @@ -81,29 +81,28 @@ protected: SkNi<N/2, T> fLo, fHi; }; -template <int N, typename T> +template <int N> class SkNf { - static int32_t MyNi(float); - static int64_t MyNi(double); - typedef decltype(MyNi(T())) I; public: SkNf() {} - explicit SkNf(T val) : fLo(val), fHi(val) {} - static SkNf Load(const T vals[N]) { - return SkNf(SkNf<N/2,T>::Load(vals), SkNf<N/2,T>::Load(vals+N/2)); + explicit SkNf(float val) : fLo(val), fHi(val) {} + static SkNf Load(const float vals[N]) { + return SkNf(SkNf<N/2>::Load(vals), SkNf<N/2>::Load(vals+N/2)); } // FromBytes() and toBytes() specializations may assume their argument is N-byte aligned. // E.g. Sk4f::FromBytes() may assume it's reading from a 4-byte-aligned pointer. // Converts [0,255] bytes to [0.0, 255.0] floats. static SkNf FromBytes(const uint8_t bytes[N]) { - return SkNf(SkNf<N/2,T>::FromBytes(bytes), SkNf<N/2,T>::FromBytes(bytes+N/2)); + return SkNf(SkNf<N/2>::FromBytes(bytes), SkNf<N/2>::FromBytes(bytes+N/2)); } - SkNf(T a, T b) : fLo(a), fHi(b) { REQUIRE(N==2); } - SkNf(T a, T b, T c, T d) : fLo(a,b), fHi(c,d) { REQUIRE(N==4); } - SkNf(T a, T b, T c, T d, T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) { REQUIRE(N==8); } + SkNf(float a, float b) : fLo(a), fHi(b) { REQUIRE(N==2); } + SkNf(float a, float b, float c, float d) : fLo(a,b), fHi(c,d) { REQUIRE(N==4); } + SkNf(float a, float b, float c, float d, float e, float f, float g, float h) + : fLo(a,b,c,d) + , fHi(e,f,g,h) { REQUIRE(N==8); } - void store(T vals[N]) const { + void store(float vals[N]) const { fLo.store(vals); fHi.store(vals+N/2); } @@ -114,8 +113,6 @@ public: fHi.toBytes(bytes+N/2); } - SkNi<N,I> castTrunc() const { return SkNi<N,I>(fLo.castTrunc(), fHi.castTrunc()); } - SkNf operator + (const SkNf& o) const { return SkNf(fLo + o.fLo, fHi + o.fHi); } SkNf operator - (const SkNf& o) const { return SkNf(fLo - o.fLo, fHi - o.fHi); } SkNf operator * (const SkNf& o) const { return SkNf(fLo * o.fLo, fHi * o.fHi); } @@ -129,10 +126,10 @@ public: SkNf operator >= (const SkNf& o) const { return SkNf(fLo >= o.fLo, fHi >= o.fHi); } static SkNf Min(const SkNf& l, const SkNf& r) { - return SkNf(SkNf<N/2,T>::Min(l.fLo, r.fLo), SkNf<N/2,T>::Min(l.fHi, r.fHi)); + return SkNf(SkNf<N/2>::Min(l.fLo, r.fLo), SkNf<N/2>::Min(l.fHi, r.fHi)); } static SkNf Max(const SkNf& l, const SkNf& r) { - return SkNf(SkNf<N/2,T>::Max(l.fLo, r.fLo), SkNf<N/2,T>::Max(l.fHi, r.fHi)); + return SkNf(SkNf<N/2>::Max(l.fLo, r.fLo), SkNf<N/2>::Max(l.fHi, r.fHi)); } SkNf sqrt() const { return SkNf(fLo. sqrt(), fHi. sqrt()); } @@ -145,7 +142,7 @@ public: SkNf invert() const { return SkNf(fLo. invert(), fHi. invert()); } SkNf approxInvert() const { return SkNf(fLo.approxInvert(), fHi.approxInvert()); } - template <int k> T kth() const { + template <int k> float kth() const { SkASSERT(0 <= k && k < N); return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>(); } @@ -158,9 +155,9 @@ public: protected: REQUIRE(0 == (N & (N-1))); - SkNf(const SkNf<N/2, T>& lo, const SkNf<N/2, T>& hi) : fLo(lo), fHi(hi) {} + SkNf(const SkNf<N/2>& lo, const SkNf<N/2>& hi) : fLo(lo), fHi(hi) {} - SkNf<N/2, T> fLo, fHi; + SkNf<N/2> fLo, fHi; }; @@ -204,21 +201,16 @@ protected: T fVal; }; -template <typename T> -class SkNf<1,T> { - static int32_t MyNi(float); - static int64_t MyNi(double); - typedef decltype(MyNi(T())) I; +template <> +class SkNf<1> { public: SkNf() {} - explicit SkNf(T val) : fVal(val) {} - static SkNf Load(const T vals[1]) { return SkNf(vals[0]); } - static SkNf FromBytes(const uint8_t bytes[1]) { return SkNf((T)bytes[0]); } - - void store(T vals[1]) const { vals[0] = fVal; } - void toBytes(uint8_t bytes[1]) const { bytes[0] = (uint8_t)(SkTMin(fVal, (T)255.0)); } + explicit SkNf(float val) : fVal(val) {} + static SkNf Load(const float vals[1]) { return SkNf(vals[0]); } + static SkNf FromBytes(const uint8_t bytes[1]) { return SkNf((float)bytes[0]); } - SkNi<1,I> castTrunc() const { return SkNi<1,I>(fVal); } + void store(float vals[1]) const { vals[0] = fVal; } + void toBytes(uint8_t bytes[1]) const { bytes[0] = (uint8_t)(SkTMin(fVal, 255.0f)); } SkNf operator + (const SkNf& o) const { return SkNf(fVal + o.fVal); } SkNf operator - (const SkNf& o) const { return SkNf(fVal - o.fVal); } @@ -235,35 +227,30 @@ public: static SkNf Min(const SkNf& l, const SkNf& r) { return SkNf(SkTMin(l.fVal, r.fVal)); } static SkNf Max(const SkNf& l, const SkNf& r) { return SkNf(SkTMax(l.fVal, r.fVal)); } - SkNf sqrt() const { return SkNf(Sqrt(fVal)); } - SkNf rsqrt0() const { return SkNf((T)1 / Sqrt(fVal)); } + SkNf sqrt() const { return SkNf(sqrtf(fVal)); } + SkNf rsqrt0() const { return SkNf(1.0f / sqrtf(fVal)); } SkNf rsqrt1() const { return this->rsqrt0(); } SkNf rsqrt2() const { return this->rsqrt1(); } - SkNf invert() const { return SkNf((T)1 / fVal); } + SkNf invert() const { return SkNf(1.0f / fVal); } SkNf approxInvert() const { return this->invert(); } - template <int k> T kth() const { + template <int k> float kth() const { SkASSERT(k == 0); return fVal; } - bool allTrue() const { return this->pun(); } - bool anyTrue() const { return this->pun(); } + bool allTrue() const { return this->pun() != 0; } + bool anyTrue() const { return this->pun() != 0; } SkNf thenElse(const SkNf& t, const SkNf& e) const { return this->pun() ? t : e; } protected: - // We do double sqrts natively, or via floats for any other type. - template <typename U> - static U Sqrt(U val) { return (U) ::sqrtf((float)val); } - static double Sqrt(double val) { return ::sqrt ( val); } - - I pun() const { - union { T f; I i; } pun = { fVal }; + uint32_t pun() const { + union { float f; uint32_t i; } pun = { fVal }; return pun.i; } - T fVal; + float fVal; }; // This default implementation can be specialized by ../opts/SkNx_foo.h @@ -285,8 +272,6 @@ inline SkNx SkNx_dup(const SkNx& src) { return SkNx_shuffle<Ix>(src); } } // namespace - - // Include platform specific specializations if available. #ifndef SKNX_NO_SIMD #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 @@ -298,21 +283,14 @@ inline SkNx SkNx_dup(const SkNx& src) { return SkNx_shuffle<Ix>(src); } #undef REQUIRE -typedef SkNf<2, float> Sk2f; -typedef SkNf<2, double> Sk2d; -typedef SkNf<2, SkScalar> Sk2s; +typedef SkNf<2> Sk2f; +typedef SkNf<2> Sk2s; -typedef SkNf<4, float> Sk4f; -typedef SkNf<4, double> Sk4d; -typedef SkNf<4, SkScalar> Sk4s; +typedef SkNf<4> Sk4f; +typedef SkNf<4> Sk4s; -typedef SkNi<4, uint16_t> Sk4h; typedef SkNi<8, uint16_t> Sk8h; typedef SkNi<16, uint16_t> Sk16h; - -typedef SkNi<16, uint8_t> Sk16b; - -typedef SkNi<4, int32_t> Sk4i; -typedef SkNi<4, uint32_t> Sk4u; +typedef SkNi<16, uint8_t> Sk16b; #endif//SkNx_DEFINED diff --git a/src/effects/gradients/SkRadialGradient.cpp b/src/effects/gradients/SkRadialGradient.cpp index de0f764a56..a9cdb2a912 100644 --- a/src/effects/gradients/SkRadialGradient.cpp +++ b/src/effects/gradients/SkRadialGradient.cpp @@ -306,8 +306,8 @@ void shadeSpan_radial_clamp2(SkScalar sfx, SkScalar sdx, SkScalar sfy, SkScalar R = R + dR; dR = dR + ddR; - int fi[4]; - dist.castTrunc().store(fi); + uint8_t fi[4]; + dist.toBytes(fi); for (int i = 0; i < 4; i++) { *dstC++ = cache[toggle + fi[i]]; @@ -318,8 +318,8 @@ void shadeSpan_radial_clamp2(SkScalar sfx, SkScalar sdx, SkScalar sfy, SkScalar if (count) { Sk4f dist = Sk4f::Min(fast_sqrt(R), max); - int fi[4]; - dist.castTrunc().store(fi); + uint8_t fi[4]; + dist.toBytes(fi); for (int i = 0; i < count; i++) { *dstC++ = cache[toggle + fi[i]]; toggle = next_dither_toggle(toggle); diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index cf149862c5..33b3ad8c53 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -33,7 +33,7 @@ namespace { // See SkNx.h case 31: return op(v, 31); } return fVec template <> -class SkNf<2, float> { +class SkNf<2> { public: SkNf(float32x2_t vec) : fVec(vec) {} @@ -113,81 +113,6 @@ public: float32x2_t fVec; }; -#if defined(SK_CPU_ARM64) -template <> -class SkNf<2, double> { -public: - SkNf(float64x2_t vec) : fVec(vec) {} - - SkNf() {} - explicit SkNf(double val) : fVec(vdupq_n_f64(val)) {} - static SkNf Load(const double vals[2]) { return vld1q_f64(vals); } - SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; } - - void store(double vals[2]) const { vst1q_f64(vals, fVec); } - - SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); } - SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); } - SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); } - SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); } - - // vreinterpretq_f64_u64 and vreinterpretq_f64_u32 don't seem to exist.... weird. - SkNf operator==(const SkNf& o) const { return (float64x2_t)(vceqq_f64(fVec, o.fVec)); } - SkNf operator <(const SkNf& o) const { return (float64x2_t)(vcltq_f64(fVec, o.fVec)); } - SkNf operator >(const SkNf& o) const { return (float64x2_t)(vcgtq_f64(fVec, o.fVec)); } - SkNf operator<=(const SkNf& o) const { return (float64x2_t)(vcleq_f64(fVec, o.fVec)); } - SkNf operator>=(const SkNf& o) const { return (float64x2_t)(vcgeq_f64(fVec, o.fVec)); } - SkNf operator != (const SkNf& o) const { - return (float64x2_t)(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(fVec, o.fVec)))); - } - - static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.fVec); } - static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.fVec); } - - SkNf sqrt() const { return vsqrtq_f64(fVec); } - - SkNf rsqrt0() const { return vrsqrteq_f64(fVec); } - SkNf rsqrt1() const { - float64x2_t est0 = this->rsqrt0().fVec; - return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0); - } - SkNf rsqrt2() const { - float64x2_t est1 = this->rsqrt1().fVec; - return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est1, est1)), est1); - } - - SkNf approxInvert() const { - float64x2_t est0 = vrecpeq_f64(fVec), - est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0); - return est1; - } - - SkNf invert() const { - float64x2_t est1 = this->approxInvert().fVec, - est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), - est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); - return est3; - } - - template <int k> double kth() const { - SkASSERT(0 <= k && k < 2); - return vgetq_lane_f64(fVec, k&1); - } - - // vreinterpretq_u64_f64 doesn't seem to exist.... weird. - bool allTrue() const { - auto v = (uint64x2_t)(fVec); - return vgetq_lane_u64(v,0) && vgetq_lane_u64(v,1); - } - bool anyTrue() const { - auto v = (uint64x2_t)(fVec); - return vgetq_lane_u64(v,0) || vgetq_lane_u64(v,1); - } - - float64x2_t fVec; -}; -#endif//defined(SK_CPU_ARM64) - template <> class SkNi<4, int> { public: @@ -216,7 +141,7 @@ public: }; template <> -class SkNf<4, float> { +class SkNf<4> { public: SkNf(float32x4_t vec) : fVec(vec) {} @@ -240,8 +165,6 @@ public: vst1_lane_u32((uint32_t*)bytes, (uint32x2_t)fix8, 0); } - SkNi<4, int> castTrunc() const { return vcvtq_s32_f32(fVec); } - SkNf approxInvert() const { float32x4_t est0 = vrecpeq_f32(fVec), est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index a99531b770..6e9c59c5cb 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -14,7 +14,7 @@ namespace { // See SkNx.h template <> -class SkNf<2, float> { +class SkNf<2> { public: SkNf(const __m128& vec) : fVec(vec) {} @@ -63,53 +63,6 @@ public: }; template <> -class SkNf<2, double> { -public: - SkNf(const __m128d& vec) : fVec(vec) {} - - SkNf() {} - explicit SkNf(double val) : fVec( _mm_set1_pd(val) ) {} - static SkNf Load(const double vals[2]) { return _mm_loadu_pd(vals); } - SkNf(double a, double b) : fVec(_mm_setr_pd(a,b)) {} - - void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); } - - SkNf operator + (const SkNf& o) const { return _mm_add_pd(fVec, o.fVec); } - SkNf operator - (const SkNf& o) const { return _mm_sub_pd(fVec, o.fVec); } - SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); } - SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); } - - SkNf operator == (const SkNf& o) const { return _mm_cmpeq_pd (fVec, o.fVec); } - SkNf operator != (const SkNf& o) const { return _mm_cmpneq_pd(fVec, o.fVec); } - SkNf operator < (const SkNf& o) const { return _mm_cmplt_pd (fVec, o.fVec); } - SkNf operator > (const SkNf& o) const { return _mm_cmpgt_pd (fVec, o.fVec); } - SkNf operator <= (const SkNf& o) const { return _mm_cmple_pd (fVec, o.fVec); } - SkNf operator >= (const SkNf& o) const { return _mm_cmpge_pd (fVec, o.fVec); } - - static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.fVec); } - static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.fVec); } - - SkNf sqrt() const { return _mm_sqrt_pd(fVec); } - SkNf rsqrt0() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); } - SkNf rsqrt1() const { return this->rsqrt0(); } - SkNf rsqrt2() const { return this->rsqrt1(); } - - SkNf invert() const { return SkNf(1) / *this; } - SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec))); } - - template <int k> double kth() const { - SkASSERT(0 <= k && k < 2); - union { __m128d v; double ds[2]; } pun = {fVec}; - return pun.ds[k&1]; - } - - bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castpd_si128(fVec)); } - bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castpd_si128(fVec)); } - - __m128d fVec; -}; - -template <> class SkNi<4, int> { public: SkNi(const __m128i& vec) : fVec(vec) {} @@ -148,7 +101,7 @@ public: }; template <> -class SkNf<4, float> { +class SkNf<4> { public: SkNf(const __m128& vec) : fVec(vec) {} @@ -179,8 +132,6 @@ public: *(int*)bytes = _mm_cvtsi128_si32(fix8); } - SkNi<4, int> castTrunc() const { return _mm_cvttps_epi32(fVec); } - SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp index 185940f195..31baebc430 100644 --- a/tests/SkNxTest.cpp +++ b/tests/SkNxTest.cpp @@ -10,12 +10,12 @@ #include "SkRandom.h" #include "Test.h" -template <int N, typename T> +template <int N> static void test_Nf(skiatest::Reporter* r) { - auto assert_nearly_eq = [&](double eps, const SkNf<N,T>& v, T a, T b, T c, T d) { - auto close = [=](T a, T b) { return fabs(a-b) <= eps; }; - T vals[4]; + auto assert_nearly_eq = [&](float eps, const SkNf<N>& v, float a, float b, float c, float d) { + auto close = [=](float a, float b) { return fabsf(a-b) <= eps; }; + float vals[4]; v.store(vals); bool ok = close(vals[0], a) && close(vals[1], b) && close(v.template kth<0>(), a) && close(v.template kth<1>(), b); @@ -26,15 +26,15 @@ static void test_Nf(skiatest::Reporter* r) { REPORTER_ASSERT(r, ok); } }; - auto assert_eq = [&](const SkNf<N,T>& v, T a, T b, T c, T d) { + auto assert_eq = [&](const SkNf<N>& v, float a, float b, float c, float d) { return assert_nearly_eq(0, v, a,b,c,d); }; - T vals[] = {3, 4, 5, 6}; - SkNf<N,T> a = SkNf<N,T>::Load(vals), - b(a), - c = a; - SkNf<N,T> d; + float vals[] = {3, 4, 5, 6}; + SkNf<N> a = SkNf<N>::Load(vals), + b(a), + c = a; + SkNf<N> d; d = a; assert_eq(a, 3, 4, 5, 6); @@ -47,20 +47,20 @@ static void test_Nf(skiatest::Reporter* r) { assert_eq(a*b-b, 6, 12, 20, 30); assert_eq((a*b).sqrt(), 3, 4, 5, 6); assert_eq(a/b, 1, 1, 1, 1); - assert_eq(SkNf<N,T>(0)-a, -3, -4, -5, -6); + assert_eq(SkNf<N>(0)-a, -3, -4, -5, -6); - SkNf<N,T> fours(4); + SkNf<N> fours(4); assert_eq(fours.sqrt(), 2,2,2,2); - assert_nearly_eq(0.001, fours.rsqrt0(), 0.5, 0.5, 0.5, 0.5); - assert_nearly_eq(0.001, fours.rsqrt1(), 0.5, 0.5, 0.5, 0.5); - assert_nearly_eq(0.001, fours.rsqrt2(), 0.5, 0.5, 0.5, 0.5); + assert_nearly_eq(0.001f, fours.rsqrt0(), 0.5, 0.5, 0.5, 0.5); + assert_nearly_eq(0.001f, fours.rsqrt1(), 0.5, 0.5, 0.5, 0.5); + assert_nearly_eq(0.001f, fours.rsqrt2(), 0.5, 0.5, 0.5, 0.5); - assert_eq( fours. invert(), 0.25, 0.25, 0.25, 0.25); - assert_nearly_eq(0.001, fours.approxInvert(), 0.25, 0.25, 0.25, 0.25); + assert_eq( fours. invert(), 0.25, 0.25, 0.25, 0.25); + assert_nearly_eq(0.001f, fours.approxInvert(), 0.25, 0.25, 0.25, 0.25); - assert_eq(SkNf<N,T>::Min(a, fours), 3, 4, 4, 4); - assert_eq(SkNf<N,T>::Max(a, fours), 4, 4, 5, 6); + assert_eq(SkNf<N>::Min(a, fours), 3, 4, 4, 4); + assert_eq(SkNf<N>::Max(a, fours), 4, 4, 5, 6); // Test some comparisons. This is not exhaustive. REPORTER_ASSERT(r, (a == b).allTrue()); @@ -75,11 +75,8 @@ static void test_Nf(skiatest::Reporter* r) { } DEF_TEST(SkNf, r) { - test_Nf<2, float>(r); - test_Nf<2, double>(r); - - test_Nf<4, float>(r); - test_Nf<4, double>(r); + test_Nf<2>(r); + test_Nf<4>(r); } template <int N, typename T> |