diff options
-rw-r--r-- | src/core/SkPMFloat.h | 21 | ||||
-rw-r--r-- | src/core/SkXfermode.cpp | 24 | ||||
-rw-r--r-- | src/effects/SkColorMatrixFilter.cpp | 2 | ||||
-rw-r--r-- | src/opts/SkNx_neon.h | 3 | ||||
-rw-r--r-- | src/opts/SkNx_sse.h | 3 | ||||
-rw-r--r-- | src/opts/SkPMFloat_SSE2.h | 14 | ||||
-rw-r--r-- | src/opts/SkPMFloat_SSSE3.h | 14 | ||||
-rw-r--r-- | src/opts/SkPMFloat_neon.h | 4 |
8 files changed, 51 insertions, 34 deletions
diff --git a/src/core/SkPMFloat.h b/src/core/SkPMFloat.h index eb575f2ee4..a327025f3c 100644 --- a/src/core/SkPMFloat.h +++ b/src/core/SkPMFloat.h @@ -15,7 +15,7 @@ // A pre-multiplied color storing each component in the same order as SkPMColor, // but as a float in the range [0, 255]. -class SkPMFloat : public Sk4f { +class SK_STRUCT_ALIGN(16) SkPMFloat { public: static SkPMFloat FromPMColor(SkPMColor c) { return SkPMFloat(c); } static SkPMFloat FromARGB(float a, float r, float g, float b) { return SkPMFloat(a,r,g,b); } @@ -28,17 +28,20 @@ public: explicit SkPMFloat(SkPMColor); SkPMFloat(float a, float r, float g, float b) #ifdef SK_PMCOLOR_IS_RGBA - : INHERITED(r,g,b,a) {} + : fColors(r,g,b,a) {} #else - : INHERITED(b,g,r,a) {} + : fColors(b,g,r,a) {} #endif - SkPMFloat(const Sk4f& fs) : INHERITED(fs) {} - float a() const { return this->kth<SK_A32_SHIFT / 8>(); } - float r() const { return this->kth<SK_R32_SHIFT / 8>(); } - float g() const { return this->kth<SK_G32_SHIFT / 8>(); } - float b() const { return this->kth<SK_B32_SHIFT / 8>(); } + // Freely autoconvert between SkPMFloat and Sk4f. + /*implicit*/ SkPMFloat(const Sk4f& fs) { fColors = fs; } + /*implicit*/ operator Sk4f() const { return fColors; } + + float a() const { return fColors.kth<SK_A32_SHIFT / 8>(); } + float r() const { return fColors.kth<SK_R32_SHIFT / 8>(); } + float g() const { return fColors.kth<SK_G32_SHIFT / 8>(); } + float b() const { return fColors.kth<SK_B32_SHIFT / 8>(); } // N.B. All methods returning an SkPMColor call SkPMColorAssert on that result before returning. @@ -64,7 +67,7 @@ public: } private: - typedef Sk4f INHERITED; + Sk4f fColors; }; #ifdef SKNX_NO_SIMD diff --git a/src/core/SkXfermode.cpp b/src/core/SkXfermode.cpp index 1d502d987c..5e9055c67d 100644 --- a/src/core/SkXfermode.cpp +++ b/src/core/SkXfermode.cpp @@ -1236,7 +1236,9 @@ static inline SkPMFloat check_as_pmfloat(const Sk4f& value) { struct SrcATop4f { static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) { const Sk4f inv255(gInv255); - return check_as_pmfloat(dst + (src * Sk4f(dst.a()) - dst * Sk4f(src.a())) * inv255); + Sk4f s4 = src; + Sk4f d4 = dst; + return check_as_pmfloat(d4 + (s4 * Sk4f(dst.a()) - d4 * Sk4f(src.a())) * inv255); } static const bool kFoldCoverageIntoSrcAlpha = true; static const SkXfermode::Mode kMode = SkXfermode::kSrcATop_Mode; @@ -1246,7 +1248,9 @@ struct SrcATop4f { struct DstATop4f { static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) { const Sk4f inv255(gInv255); - return check_as_pmfloat(src + (dst * Sk4f(src.a()) - src * Sk4f(dst.a())) * inv255); + Sk4f s4 = src; + Sk4f d4 = dst; + return check_as_pmfloat(s4 + (d4 * Sk4f(src.a()) - s4 * Sk4f(dst.a())) * inv255); } static const bool kFoldCoverageIntoSrcAlpha = false; static const SkXfermode::Mode kMode = SkXfermode::kDstATop_Mode; @@ -1256,7 +1260,9 @@ struct DstATop4f { struct Xor4f { static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) { const Sk4f inv255(gInv255); - return check_as_pmfloat(src + dst - (src * Sk4f(dst.a()) + dst * Sk4f(src.a())) * inv255); + Sk4f s4 = src; + Sk4f d4 = dst; + return check_as_pmfloat(s4 + d4 - (s4 * Sk4f(dst.a()) + d4 * Sk4f(src.a())) * inv255); } static const bool kFoldCoverageIntoSrcAlpha = true; static const SkXfermode::Mode kMode = SkXfermode::kXor_Mode; @@ -1265,7 +1271,9 @@ struct Xor4f { // kPlus_Mode [Sa + Da, Sc + Dc] struct Plus4f { static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) { - return check_as_pmfloat(clamp_255(src + dst)); + Sk4f s4 = src; + Sk4f d4 = dst; + return check_as_pmfloat(clamp_255(s4 + d4)); } static const bool kFoldCoverageIntoSrcAlpha = true; static const SkXfermode::Mode kMode = SkXfermode::kPlus_Mode; @@ -1275,7 +1283,9 @@ struct Plus4f { struct Modulate4f { static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) { const Sk4f inv255(gInv255); - return check_as_pmfloat(src * dst * inv255); + Sk4f s4 = src; + Sk4f d4 = dst; + return check_as_pmfloat(s4 * d4 * inv255); } static const bool kFoldCoverageIntoSrcAlpha = false; static const SkXfermode::Mode kMode = SkXfermode::kModulate_Mode; @@ -1285,7 +1295,9 @@ struct Modulate4f { struct Screen4f { static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) { const Sk4f inv255(gInv255); - return check_as_pmfloat(src + dst - src * dst * inv255); + Sk4f s4 = src; + Sk4f d4 = dst; + return check_as_pmfloat(s4 + d4 - s4 * d4 * inv255); } static const bool kFoldCoverageIntoSrcAlpha = true; static const SkXfermode::Mode kMode = SkXfermode::kScreen_Mode; diff --git a/src/effects/SkColorMatrixFilter.cpp b/src/effects/SkColorMatrixFilter.cpp index 196ebd0f25..a20c65a0a5 100644 --- a/src/effects/SkColorMatrixFilter.cpp +++ b/src/effects/SkColorMatrixFilter.cpp @@ -269,7 +269,7 @@ static Sk4f premul(const Sk4f& x) { static Sk4f unpremul(const SkPMFloat& pm) { float scale = 255 / pm.a(); // candidate for fast/approx invert? - return pm * Sk4f(scale, scale, scale, 1); + return Sk4f(pm) * Sk4f(scale, scale, scale, 1); } static Sk4f clamp_0_255(const Sk4f& value) { diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index 451482936f..c8580bca5a 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -187,6 +187,7 @@ class SkNf<4, float> { typedef SkNi<4, int32_t> Ni; public: SkNf(float32x4_t vec) : fVec(vec) {} + float32x4_t vec() const { return fVec; } SkNf() {} explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} @@ -251,7 +252,7 @@ public: return vgetq_lane_f32(fVec, k&3); } -protected: +private: float32x4_t fVec; }; diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index c1ae1f0469..cae2d458f6 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -146,6 +146,7 @@ class SkNf<4, float> { typedef SkNi<4, int32_t> Ni; public: SkNf(const __m128& vec) : fVec(vec) {} + __m128 vec() const { return fVec; } SkNf() {} explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} @@ -181,7 +182,7 @@ public: return pun.fs[k&3]; } -protected: +private: __m128 fVec; }; diff --git a/src/opts/SkPMFloat_SSE2.h b/src/opts/SkPMFloat_SSE2.h index 88a38039b5..e10328aae3 100644 --- a/src/opts/SkPMFloat_SSE2.h +++ b/src/opts/SkPMFloat_SSE2.h @@ -17,7 +17,7 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) { __m128i fix8 = _mm_set_epi32(0,0,0,c), fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()), fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128()); - fVec = _mm_cvtepi32_ps(fix8_32); + fColors = _mm_cvtepi32_ps(fix8_32); SkASSERT(this->isValid()); } @@ -27,7 +27,7 @@ inline SkPMColor SkPMFloat::round() const { inline SkPMColor SkPMFloat::roundClamp() const { // We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up). - __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fVec)), + __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors.vec())), fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), fix8 = _mm_packus_epi16(fix8_16, fix8_16); SkPMColor c = _mm_cvtsi128_si32(fix8); @@ -37,7 +37,7 @@ inline SkPMColor SkPMFloat::roundClamp() const { inline SkPMColor SkPMFloat::trunc() const { // Basically, same as roundClamp(), but no rounding. - __m128i fix8_32 = _mm_cvttps_epi32(fVec), + __m128i fix8_32 = _mm_cvttps_epi32(fColors.vec()), fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), fix8 = _mm_packus_epi16(fix8_16, fix8_16); SkPMColor c = _mm_cvtsi128_si32(fix8); @@ -66,10 +66,10 @@ inline void SkPMFloat::RoundClampTo4PMColors( SkPMColor colors[4]) { // Same as _SSSE3.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8. // We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up). - __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fVec)), - c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fVec)), - c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fVec)), - c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fVec)); + __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fColors.vec())), + c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fColors.vec())), + c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fColors.vec())), + c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fColors.vec())); __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1), _mm_packus_epi16(c2, c3)); _mm_storeu_si128((__m128i*)colors, c3210); diff --git a/src/opts/SkPMFloat_SSSE3.h b/src/opts/SkPMFloat_SSSE3.h index 9ff73568fc..c1c6d67446 100644 --- a/src/opts/SkPMFloat_SSSE3.h +++ b/src/opts/SkPMFloat_SSSE3.h @@ -18,13 +18,13 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) { const int _ = 255; // _ means to zero that byte. __m128i fix8 = _mm_set_epi32(0,0,0,c), fix8_32 = _mm_shuffle_epi8(fix8, _mm_set_epi8(_,_,_,3, _,_,_,2, _,_,_,1, _,_,_,0)); - fVec = _mm_cvtepi32_ps(fix8_32); + fColors = _mm_cvtepi32_ps(fix8_32); SkASSERT(this->isValid()); } inline SkPMColor SkPMFloat::trunc() const { const int _ = 255; // _ means to zero that byte. - __m128i fix8_32 = _mm_cvttps_epi32(fVec), + __m128i fix8_32 = _mm_cvttps_epi32(fColors.vec()), fix8 = _mm_shuffle_epi8(fix8_32, _mm_set_epi8(_,_,_,_, _,_,_,_, _,_,_,_, 12,8,4,0)); SkPMColor c = _mm_cvtsi128_si32(fix8); SkPMColorAssert(c); @@ -37,7 +37,7 @@ inline SkPMColor SkPMFloat::round() const { inline SkPMColor SkPMFloat::roundClamp() const { // We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up). - __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fVec)), + __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors.vec())), fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), fix8 = _mm_packus_epi16(fix8_16, fix8_16); SkPMColor c = _mm_cvtsi128_si32(fix8); @@ -69,10 +69,10 @@ inline void SkPMFloat::RoundClampTo4PMColors( SkPMColor colors[4]) { // Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8. // We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up). - __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fVec)), - c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fVec)), - c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fVec)), - c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fVec)); + __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fColors.vec())), + c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fColors.vec())), + c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fColors.vec())), + c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fColors.vec())); __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1), _mm_packus_epi16(c2, c3)); _mm_storeu_si128((__m128i*)colors, c3210); diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h index fcb9ba8054..baad190e56 100644 --- a/src/opts/SkPMFloat_neon.h +++ b/src/opts/SkPMFloat_neon.h @@ -22,7 +22,7 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) { } inline SkPMColor SkPMFloat::trunc() const { - uint32x4_t fix8_32 = vcvtq_u32_f32(fVec); // vcvtq_u32_f32 truncates + uint32x4_t fix8_32 = vcvtq_u32_f32(fColors.vec()); // vcvtq_u32_f32 truncates uint16x4_t fix8_16 = vmovn_u32(fix8_32); uint8x8_t fix8 = vmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0); @@ -35,7 +35,7 @@ inline SkPMColor SkPMFloat::round() const { } inline SkPMColor SkPMFloat::roundClamp() const { - float32x4_t add_half = vaddq_f32(fVec, vdupq_n_f32(0.5f)); + float32x4_t add_half = vaddq_f32(fColors.vec(), vdupq_n_f32(0.5f)); uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually uint16x4_t fix8_16 = vqmovn_u32(fix8_32); uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); |