diff options
author | mtklein <mtklein@chromium.org> | 2015-06-25 08:56:28 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-06-25 08:56:28 -0700 |
commit | e9a3e3c17a313942042d6cfb9f4f0361a900d9e7 (patch) | |
tree | 913d6d293e2578f223ec82f2bbac69fbf43b711b /src/opts | |
parent | 538bacb4bb3ceac7786108cd68b04ed58b1c29c7 (diff) |
Convert SkPMFloat to [0,1] range and prune its API.
Now that Sk4px exists, there's a lot less sense in eeking out every
cycle of speed from SkPMFloat: if we need to go _really_ fast, we
should use Sk4px. SkPMFloat's going to be used for things that are
already slow: large-range intermediates, divides, sqrts, etc.
A [0,1] range is easier to work with, and can even be faster if we
eliminate enough *255 and *1/255 steps. This is particularly true
on ARM, where NEON can do the *255 and /255 steps for us while
converting float<->int.
We have lots of experimental SkPMFloat <-> SkPMColor APIs that
I'm now removing. Of the existing APIs, roundClamp() is the sanest,
so I've kept only that, now called round(). The 4-at-a-time APIs
never panned out, so they're gone.
There will be small diffs on:
colormatrix coloremoji colorfilterimagefilter fadefilter imagefilters_xfermodes imagefilterscropexpand imagefiltersgraph tileimagefilter
BUG=skia:
Review URL: https://codereview.chromium.org/1201343004
Diffstat (limited to 'src/opts')
-rw-r--r-- | src/opts/SkPMFloat_SSE2.h | 84 | ||||
-rw-r--r-- | src/opts/SkPMFloat_SSSE3.h | 87 | ||||
-rw-r--r-- | src/opts/SkPMFloat_neon.h | 58 | ||||
-rw-r--r-- | src/opts/SkPMFloat_none.h | 55 | ||||
-rw-r--r-- | src/opts/SkPMFloat_sse.h | 36 |
5 files changed, 52 insertions, 268 deletions
diff --git a/src/opts/SkPMFloat_SSE2.h b/src/opts/SkPMFloat_SSE2.h deleted file mode 100644 index c7e791ff62..0000000000 --- a/src/opts/SkPMFloat_SSE2.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright 2015 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -namespace { // See SkPMFloat.h - -// For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit components in 16 bits -// (fix8_16), then widen those to 8-bit-in-32-bits (fix8_32), and finally convert those to floats. - -// round() and roundClamp() do the opposite, working from floats to 8-bit-in-32-bit, -// to 8-bit-in-16-bit, back down to 8-bit components. -// _mm_packus_epi16() gives us clamping for free while narrowing. - -inline SkPMFloat::SkPMFloat(SkPMColor c) { - SkPMColorAssert(c); - __m128i fix8 = _mm_set_epi32(0,0,0,c), - fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()), - fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128()); - fVec = _mm_cvtepi32_ps(fix8_32); - SkASSERT(this->isValid()); -} - -inline SkPMColor SkPMFloat::round() const { - return this->roundClamp(); // Haven't beaten this yet. -} - -inline SkPMColor SkPMFloat::roundClamp() const { - // We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up). - __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fVec)), - fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), - fix8 = _mm_packus_epi16(fix8_16, fix8_16); - SkPMColor c = _mm_cvtsi128_si32(fix8); - SkPMColorAssert(c); - return c; -} - -inline SkPMColor SkPMFloat::trunc() const { - // Basically, same as roundClamp(), but no rounding. - __m128i fix8_32 = _mm_cvttps_epi32(fVec), - fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), - fix8 = _mm_packus_epi16(fix8_16, fix8_16); - SkPMColor c = _mm_cvtsi128_si32(fix8); - SkPMColorAssert(c); - return c; -} - -inline void SkPMFloat::From4PMColors(const SkPMColor colors[4], - SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) { - // Haven't beaten this yet. - *a = FromPMColor(colors[0]); - *b = FromPMColor(colors[1]); - *c = FromPMColor(colors[2]); - *d = FromPMColor(colors[3]); -} - -inline void SkPMFloat::RoundTo4PMColors( - const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d, - SkPMColor colors[4]) { - // Haven't beaten this yet. - RoundClampTo4PMColors(a,b,c,d, colors); -} - -inline void SkPMFloat::RoundClampTo4PMColors( - const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d, - SkPMColor colors[4]) { - // Same as _SSSE3.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8. - // We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up). - __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fVec)), - c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fVec)), - c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fVec)), - c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fVec)); - __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1), - _mm_packus_epi16(c2, c3)); - _mm_storeu_si128((__m128i*)colors, c3210); - SkPMColorAssert(colors[0]); - SkPMColorAssert(colors[1]); - SkPMColorAssert(colors[2]); - SkPMColorAssert(colors[3]); -} - -} // namespace diff --git a/src/opts/SkPMFloat_SSSE3.h b/src/opts/SkPMFloat_SSSE3.h deleted file mode 100644 index 67116ec2dd..0000000000 --- a/src/opts/SkPMFloat_SSSE3.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright 2015 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -namespace { // See SkPMFloat.h - -// For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit components in 32 bits -// (fix8_32), then convert those to floats. - -// round() does the opposite, working from floats to 8-bit-in-32-bits, then back to packed 8 bit. - -// roundClamp() is the same as _SSE2: floats to 8-in-32, to 8-in-16, to packed 8 bit, with -// _mm_packus_epi16() both clamping and narrowing. - -inline SkPMFloat::SkPMFloat(SkPMColor c) { - SkPMColorAssert(c); - const int _ = 255; // _ means to zero that byte. - __m128i fix8 = _mm_set_epi32(0,0,0,c), - fix8_32 = _mm_shuffle_epi8(fix8, _mm_set_epi8(_,_,_,3, _,_,_,2, _,_,_,1, _,_,_,0)); - fVec = _mm_cvtepi32_ps(fix8_32); - SkASSERT(this->isValid()); -} - -inline SkPMColor SkPMFloat::trunc() const { - const int _ = 255; // _ means to zero that byte. - __m128i fix8_32 = _mm_cvttps_epi32(fVec), - fix8 = _mm_shuffle_epi8(fix8_32, _mm_set_epi8(_,_,_,_, _,_,_,_, _,_,_,_, 12,8,4,0)); - SkPMColor c = _mm_cvtsi128_si32(fix8); - SkPMColorAssert(c); - return c; -} - -inline SkPMColor SkPMFloat::round() const { - return SkPMFloat(Sk4f(0.5f) + *this).trunc(); -} - -inline SkPMColor SkPMFloat::roundClamp() const { - // We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up). - __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fVec)), - fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), - fix8 = _mm_packus_epi16(fix8_16, fix8_16); - SkPMColor c = _mm_cvtsi128_si32(fix8); - SkPMColorAssert(c); - return c; -} - -inline void SkPMFloat::From4PMColors(const SkPMColor colors[4], - SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) { - // Haven't beaten this yet. - *a = FromPMColor(colors[0]); - *b = FromPMColor(colors[1]); - *c = FromPMColor(colors[2]); - *d = FromPMColor(colors[3]); -} - -inline void SkPMFloat::RoundTo4PMColors( - const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d, - SkPMColor colors[4]) { - // Haven't beaten this yet. Still faster than RoundClampTo4PMColors? - colors[0] = a.round(); - colors[1] = b.round(); - colors[2] = c.round(); - colors[3] = d.round(); -} - -inline void SkPMFloat::RoundClampTo4PMColors( - const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d, - SkPMColor colors[4]) { - // Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8. - // We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up). - __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fVec)), - c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fVec)), - c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fVec)), - c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fVec)); - __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1), - _mm_packus_epi16(c2, c3)); - _mm_storeu_si128((__m128i*)colors, c3210); - SkPMColorAssert(colors[0]); - SkPMColorAssert(colors[1]); - SkPMColorAssert(colors[2]); - SkPMColorAssert(colors[3]); -} - -} // namespace diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h index cabb29a2a8..57f613751d 100644 --- a/src/opts/SkPMFloat_neon.h +++ b/src/opts/SkPMFloat_neon.h @@ -7,70 +7,24 @@ namespace { // See SkPMFloat.h -// For SkPMFloat(SkPMFColor), we widen our 8 bit components (fix8) to 8-bit components in 16 bits -// (fix8_16), then widen those to 8-bit-in-32-bits (fix8_32), and finally convert those to floats. - -// round() and roundClamp() do the opposite, working from floats to 8-bit-in-32-bit, -// to 8-bit-in-16-bit, back down to 8-bit components. -// roundClamp() uses vqmovn to clamp while narrowing instead of just narrowing with vmovn. - inline SkPMFloat::SkPMFloat(SkPMColor c) { SkPMColorAssert(c); uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c); uint16x8_t fix8_16 = vmovl_u8(fix8); uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16)); - fVec = vcvtq_f32_u32(fix8_32); + fVec = vcvtq_n_f32_u32(fix8_32, 8); SkASSERT(this->isValid()); } -inline SkPMColor SkPMFloat::trunc() const { - uint32x4_t fix8_32 = vcvtq_u32_f32(fVec); // vcvtq_u32_f32 truncates - uint16x4_t fix8_16 = vmovn_u32(fix8_32); - uint8x8_t fix8 = vmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); - SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0); - SkPMColorAssert(c); - return c; -} - inline SkPMColor SkPMFloat::round() const { - return SkPMFloat(Sk4f(0.5f) + *this).trunc(); -} - -inline SkPMColor SkPMFloat::roundClamp() const { - float32x4_t add_half = vaddq_f32(fVec, vdupq_n_f32(0.5f)); - uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually - uint16x4_t fix8_16 = vqmovn_u32(fix8_32); - uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); + // vcvtq_n_u32_f32 truncates, so we round manually by adding a half before converting. + float32x4_t rounded = vaddq_f32(fVec, vdupq_n_f32(0.5f/255)); + uint32x4_t fix8_32 = vcvtq_n_u32_f32(rounded, 8); + uint16x4_t fix8_16 = vqmovn_u32(fix8_32); + uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0); SkPMColorAssert(c); return c; } -// TODO: we should be able to beat these loops on all three methods. -inline void SkPMFloat::From4PMColors(const SkPMColor colors[4], - SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) { - *a = FromPMColor(colors[0]); - *b = FromPMColor(colors[1]); - *c = FromPMColor(colors[2]); - *d = FromPMColor(colors[3]); -} - -inline void SkPMFloat::RoundTo4PMColors( - const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d, - SkPMColor colors[4]) { - colors[0] = a.round(); - colors[1] = b.round(); - colors[2] = c.round(); - colors[3] = d.round(); -} - -inline void SkPMFloat::RoundClampTo4PMColors( - const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d, - SkPMColor colors[4]) { - colors[0] = a.roundClamp(); - colors[1] = b.roundClamp(); - colors[2] = c.roundClamp(); - colors[3] = d.roundClamp(); -} - } // namespace diff --git a/src/opts/SkPMFloat_none.h b/src/opts/SkPMFloat_none.h index 9bb584ed86..ba773564d6 100644 --- a/src/opts/SkPMFloat_none.h +++ b/src/opts/SkPMFloat_none.h @@ -8,61 +8,26 @@ namespace { // See SkPMFloat.h inline SkPMFloat::SkPMFloat(SkPMColor c) { - *this = SkPMFloat::FromARGB(SkGetPackedA32(c), - SkGetPackedR32(c), - SkGetPackedG32(c), - SkGetPackedB32(c)); + float inv255 = 1.0f/255; + *this = SkPMFloat::FromARGB(SkGetPackedA32(c) * inv255, + SkGetPackedR32(c) * inv255, + SkGetPackedG32(c) * inv255, + SkGetPackedB32(c) * inv255); SkASSERT(this->isValid()); } -inline SkPMColor SkPMFloat::trunc() const { - return SkPackARGB32(this->a(), this->r(), this->g(), this->b()); -} - inline SkPMColor SkPMFloat::round() const { - SkPMColor c = SkPackARGB32(this->a()+0.5f, this->r()+0.5f, this->g()+0.5f, this->b()+0.5f); - SkPMColorAssert(c); - return c; -} - -inline SkPMColor SkPMFloat::roundClamp() const { float a = this->a(), r = this->r(), g = this->g(), b = this->b(); - a = a < 0 ? 0 : (a > 255 ? 255 : a); - r = r < 0 ? 0 : (r > 255 ? 255 : r); - g = g < 0 ? 0 : (g > 255 ? 255 : g); - b = b < 0 ? 0 : (b > 255 ? 255 : b); - SkPMColor c = SkPackARGB32(a+0.5f, r+0.5f, g+0.5f, b+0.5f); + a = a < 0 ? 0 : (a > 1 ? 1 : a); + r = r < 0 ? 0 : (r > 1 ? 1 : r); + g = g < 0 ? 0 : (g > 1 ? 1 : g); + b = b < 0 ? 0 : (b > 1 ? 1 : b); + SkPMColor c = SkPackARGB32(255*a+0.5f, 255*r+0.5f, 255*g+0.5f, 255*b+0.5f); SkPMColorAssert(c); return c; } -inline void SkPMFloat::From4PMColors(const SkPMColor colors[4], - SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) { - *a = FromPMColor(colors[0]); - *b = FromPMColor(colors[1]); - *c = FromPMColor(colors[2]); - *d = FromPMColor(colors[3]); -} - -inline void SkPMFloat::RoundTo4PMColors( - const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d, - SkPMColor colors[4]) { - colors[0] = a.round(); - colors[1] = b.round(); - colors[2] = c.round(); - colors[3] = d.round(); -} - -inline void SkPMFloat::RoundClampTo4PMColors( - const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d, - SkPMColor colors[4]) { - colors[0] = a.roundClamp(); - colors[1] = b.roundClamp(); - colors[2] = c.roundClamp(); - colors[3] = d.roundClamp(); -} - } // namespace diff --git a/src/opts/SkPMFloat_sse.h b/src/opts/SkPMFloat_sse.h new file mode 100644 index 0000000000..802b17ba0c --- /dev/null +++ b/src/opts/SkPMFloat_sse.h @@ -0,0 +1,36 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +namespace { // See SkPMFloat.h + +inline SkPMFloat::SkPMFloat(SkPMColor c) { + SkPMColorAssert(c); +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 + const int _ = 255; // Zero these bytes. + __m128i fix8 = _mm_cvtsi32_si128((int)c), + fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_)); +#else + __m128i fix8 = _mm_cvtsi32_si128((int)c), + fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()), + fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128()); +#endif + fVec = _mm_mul_ps(_mm_cvtepi32_ps(fix8_32), _mm_set1_ps(1.0f / 255)); + SkASSERT(this->isValid()); +} + +inline SkPMColor SkPMFloat::round() const { + // We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up). + __m128 scaled = _mm_mul_ps(_mm_set1_ps(255), fVec); + __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), scaled)), + fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), + fix8 = _mm_packus_epi16(fix8_16, fix8_16); + SkPMColor c = _mm_cvtsi128_si32(fix8); + SkPMColorAssert(c); + return c; +} + +} // namespace |