diff options
author | mtklein <mtklein@chromium.org> | 2015-08-19 18:56:49 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-08-19 18:56:50 -0700 |
commit | d1c6b7c5007b5c609b44a9cdfe95ef64a5a8f29f (patch) | |
tree | 0173dadf058256bfe7f76fff0f2de3f195f2855d /src | |
parent | 3679c1bbe7f1e611d7c06b1134e16049799a3ac6 (diff) |
SkColorCubeFilter: require alpha == 0xFF.
This is about a 12% improvement on my desktop, from 134 to 118ms on our bench.
BUG=skia:
Review URL: https://codereview.chromium.org/1295873004
Diffstat (limited to 'src')
-rw-r--r-- | src/core/SkPMFloat.h | 2 | ||||
-rw-r--r-- | src/opts/SkColorCubeFilter_opts.h | 8 | ||||
-rw-r--r-- | src/opts/SkPMFloat_neon.h | 4 | ||||
-rw-r--r-- | src/opts/SkPMFloat_none.h | 3 | ||||
-rw-r--r-- | src/opts/SkPMFloat_sse.h | 9 |
5 files changed, 13 insertions, 13 deletions
diff --git a/src/core/SkPMFloat.h b/src/core/SkPMFloat.h index 1fc80f5004..4a5621f5b3 100644 --- a/src/core/SkPMFloat.h +++ b/src/core/SkPMFloat.h @@ -26,7 +26,7 @@ class SkPMFloat : public Sk4f { public: static SkPMFloat FromPMColor(SkPMColor c) { return SkPMFloat(c); } static SkPMFloat FromARGB(float a, float r, float g, float b) { return SkPMFloat(a,r,g,b); } - static SkPMFloat FromBGRx(SkColor c); // Ignores c's alpha, instead forcing it to 1. + static SkPMFloat FromOpaqueColor(SkColor c); // Requires c's alpha == 0xFF. Sk4f alphas() const; // argb -> aaaa, generally faster than the equivalent Sk4f(this->a()). diff --git a/src/opts/SkColorCubeFilter_opts.h b/src/opts/SkColorCubeFilter_opts.h index 92c75664d2..a61f66d174 100644 --- a/src/opts/SkColorCubeFilter_opts.h +++ b/src/opts/SkColorCubeFilter_opts.h @@ -61,10 +61,10 @@ void color_cube_filter_span(const SkPMColor src[], const SkColor lutColor10 = colorCube[ix + i10]; const SkColor lutColor11 = colorCube[ix + i11]; - Sk4f sum = SkPMFloat::FromBGRx(lutColor00) * g0b0; - sum = sum + SkPMFloat::FromBGRx(lutColor01) * g0b1; - sum = sum + SkPMFloat::FromBGRx(lutColor10) * g1b0; - sum = sum + SkPMFloat::FromBGRx(lutColor11) * g1b1; + Sk4f sum = SkPMFloat::FromOpaqueColor(lutColor00) * g0b0; + sum = sum + SkPMFloat::FromOpaqueColor(lutColor01) * g0b1; + sum = sum + SkPMFloat::FromOpaqueColor(lutColor10) * g1b0; + sum = sum + SkPMFloat::FromOpaqueColor(lutColor11) * g1b1; color = color + sum * Sk4f((float)colorToFactors[x][r]); } diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h index ecb151f1fd..4e099f91ec 100644 --- a/src/opts/SkPMFloat_neon.h +++ b/src/opts/SkPMFloat_neon.h @@ -33,14 +33,14 @@ inline Sk4f SkPMFloat::alphas() const { return vdupq_lane_f32(vget_high_f32(fVec), 1); // Duplicate high lane of high half i.e. lane 3. } -inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) { +inline SkPMFloat SkPMFloat::FromOpaqueColor(SkColor c) { + SkASSERT(SkColorGetA(c) == 0xFF); uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c); #if defined(SK_PMCOLOR_IS_RGBA) fix8 = vtbl1_u8(fix8, vcreate_u8(0x0300010203000102ULL)); // 03 00 01 02, 2x, i.e. swap R&B. #endif uint16x8_t fix8_16 = vmovl_u8(fix8); uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16)); - fix8_32 = vsetq_lane_u32(0xFF, fix8_32, 3); // Force alpha to 1. SkPMFloat pmf = Sk4f(vmulq_f32(vcvtq_f32_u32(fix8_32), vdupq_n_f32(1.0f/255))); SkASSERT(pmf.isValid()); diff --git a/src/opts/SkPMFloat_none.h b/src/opts/SkPMFloat_none.h index adc0bde3b0..17c76d90f4 100644 --- a/src/opts/SkPMFloat_none.h +++ b/src/opts/SkPMFloat_none.h @@ -34,7 +34,8 @@ inline Sk4f SkPMFloat::alphas() const { return Sk4f(this->a()); } -inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) { +inline SkPMFloat SkPMFloat::FromOpaqueColor(SkColor c) { + SkASSERT(SkColorGetA(c) == 0xFF); float inv255 = 1.0f / 255; SkPMFloat pmf = SkPMFloat::FromARGB(1.0f, SkColorGetR(c) * inv255, diff --git a/src/opts/SkPMFloat_sse.h b/src/opts/SkPMFloat_sse.h index 6cfee1da6f..6a4d5b6b5c 100644 --- a/src/opts/SkPMFloat_sse.h +++ b/src/opts/SkPMFloat_sse.h @@ -38,15 +38,16 @@ inline Sk4f SkPMFloat::alphas() const { return _mm_shuffle_ps(fVec, fVec, 0xff); // Read as 11 11 11 11, copying lane 3 to all lanes. } -inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) { +inline SkPMFloat SkPMFloat::FromOpaqueColor(SkColor c) { + SkASSERT(SkColorGetA(c) == 0xFF); __m128i fix8 = _mm_cvtsi32_si128((int)c); #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 const char _ = ~0; // Zero these bytes. __m128i fix8_32 = _mm_shuffle_epi8(fix8, #if defined(SK_PMCOLOR_IS_BGRA) - _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, _,_,_,_) + _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_) #else - _mm_setr_epi8(2,_,_,_, 1,_,_,_, 0,_,_,_, _,_,_,_) + _mm_setr_epi8(2,_,_,_, 1,_,_,_, 0,_,_,_, 3,_,_,_) #endif ); #else @@ -56,8 +57,6 @@ inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) { fix8_32 = _mm_shuffle_epi32(fix8_32, 0xC6); // C6 == 11 00 01 10, i.e swap lanes 0 and 2. #endif #endif - fix8_32 = _mm_or_si128(fix8_32, _mm_set_epi32(0xFF,0,0,0)); // Force alpha to 1. - SkPMFloat pmf = Sk4f(_mm_mul_ps(_mm_cvtepi32_ps(fix8_32), _mm_set1_ps(1.0f/255))); SkASSERT(pmf.isValid()); return pmf; |