aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-08-19 18:56:49 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-08-19 18:56:50 -0700
commitd1c6b7c5007b5c609b44a9cdfe95ef64a5a8f29f (patch)
tree0173dadf058256bfe7f76fff0f2de3f195f2855d /src
parent3679c1bbe7f1e611d7c06b1134e16049799a3ac6 (diff)
SkColorCubeFilter: require alpha == 0xFF.
This is about a 12% improvement on my desktop, from 134 to 118ms on our bench. BUG=skia: Review URL: https://codereview.chromium.org/1295873004
Diffstat (limited to 'src')
-rw-r--r--src/core/SkPMFloat.h2
-rw-r--r--src/opts/SkColorCubeFilter_opts.h8
-rw-r--r--src/opts/SkPMFloat_neon.h4
-rw-r--r--src/opts/SkPMFloat_none.h3
-rw-r--r--src/opts/SkPMFloat_sse.h9
5 files changed, 13 insertions, 13 deletions
diff --git a/src/core/SkPMFloat.h b/src/core/SkPMFloat.h
index 1fc80f5004..4a5621f5b3 100644
--- a/src/core/SkPMFloat.h
+++ b/src/core/SkPMFloat.h
@@ -26,7 +26,7 @@ class SkPMFloat : public Sk4f {
public:
static SkPMFloat FromPMColor(SkPMColor c) { return SkPMFloat(c); }
static SkPMFloat FromARGB(float a, float r, float g, float b) { return SkPMFloat(a,r,g,b); }
- static SkPMFloat FromBGRx(SkColor c); // Ignores c's alpha, instead forcing it to 1.
+ static SkPMFloat FromOpaqueColor(SkColor c); // Requires c's alpha == 0xFF.
Sk4f alphas() const; // argb -> aaaa, generally faster than the equivalent Sk4f(this->a()).
diff --git a/src/opts/SkColorCubeFilter_opts.h b/src/opts/SkColorCubeFilter_opts.h
index 92c75664d2..a61f66d174 100644
--- a/src/opts/SkColorCubeFilter_opts.h
+++ b/src/opts/SkColorCubeFilter_opts.h
@@ -61,10 +61,10 @@ void color_cube_filter_span(const SkPMColor src[],
const SkColor lutColor10 = colorCube[ix + i10];
const SkColor lutColor11 = colorCube[ix + i11];
- Sk4f sum = SkPMFloat::FromBGRx(lutColor00) * g0b0;
- sum = sum + SkPMFloat::FromBGRx(lutColor01) * g0b1;
- sum = sum + SkPMFloat::FromBGRx(lutColor10) * g1b0;
- sum = sum + SkPMFloat::FromBGRx(lutColor11) * g1b1;
+ Sk4f sum = SkPMFloat::FromOpaqueColor(lutColor00) * g0b0;
+ sum = sum + SkPMFloat::FromOpaqueColor(lutColor01) * g0b1;
+ sum = sum + SkPMFloat::FromOpaqueColor(lutColor10) * g1b0;
+ sum = sum + SkPMFloat::FromOpaqueColor(lutColor11) * g1b1;
color = color + sum * Sk4f((float)colorToFactors[x][r]);
}
diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h
index ecb151f1fd..4e099f91ec 100644
--- a/src/opts/SkPMFloat_neon.h
+++ b/src/opts/SkPMFloat_neon.h
@@ -33,14 +33,14 @@ inline Sk4f SkPMFloat::alphas() const {
return vdupq_lane_f32(vget_high_f32(fVec), 1); // Duplicate high lane of high half i.e. lane 3.
}
-inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) {
+inline SkPMFloat SkPMFloat::FromOpaqueColor(SkColor c) {
+ SkASSERT(SkColorGetA(c) == 0xFF);
uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c);
#if defined(SK_PMCOLOR_IS_RGBA)
fix8 = vtbl1_u8(fix8, vcreate_u8(0x0300010203000102ULL)); // 03 00 01 02, 2x, i.e. swap R&B.
#endif
uint16x8_t fix8_16 = vmovl_u8(fix8);
uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16));
- fix8_32 = vsetq_lane_u32(0xFF, fix8_32, 3); // Force alpha to 1.
SkPMFloat pmf = Sk4f(vmulq_f32(vcvtq_f32_u32(fix8_32), vdupq_n_f32(1.0f/255)));
SkASSERT(pmf.isValid());
diff --git a/src/opts/SkPMFloat_none.h b/src/opts/SkPMFloat_none.h
index adc0bde3b0..17c76d90f4 100644
--- a/src/opts/SkPMFloat_none.h
+++ b/src/opts/SkPMFloat_none.h
@@ -34,7 +34,8 @@ inline Sk4f SkPMFloat::alphas() const {
return Sk4f(this->a());
}
-inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) {
+inline SkPMFloat SkPMFloat::FromOpaqueColor(SkColor c) {
+ SkASSERT(SkColorGetA(c) == 0xFF);
float inv255 = 1.0f / 255;
SkPMFloat pmf = SkPMFloat::FromARGB(1.0f,
SkColorGetR(c) * inv255,
diff --git a/src/opts/SkPMFloat_sse.h b/src/opts/SkPMFloat_sse.h
index 6cfee1da6f..6a4d5b6b5c 100644
--- a/src/opts/SkPMFloat_sse.h
+++ b/src/opts/SkPMFloat_sse.h
@@ -38,15 +38,16 @@ inline Sk4f SkPMFloat::alphas() const {
return _mm_shuffle_ps(fVec, fVec, 0xff); // Read as 11 11 11 11, copying lane 3 to all lanes.
}
-inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) {
+inline SkPMFloat SkPMFloat::FromOpaqueColor(SkColor c) {
+ SkASSERT(SkColorGetA(c) == 0xFF);
__m128i fix8 = _mm_cvtsi32_si128((int)c);
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
const char _ = ~0; // Zero these bytes.
__m128i fix8_32 = _mm_shuffle_epi8(fix8,
#if defined(SK_PMCOLOR_IS_BGRA)
- _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, _,_,_,_)
+ _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_)
#else
- _mm_setr_epi8(2,_,_,_, 1,_,_,_, 0,_,_,_, _,_,_,_)
+ _mm_setr_epi8(2,_,_,_, 1,_,_,_, 0,_,_,_, 3,_,_,_)
#endif
);
#else
@@ -56,8 +57,6 @@ inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) {
fix8_32 = _mm_shuffle_epi32(fix8_32, 0xC6); // C6 == 11 00 01 10, i.e swap lanes 0 and 2.
#endif
#endif
- fix8_32 = _mm_or_si128(fix8_32, _mm_set_epi32(0xFF,0,0,0)); // Force alpha to 1.
-
SkPMFloat pmf = Sk4f(_mm_mul_ps(_mm_cvtepi32_ps(fix8_32), _mm_set1_ps(1.0f/255)));
SkASSERT(pmf.isValid());
return pmf;