diff options
author | qiankun.miao <qiankun.miao@intel.com> | 2014-11-14 08:23:53 -0800 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2014-11-14 08:23:53 -0800 |
commit | f04713d9c8f2af15f97984b47587358488e2594e (patch) | |
tree | aa8941aecee7cf81af584a6205fc0fe26d4230ea /src/opts/SkColor_opts_SSE2.h | |
parent | c7e55e410c9ed766d7a42cb6534ab36e0b0a6f00 (diff) |
Optimize SkAlphaMulQ_SSE2
These two mask clear are useless, because _mm_srli_epi16 fills high byte
of each word with 0.
BUG=skia:
Review URL: https://codereview.chromium.org/724333003
Diffstat (limited to 'src/opts/SkColor_opts_SSE2.h')
-rw-r--r-- | src/opts/SkColor_opts_SSE2.h | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/src/opts/SkColor_opts_SSE2.h b/src/opts/SkColor_opts_SSE2.h index 7e61d526b3..741d1ab777 100644 --- a/src/opts/SkColor_opts_SSE2.h +++ b/src/opts/SkColor_opts_SSE2.h @@ -10,6 +10,8 @@ #include <emmintrin.h> +#define ASSERT_EQ(a,b) SkASSERT(0xffff == _mm_movemask_epi8(_mm_cmpeq_epi8((a), (b)))) + // Because no _mm_mul_epi32() in SSE2, we emulate it here. // Multiplies 4 32-bit integers from a by 4 32-bit intergers from b. // The 4 multiplication results should be represented within 32-bit @@ -52,11 +54,11 @@ static inline __m128i SkAlphaMulQ_SSE2(const __m128i& c, const __m128i& scale) { // uint32_t ag = ((c >> 8) & mask) * scale __m128i ag = _mm_srli_epi16(c, 8); - ag = _mm_and_si128(ag, mask); + ASSERT_EQ(ag, _mm_and_si128(mask, ag)); // ag = _mm_srli_epi16(c, 8) did this for us. ag = _mm_mullo_epi16(ag, s); // (rb & mask) | (ag & ~mask) - rb = _mm_and_si128(mask, rb); + ASSERT_EQ(rb, _mm_and_si128(mask, rb)); // rb = _mm_srli_epi16(rb, 8) did this for us. ag = _mm_andnot_si128(mask, ag); return _mm_or_si128(rb, ag); } @@ -183,4 +185,5 @@ static inline __m128i SkPixel32ToPixel16_ToU16_SSE2(const __m128i& src_pixel1, return d_pixel; } +#undef ASSERT_EQ #endif // SkColor_opts_SSE2_DEFINED |