diff options
author | 2017-05-30 14:58:03 -0400 | |
---|---|---|
committer | 2017-05-30 19:16:46 +0000 | |
commit | 836e6c1f7daee7aa02c0779dce2cfd59b6c1d830 (patch) | |
tree | e0ba662c1f7693e52caedb3edcfacac819077281 /src | |
parent | b391f12e78008cb61be32591829de3e95d4fed63 (diff) |
remove sse2::srcover_srgb_srgb
Change-Id: Icc570d8a8f1df1dea202e1d234433491122b9b67
Reviewed-on: https://skia-review.googlesource.com/18135
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src')
-rw-r--r-- | src/opts/SkBlend_opts.h | 152 |
1 files changed, 39 insertions, 113 deletions
diff --git a/src/opts/SkBlend_opts.h b/src/opts/SkBlend_opts.h index 1da4c4fb04..15636e9914 100644 --- a/src/opts/SkBlend_opts.h +++ b/src/opts/SkBlend_opts.h @@ -15,7 +15,7 @@ ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an #include "SkNx.h" #include "SkPM4fPriv.h" -#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 #include <immintrin.h> #endif @@ -38,7 +38,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { srcover_srgb_srgb_1(dst , *src ); } -#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 static inline __m128i load(const uint32_t* p) { return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); @@ -48,124 +48,50 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v); } - #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 - - static void srcover_srgb_srgb( + static void srcover_srgb_srgb( uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) { - const __m128i alphaMask = _mm_set1_epi32(0xFF000000); - while (ndst > 0) { - int count = SkTMin(ndst, nsrc); - ndst -= count; - const uint32_t* src = srcStart; - const uint32_t* end = dst + (count & ~3); - ptrdiff_t delta = src - dst; - - while (dst < end) { - __m128i pixels = load(src); - if (_mm_testc_si128(pixels, alphaMask)) { - uint32_t* start = dst; - do { - store(dst, pixels); - dst += 4; - } while (dst < end - && _mm_testc_si128(pixels = load(dst + delta), alphaMask)); - src += dst - start; - } else if (_mm_testz_si128(pixels, alphaMask)) { - do { - dst += 4; - src += 4; - } while (dst < end - && _mm_testz_si128(pixels = load(src), alphaMask)); - } else { - uint32_t* start = dst; - do { - srcover_srgb_srgb_4(dst, dst + delta); - dst += 4; - } while (dst < end - && _mm_testnzc_si128(pixels = load(dst + delta), alphaMask)); - src += dst - start; - } - } + const __m128i alphaMask = _mm_set1_epi32(0xFF000000); + while (ndst > 0) { + int count = SkTMin(ndst, nsrc); + ndst -= count; + const uint32_t* src = srcStart; + const uint32_t* end = dst + (count & ~3); + ptrdiff_t delta = src - dst; - count = count & 3; - while (count-- > 0) { - srcover_srgb_srgb_1(dst++, *src++); + while (dst < end) { + __m128i pixels = load(src); + if (_mm_testc_si128(pixels, alphaMask)) { + uint32_t* start = dst; + do { + store(dst, pixels); + dst += 4; + } while (dst < end + && _mm_testc_si128(pixels = load(dst + delta), alphaMask)); + src += dst - start; + } else if (_mm_testz_si128(pixels, alphaMask)) { + do { + dst += 4; + src += 4; + } while (dst < end + && _mm_testz_si128(pixels = load(src), alphaMask)); + } else { + uint32_t* start = dst; + do { + srcover_srgb_srgb_4(dst, dst + delta); + dst += 4; + } while (dst < end + && _mm_testnzc_si128(pixels = load(dst + delta), alphaMask)); + src += dst - start; } } - } - #else - // SSE2 versions - - // Note: In the next three comparisons a group of 4 pixels is converted to a group of - // "signed" pixels because the sse2 does not have an unsigned comparison. - // Make it so that we can use the signed comparison operators by biasing - // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0xffxxxxxx to - // 0x7fxxxxxx which is the largest set of values. - static inline bool check_opaque_alphas(__m128i pixels) { - __m128i signedPixels = _mm_xor_si128(pixels, _mm_set1_epi32(0x80000000)); - int mask = - _mm_movemask_epi8( - _mm_cmplt_epi32(signedPixels, _mm_set1_epi32(0x7F000000))); - return mask == 0; - } - - static inline bool check_transparent_alphas(__m128i pixels) { - __m128i signedPixels = _mm_xor_si128(pixels, _mm_set1_epi32(0x80000000)); - int mask = - _mm_movemask_epi8( - _mm_cmpgt_epi32(signedPixels, _mm_set1_epi32(0x80FFFFFF))); - return mask == 0; - } - static inline bool check_partial_alphas(__m128i pixels) { - __m128i signedPixels = _mm_xor_si128(pixels, _mm_set1_epi32(0x80000000)); - __m128i opaque = _mm_cmplt_epi32(signedPixels, _mm_set1_epi32(0x7F000000)); - __m128i transparent = _mm_cmpgt_epi32(signedPixels, _mm_set1_epi32(0x80FFFFFF)); - int mask = _mm_movemask_epi8(_mm_xor_si128(opaque, transparent)); - return mask == 0; - } - - static void srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) { - while (ndst > 0) { - int count = SkTMin(ndst, nsrc); - ndst -= count; - const uint32_t* src = srcStart; - const uint32_t* end = dst + (count & ~3); - const ptrdiff_t delta = src - dst; - - __m128i pixels = load(src); - do { - if (check_opaque_alphas(pixels)) { - uint32_t* start = dst; - do { - store(dst, pixels); - dst += 4; - } while (dst < end && check_opaque_alphas((pixels = load(dst + delta)))); - src += dst - start; - } else if (check_transparent_alphas(pixels)) { - const uint32_t* start = dst; - do { - dst += 4; - } while (dst < end && check_transparent_alphas(pixels = load(dst + delta))); - src += dst - start; - } else { - const uint32_t* start = dst; - do { - srcover_srgb_srgb_4(dst, dst + delta); - dst += 4; - } while (dst < end && check_partial_alphas(pixels = load(dst + delta))); - src += dst - start; - } - } while (dst < end); - - count = count & 3; - while (count-- > 0) { - srcover_srgb_srgb_1(dst++, *src++); - } + count = count & 3; + while (count-- > 0) { + srcover_srgb_srgb_1(dst++, *src++); } } - #endif + } + #else static void srcover_srgb_srgb( |