diff options
author | commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81> | 2014-02-24 04:23:39 +0000 |
---|---|---|
committer | commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81> | 2014-02-24 04:23:39 +0000 |
commit | 39ce33a1facae795eb2f02e35674702de7eb23b5 (patch) | |
tree | 0fe3befc951a3465a8967cadd23ab37fe9f05bae /src/opts/SkBlitRow_opts_SSE2.cpp | |
parent | 86cbf99a5122cd419eb76fd07cea80499f31a383 (diff) |
SSE2 implementation of S32_D565_Opaque
Benchmarks hitting this path can benfit from this patch.
Here are the data:
before after
gradient_radial2_mirror 10885.52 10849.48 0.33%
gradient_radial2_clamp_hicolor 11819.69 11644.83 1.48%
gradient_radial2_clamp 11816.10 11649.91 1.41%
bitmaprect_FF_filter_trans 6.27 4.88 22.17%
bitmaprect_FF_nofilter_trans 6.27 4.88 22.17%
bitmaprect_FF_filter_identity 6.31 4.86 22.98%
bitmaprect_FF_nofilter_identity 6.25 4.86 22.24%
bitmap_4444_update 6.26 5.05 19.33%
bitmap_4444_update_volatile 6.21 5.06 18.52%
bitmap_4444 6.22 5.06 18.65%
BUG=
R=mtklein@google.com
Author: qiankun.miao@intel.com
Review URL: https://codereview.chromium.org/172083003
git-svn-id: http://skia.googlecode.com/svn/trunk@13556 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'src/opts/SkBlitRow_opts_SSE2.cpp')
-rw-r--r-- | src/opts/SkBlitRow_opts_SSE2.cpp | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp index 47651c460b..9e99b4bc46 100644 --- a/src/opts/SkBlitRow_opts_SSE2.cpp +++ b/src/opts/SkBlitRow_opts_SSE2.cpp @@ -853,6 +853,83 @@ void SkBlitLCD16OpaqueRow_SSE2(SkPMColor dst[], const uint16_t mask[], } } +/* SSE2 version of S32_D565_Opaque() + * portable version is in core/SkBlitRow_D16.cpp + */ +void S32_D565_Opaque_SSE2(uint16_t* SK_RESTRICT dst, + const SkPMColor* SK_RESTRICT src, int count, + U8CPU alpha, int /*x*/, int /*y*/) { + SkASSERT(255 == alpha); + + if (count <= 0) { + return; + } + + if (count >= 8) { + while (((size_t)dst & 0x0F) != 0) { + SkPMColor c = *src++; + SkPMColorAssert(c); + + *dst++ = SkPixel32ToPixel16_ToU16(c); + count--; + } + + const __m128i* s = reinterpret_cast<const __m128i*>(src); + __m128i* d = reinterpret_cast<__m128i*>(dst); + __m128i r16_mask = _mm_set1_epi32(SK_R16_MASK); + __m128i g16_mask = _mm_set1_epi32(SK_G16_MASK); + __m128i b16_mask = _mm_set1_epi32(SK_B16_MASK); + + while (count >= 8) { + // Load 8 pixels of src. + __m128i src_pixel1 = _mm_loadu_si128(s++); + __m128i src_pixel2 = _mm_loadu_si128(s++); + + // Calculate result r. + __m128i r1 = _mm_srli_epi32(src_pixel1, + SK_R32_SHIFT + (8 - SK_R16_BITS)); + r1 = _mm_and_si128(r1, r16_mask); + __m128i r2 = _mm_srli_epi32(src_pixel2, + SK_R32_SHIFT + (8 - SK_R16_BITS)); + r2 = _mm_and_si128(r2, r16_mask); + __m128i r = _mm_packs_epi32(r1, r2); + + // Calculate result g. + __m128i g1 = _mm_srli_epi32(src_pixel1, + SK_G32_SHIFT + (8 - SK_G16_BITS)); + g1 = _mm_and_si128(g1, g16_mask); + __m128i g2 = _mm_srli_epi32(src_pixel2, + SK_G32_SHIFT + (8 - SK_G16_BITS)); + g2 = _mm_and_si128(g2, g16_mask); + __m128i g = _mm_packs_epi32(g1, g2); + + // Calculate result b. + __m128i b1 = _mm_srli_epi32(src_pixel1, + SK_B32_SHIFT + (8 - SK_B16_BITS)); + b1 = _mm_and_si128(b1, b16_mask); + __m128i b2 = _mm_srli_epi32(src_pixel2, + SK_B32_SHIFT + (8 - SK_B16_BITS)); + b2 = _mm_and_si128(b2, b16_mask); + __m128i b = _mm_packs_epi32(b1, b2); + + // Store 8 16-bit colors in dst. + __m128i d_pixel = SkPackRGB16_SSE(r, g, b); + _mm_store_si128(d++, d_pixel); + count -= 8; + } + src = reinterpret_cast<const SkPMColor*>(s); + dst = reinterpret_cast<uint16_t*>(d); + } + + if (count > 0) { + do { + SkPMColor c = *src++; + SkPMColorAssert(c); + *dst++ = SkPixel32ToPixel16_ToU16(c); + } while (--count != 0); + } +} + /* SSE2 version of S32A_D565_Opaque() * portable version is in core/SkBlitRow_D16.cpp */ |