diff options
author | 2016-02-02 13:41:03 -0800 | |
---|---|---|
committer | 2016-02-02 13:41:03 -0800 | |
commit | 0700651128f8c505da65e651f9788589593f07c4 (patch) | |
tree | 416af12f7e0a5fd65fa04a4957b576bc73a74b9d /src/opts/SkSwizzler_opts.h | |
parent | 792345f540e90b8debffcf951398f164832e5ba8 (diff) |
SSSE3 optimizations for gray -> RGBA (or BGRA)
Swizzle Bench Runtime
Dell Venue 8 0.16x
HP z620 0.47x
PNG Decode Time (for test set of gray encoded PNGs)
Dell Venue 8 0.80x
HP z620 0.96x
BUG=skia:4767
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1657393002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1657393002
Diffstat (limited to 'src/opts/SkSwizzler_opts.h')
-rw-r--r-- | src/opts/SkSwizzler_opts.h | 28 |
1 files changed, 27 insertions, 1 deletions
diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h index 612700e837..27416ea9e8 100644 --- a/src/opts/SkSwizzler_opts.h +++ b/src/opts/SkSwizzler_opts.h @@ -452,7 +452,33 @@ static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { insert_alpha_should_swaprb<true>(dst, src, count); } -static void gray_to_RGB1(uint32_t dst[], const void* src, int count) { +static void gray_to_RGB1(uint32_t dst[], const void* vsrc, int count) { + const uint8_t* src = (const uint8_t*) vsrc; + + const __m128i alphas = _mm_set1_epi8((uint8_t) 0xFF); + while (count >= 16) { + __m128i grays = _mm_loadu_si128((const __m128i*) src); + + __m128i gg_lo = _mm_unpacklo_epi8(grays, grays); + __m128i gg_hi = _mm_unpackhi_epi8(grays, grays); + __m128i ga_lo = _mm_unpacklo_epi8(grays, alphas); + __m128i ga_hi = _mm_unpackhi_epi8(grays, alphas); + + __m128i ggga0 = _mm_unpacklo_epi16(gg_lo, ga_lo); + __m128i ggga1 = _mm_unpackhi_epi16(gg_lo, ga_lo); + __m128i ggga2 = _mm_unpacklo_epi16(gg_hi, ga_hi); + __m128i ggga3 = _mm_unpackhi_epi16(gg_hi, ga_hi); + + _mm_storeu_si128((__m128i*) (dst + 0), ggga0); + _mm_storeu_si128((__m128i*) (dst + 4), ggga1); + _mm_storeu_si128((__m128i*) (dst + 8), ggga2); + _mm_storeu_si128((__m128i*) (dst + 12), ggga3); + + src += 16; + dst += 16; + count -= 16; + } + gray_to_RGB1_portable(dst, src, count); } |