diff options
author | 2016-01-22 14:12:38 -0800 | |
---|---|---|
committer | 2016-01-22 14:12:38 -0800 | |
commit | 13aa1a5ad97156e35184970fc1ce1aaf3c50c91c (patch) | |
tree | 712b02b8d8077a0b4451bf3c21a30c0a583ae06c | |
parent | f2b8662b5c73e03648ed1a154b717e354753a0e1 (diff) |
SSSE3 opts for RGB -> RGB(FF) or BGR(FF)
Swizzle Bench Runtime
z620 0.21x
Dell Venue 8 0.26x
RGB PNGs Decode Runtime
z620 0.91x
Dell Venus 8 0.96x
BUG=skia:4767
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1618603003
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1618603003
-rw-r--r-- | src/opts/SkSwizzler_opts.h | 39 |
1 files changed, 37 insertions, 2 deletions
diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h index ad121cfafe..14960f3b8f 100644 --- a/src/opts/SkSwizzler_opts.h +++ b/src/opts/SkSwizzler_opts.h @@ -358,12 +358,47 @@ static void RGBA_to_BGRA(uint32_t* dst, const void* vsrc, int count) { RGBA_to_BGRA_portable(dst, src, count); } +template <bool kSwapRB> +static void insert_alpha_should_swaprb(uint32_t dst[], const void* vsrc, int count) { + const uint8_t* src = (const uint8_t*) vsrc; + + const __m128i alphaMask = _mm_set1_epi32(0xFF000000); + __m128i expand; + const uint8_t X = 0xFF; // Used a placeholder. The value of X is irrelevant. + if (kSwapRB) { + expand = _mm_setr_epi8(2,1,0,X, 5,4,3,X, 8,7,6,X, 11,10,9,X); + } else { + expand = _mm_setr_epi8(0,1,2,X, 3,4,5,X, 6,7,8,X, 9,10,11,X); + } + + while (count >= 6) { + // Load a vector. While this actually contains 5 pixels plus an + // extra component, we will discard all but the first four pixels on + // this iteration. + __m128i rgb = _mm_loadu_si128((const __m128i*) src); + + // Expand the first four pixels to RGBX and then mask to RGB(FF). + __m128i rgba = _mm_or_si128(_mm_shuffle_epi8(rgb, expand), alphaMask); + + // Store 4 pixels. + _mm_storeu_si128((__m128i*) dst, rgba); + + src += 4*3; + dst += 4; + count -= 4; + } + + // Call portable code to finish up the tail of [0,4) pixels. + auto proc = kSwapRB ? RGB_to_BGR1_portable : RGB_to_RGB1_portable; + proc(dst, src, count); +} + static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { - RGB_to_RGB1_portable(dst, src, count); + insert_alpha_should_swaprb<false>(dst, src, count); } static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { - RGB_to_BGR1_portable(dst, src, count); + insert_alpha_should_swaprb<true>(dst, src, count); } #else |