aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar msarett <msarett@google.com>2016-01-22 14:12:38 -0800
committerGravatar Commit bot <commit-bot@chromium.org>2016-01-22 14:12:38 -0800
commit13aa1a5ad97156e35184970fc1ce1aaf3c50c91c (patch)
tree712b02b8d8077a0b4451bf3c21a30c0a583ae06c
parentf2b8662b5c73e03648ed1a154b717e354753a0e1 (diff)
SSSE3 opts for RGB -> RGB(FF) or BGR(FF)
Swizzle Bench Runtime z620 0.21x Dell Venue 8 0.26x RGB PNGs Decode Runtime z620 0.91x Dell Venus 8 0.96x BUG=skia:4767 GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1618603003 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review URL: https://codereview.chromium.org/1618603003
-rw-r--r--src/opts/SkSwizzler_opts.h39
1 files changed, 37 insertions, 2 deletions
diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h
index ad121cfafe..14960f3b8f 100644
--- a/src/opts/SkSwizzler_opts.h
+++ b/src/opts/SkSwizzler_opts.h
@@ -358,12 +358,47 @@ static void RGBA_to_BGRA(uint32_t* dst, const void* vsrc, int count) {
RGBA_to_BGRA_portable(dst, src, count);
}
+template <bool kSwapRB>
+static void insert_alpha_should_swaprb(uint32_t dst[], const void* vsrc, int count) {
+ const uint8_t* src = (const uint8_t*) vsrc;
+
+ const __m128i alphaMask = _mm_set1_epi32(0xFF000000);
+ __m128i expand;
+ const uint8_t X = 0xFF; // Used a placeholder. The value of X is irrelevant.
+ if (kSwapRB) {
+ expand = _mm_setr_epi8(2,1,0,X, 5,4,3,X, 8,7,6,X, 11,10,9,X);
+ } else {
+ expand = _mm_setr_epi8(0,1,2,X, 3,4,5,X, 6,7,8,X, 9,10,11,X);
+ }
+
+ while (count >= 6) {
+ // Load a vector. While this actually contains 5 pixels plus an
+ // extra component, we will discard all but the first four pixels on
+ // this iteration.
+ __m128i rgb = _mm_loadu_si128((const __m128i*) src);
+
+ // Expand the first four pixels to RGBX and then mask to RGB(FF).
+ __m128i rgba = _mm_or_si128(_mm_shuffle_epi8(rgb, expand), alphaMask);
+
+ // Store 4 pixels.
+ _mm_storeu_si128((__m128i*) dst, rgba);
+
+ src += 4*3;
+ dst += 4;
+ count -= 4;
+ }
+
+ // Call portable code to finish up the tail of [0,4) pixels.
+ auto proc = kSwapRB ? RGB_to_BGR1_portable : RGB_to_RGB1_portable;
+ proc(dst, src, count);
+}
+
static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) {
- RGB_to_RGB1_portable(dst, src, count);
+ insert_alpha_should_swaprb<false>(dst, src, count);
}
static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) {
- RGB_to_BGR1_portable(dst, src, count);
+ insert_alpha_should_swaprb<true>(dst, src, count);
}
#else