diff options
author | msarett <msarett@google.com> | 2016-02-08 13:26:25 -0800 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-02-08 13:26:25 -0800 |
commit | c5c322d8ecfc05718f9f04360956c4f1f9dc33c1 (patch) | |
tree | 6d89dc26ffb005c35bf65d5edf87202c3541d6dd | |
parent | 3125565804054691b110b4731bc5a32070fab780 (diff) |
Optimize CMYK->RGBA (BGRA) transform for jpeg decodes
Swizzle Bench Runtime
Nexus 6P 0.14x
Dell Venue 8 0.12x
CMYK Jpeg Decode Runtime
Nexus 6P 0.81x
Dell Venue 8 0.85x
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1676773003
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1676773003
-rw-r--r-- | bench/SwizzleBench.cpp | 2 | ||||
-rw-r--r-- | src/codec/SkSwizzler.cpp | 17 | ||||
-rw-r--r-- | src/core/SkOpts.cpp | 18 | ||||
-rw-r--r-- | src/core/SkOpts.h | 18 | ||||
-rw-r--r-- | src/opts/SkOpts_neon.cpp | 18 | ||||
-rw-r--r-- | src/opts/SkOpts_ssse3.cpp | 18 | ||||
-rw-r--r-- | src/opts/SkSwizzler_opts.h | 168 |
7 files changed, 227 insertions, 32 deletions
diff --git a/bench/SwizzleBench.cpp b/bench/SwizzleBench.cpp index cf7a407f0e..5490a6c44b 100644 --- a/bench/SwizzleBench.cpp +++ b/bench/SwizzleBench.cpp @@ -35,3 +35,5 @@ DEF_BENCH(return new SwizzleBench("SkOpts::RGB_to_BGR1", SkOpts::RGB_to_BGR1)); DEF_BENCH(return new SwizzleBench("SkOpts::gray_to_RGB1", SkOpts::gray_to_RGB1)); DEF_BENCH(return new SwizzleBench("SkOpts::grayA_to_RGBA", SkOpts::grayA_to_RGBA)); DEF_BENCH(return new SwizzleBench("SkOpts::grayA_to_rgbA", SkOpts::grayA_to_rgbA)); +DEF_BENCH(return new SwizzleBench("SkOpts::inverted_CMYK_to_RGB1", SkOpts::inverted_CMYK_to_RGB1)); +DEF_BENCH(return new SwizzleBench("SkOpts::inverted_CMYK_to_BGR1", SkOpts::inverted_CMYK_to_BGR1)); diff --git a/src/codec/SkSwizzler.cpp b/src/codec/SkSwizzler.cpp index debcd45c20..f9f212072e 100644 --- a/src/codec/SkSwizzler.cpp +++ b/src/codec/SkSwizzler.cpp @@ -592,6 +592,21 @@ static void swizzle_cmyk_to_n32( } } +static void fast_swizzle_cmyk_to_n32( + void* dst, const uint8_t* src, int width, int bpp, int deltaSrc, int offset, + const SkPMColor ctable[]) { + + // This function must not be called if we are sampling. If we are not + // sampling, deltaSrc should equal bpp. + SkASSERT(deltaSrc == bpp); + +#ifdef SK_PMCOLOR_IS_RGBA + SkOpts::inverted_CMYK_to_RGB1((uint32_t*) dst, src + offset, width); +#else + SkOpts::inverted_CMYK_to_BGR1((uint32_t*) dst, src + offset, width); +#endif +} + static void swizzle_cmyk_to_565( void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth, int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) { @@ -811,6 +826,7 @@ SkSwizzler* SkSwizzler::CreateSwizzler(SkSwizzler::SrcConfig sc, break; case kRGB_565_SkColorType: proc = &swizzle_rgb_to_565; + break; default: break; } @@ -844,6 +860,7 @@ SkSwizzler* SkSwizzler::CreateSwizzler(SkSwizzler::SrcConfig sc, switch (dstInfo.colorType()) { case kN32_SkColorType: proc = &swizzle_cmyk_to_n32; + fastProc = &fast_swizzle_cmyk_to_n32; break; case kRGB_565_SkColorType: proc = &swizzle_cmyk_to_565; diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 8065b77d85..56604171bb 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -79,14 +79,16 @@ namespace SkOpts { decltype(matrix_scale_translate) matrix_scale_translate = sk_default::matrix_scale_translate; decltype(matrix_affine) matrix_affine = sk_default::matrix_affine; - decltype(RGBA_to_BGRA) RGBA_to_BGRA = sk_default::RGBA_to_BGRA; - decltype(RGBA_to_rgbA) RGBA_to_rgbA = sk_default::RGBA_to_rgbA; - decltype(RGBA_to_bgrA) RGBA_to_bgrA = sk_default::RGBA_to_bgrA; - decltype(RGB_to_RGB1) RGB_to_RGB1 = sk_default::RGB_to_RGB1; - decltype(RGB_to_BGR1) RGB_to_BGR1 = sk_default::RGB_to_BGR1; - decltype(gray_to_RGB1) gray_to_RGB1 = sk_default::gray_to_RGB1; - decltype(grayA_to_RGBA) grayA_to_RGBA = sk_default::grayA_to_RGBA; - decltype(grayA_to_rgbA) grayA_to_rgbA = sk_default::grayA_to_rgbA; + decltype(RGBA_to_BGRA) RGBA_to_BGRA = sk_default::RGBA_to_BGRA; + decltype(RGBA_to_rgbA) RGBA_to_rgbA = sk_default::RGBA_to_rgbA; + decltype(RGBA_to_bgrA) RGBA_to_bgrA = sk_default::RGBA_to_bgrA; + decltype(RGB_to_RGB1) RGB_to_RGB1 = sk_default::RGB_to_RGB1; + decltype(RGB_to_BGR1) RGB_to_BGR1 = sk_default::RGB_to_BGR1; + decltype(gray_to_RGB1) gray_to_RGB1 = sk_default::gray_to_RGB1; + decltype(grayA_to_RGBA) grayA_to_RGBA = sk_default::grayA_to_RGBA; + decltype(grayA_to_rgbA) grayA_to_rgbA = sk_default::grayA_to_rgbA; + decltype(inverted_CMYK_to_RGB1) inverted_CMYK_to_RGB1 = sk_default::inverted_CMYK_to_RGB1; + decltype(inverted_CMYK_to_BGR1) inverted_CMYK_to_BGR1 = sk_default::inverted_CMYK_to_BGR1; // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. void Init_ssse3(); diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index 69da36254c..2e8778e1a1 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -57,14 +57,16 @@ namespace SkOpts { // Swizzle input into some sort of 8888 pixel, {premul,unpremul} x {rgba,bgra}. typedef void (*Swizzle_8888)(uint32_t*, const void*, int); - extern Swizzle_8888 RGBA_to_BGRA, // i.e. just swap RB - RGBA_to_rgbA, // i.e. just premultiply - RGBA_to_bgrA, // i.e. swap RB and premultiply - RGB_to_RGB1, // i.e. insert an opaque alpha - RGB_to_BGR1, // i.e. swap RB and insert an opaque alpha - gray_to_RGB1, // i.e. expand to color channels + an opaque alpha - grayA_to_RGBA, // i.e. expand to color channels - grayA_to_rgbA; // i.e. expand to color channels and premultiply + extern Swizzle_8888 RGBA_to_BGRA, // i.e. just swap RB + RGBA_to_rgbA, // i.e. just premultiply + RGBA_to_bgrA, // i.e. swap RB and premultiply + RGB_to_RGB1, // i.e. insert an opaque alpha + RGB_to_BGR1, // i.e. swap RB and insert an opaque alpha + gray_to_RGB1, // i.e. expand to color channels + an opaque alpha + grayA_to_RGBA, // i.e. expand to color channels + grayA_to_rgbA, // i.e. expand to color channels and premultiply + inverted_CMYK_to_RGB1, // i.e. convert color space + inverted_CMYK_to_BGR1; // i.e. convert color space } #endif//SkOpts_DEFINED diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkOpts_neon.cpp index 01f43ef3ed..80fb4e93d0 100644 --- a/src/opts/SkOpts_neon.cpp +++ b/src/opts/SkOpts_neon.cpp @@ -47,13 +47,15 @@ namespace SkOpts { matrix_scale_translate = sk_neon::matrix_scale_translate; matrix_affine = sk_neon::matrix_affine; - RGBA_to_BGRA = sk_neon::RGBA_to_BGRA; - RGBA_to_rgbA = sk_neon::RGBA_to_rgbA; - RGBA_to_bgrA = sk_neon::RGBA_to_bgrA; - RGB_to_RGB1 = sk_neon::RGB_to_RGB1; - RGB_to_BGR1 = sk_neon::RGB_to_BGR1; - gray_to_RGB1 = sk_neon::gray_to_RGB1; - grayA_to_RGBA = sk_neon::grayA_to_RGBA; - grayA_to_rgbA = sk_neon::grayA_to_rgbA; + RGBA_to_BGRA = sk_neon::RGBA_to_BGRA; + RGBA_to_rgbA = sk_neon::RGBA_to_rgbA; + RGBA_to_bgrA = sk_neon::RGBA_to_bgrA; + RGB_to_RGB1 = sk_neon::RGB_to_RGB1; + RGB_to_BGR1 = sk_neon::RGB_to_BGR1; + gray_to_RGB1 = sk_neon::gray_to_RGB1; + grayA_to_RGBA = sk_neon::grayA_to_RGBA; + grayA_to_rgbA = sk_neon::grayA_to_rgbA; + inverted_CMYK_to_RGB1 = sk_neon::inverted_CMYK_to_RGB1; + inverted_CMYK_to_BGR1 = sk_neon::inverted_CMYK_to_BGR1; } } diff --git a/src/opts/SkOpts_ssse3.cpp b/src/opts/SkOpts_ssse3.cpp index 13c8fbf8e2..94a26a1ca2 100644 --- a/src/opts/SkOpts_ssse3.cpp +++ b/src/opts/SkOpts_ssse3.cpp @@ -18,13 +18,15 @@ namespace SkOpts { blit_mask_d32_a8 = sk_ssse3::blit_mask_d32_a8; color_cube_filter_span = sk_ssse3::color_cube_filter_span; - RGBA_to_BGRA = sk_ssse3::RGBA_to_BGRA; - RGBA_to_rgbA = sk_ssse3::RGBA_to_rgbA; - RGBA_to_bgrA = sk_ssse3::RGBA_to_bgrA; - RGB_to_RGB1 = sk_ssse3::RGB_to_RGB1; - RGB_to_BGR1 = sk_ssse3::RGB_to_BGR1; - gray_to_RGB1 = sk_ssse3::gray_to_RGB1; - grayA_to_RGBA = sk_ssse3::grayA_to_RGBA; - grayA_to_rgbA = sk_ssse3::grayA_to_rgbA; + RGBA_to_BGRA = sk_ssse3::RGBA_to_BGRA; + RGBA_to_rgbA = sk_ssse3::RGBA_to_rgbA; + RGBA_to_bgrA = sk_ssse3::RGBA_to_bgrA; + RGB_to_RGB1 = sk_ssse3::RGB_to_RGB1; + RGB_to_BGR1 = sk_ssse3::RGB_to_BGR1; + gray_to_RGB1 = sk_ssse3::gray_to_RGB1; + grayA_to_RGBA = sk_ssse3::grayA_to_RGBA; + grayA_to_rgbA = sk_ssse3::grayA_to_rgbA; + inverted_CMYK_to_RGB1 = sk_ssse3::inverted_CMYK_to_RGB1; + inverted_CMYK_to_BGR1 = sk_ssse3::inverted_CMYK_to_BGR1; } } diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h index 1d3cc51c37..15eec3a355 100644 --- a/src/opts/SkSwizzler_opts.h +++ b/src/opts/SkSwizzler_opts.h @@ -125,6 +125,41 @@ static void grayA_to_rgbA_portable(uint32_t dst[], const void* vsrc, int count) } } +static void inverted_CMYK_to_RGB1_portable(uint32_t* dst, const void* vsrc, int count) { + const uint32_t* src = (const uint32_t*)vsrc; + for (int i = 0; i < count; i++) { + uint8_t k = src[i] >> 24, + y = src[i] >> 16, + m = src[i] >> 8, + c = src[i] >> 0; + // See comments in SkSwizzler.cpp for details on the conversion formula. + uint8_t b = (y*k+127)/255, + g = (m*k+127)/255, + r = (c*k+127)/255; + dst[i] = (uint32_t)0xFF << 24 + | (uint32_t) b << 16 + | (uint32_t) g << 8 + | (uint32_t) r << 0; + } +} + +static void inverted_CMYK_to_BGR1_portable(uint32_t* dst, const void* vsrc, int count) { + const uint32_t* src = (const uint32_t*)vsrc; + for (int i = 0; i < count; i++) { + uint8_t k = src[i] >> 24, + y = src[i] >> 16, + m = src[i] >> 8, + c = src[i] >> 0; + uint8_t b = (y*k+127)/255, + g = (m*k+127)/255, + r = (c*k+127)/255; + dst[i] = (uint32_t)0xFF << 24 + | (uint32_t) r << 16 + | (uint32_t) g << 8 + | (uint32_t) b << 0; + } +} + #if defined(SK_ARM_HAS_NEON) // Rounded divide by 255, (x + 127) / 255 @@ -401,6 +436,54 @@ static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { expand_grayA<true>(dst, src, count); } +enum Format { kRGB1, kBGR1 }; +template <Format format> +static void inverted_cmyk_to(uint32_t* dst, const void* vsrc, int count) { + auto src = (const uint32_t*)vsrc; + while (count >= 8) { + // Load 8 cmyk pixels. + uint8x8x4_t pixels = vld4_u8((const uint8_t*) src); + + uint8x8_t k = pixels.val[3], + y = pixels.val[2], + m = pixels.val[1], + c = pixels.val[0]; + + // Scale to r, g, b. + uint8x8_t b = scale(y, k); + uint8x8_t g = scale(m, k); + uint8x8_t r = scale(c, k); + + // Store 8 rgba pixels. + if (kBGR1 == format) { + pixels.val[3] = vdup_n_u8(0xFF); + pixels.val[2] = r; + pixels.val[1] = g; + pixels.val[0] = b; + } else { + pixels.val[3] = vdup_n_u8(0xFF); + pixels.val[2] = b; + pixels.val[1] = g; + pixels.val[0] = r; + } + vst4_u8((uint8_t*) dst, pixels); + src += 8; + dst += 8; + count -= 8; + } + + auto proc = (kBGR1 == format) ? inverted_CMYK_to_BGR1_portable : inverted_CMYK_to_RGB1_portable; + proc(dst, src, count); +} + +static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { + inverted_cmyk_to<kRGB1>(dst, src, count); +} + +static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { + inverted_cmyk_to<kBGR1>(dst, src, count); +} + #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 // Scale a byte by another. @@ -631,6 +714,83 @@ static void grayA_to_rgbA(uint32_t dst[], const void* vsrc, int count) { grayA_to_rgbA_portable(dst, src, count); } +enum Format { kRGB1, kBGR1 }; +template <Format format> +static void inverted_cmyk_to(uint32_t* dst, const void* vsrc, int count) { + auto src = (const uint32_t*)vsrc; + + auto convert8 = [](__m128i* lo, __m128i* hi) { + const __m128i zeros = _mm_setzero_si128(); + __m128i planar; + if (kBGR1 == format) { + planar = _mm_setr_epi8(2,6,10,14, 1,5,9,13, 0,4,8,12, 3,7,11,15); + } else { + planar = _mm_setr_epi8(0,4,8,12, 1,5,9,13, 2,6,10,14, 3,7,11,15); + } + + // Swizzle the pixels to 8-bit planar. + *lo = _mm_shuffle_epi8(*lo, planar); // ccccmmmm yyyykkkk + *hi = _mm_shuffle_epi8(*hi, planar); // CCCCMMMM YYYYKKKK + __m128i cm = _mm_unpacklo_epi32(*lo, *hi), // ccccCCCC mmmmMMMM + yk = _mm_unpackhi_epi32(*lo, *hi); // yyyyYYYY kkkkKKKK + + // Unpack to 16-bit planar. + __m128i c = _mm_unpacklo_epi8(cm, zeros), // c_c_c_c_ C_C_C_C_ + m = _mm_unpackhi_epi8(cm, zeros), // m_m_m_m_ M_M_M_M_ + y = _mm_unpacklo_epi8(yk, zeros), // y_y_y_y_ Y_Y_Y_Y_ + k = _mm_unpackhi_epi8(yk, zeros); // k_k_k_k_ K_K_K_K_ + + // Scale to r, g, b. + __m128i r = scale(c, k), + g = scale(m, k), + b = scale(y, k); + + // Repack into interlaced pixels. + __m128i rg = _mm_or_si128(r, _mm_slli_epi16(g, 8)), // rgrgrgrg RGRGRGRG + ba = _mm_or_si128(b, _mm_set1_epi16((uint16_t) 0xFF00)); // b1b1b1b1 B1B1B1B1 + *lo = _mm_unpacklo_epi16(rg, ba); // rgbargba rgbargba + *hi = _mm_unpackhi_epi16(rg, ba); // RGB1RGB1 RGB1RGB1 + }; + + while (count >= 8) { + __m128i lo = _mm_loadu_si128((const __m128i*) (src + 0)), + hi = _mm_loadu_si128((const __m128i*) (src + 4)); + + convert8(&lo, &hi); + + _mm_storeu_si128((__m128i*) (dst + 0), lo); + _mm_storeu_si128((__m128i*) (dst + 4), hi); + + src += 8; + dst += 8; + count -= 8; + } + + if (count >= 4) { + __m128i lo = _mm_loadu_si128((const __m128i*) src), + hi = _mm_setzero_si128(); + + convert8(&lo, &hi); + + _mm_storeu_si128((__m128i*) dst, lo); + + src += 4; + dst += 4; + count -= 4; + } + + auto proc = (kBGR1 == format) ? inverted_CMYK_to_BGR1_portable : inverted_CMYK_to_RGB1_portable; + proc(dst, src, count); +} + +static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { + inverted_cmyk_to<kRGB1>(dst, src, count); +} + +static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { + inverted_cmyk_to<kBGR1>(dst, src, count); +} + #else static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { @@ -665,6 +825,14 @@ static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { grayA_to_rgbA_portable(dst, src, count); } +static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { + inverted_CMYK_to_RGB1_portable(dst, src, count); +} + +static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { + inverted_CMYK_to_BGR1_portable(dst, src, count); +} + #endif } |