diff options
-rw-r--r-- | bench/SwizzleBench.cpp | 2 | ||||
-rw-r--r-- | src/codec/SkSwizzler.cpp | 18 | ||||
-rw-r--r-- | src/core/SkOpts.cpp | 2 | ||||
-rw-r--r-- | src/core/SkOpts.h | 4 | ||||
-rw-r--r-- | src/opts/SkOpts_neon.cpp | 2 | ||||
-rw-r--r-- | src/opts/SkOpts_ssse3.cpp | 2 | ||||
-rw-r--r-- | src/opts/SkSwizzler_opts.h | 142 |
7 files changed, 150 insertions, 22 deletions
diff --git a/bench/SwizzleBench.cpp b/bench/SwizzleBench.cpp index 922c276dbc..c78f2c977d 100644 --- a/bench/SwizzleBench.cpp +++ b/bench/SwizzleBench.cpp @@ -30,3 +30,5 @@ private: DEF_BENCH(return new SwizzleBench("SkOpts::RGBA_to_rgbA", SkOpts::RGBA_to_rgbA)); DEF_BENCH(return new SwizzleBench("SkOpts::RGBA_to_bgrA", SkOpts::RGBA_to_bgrA)); DEF_BENCH(return new SwizzleBench("SkOpts::RGBA_to_BGRA", SkOpts::RGBA_to_BGRA)); +DEF_BENCH(return new SwizzleBench("SkOpts::RGB_to_RGB1", SkOpts::RGB_to_RGB1)); +DEF_BENCH(return new SwizzleBench("SkOpts::RGB_to_BGR1", SkOpts::RGB_to_BGR1)); diff --git a/src/codec/SkSwizzler.cpp b/src/codec/SkSwizzler.cpp index f84b83e23a..7865184cce 100644 --- a/src/codec/SkSwizzler.cpp +++ b/src/codec/SkSwizzler.cpp @@ -331,7 +331,6 @@ static void fast_swizzle_bgra_to_n32_unpremul( // sampling, deltaSrc should equal bpp. SkASSERT(deltaSrc == bpp); - // These swizzles trust that the alpha value is already 0xFF. #ifdef SK_PMCOLOR_IS_RGBA SkOpts::RGBA_to_BGRA((uint32_t*) dst, src + offset, width); #else @@ -376,12 +375,25 @@ static void swizzle_rgb_to_n32( src += offset; SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow; for (int x = 0; x < dstWidth; x++) { - dst[x] = SkPackARGB32(0xFF, src[0], src[1], src[2]); + dst[x] = SkPackARGB32NoCheck(0xFF, src[0], src[1], src[2]); src += deltaSrc; } } +static void fast_swizzle_rgb_to_n32( + void* dst, const uint8_t* src, int width, int bpp, int deltaSrc, + int offset, const SkPMColor ctable[]) { + + // This function must not be called if we are sampling. If we are not + // sampling, deltaSrc should equal bpp. + SkASSERT(deltaSrc == bpp); +#ifdef SK_PMCOLOR_IS_RGBA + SkOpts::RGB_to_RGB1((uint32_t*) dst, src + offset, width); +#else + SkOpts::RGB_to_BGR1((uint32_t*) dst, src + offset, width); +#endif +} static void swizzle_rgb_to_565( void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth, @@ -446,7 +458,6 @@ static void fast_swizzle_rgba_to_n32_unpremul( // sampling, deltaSrc should equal bpp. SkASSERT(deltaSrc == bpp); - // These swizzles trust that the alpha value is already 0xFF. #ifdef SK_PMCOLOR_IS_RGBA memcpy(dst, src + offset, width * bpp); #else @@ -682,6 +693,7 @@ SkSwizzler* SkSwizzler::CreateSwizzler(SkSwizzler::SrcConfig sc, switch (dstInfo.colorType()) { case kN32_SkColorType: proc = &swizzle_rgb_to_n32; + fastProc = &fast_swizzle_rgb_to_n32; break; case kRGB_565_SkColorType: proc = &swizzle_rgb_to_565; diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 92e973c1d4..669401b417 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -82,6 +82,8 @@ namespace SkOpts { decltype(RGBA_to_BGRA) RGBA_to_BGRA = sk_default::RGBA_to_BGRA; decltype(RGBA_to_rgbA) RGBA_to_rgbA = sk_default::RGBA_to_rgbA; decltype(RGBA_to_bgrA) RGBA_to_bgrA = sk_default::RGBA_to_bgrA; + decltype(RGB_to_RGB1) RGB_to_RGB1 = sk_default::RGB_to_RGB1; + decltype(RGB_to_BGR1) RGB_to_BGR1 = sk_default::RGB_to_BGR1; // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. void Init_ssse3(); diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index 23541434a3..41ad8ebfe8 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -59,7 +59,9 @@ namespace SkOpts { typedef void (*Swizzle_8888)(uint32_t*, const void*, int); extern Swizzle_8888 RGBA_to_BGRA, // i.e. just swap RB RGBA_to_rgbA, // i.e. just premultiply - RGBA_to_bgrA; // i.e. swap RB and premultiply + RGBA_to_bgrA, // i.e. swap RB and premultiply + RGB_to_RGB1, // i.e. insert an opaque alpha + RGB_to_BGR1; // i.e. swap RB and insert an opaque alpha } #endif//SkOpts_DEFINED diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkOpts_neon.cpp index cf7a5c2245..dcb057e1fe 100644 --- a/src/opts/SkOpts_neon.cpp +++ b/src/opts/SkOpts_neon.cpp @@ -50,5 +50,7 @@ namespace SkOpts { RGBA_to_BGRA = sk_neon::RGBA_to_BGRA; RGBA_to_rgbA = sk_neon::RGBA_to_rgbA; RGBA_to_bgrA = sk_neon::RGBA_to_bgrA; + RGB_to_RGB1 = sk_neon::RGB_to_RGB1; + RGB_to_BGR1 = sk_neon::RGB_to_BGR1; } } diff --git a/src/opts/SkOpts_ssse3.cpp b/src/opts/SkOpts_ssse3.cpp index 96e8493bfc..23fdffbffa 100644 --- a/src/opts/SkOpts_ssse3.cpp +++ b/src/opts/SkOpts_ssse3.cpp @@ -21,5 +21,7 @@ namespace SkOpts { RGBA_to_BGRA = sk_ssse3::RGBA_to_BGRA; RGBA_to_rgbA = sk_ssse3::RGBA_to_rgbA; RGBA_to_bgrA = sk_ssse3::RGBA_to_bgrA; + RGB_to_RGB1 = sk_ssse3::RGB_to_RGB1; + RGB_to_BGR1 = sk_ssse3::RGB_to_BGR1; } } diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h index 8d1be84df2..ad121cfafe 100644 --- a/src/opts/SkSwizzler_opts.h +++ b/src/opts/SkSwizzler_opts.h @@ -60,6 +60,34 @@ static void RGBA_to_BGRA_portable(uint32_t* dst, const void* vsrc, int count) { } } +static void RGB_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) { + const uint8_t* src = (const uint8_t*)vsrc; + for (int i = 0; i < count; i++) { + uint8_t r = src[0], + g = src[1], + b = src[2]; + src += 3; + dst[i] = (uint32_t)0xFF << 24 + | (uint32_t)b << 16 + | (uint32_t)g << 8 + | (uint32_t)r << 0; + } +} + +static void RGB_to_BGR1_portable(uint32_t dst[], const void* vsrc, int count) { + const uint8_t* src = (const uint8_t*)vsrc; + for (int i = 0; i < count; i++) { + uint8_t r = src[0], + g = src[1], + b = src[2]; + src += 3; + dst[i] = (uint32_t)0xFF << 24 + | (uint32_t)r << 16 + | (uint32_t)g << 8 + | (uint32_t)b << 0; + } +} + #if defined(SK_ARM_HAS_NEON) // Rounded divide by 255, (x + 127) / 255 @@ -96,12 +124,12 @@ static void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) { auto src = (const uint32_t*)vsrc; while (count >= 8) { // Load 8 pixels. - uint8x8x4_t bgra = vld4_u8((const uint8_t*) src); + uint8x8x4_t rgba = vld4_u8((const uint8_t*) src); - uint8x8_t a = bgra.val[3], - b = bgra.val[2], - g = bgra.val[1], - r = bgra.val[0]; + uint8x8_t a = rgba.val[3], + b = rgba.val[2], + g = rgba.val[1], + r = rgba.val[0]; // Premultiply. b = scale(b, a); @@ -110,15 +138,15 @@ static void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) { // Store 8 premultiplied pixels. if (kSwapRB) { - bgra.val[2] = r; - bgra.val[1] = g; - bgra.val[0] = b; + rgba.val[2] = r; + rgba.val[1] = g; + rgba.val[0] = b; } else { - bgra.val[2] = b; - bgra.val[1] = g; - bgra.val[0] = r; + rgba.val[2] = b; + rgba.val[1] = g; + rgba.val[0] = r; } - vst4_u8((uint8_t*) dst, bgra); + vst4_u8((uint8_t*) dst, rgba); src += 8; dst += 8; count -= 8; @@ -141,13 +169,13 @@ static void RGBA_to_BGRA(uint32_t* dst, const void* vsrc, int count) { auto src = (const uint32_t*)vsrc; while (count >= 16) { // Load 16 pixels. - uint8x16x4_t bgra = vld4q_u8((const uint8_t*) src); + uint8x16x4_t rgba = vld4q_u8((const uint8_t*) src); // Swap r and b. - SkTSwap(bgra.val[0], bgra.val[2]); + SkTSwap(rgba.val[0], rgba.val[2]); // Store 16 pixels. - vst4q_u8((uint8_t*) dst, bgra); + vst4q_u8((uint8_t*) dst, rgba); src += 16; dst += 16; count -= 16; @@ -155,13 +183,13 @@ static void RGBA_to_BGRA(uint32_t* dst, const void* vsrc, int count) { if (count >= 8) { // Load 8 pixels. - uint8x8x4_t bgra = vld4_u8((const uint8_t*) src); + uint8x8x4_t rgba = vld4_u8((const uint8_t*) src); // Swap r and b. - SkTSwap(bgra.val[0], bgra.val[2]); + SkTSwap(rgba.val[0], rgba.val[2]); // Store 8 pixels. - vst4_u8((uint8_t*) dst, bgra); + vst4_u8((uint8_t*) dst, rgba); src += 8; dst += 8; count -= 8; @@ -170,6 +198,68 @@ static void RGBA_to_BGRA(uint32_t* dst, const void* vsrc, int count) { RGBA_to_BGRA_portable(dst, src, count); } +template <bool kSwapRB> +static void insert_alpha_should_swaprb(uint32_t dst[], const void* vsrc, int count) { + const uint8_t* src = (const uint8_t*) vsrc; + while (count >= 16) { + // Load 16 pixels. + uint8x16x3_t rgb = vld3q_u8(src); + + // Insert an opaque alpha channel and swap if needed. + uint8x16x4_t rgba; + if (kSwapRB) { + rgba.val[0] = rgb.val[2]; + rgba.val[2] = rgb.val[0]; + } else { + rgba.val[0] = rgb.val[0]; + rgba.val[2] = rgb.val[2]; + } + rgba.val[1] = rgb.val[1]; + rgba.val[3] = vdupq_n_u8(0xFF); + + // Store 16 pixels. + vst4q_u8((uint8_t*) dst, rgba); + src += 16*3; + dst += 16; + count -= 16; + } + + if (count >= 8) { + // Load 8 pixels. + uint8x8x3_t rgb = vld3_u8(src); + + // Insert an opaque alpha channel and swap if needed. + uint8x8x4_t rgba; + if (kSwapRB) { + rgba.val[0] = rgb.val[2]; + rgba.val[2] = rgb.val[0]; + } else { + rgba.val[0] = rgb.val[0]; + rgba.val[2] = rgb.val[2]; + } + rgba.val[1] = rgb.val[1]; + rgba.val[3] = vdup_n_u8(0xFF); + + // Store 8 pixels. + vst4_u8((uint8_t*) dst, rgba); + src += 8*3; + dst += 8; + count -= 8; + } + + // Call portable code to finish up the tail of [0,8) pixels. + auto proc = kSwapRB ? RGB_to_BGR1_portable : RGB_to_RGB1_portable; + proc(dst, src, count); +} + +static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { + insert_alpha_should_swaprb<false>(dst, src, count); +} + +static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { + insert_alpha_should_swaprb<true>(dst, src, count); +} + #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 template <bool kSwapRB> @@ -268,6 +358,14 @@ static void RGBA_to_BGRA(uint32_t* dst, const void* vsrc, int count) { RGBA_to_BGRA_portable(dst, src, count); } +static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { + RGB_to_RGB1_portable(dst, src, count); +} + +static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { + RGB_to_BGR1_portable(dst, src, count); +} + #else static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { @@ -282,6 +380,14 @@ static void RGBA_to_BGRA(uint32_t* dst, const void* src, int count) { RGBA_to_BGRA_portable(dst, src, count); } +static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { + RGB_to_RGB1_portable(dst, src, count); +} + +static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { + RGB_to_BGR1_portable(dst, src, count); +} + #endif } |