aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/codec/SkSwizzler.cpp
diff options
context:
space:
mode:
authorGravatar msarett <msarett@google.com>2016-01-15 11:02:36 -0800
committerGravatar Commit bot <commit-bot@chromium.org>2016-01-15 11:02:36 -0800
commit03108de163354fa574679ad153b58ce57126b2ba (patch)
tree1c1a0f47ae33e03b95332335e095cc5631fd130d /src/codec/SkSwizzler.cpp
parent01dc44ae669b53b45f9b33ab826ba22956bddf62 (diff)
Add NEON swap opts and use opts in SkSwizzler
All RGBA, RGBX, BGRA, BGRX routines in SkSwizzler now use fast options (with the exception of conversions to 565). Swizzle Time for swap_rb 0.94x Nexus 9 0.81x Nexus 6P Unpremul Decode Time for RGBA PNGs*** ZeroInit 0.93x Nexus 9 Regular 0.94x Nexus 9 ZeroInit 0.97x Nexus 6P ZeroInit 0.95x Nexus 6P ***Two Notes: The improvements here are actually due to taking advantage of memcpy() (no need to swap, the bytes are already in the proper order). ZeroInit skips writing zeros to zero initialized memory. This is a memory use opt in Android. BMP decodes should also benefit from these improvements. I am relying on Gold to help test all possible cases. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1581933006 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review URL: https://codereview.chromium.org/1581933006
Diffstat (limited to 'src/codec/SkSwizzler.cpp')
-rw-r--r--src/codec/SkSwizzler.cpp81
1 files changed, 74 insertions, 7 deletions
diff --git a/src/codec/SkSwizzler.cpp b/src/codec/SkSwizzler.cpp
index 74d6c7f87f..e560d6f3cd 100644
--- a/src/codec/SkSwizzler.cpp
+++ b/src/codec/SkSwizzler.cpp
@@ -303,6 +303,26 @@ static void swizzle_bgrx_to_n32(
}
}
+static void fast_swizzle_bgrx_to_32(
+ void* dst, const uint8_t* src, int width, int bpp, int deltaSrc, int offset,
+ const SkPMColor ctable[]) {
+
+ // This function must not be called if we are sampling. If we are not
+ // sampling, deltaSrc should equal bpp.
+ SkASSERT(deltaSrc == bpp);
+
+ // The default swizzle supports BGR->N32 and BGRX->N32. This only
+ // supports BGRX->N32.
+ SkASSERT(4 == bpp);
+
+ // These swizzles trust that the alpha value is already 0xFF.
+#ifdef SK_PMCOLOR_IS_RGBA
+ SkOpts::swaprb_xxxa((uint32_t*) dst, (const uint32_t*) (src + offset), width);
+#else
+ memcpy(dst, src + offset, width * bpp);
+#endif
+}
+
static void swizzle_bgrx_to_565(
void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
@@ -343,7 +363,23 @@ static void swizzle_bgra_to_n32_premul(
}
}
+static void fast_swizzle_bgra_to_n32_premul(
+ void* dst, const uint8_t* src, int width, int bpp, int deltaSrc, int offset,
+ const SkPMColor ctable[]) {
+
+ // This function must not be called if we are sampling. If we are not
+ // sampling, deltaSrc should equal bpp.
+ SkASSERT(deltaSrc == bpp);
+
+#ifdef SK_PMCOLOR_IS_RGBA
+ SkOpts::premul_swaprb_xxxa((uint32_t*) dst, (const uint32_t*) (src + offset), width);
+#else
+ SkOpts::premul_xxxa((uint32_t*) dst, (const uint32_t*) (src + offset), width);
+#endif
+}
+
// kRGBX
+
static void swizzle_rgbx_to_n32(
void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
@@ -356,6 +392,26 @@ static void swizzle_rgbx_to_n32(
}
}
+static void fast_swizzle_rgbx_to_32(
+ void* dst, const uint8_t* src, int width, int bpp, int deltaSrc, int offset,
+ const SkPMColor ctable[]) {
+
+ // This function must not be called if we are sampling. If we are not
+ // sampling, deltaSrc should equal bpp.
+ SkASSERT(deltaSrc == bpp);
+
+ // The default swizzle supports RGB->N32 and RGBX->N32. This only
+ // supports RGBX->N32.
+ SkASSERT(4 == bpp);
+
+ // These swizzles trust that the alpha value is already 0xFF.
+#ifdef SK_PMCOLOR_IS_RGBA
+ memcpy(dst, src + offset, width * bpp);
+#else
+ SkOpts::swaprb_xxxa((uint32_t*) dst, (const uint32_t*) (src + offset), width);
+#endif
+}
+
static void swizzle_rgbx_to_565(
void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
int bytesPerPixel, int deltaSrc, int offset, const SkPMColor ctable[]) {
@@ -369,6 +425,7 @@ static void swizzle_rgbx_to_565(
}
// kRGBA
+
static void swizzle_rgba_to_n32_premul(
void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
@@ -610,15 +667,22 @@ SkSwizzler* SkSwizzler::CreateSwizzler(SkSwizzler::SrcConfig sc,
case kBGRA:
switch (dstInfo.colorType()) {
case kN32_SkColorType:
- switch (dstInfo.alphaType()) {
- case kUnpremul_SkAlphaType:
+ if (dstInfo.alphaType() == kUnpremul_SkAlphaType) {
+ if (SkCodec::kYes_ZeroInitialized == zeroInit) {
+ proc = &SkipLeading8888ZerosThen<swizzle_bgra_to_n32_unpremul>;
+ fastProc = &SkipLeading8888ZerosThen<fast_swizzle_bgrx_to_32>;
+ } else {
proc = &swizzle_bgra_to_n32_unpremul;
- break;
- case kPremul_SkAlphaType:
+ fastProc = &fast_swizzle_bgrx_to_32;
+ }
+ } else {
+ if (SkCodec::kYes_ZeroInitialized == zeroInit) {
+ proc = &SkipLeading8888ZerosThen<swizzle_bgra_to_n32_premul>;
+ fastProc = &SkipLeading8888ZerosThen<fast_swizzle_bgra_to_n32_premul>;
+ } else {
proc = &swizzle_bgra_to_n32_premul;
- break;
- default:
- break;
+ fastProc = &fast_swizzle_bgra_to_n32_premul;
+ }
}
break;
default:
@@ -629,6 +693,7 @@ SkSwizzler* SkSwizzler::CreateSwizzler(SkSwizzler::SrcConfig sc,
switch (dstInfo.colorType()) {
case kN32_SkColorType:
proc = &swizzle_rgbx_to_n32;
+ fastProc = &fast_swizzle_rgbx_to_32;
break;
case kRGB_565_SkColorType:
proc = &swizzle_rgbx_to_565;
@@ -642,8 +707,10 @@ SkSwizzler* SkSwizzler::CreateSwizzler(SkSwizzler::SrcConfig sc,
if (dstInfo.alphaType() == kUnpremul_SkAlphaType) {
if (SkCodec::kYes_ZeroInitialized == zeroInit) {
proc = &SkipLeading8888ZerosThen<swizzle_rgba_to_n32_unpremul>;
+ fastProc = &SkipLeading8888ZerosThen<fast_swizzle_rgbx_to_32>;
} else {
proc = &swizzle_rgba_to_n32_unpremul;
+ fastProc = &fast_swizzle_rgbx_to_32;
}
} else {
if (SkCodec::kYes_ZeroInitialized == zeroInit) {