diff options
author | 2016-01-13 14:31:59 -0800 | |
---|---|---|
committer | 2016-01-13 14:31:59 -0800 | |
commit | 3a24f459582f2665f0e66bd35a0d8f46a1c4c72f (patch) | |
tree | 5faea85984dc8be63ebbd79e9b9c6ed346177efc /src/core/SkOpts.cpp | |
parent | 06d54ad09a03edd62d597b2ba67ad3c05944fd92 (diff) |
Optimized premultiplying swizzles for NEON
Improves decode performance for RGBA encoded PNGs.
Swizzle Time on Nexus 9 (with clang):
SwapPremul 0.44x
Premul 0.44x
Decode Time On Nexus 9 (with clang):
ZeroInit Decodes 0.85x
Regular Decodes 0.86x
Swizzle Time on Nexus 6P (with clang)
SwapPremul 0.14x
Premul 0.14x
Decode Time On Nexus 6P (with clang):
ZeroInit Decodes 0.93x
Regular Decodes 0.95x
Notes:
ZeroInit means memory is zero initialized, and we do not write to
memory for large sections of zero pixels (memory use opt for Android).
A profile on Nexus 9 shows that the premultiplication step of PNG
decoding is now ~5% of decode time (down from ~20%).
BUG=skia:4767
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1577703006
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1577703006
Diffstat (limited to 'src/core/SkOpts.cpp')
-rw-r--r-- | src/core/SkOpts.cpp | 53 |
1 files changed, 1 insertions, 52 deletions
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index ee88b23169..5f1a36c1be 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -16,6 +16,7 @@ #include "SkFloatingPoint_opts.h" #include "SkMatrix_opts.h" #include "SkMorphologyImageFilter_opts.h" +#include "SkSwizzler_opts.h" #include "SkTextureCompressor_opts.h" #include "SkUtils_opts.h" #include "SkXfermode_opts.h" @@ -49,58 +50,6 @@ #include <cpu-features.h> #endif -namespace sk_default { - -// These variable names in these functions just pretend the input is BGRA. -// They work fine with both RGBA and BGRA. - -static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { - for (int i = 0; i < count; i++) { - uint8_t a = src[i] >> 24, - r = src[i] >> 16, - g = src[i] >> 8, - b = src[i] >> 0; - r = (r*a+127)/255; - g = (g*a+127)/255; - b = (b*a+127)/255; - dst[i] = (uint32_t)a << 24 - | (uint32_t)r << 16 - | (uint32_t)g << 8 - | (uint32_t)b << 0; - } -} - -static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { - for (int i = 0; i < count; i++) { - uint8_t a = src[i] >> 24, - r = src[i] >> 16, - g = src[i] >> 8, - b = src[i] >> 0; - dst[i] = (uint32_t)a << 24 - | (uint32_t)b << 16 - | (uint32_t)g << 8 - | (uint32_t)r << 0; - } -} - -static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { - for (int i = 0; i < count; i++) { - uint8_t a = src[i] >> 24, - r = src[i] >> 16, - g = src[i] >> 8, - b = src[i] >> 0; - r = (r*a+127)/255; - g = (g*a+127)/255; - b = (b*a+127)/255; - dst[i] = (uint32_t)a << 24 - | (uint32_t)b << 16 - | (uint32_t)g << 8 - | (uint32_t)r << 0; - } -} - -} // namespace sk_default - namespace SkOpts { // Define default function pointer values here... // If our global compile options are set high enough, these defaults might even be |