aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/SkOpts.cpp
diff options
context:
space:
mode:
authorGravatar msarett <msarett@google.com>2016-01-13 14:31:59 -0800
committerGravatar Commit bot <commit-bot@chromium.org>2016-01-13 14:31:59 -0800
commit3a24f459582f2665f0e66bd35a0d8f46a1c4c72f (patch)
tree5faea85984dc8be63ebbd79e9b9c6ed346177efc /src/core/SkOpts.cpp
parent06d54ad09a03edd62d597b2ba67ad3c05944fd92 (diff)
Optimized premultiplying swizzles for NEON
Improves decode performance for RGBA encoded PNGs. Swizzle Time on Nexus 9 (with clang): SwapPremul 0.44x Premul 0.44x Decode Time On Nexus 9 (with clang): ZeroInit Decodes 0.85x Regular Decodes 0.86x Swizzle Time on Nexus 6P (with clang) SwapPremul 0.14x Premul 0.14x Decode Time On Nexus 6P (with clang): ZeroInit Decodes 0.93x Regular Decodes 0.95x Notes: ZeroInit means memory is zero initialized, and we do not write to memory for large sections of zero pixels (memory use opt for Android). A profile on Nexus 9 shows that the premultiplication step of PNG decoding is now ~5% of decode time (down from ~20%). BUG=skia:4767 GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1577703006 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review URL: https://codereview.chromium.org/1577703006
Diffstat (limited to 'src/core/SkOpts.cpp')
-rw-r--r--src/core/SkOpts.cpp53
1 files changed, 1 insertions, 52 deletions
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index ee88b23169..5f1a36c1be 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -16,6 +16,7 @@
#include "SkFloatingPoint_opts.h"
#include "SkMatrix_opts.h"
#include "SkMorphologyImageFilter_opts.h"
+#include "SkSwizzler_opts.h"
#include "SkTextureCompressor_opts.h"
#include "SkUtils_opts.h"
#include "SkXfermode_opts.h"
@@ -49,58 +50,6 @@
#include <cpu-features.h>
#endif
-namespace sk_default {
-
-// These variable names in these functions just pretend the input is BGRA.
-// They work fine with both RGBA and BGRA.
-
-static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) {
- for (int i = 0; i < count; i++) {
- uint8_t a = src[i] >> 24,
- r = src[i] >> 16,
- g = src[i] >> 8,
- b = src[i] >> 0;
- r = (r*a+127)/255;
- g = (g*a+127)/255;
- b = (b*a+127)/255;
- dst[i] = (uint32_t)a << 24
- | (uint32_t)r << 16
- | (uint32_t)g << 8
- | (uint32_t)b << 0;
- }
-}
-
-static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
- for (int i = 0; i < count; i++) {
- uint8_t a = src[i] >> 24,
- r = src[i] >> 16,
- g = src[i] >> 8,
- b = src[i] >> 0;
- dst[i] = (uint32_t)a << 24
- | (uint32_t)b << 16
- | (uint32_t)g << 8
- | (uint32_t)r << 0;
- }
-}
-
-static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
- for (int i = 0; i < count; i++) {
- uint8_t a = src[i] >> 24,
- r = src[i] >> 16,
- g = src[i] >> 8,
- b = src[i] >> 0;
- r = (r*a+127)/255;
- g = (g*a+127)/255;
- b = (b*a+127)/255;
- dst[i] = (uint32_t)a << 24
- | (uint32_t)b << 16
- | (uint32_t)g << 8
- | (uint32_t)r << 0;
- }
-}
-
-} // namespace sk_default
-
namespace SkOpts {
// Define default function pointer values here...
// If our global compile options are set high enough, these defaults might even be