aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/SkUtils.cpp
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-04-09 14:05:17 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-04-09 14:05:17 -0700
commit9ff378b01be0b0a3fc35677a2155ba4ade286cc2 (patch)
tree2d9221ef31e3632dde74bda9a1023bc1ce71b27c /src/core/SkUtils.cpp
parenta1e41c6d9a2029eb836c6120bb154ccd25e1588d (diff)
Rewrite memset benches, then use results to add a small-N optimization.
The benches for N <= 10 get around 2x faster on my N7 and N9. I believe this is because of the reduced function-call-then-function-pointer-call overhead on the N7, and additionally because it seems autovectorization beats our NEON code for small N on the N9. My desktop is unchanged, though that's probably because N=10 lies well within a region where memset's performance is essentially constant: N=100 takes only about 2x as long as N=1 and N=10, which perform nearly identically. BUG=skia: Review URL: https://codereview.chromium.org/1073863002
Diffstat (limited to 'src/core/SkUtils.cpp')
-rw-r--r--src/core/SkUtils.cpp4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/core/SkUtils.cpp b/src/core/SkUtils.cpp
index b063071932..33ea4db47c 100644
--- a/src/core/SkUtils.cpp
+++ b/src/core/SkUtils.cpp
@@ -134,12 +134,12 @@ SkMemcpy32Proc choose_memcpy32() {
} // namespace
-void sk_memset16(uint16_t dst[], uint16_t value, int count) {
+void sk_memset16_large(uint16_t dst[], uint16_t value, int count) {
SK_DECLARE_STATIC_LAZY_FN_PTR(SkMemset16Proc, proc, choose_memset16);
proc.get()(dst, value, count);
}
-void sk_memset32(uint32_t dst[], uint32_t value, int count) {
+void sk_memset32_large(uint32_t dst[], uint32_t value, int count) {
SK_DECLARE_STATIC_LAZY_FN_PTR(SkMemset32Proc, proc, choose_memset32);
proc.get()(dst, value, count);
}