aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkUtils_opts.h
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-08-15 15:08:54 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-08-15 22:28:17 +0000
commit25954b64c066b143819bc720b20a9b4287042ecc (patch)
treef2368c5dc2700c3b549c9cc435795dd9b3ffa064 /src/opts/SkUtils_opts.h
parent135a1b10fef1ee158f8f2379c2a7429eead004e0 (diff)
explicitly vectorize sk_memset{16,32,64}
This ought to help clients who don't enable autovectorization. With autovectorization enabled, this new version is like, hyper-vectorized compared to the old autovectorization. Instead of handling 128 bytes max per loop, it now handles up to 512 bytes per loop. Pretty exciting. Locally perf effects are a mix, but we'd expect this to help Chrome unambiguously if they've turned off autovectorization. $ out/ok bench:samples=100 sw filter:match=memset32_\\d\* serial Before: [memset32_100000] 16ms @0 20.1ms @99 20.2ms @100 [memset32_10000] 1.07ms @0 1.26ms @99 1.31ms @100 [memset32_1000] 73.9µs @0 89.4µs @99 90.1µs @100 [memset32_100] 8.59µs @0 9.74µs @99 9.96µs @100 [memset32_10] 7.45µs @0 8.96µs @99 8.99µs @100 [memset32_1] 2.29µs @0 2.81µs @99 2.92µs @100 After: [memset32_100000] 16.2ms @0 17.3ms @99 17.3ms @100 [memset32_10000] 1.06ms @0 1.18ms @99 1.23ms @100 [memset32_1000] 72µs @0 75.6µs @99 84.7µs @100 [memset32_100] 9.14µs @0 10.6µs @99 10.7µs @100 [memset32_10] 5.43µs @0 5.88µs @99 5.99µs @100 [memset32_1] 3.43µs @0 3.65µs @99 3.83µs @100 BUG=chromium:755391 Change-Id: If9059a30ca7a345f1f7c37bd51473c29e8bb8922 Reviewed-on: https://skia-review.googlesource.com/34746 Reviewed-by: Florin Malita <fmalita@chromium.org> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/opts/SkUtils_opts.h')
-rw-r--r--src/opts/SkUtils_opts.h32
1 files changed, 22 insertions, 10 deletions
diff --git a/src/opts/SkUtils_opts.h b/src/opts/SkUtils_opts.h
index ba93305b4a..d67a777ca8 100644
--- a/src/opts/SkUtils_opts.h
+++ b/src/opts/SkUtils_opts.h
@@ -8,24 +8,36 @@
#ifndef SkUtils_opts_DEFINED
#define SkUtils_opts_DEFINED
-#include "stdint.h"
+#include <stdint.h>
+#include "SkNx.h"
namespace SK_OPTS_NS {
- static void memset16(uint16_t buffer[], uint16_t value, int count) {
- for (int i = 0; i < count; i++) {
- buffer[i] = value;
+ template <typename T>
+ static void memsetT(T buffer[], T value, int count) {
+ #if defined(__AVX__)
+ static const int N = 32 / sizeof(T);
+ #else
+ static const int N = 16 / sizeof(T);
+ #endif
+ while (count >= N) {
+ SkNx<N,T>(value).store(buffer);
+ buffer += N;
+ count -= N;
+ }
+ while (count --> 0) {
+ *buffer++ = value;
}
}
+
+ static void memset16(uint16_t buffer[], uint16_t value, int count) {
+ memsetT(buffer, value, count);
+ }
static void memset32(uint32_t buffer[], uint32_t value, int count) {
- for (int i = 0; i < count; i++) {
- buffer[i] = value;
- }
+ memsetT(buffer, value, count);
}
static void memset64(uint64_t buffer[], uint64_t value, int count) {
- for (int i = 0; i < count; i++) {
- buffer[i] = value;
- }
+ memsetT(buffer, value, count);
}
}