aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkUtils_opts.h
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2015-08-04 16:48:43 -0400
committerGravatar Mike Klein <mtklein@chromium.org>2015-08-04 16:48:43 -0400
commit8caa5af92cf91debc1598380cb72c330e8c63efb (patch)
tree93003bd6400bffd51ffe8b0f2919974178662098 /src/opts/SkUtils_opts.h
parent8e47cdb17692278766cbaf19ce933da0e6181c3a (diff)
Reorganize to keep similar code together.
This organizes memset16, memset32, and rsqrt the same way as the other code. No functional change. BUG=skia:4117 R=djsollen@google.com Review URL: https://codereview.chromium.org/1264423002 .
Diffstat (limited to 'src/opts/SkUtils_opts.h')
-rw-r--r--src/opts/SkUtils_opts.h110
1 files changed, 110 insertions, 0 deletions
diff --git a/src/opts/SkUtils_opts.h b/src/opts/SkUtils_opts.h
new file mode 100644
index 0000000000..44fe643276
--- /dev/null
+++ b/src/opts/SkUtils_opts.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkUtils_opts_DEFINED
+#define SkUtils_opts_DEFINED
+
+namespace SK_OPTS_NS {
+
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
+
+static void memset16(uint16_t* dst, uint16_t val, int n) {
+ auto dst8 = (__m128i*)dst;
+ auto val8 = _mm_set1_epi16(val);
+ for ( ; n >= 8; n -= 8) {
+ _mm_storeu_si128(dst8++, val8);
+ }
+ dst = (uint16_t*)dst8;
+ if (n & 4) {
+ _mm_storel_epi64((__m128i*)dst, val8);
+ dst += 4;
+ }
+ if (n & 2) {
+ *(uint32_t*)dst = _mm_cvtsi128_si32(val8);
+ dst += 2;
+ }
+ if (n & 1) {
+ *dst = val;
+ }
+}
+
+static void memset32(uint32_t* dst, uint32_t val, int n) {
+ auto dst4 = (__m128i*)dst;
+ auto val4 = _mm_set1_epi32(val);
+ for ( ; n >= 4; n -= 4) {
+ _mm_storeu_si128(dst4++, val4);
+ }
+ dst = (uint32_t*)dst4;
+ if (n & 2) {
+ _mm_storel_epi64((__m128i*)dst, val4);
+ dst += 2;
+ }
+ if (n & 1) {
+ *dst = val;
+ }
+}
+
+#elif defined(SK_ARM_HAS_NEON)
+
+static void memset16(uint16_t* dst, uint16_t value, int n) {
+ uint16x8_t v8 = vdupq_n_u16(value);
+ uint16x8x4_t v32 = {{ v8, v8, v8, v8 }};
+
+ while (n >= 32) {
+ vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are the same, value.
+ dst += 32;
+ n -= 32;
+ }
+ switch (n / 8) {
+ case 3: vst1q_u16(dst, v8); dst += 8;
+ case 2: vst1q_u16(dst, v8); dst += 8;
+ case 1: vst1q_u16(dst, v8); dst += 8;
+ }
+ if (n & 4) {
+ vst1_u16(dst, vget_low_u16(v8));
+ dst += 4;
+ }
+ switch (n & 3) {
+ case 3: *dst++ = value;
+ case 2: *dst++ = value;
+ case 1: *dst = value;
+ }
+}
+
+static void memset32(uint32_t* dst, uint32_t value, int n) {
+ uint32x4_t v4 = vdupq_n_u32(value);
+ uint32x4x4_t v16 = {{ v4, v4, v4, v4 }};
+
+ while (n >= 16) {
+ vst4q_u32(dst, v16); // This swizzles, but we don't care: all lanes are the same, value.
+ dst += 16;
+ n -= 16;
+ }
+ switch (n / 4) {
+ case 3: vst1q_u32(dst, v4); dst += 4;
+ case 2: vst1q_u32(dst, v4); dst += 4;
+ case 1: vst1q_u32(dst, v4); dst += 4;
+ }
+ if (n & 2) {
+ vst1_u32(dst, vget_low_u32(v4));
+ dst += 2;
+ }
+ if (n & 1) {
+ *dst = value;
+ }
+}
+
+#else // Neither NEON nor SSE2.
+
+static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *dst++ = val; } }
+static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *dst++ = val; } }
+
+#endif
+
+} // namespace SK_OPTS_NS
+
+#endif//SkUtils_opts_DEFINED