aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkUtils_opts.h
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2016-02-17 14:21:28 -0800
committerGravatar Commit bot <commit-bot@chromium.org>2016-02-17 14:21:28 -0800
commit3bc2624a4b89c49efd65f5e548ac5f2dd9351431 (patch)
tree7994dedcda068d5cbaa18b2f92a2778639167847 /src/opts/SkUtils_opts.h
parent2775cf548ca62d3b1076a7e9cc2a40853f1bb885 (diff)
try plain-old code for sk_memset16/32 now that NEON is compile-time
Most of these implementations now just say "always inline". Let's see if we can get away with the simplicity of doing that all the time. These inlined implementations can autovectorize easily. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1639863002 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review URL: https://codereview.chromium.org/1639863002
Diffstat (limited to 'src/opts/SkUtils_opts.h')
-rw-r--r--src/opts/SkUtils_opts.h110
1 files changed, 0 insertions, 110 deletions
diff --git a/src/opts/SkUtils_opts.h b/src/opts/SkUtils_opts.h
deleted file mode 100644
index 44fe643276..0000000000
--- a/src/opts/SkUtils_opts.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright 2015 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkUtils_opts_DEFINED
-#define SkUtils_opts_DEFINED
-
-namespace SK_OPTS_NS {
-
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
-
-static void memset16(uint16_t* dst, uint16_t val, int n) {
- auto dst8 = (__m128i*)dst;
- auto val8 = _mm_set1_epi16(val);
- for ( ; n >= 8; n -= 8) {
- _mm_storeu_si128(dst8++, val8);
- }
- dst = (uint16_t*)dst8;
- if (n & 4) {
- _mm_storel_epi64((__m128i*)dst, val8);
- dst += 4;
- }
- if (n & 2) {
- *(uint32_t*)dst = _mm_cvtsi128_si32(val8);
- dst += 2;
- }
- if (n & 1) {
- *dst = val;
- }
-}
-
-static void memset32(uint32_t* dst, uint32_t val, int n) {
- auto dst4 = (__m128i*)dst;
- auto val4 = _mm_set1_epi32(val);
- for ( ; n >= 4; n -= 4) {
- _mm_storeu_si128(dst4++, val4);
- }
- dst = (uint32_t*)dst4;
- if (n & 2) {
- _mm_storel_epi64((__m128i*)dst, val4);
- dst += 2;
- }
- if (n & 1) {
- *dst = val;
- }
-}
-
-#elif defined(SK_ARM_HAS_NEON)
-
-static void memset16(uint16_t* dst, uint16_t value, int n) {
- uint16x8_t v8 = vdupq_n_u16(value);
- uint16x8x4_t v32 = {{ v8, v8, v8, v8 }};
-
- while (n >= 32) {
- vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are the same, value.
- dst += 32;
- n -= 32;
- }
- switch (n / 8) {
- case 3: vst1q_u16(dst, v8); dst += 8;
- case 2: vst1q_u16(dst, v8); dst += 8;
- case 1: vst1q_u16(dst, v8); dst += 8;
- }
- if (n & 4) {
- vst1_u16(dst, vget_low_u16(v8));
- dst += 4;
- }
- switch (n & 3) {
- case 3: *dst++ = value;
- case 2: *dst++ = value;
- case 1: *dst = value;
- }
-}
-
-static void memset32(uint32_t* dst, uint32_t value, int n) {
- uint32x4_t v4 = vdupq_n_u32(value);
- uint32x4x4_t v16 = {{ v4, v4, v4, v4 }};
-
- while (n >= 16) {
- vst4q_u32(dst, v16); // This swizzles, but we don't care: all lanes are the same, value.
- dst += 16;
- n -= 16;
- }
- switch (n / 4) {
- case 3: vst1q_u32(dst, v4); dst += 4;
- case 2: vst1q_u32(dst, v4); dst += 4;
- case 1: vst1q_u32(dst, v4); dst += 4;
- }
- if (n & 2) {
- vst1_u32(dst, vget_low_u32(v4));
- dst += 2;
- }
- if (n & 1) {
- *dst = value;
- }
-}
-
-#else // Neither NEON nor SSE2.
-
-static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *dst++ = val; } }
-static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *dst++ = val; } }
-
-#endif
-
-} // namespace SK_OPTS_NS
-
-#endif//SkUtils_opts_DEFINED