diff options
author | Mike Klein <mtklein@chromium.org> | 2015-08-04 16:48:43 -0400 |
---|---|---|
committer | Mike Klein <mtklein@chromium.org> | 2015-08-04 16:48:43 -0400 |
commit | 8caa5af92cf91debc1598380cb72c330e8c63efb (patch) | |
tree | 93003bd6400bffd51ffe8b0f2919974178662098 /src/opts/SkUtils_opts.h | |
parent | 8e47cdb17692278766cbaf19ce933da0e6181c3a (diff) |
Reorganize to keep similar code together.
This organizes memset16, memset32, and rsqrt the same way as the other code. No functional change.
BUG=skia:4117
R=djsollen@google.com
Review URL: https://codereview.chromium.org/1264423002 .
Diffstat (limited to 'src/opts/SkUtils_opts.h')
-rw-r--r-- | src/opts/SkUtils_opts.h | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/src/opts/SkUtils_opts.h b/src/opts/SkUtils_opts.h new file mode 100644 index 0000000000..44fe643276 --- /dev/null +++ b/src/opts/SkUtils_opts.h @@ -0,0 +1,110 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkUtils_opts_DEFINED +#define SkUtils_opts_DEFINED + +namespace SK_OPTS_NS { + +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 + +static void memset16(uint16_t* dst, uint16_t val, int n) { + auto dst8 = (__m128i*)dst; + auto val8 = _mm_set1_epi16(val); + for ( ; n >= 8; n -= 8) { + _mm_storeu_si128(dst8++, val8); + } + dst = (uint16_t*)dst8; + if (n & 4) { + _mm_storel_epi64((__m128i*)dst, val8); + dst += 4; + } + if (n & 2) { + *(uint32_t*)dst = _mm_cvtsi128_si32(val8); + dst += 2; + } + if (n & 1) { + *dst = val; + } +} + +static void memset32(uint32_t* dst, uint32_t val, int n) { + auto dst4 = (__m128i*)dst; + auto val4 = _mm_set1_epi32(val); + for ( ; n >= 4; n -= 4) { + _mm_storeu_si128(dst4++, val4); + } + dst = (uint32_t*)dst4; + if (n & 2) { + _mm_storel_epi64((__m128i*)dst, val4); + dst += 2; + } + if (n & 1) { + *dst = val; + } +} + +#elif defined(SK_ARM_HAS_NEON) + +static void memset16(uint16_t* dst, uint16_t value, int n) { + uint16x8_t v8 = vdupq_n_u16(value); + uint16x8x4_t v32 = {{ v8, v8, v8, v8 }}; + + while (n >= 32) { + vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are the same, value. + dst += 32; + n -= 32; + } + switch (n / 8) { + case 3: vst1q_u16(dst, v8); dst += 8; + case 2: vst1q_u16(dst, v8); dst += 8; + case 1: vst1q_u16(dst, v8); dst += 8; + } + if (n & 4) { + vst1_u16(dst, vget_low_u16(v8)); + dst += 4; + } + switch (n & 3) { + case 3: *dst++ = value; + case 2: *dst++ = value; + case 1: *dst = value; + } +} + +static void memset32(uint32_t* dst, uint32_t value, int n) { + uint32x4_t v4 = vdupq_n_u32(value); + uint32x4x4_t v16 = {{ v4, v4, v4, v4 }}; + + while (n >= 16) { + vst4q_u32(dst, v16); // This swizzles, but we don't care: all lanes are the same, value. + dst += 16; + n -= 16; + } + switch (n / 4) { + case 3: vst1q_u32(dst, v4); dst += 4; + case 2: vst1q_u32(dst, v4); dst += 4; + case 1: vst1q_u32(dst, v4); dst += 4; + } + if (n & 2) { + vst1_u32(dst, vget_low_u32(v4)); + dst += 2; + } + if (n & 1) { + *dst = value; + } +} + +#else // Neither NEON nor SSE2. + +static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *dst++ = val; } } +static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *dst++ = val; } } + +#endif + +} // namespace SK_OPTS_NS + +#endif//SkUtils_opts_DEFINED |