/* * Copyright 2015 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #ifndef SkUtils_opts_DEFINED #define SkUtils_opts_DEFINED namespace SK_OPTS_NS { #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 static void memset16(uint16_t* dst, uint16_t val, int n) { auto dst8 = (__m128i*)dst; auto val8 = _mm_set1_epi16(val); for ( ; n >= 8; n -= 8) { _mm_storeu_si128(dst8++, val8); } dst = (uint16_t*)dst8; if (n & 4) { _mm_storel_epi64((__m128i*)dst, val8); dst += 4; } if (n & 2) { *(uint32_t*)dst = _mm_cvtsi128_si32(val8); dst += 2; } if (n & 1) { *dst = val; } } static void memset32(uint32_t* dst, uint32_t val, int n) { auto dst4 = (__m128i*)dst; auto val4 = _mm_set1_epi32(val); for ( ; n >= 4; n -= 4) { _mm_storeu_si128(dst4++, val4); } dst = (uint32_t*)dst4; if (n & 2) { _mm_storel_epi64((__m128i*)dst, val4); dst += 2; } if (n & 1) { *dst = val; } } #elif defined(SK_ARM_HAS_NEON) static void memset16(uint16_t* dst, uint16_t value, int n) { uint16x8_t v8 = vdupq_n_u16(value); uint16x8x4_t v32 = {{ v8, v8, v8, v8 }}; while (n >= 32) { vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are the same, value. dst += 32; n -= 32; } switch (n / 8) { case 3: vst1q_u16(dst, v8); dst += 8; case 2: vst1q_u16(dst, v8); dst += 8; case 1: vst1q_u16(dst, v8); dst += 8; } if (n & 4) { vst1_u16(dst, vget_low_u16(v8)); dst += 4; } switch (n & 3) { case 3: *dst++ = value; case 2: *dst++ = value; case 1: *dst = value; } } static void memset32(uint32_t* dst, uint32_t value, int n) { uint32x4_t v4 = vdupq_n_u32(value); uint32x4x4_t v16 = {{ v4, v4, v4, v4 }}; while (n >= 16) { vst4q_u32(dst, v16); // This swizzles, but we don't care: all lanes are the same, value. dst += 16; n -= 16; } switch (n / 4) { case 3: vst1q_u32(dst, v4); dst += 4; case 2: vst1q_u32(dst, v4); dst += 4; case 1: vst1q_u32(dst, v4); dst += 4; } if (n & 2) { vst1_u32(dst, vget_low_u32(v4)); dst += 2; } if (n & 1) { *dst = value; } } #else // Neither NEON nor SSE2. static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *dst++ = val; } } static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *dst++ = val; } } #endif } // namespace SK_OPTS_NS #endif//SkUtils_opts_DEFINED