diff options
author | 2017-03-01 21:49:23 -0500 | |
---|---|---|
committer | 2017-03-02 03:08:19 +0000 | |
commit | 9c10df3b60f4a7d50c1070a5d8c4aaadb79ba9b7 (patch) | |
tree | 70c7270ebdcb0a9b837ce4052a42424954095aec | |
parent | 580ffa0fb17bc4e924776eafd941bf1fab397cde (diff) |
Some small SkJumper refactoring.
No generated code changes.
Change-Id: I2d480b5391f8246a01118766a9522d528a87f75a
Reviewed-on: https://skia-review.googlesource.com/9129
Reviewed-by: Mike Klein <mtklein@chromium.org>
Commit-Queue: Mike Klein <mtklein@chromium.org>
-rw-r--r-- | src/jumper/SkJumper_stages.cpp | 62 | ||||
-rwxr-xr-x | src/jumper/build_stages.py | 2 |
2 files changed, 20 insertions, 44 deletions
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp index 686d5d6fe9..6f498f747b 100644 --- a/src/jumper/SkJumper_stages.cpp +++ b/src/jumper/SkJumper_stages.cpp @@ -114,7 +114,7 @@ static Dst bit_cast(const Src& src) { #define WRAP(name) sk_##name##_vfp4 -#elif defined(__AVX2__) && defined(__FMA__) && defined(__F16C__) +#elif defined(__AVX__) #include <immintrin.h> // These are __m256 and __m256i, but friendlier and strongly-typed. @@ -124,41 +124,14 @@ static Dst bit_cast(const Src& src) { using U16 = uint16_t __attribute__((ext_vector_type(8))); using U8 = uint8_t __attribute__((ext_vector_type(8))); - static F mad(F f, F m, F a) { return _mm256_fmadd_ps(f,m,a);} - static F min(F a, F b) { return _mm256_min_ps(a,b); } - static F max(F a, F b) { return _mm256_max_ps(a,b); } - static F abs_(F v) { return _mm256_and_ps(v, 0-v); } - static F floor(F v, K*) { return _mm256_floor_ps(v); } - static F rcp (F v) { return _mm256_rcp_ps (v); } - static F rsqrt(F v) { return _mm256_rsqrt_ps(v); } - static U32 round(F v, F scale) { return _mm256_cvtps_epi32(v*scale); } - - static U16 pack(U32 v) { - __m128i lo = _mm256_extractf128_si256(v, 0), - hi = _mm256_extractf128_si256(v, 1); - return _mm_packus_epi32(lo, hi); - } - static U8 pack(U16 v) { - __m128i r = _mm_packus_epi16(v,v); - return unaligned_load<U8>(&r); + static F mad(F f, F m, F a) { + #if defined(__FMA__) + return _mm256_fmadd_ps(f,m,a); + #else + return f*m+a; + #endif } - static F if_then_else(I32 c, F t, F e) { return _mm256_blendv_ps(e,t,c); } - - static F gather(const float* p, U32 ix) { return _mm256_i32gather_ps(p, ix, 4); } - - #define WRAP(name) sk_##name##_hsw - -#elif defined(__AVX__) - #include <immintrin.h> - - using F = float __attribute__((ext_vector_type(8))); - using I32 = int32_t __attribute__((ext_vector_type(8))); - using U32 = uint32_t __attribute__((ext_vector_type(8))); - using U16 = uint16_t __attribute__((ext_vector_type(8))); - using U8 = uint8_t __attribute__((ext_vector_type(8))); - - static F mad(F f, F m, F a) { return f*m+a; } static F min(F a, F b) { return _mm256_min_ps(a,b); } static F max(F a, F b) { return _mm256_max_ps(a,b); } static F abs_(F v) { return _mm256_and_ps(v, 0-v); } @@ -168,23 +141,30 @@ static Dst bit_cast(const Src& src) { static U32 round(F v, F scale) { return _mm256_cvtps_epi32(v*scale); } static U16 pack(U32 v) { - __m128i lo = _mm256_extractf128_si256(v, 0), - hi = _mm256_extractf128_si256(v, 1); - return _mm_packus_epi32(lo, hi); + return _mm_packus_epi32(_mm256_extractf128_si256(v, 0), + _mm256_extractf128_si256(v, 1)); } static U8 pack(U16 v) { - __m128i r = _mm_packus_epi16(v,v); + auto r = _mm_packus_epi16(v,v); return unaligned_load<U8>(&r); } static F if_then_else(I32 c, F t, F e) { return _mm256_blendv_ps(e,t,c); } static F gather(const float* p, U32 ix) { + #if defined(__AVX2__) + return _mm256_i32gather_ps(p, ix, 4); + #else return { p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]], p[ix[4]], p[ix[5]], p[ix[6]], p[ix[7]], }; + #endif } - #define WRAP(name) sk_##name##_avx + #if defined(__AVX2__) && defined(__F16C__) && defined(__FMA__) + #define WRAP(name) sk_##name##_hsw + #else + #define WRAP(name) sk_##name##_avx + #endif #elif defined(__SSE2__) #include <immintrin.h> @@ -221,11 +201,7 @@ static Dst bit_cast(const Src& src) { } static F if_then_else(I32 c, F t, F e) { - #if defined(__SSE4_1__) - return _mm_blendv_ps(e,t,c); - #else return _mm_or_ps(_mm_and_ps(c, t), _mm_andnot_ps(c, e)); - #endif } static F floor(F v, K* k) { diff --git a/src/jumper/build_stages.py b/src/jumper/build_stages.py index b1914f6a29..4ca04d15b3 100755 --- a/src/jumper/build_stages.py +++ b/src/jumper/build_stages.py @@ -10,7 +10,7 @@ import subprocess import sys #clang = ['clang++'] -clang = ['clang-3.9', '-x', 'c++'] +clang = ['ccache', 'clang-3.9', '-x', 'c++'] ndk = '/Users/mtklein/brew/opt/android-ndk/' objdump = 'gobjdump' |