aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-03-01 21:49:23 -0500
committerGravatar Mike Klein <mtklein@chromium.org>2017-03-02 03:08:19 +0000
commit9c10df3b60f4a7d50c1070a5d8c4aaadb79ba9b7 (patch)
tree70c7270ebdcb0a9b837ce4052a42424954095aec
parent580ffa0fb17bc4e924776eafd941bf1fab397cde (diff)
Some small SkJumper refactoring.
No generated code changes. Change-Id: I2d480b5391f8246a01118766a9522d528a87f75a Reviewed-on: https://skia-review.googlesource.com/9129 Reviewed-by: Mike Klein <mtklein@chromium.org> Commit-Queue: Mike Klein <mtklein@chromium.org>
-rw-r--r--src/jumper/SkJumper_stages.cpp62
-rwxr-xr-xsrc/jumper/build_stages.py2
2 files changed, 20 insertions, 44 deletions
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index 686d5d6fe9..6f498f747b 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -114,7 +114,7 @@ static Dst bit_cast(const Src& src) {
#define WRAP(name) sk_##name##_vfp4
-#elif defined(__AVX2__) && defined(__FMA__) && defined(__F16C__)
+#elif defined(__AVX__)
#include <immintrin.h>
// These are __m256 and __m256i, but friendlier and strongly-typed.
@@ -124,41 +124,14 @@ static Dst bit_cast(const Src& src) {
using U16 = uint16_t __attribute__((ext_vector_type(8)));
using U8 = uint8_t __attribute__((ext_vector_type(8)));
- static F mad(F f, F m, F a) { return _mm256_fmadd_ps(f,m,a);}
- static F min(F a, F b) { return _mm256_min_ps(a,b); }
- static F max(F a, F b) { return _mm256_max_ps(a,b); }
- static F abs_(F v) { return _mm256_and_ps(v, 0-v); }
- static F floor(F v, K*) { return _mm256_floor_ps(v); }
- static F rcp (F v) { return _mm256_rcp_ps (v); }
- static F rsqrt(F v) { return _mm256_rsqrt_ps(v); }
- static U32 round(F v, F scale) { return _mm256_cvtps_epi32(v*scale); }
-
- static U16 pack(U32 v) {
- __m128i lo = _mm256_extractf128_si256(v, 0),
- hi = _mm256_extractf128_si256(v, 1);
- return _mm_packus_epi32(lo, hi);
- }
- static U8 pack(U16 v) {
- __m128i r = _mm_packus_epi16(v,v);
- return unaligned_load<U8>(&r);
+ static F mad(F f, F m, F a) {
+ #if defined(__FMA__)
+ return _mm256_fmadd_ps(f,m,a);
+ #else
+ return f*m+a;
+ #endif
}
- static F if_then_else(I32 c, F t, F e) { return _mm256_blendv_ps(e,t,c); }
-
- static F gather(const float* p, U32 ix) { return _mm256_i32gather_ps(p, ix, 4); }
-
- #define WRAP(name) sk_##name##_hsw
-
-#elif defined(__AVX__)
- #include <immintrin.h>
-
- using F = float __attribute__((ext_vector_type(8)));
- using I32 = int32_t __attribute__((ext_vector_type(8)));
- using U32 = uint32_t __attribute__((ext_vector_type(8)));
- using U16 = uint16_t __attribute__((ext_vector_type(8)));
- using U8 = uint8_t __attribute__((ext_vector_type(8)));
-
- static F mad(F f, F m, F a) { return f*m+a; }
static F min(F a, F b) { return _mm256_min_ps(a,b); }
static F max(F a, F b) { return _mm256_max_ps(a,b); }
static F abs_(F v) { return _mm256_and_ps(v, 0-v); }
@@ -168,23 +141,30 @@ static Dst bit_cast(const Src& src) {
static U32 round(F v, F scale) { return _mm256_cvtps_epi32(v*scale); }
static U16 pack(U32 v) {
- __m128i lo = _mm256_extractf128_si256(v, 0),
- hi = _mm256_extractf128_si256(v, 1);
- return _mm_packus_epi32(lo, hi);
+ return _mm_packus_epi32(_mm256_extractf128_si256(v, 0),
+ _mm256_extractf128_si256(v, 1));
}
static U8 pack(U16 v) {
- __m128i r = _mm_packus_epi16(v,v);
+ auto r = _mm_packus_epi16(v,v);
return unaligned_load<U8>(&r);
}
static F if_then_else(I32 c, F t, F e) { return _mm256_blendv_ps(e,t,c); }
static F gather(const float* p, U32 ix) {
+ #if defined(__AVX2__)
+ return _mm256_i32gather_ps(p, ix, 4);
+ #else
return { p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]],
p[ix[4]], p[ix[5]], p[ix[6]], p[ix[7]], };
+ #endif
}
- #define WRAP(name) sk_##name##_avx
+ #if defined(__AVX2__) && defined(__F16C__) && defined(__FMA__)
+ #define WRAP(name) sk_##name##_hsw
+ #else
+ #define WRAP(name) sk_##name##_avx
+ #endif
#elif defined(__SSE2__)
#include <immintrin.h>
@@ -221,11 +201,7 @@ static Dst bit_cast(const Src& src) {
}
static F if_then_else(I32 c, F t, F e) {
- #if defined(__SSE4_1__)
- return _mm_blendv_ps(e,t,c);
- #else
return _mm_or_ps(_mm_and_ps(c, t), _mm_andnot_ps(c, e));
- #endif
}
static F floor(F v, K* k) {
diff --git a/src/jumper/build_stages.py b/src/jumper/build_stages.py
index b1914f6a29..4ca04d15b3 100755
--- a/src/jumper/build_stages.py
+++ b/src/jumper/build_stages.py
@@ -10,7 +10,7 @@ import subprocess
import sys
#clang = ['clang++']
-clang = ['clang-3.9', '-x', 'c++']
+clang = ['ccache', 'clang-3.9', '-x', 'c++']
ndk = '/Users/mtklein/brew/opt/android-ndk/'
objdump = 'gobjdump'