aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper
diff options
context:
space:
mode:
Diffstat (limited to 'src/jumper')
-rw-r--r--src/jumper/SkJumper_stages.cpp4
-rw-r--r--src/jumper/SkJumper_vectors.h14
2 files changed, 9 insertions, 9 deletions
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index e7ffe6bbff..8b2e01d81b 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -624,7 +624,7 @@ STAGE(to_srgb, Ctx::None) {
const float c = 1.130026340485f,
d = 0.141387879848f;
#elif defined(JUMPER_IS_SSE2) || defined(JUMPER_IS_SSE41) || \
- defined(JUMPER_IS_AVX ) || defined(JUMPER_IS_AVX2 )
+ defined(JUMPER_IS_AVX ) || defined(JUMPER_IS_HSW )
const float c = 1.130048394203f,
d = 0.141357362270f;
#elif defined(JUMPER_IS_NEON)
@@ -1162,7 +1162,7 @@ STAGE(matrix_perspective, const float* m) {
SI void gradient_lookup(const SkJumper_GradientCtx* c, U32 idx, F t,
F* r, F* g, F* b, F* a) {
F fr, br, fg, bg, fb, bb, fa, ba;
-#if defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512)
+#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
if (c->stopCount <=8) {
fr = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[0]), idx);
br = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[0]), idx);
diff --git a/src/jumper/SkJumper_vectors.h b/src/jumper/SkJumper_vectors.h
index 5bae17cba8..0557d28199 100644
--- a/src/jumper/SkJumper_vectors.h
+++ b/src/jumper/SkJumper_vectors.h
@@ -22,8 +22,8 @@
#define JUMPER_IS_NEON
#elif defined(__AVX512F__)
#define JUMPER_IS_AVX512
-#elif defined(__AVX2__)
- #define JUMPER_IS_AVX2
+#elif defined(__AVX2__) && defined(__F16C__) && defined(__FMA__)
+ #define JUMPER_IS_HSW
#elif defined(__AVX__)
#define JUMPER_IS_AVX
#elif defined(__SSE4_1__)
@@ -216,7 +216,7 @@
}
}
-#elif defined(JUMPER_IS_AVX) || defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512)
+#elif defined(JUMPER_IS_AVX) || defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
#include <immintrin.h>
// These are __m256 and __m256i, but friendlier and strongly-typed.
@@ -229,7 +229,7 @@
using U8 = V<uint8_t >;
SI F mad(F f, F m, F a) {
- #if defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512)
+ #if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
return _mm256_fmadd_ps(f,m,a);
#else
return f*m+a;
@@ -261,7 +261,7 @@
return { p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]],
p[ix[4]], p[ix[5]], p[ix[6]], p[ix[7]], };
}
- #if defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512)
+ #if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
SI F gather(const float* p, U32 ix) { return _mm256_i32gather_ps (p, ix, 4); }
SI U32 gather(const uint32_t* p, U32 ix) { return _mm256_i32gather_epi32(p, ix, 4); }
SI U64 gather(const uint64_t* p, U32 ix) {
@@ -658,7 +658,7 @@ SI F from_half(U16 h) {
#if defined(__aarch64__)
return vcvt_f32_f16(h);
-#elif defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512)
+#elif defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
return _mm256_cvtph_ps(h);
#else
@@ -678,7 +678,7 @@ SI U16 to_half(F f) {
#if defined(__aarch64__)
return vcvt_f16_f32(f);
-#elif defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512)
+#elif defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
return _mm256_cvtps_ph(f, _MM_FROUND_CUR_DIRECTION);
#else