diff options
Diffstat (limited to 'src/jumper')
-rw-r--r-- | src/jumper/SkJumper_stages.cpp | 4 | ||||
-rw-r--r-- | src/jumper/SkJumper_vectors.h | 14 |
2 files changed, 9 insertions, 9 deletions
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp index e7ffe6bbff..8b2e01d81b 100644 --- a/src/jumper/SkJumper_stages.cpp +++ b/src/jumper/SkJumper_stages.cpp @@ -624,7 +624,7 @@ STAGE(to_srgb, Ctx::None) { const float c = 1.130026340485f, d = 0.141387879848f; #elif defined(JUMPER_IS_SSE2) || defined(JUMPER_IS_SSE41) || \ - defined(JUMPER_IS_AVX ) || defined(JUMPER_IS_AVX2 ) + defined(JUMPER_IS_AVX ) || defined(JUMPER_IS_HSW ) const float c = 1.130048394203f, d = 0.141357362270f; #elif defined(JUMPER_IS_NEON) @@ -1162,7 +1162,7 @@ STAGE(matrix_perspective, const float* m) { SI void gradient_lookup(const SkJumper_GradientCtx* c, U32 idx, F t, F* r, F* g, F* b, F* a) { F fr, br, fg, bg, fb, bb, fa, ba; -#if defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512) +#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512) if (c->stopCount <=8) { fr = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[0]), idx); br = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[0]), idx); diff --git a/src/jumper/SkJumper_vectors.h b/src/jumper/SkJumper_vectors.h index 5bae17cba8..0557d28199 100644 --- a/src/jumper/SkJumper_vectors.h +++ b/src/jumper/SkJumper_vectors.h @@ -22,8 +22,8 @@ #define JUMPER_IS_NEON #elif defined(__AVX512F__) #define JUMPER_IS_AVX512 -#elif defined(__AVX2__) - #define JUMPER_IS_AVX2 +#elif defined(__AVX2__) && defined(__F16C__) && defined(__FMA__) + #define JUMPER_IS_HSW #elif defined(__AVX__) #define JUMPER_IS_AVX #elif defined(__SSE4_1__) @@ -216,7 +216,7 @@ } } -#elif defined(JUMPER_IS_AVX) || defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512) +#elif defined(JUMPER_IS_AVX) || defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512) #include <immintrin.h> // These are __m256 and __m256i, but friendlier and strongly-typed. @@ -229,7 +229,7 @@ using U8 = V<uint8_t >; SI F mad(F f, F m, F a) { - #if defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512) + #if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512) return _mm256_fmadd_ps(f,m,a); #else return f*m+a; @@ -261,7 +261,7 @@ return { p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]], p[ix[4]], p[ix[5]], p[ix[6]], p[ix[7]], }; } - #if defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512) + #if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512) SI F gather(const float* p, U32 ix) { return _mm256_i32gather_ps (p, ix, 4); } SI U32 gather(const uint32_t* p, U32 ix) { return _mm256_i32gather_epi32(p, ix, 4); } SI U64 gather(const uint64_t* p, U32 ix) { @@ -658,7 +658,7 @@ SI F from_half(U16 h) { #if defined(__aarch64__) return vcvt_f32_f16(h); -#elif defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512) +#elif defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512) return _mm256_cvtph_ps(h); #else @@ -678,7 +678,7 @@ SI U16 to_half(F f) { #if defined(__aarch64__) return vcvt_f16_f32(f); -#elif defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512) +#elif defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512) return _mm256_cvtps_ph(f, _MM_FROUND_CUR_DIRECTION); #else |