aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-12-12 17:07:49 -0500
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-12-12 22:54:24 +0000
commit106e17aa38c9cf90c864232ae8dc7667f37bb787 (patch)
tree11565154af93500b36e921dfceed2b6a1fb07a1f /src/jumper
parent6f1362245277100c043e085e2efd0314af5436a3 (diff)
JUMPER_IS_AVX2 -> JUMPER_IS_HSW
We need to be a bit more pedantic here to support builds that may be using AVX2 as part of their baseline but perhaps not enabling all the related features SkJumper would like to use. E.g. we've seen Tensorflow build with AVX2 and FMA, but not F16C. So check all three {AVX2,FMA,F16C}, and only then build stages in HSW mode. I've updated the define as a reminder. This only affects builds using these features for their _baseline_ stages... the offline-compiled stages in SkJumper_generated.S are not affected. Change-Id: I9bfb3bae3589d35043b748782cefa8c213726d6a Reviewed-on: https://skia-review.googlesource.com/84221 Reviewed-by: Florin Malita <fmalita@chromium.org> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/jumper')
-rw-r--r--src/jumper/SkJumper_stages.cpp4
-rw-r--r--src/jumper/SkJumper_vectors.h14
2 files changed, 9 insertions, 9 deletions
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index e7ffe6bbff..8b2e01d81b 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -624,7 +624,7 @@ STAGE(to_srgb, Ctx::None) {
const float c = 1.130026340485f,
d = 0.141387879848f;
#elif defined(JUMPER_IS_SSE2) || defined(JUMPER_IS_SSE41) || \
- defined(JUMPER_IS_AVX ) || defined(JUMPER_IS_AVX2 )
+ defined(JUMPER_IS_AVX ) || defined(JUMPER_IS_HSW )
const float c = 1.130048394203f,
d = 0.141357362270f;
#elif defined(JUMPER_IS_NEON)
@@ -1162,7 +1162,7 @@ STAGE(matrix_perspective, const float* m) {
SI void gradient_lookup(const SkJumper_GradientCtx* c, U32 idx, F t,
F* r, F* g, F* b, F* a) {
F fr, br, fg, bg, fb, bb, fa, ba;
-#if defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512)
+#if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
if (c->stopCount <=8) {
fr = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[0]), idx);
br = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[0]), idx);
diff --git a/src/jumper/SkJumper_vectors.h b/src/jumper/SkJumper_vectors.h
index 5bae17cba8..0557d28199 100644
--- a/src/jumper/SkJumper_vectors.h
+++ b/src/jumper/SkJumper_vectors.h
@@ -22,8 +22,8 @@
#define JUMPER_IS_NEON
#elif defined(__AVX512F__)
#define JUMPER_IS_AVX512
-#elif defined(__AVX2__)
- #define JUMPER_IS_AVX2
+#elif defined(__AVX2__) && defined(__F16C__) && defined(__FMA__)
+ #define JUMPER_IS_HSW
#elif defined(__AVX__)
#define JUMPER_IS_AVX
#elif defined(__SSE4_1__)
@@ -216,7 +216,7 @@
}
}
-#elif defined(JUMPER_IS_AVX) || defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512)
+#elif defined(JUMPER_IS_AVX) || defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
#include <immintrin.h>
// These are __m256 and __m256i, but friendlier and strongly-typed.
@@ -229,7 +229,7 @@
using U8 = V<uint8_t >;
SI F mad(F f, F m, F a) {
- #if defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512)
+ #if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
return _mm256_fmadd_ps(f,m,a);
#else
return f*m+a;
@@ -261,7 +261,7 @@
return { p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]],
p[ix[4]], p[ix[5]], p[ix[6]], p[ix[7]], };
}
- #if defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512)
+ #if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
SI F gather(const float* p, U32 ix) { return _mm256_i32gather_ps (p, ix, 4); }
SI U32 gather(const uint32_t* p, U32 ix) { return _mm256_i32gather_epi32(p, ix, 4); }
SI U64 gather(const uint64_t* p, U32 ix) {
@@ -658,7 +658,7 @@ SI F from_half(U16 h) {
#if defined(__aarch64__)
return vcvt_f32_f16(h);
-#elif defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512)
+#elif defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
return _mm256_cvtph_ps(h);
#else
@@ -678,7 +678,7 @@ SI U16 to_half(F f) {
#if defined(__aarch64__)
return vcvt_f16_f32(f);
-#elif defined(JUMPER_IS_AVX2) || defined(JUMPER_IS_AVX512)
+#elif defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
return _mm256_cvtps_ph(f, _MM_FROUND_CUR_DIRECTION);
#else