aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-04-10 14:44:47 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-04-11 14:59:49 +0000
commitd177ae18d7f2d02e872adbfaf2bd060beba48365 (patch)
tree5c96cf7c9d74b26c76b00db1c1afdee7e074a770 /src/opts
parent8d4cbad13c3f54dbfd080931401d7b24e346822f (diff)
remove SkNx AVX code
We can't realistically use AVX and SkNx together because of ODR problems, so remove the code that may tempt us to try. Remaining code paths using AVX: - one intrinsics-only routine in SkOpts_hsw.cpp - SkJumper Change-Id: I0d2d03b47ea4a0eec27f2de2b28a4c3d1ff8376f Reviewed-on: https://skia-review.googlesource.com/13121 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/opts')
-rw-r--r--src/opts/SkNx_sse.h240
-rw-r--r--src/opts/SkRasterPipeline_opts.h69
2 files changed, 0 insertions, 309 deletions
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index e81872291e..3e59a9d0e8 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -329,7 +329,6 @@ public:
AI void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }
AI static void Load4(const void* ptr, SkNx* r, SkNx* g, SkNx* b, SkNx* a) {
- // TODO: AVX2 version
__m128i _01 = _mm_loadu_si128(((__m128i*)ptr) + 0),
_23 = _mm_loadu_si128(((__m128i*)ptr) + 1),
_45 = _mm_loadu_si128(((__m128i*)ptr) + 2),
@@ -351,7 +350,6 @@ public:
*a = _mm_unpackhi_epi64(ba0123, ba4567);
}
AI static void Load3(const void* ptr, SkNx* r, SkNx* g, SkNx* b) {
- // TODO: AVX2 version
const uint8_t* ptr8 = (const uint8_t*) ptr;
__m128i rgb0 = _mm_loadu_si128((const __m128i*) (ptr8 + 0*2));
__m128i rgb1 = _mm_srli_si128(rgb0, 3*2);
@@ -377,7 +375,6 @@ public:
*b = _mm_unpacklo_epi64(bx03, bx47);
}
AI static void Store4(void* ptr, const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
- // TODO: AVX2 version
__m128i rg0123 = _mm_unpacklo_epi16(r.fVec, g.fVec), // r0 g0 r1 g1 r2 g2 r3 g3
rg4567 = _mm_unpackhi_epi16(r.fVec, g.fVec), // r4 g4 r5 g5 r6 g6 r7 g7
ba0123 = _mm_unpacklo_epi16(b.fVec, a.fVec),
@@ -486,243 +483,6 @@ public:
__m128i fVec;
};
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
-
- template <>
- class SkNx<8, uint8_t> {
- public:
- AI SkNx(const __m128i& vec) : fVec(vec) {}
-
- AI SkNx() {}
- AI SkNx(uint8_t v) : fVec(_mm_set1_epi8(v)) {}
- AI SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
- uint8_t e, uint8_t f, uint8_t g, uint8_t h)
- : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, 0,0,0,0, 0,0,0,0)) {}
-
-
- AI static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)ptr); }
- AI void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); }
-
- AI uint8_t operator[](int k) const {
- SkASSERT(0 <= k && k < 8);
- union { __m128i v; uint8_t us[16]; } pun = {fVec};
- return pun.us[k&7];
- }
-
- __m128i fVec;
- };
-
- template <>
- class SkNx<8, int32_t> {
- public:
- AI SkNx(const __m256i& vec) : fVec(vec) {}
-
- AI SkNx() {}
- AI SkNx(int32_t v) : fVec(_mm256_set1_epi32(v)) {}
- AI SkNx(int32_t a, int32_t b, int32_t c, int32_t d,
- int32_t e, int32_t f, int32_t g, int32_t h)
- : fVec(_mm256_setr_epi32(a,b,c,d, e,f,g,h)) {}
-
- AI static SkNx Load(const void* ptr) { return _mm256_loadu_si256((const __m256i*)ptr); }
- AI void store(void* ptr) const { _mm256_storeu_si256((__m256i*)ptr, fVec); }
-
- AI SkNx operator + (const SkNx& o) const { return _mm256_add_epi32(fVec, o.fVec); }
- AI SkNx operator - (const SkNx& o) const { return _mm256_sub_epi32(fVec, o.fVec); }
- AI SkNx operator * (const SkNx& o) const { return _mm256_mullo_epi32(fVec, o.fVec); }
-
- AI SkNx operator & (const SkNx& o) const { return _mm256_and_si256(fVec, o.fVec); }
- AI SkNx operator | (const SkNx& o) const { return _mm256_or_si256(fVec, o.fVec); }
- AI SkNx operator ^ (const SkNx& o) const { return _mm256_xor_si256(fVec, o.fVec); }
-
- AI SkNx operator << (int bits) const { return _mm256_slli_epi32(fVec, bits); }
- AI SkNx operator >> (int bits) const { return _mm256_srai_epi32(fVec, bits); }
-
- AI int32_t operator[](int k) const {
- SkASSERT(0 <= k && k < 8);
- union { __m256i v; int32_t is[8]; } pun = {fVec};
- return pun.is[k&7];
- }
-
- __m256i fVec;
- };
-
- template <>
- class SkNx<8, uint32_t> {
- public:
- AI SkNx(const __m256i& vec) : fVec(vec) {}
-
- AI SkNx() {}
- AI SkNx(uint32_t v) : fVec(_mm256_set1_epi32(v)) {}
- AI SkNx(uint32_t a, uint32_t b, uint32_t c, uint32_t d,
- uint32_t e, uint32_t f, uint32_t g, uint32_t h)
- : fVec(_mm256_setr_epi32(a,b,c,d, e,f,g,h)) {}
-
- AI static SkNx Load(const void* ptr) { return _mm256_loadu_si256((const __m256i*)ptr); }
- AI void store(void* ptr) const { _mm256_storeu_si256((__m256i*)ptr, fVec); }
-
- AI SkNx operator + (const SkNx& o) const { return _mm256_add_epi32(fVec, o.fVec); }
- AI SkNx operator - (const SkNx& o) const { return _mm256_sub_epi32(fVec, o.fVec); }
- AI SkNx operator * (const SkNx& o) const { return _mm256_mullo_epi32(fVec, o.fVec); }
-
- AI SkNx operator & (const SkNx& o) const { return _mm256_and_si256(fVec, o.fVec); }
- AI SkNx operator | (const SkNx& o) const { return _mm256_or_si256(fVec, o.fVec); }
- AI SkNx operator ^ (const SkNx& o) const { return _mm256_xor_si256(fVec, o.fVec); }
-
- AI SkNx operator << (int bits) const { return _mm256_slli_epi32(fVec, bits); }
- AI SkNx operator >> (int bits) const { return _mm256_srli_epi32(fVec, bits); }
-
- AI uint32_t operator[](int k) const {
- SkASSERT(0 <= k && k < 8);
- union { __m256i v; uint32_t us[8]; } pun = {fVec};
- return pun.us[k&7];
- }
-
- __m256i fVec;
- };
-
- // _mm256_unpack{lo,hi}_pd() auto-casting to and from __m256d.
- AI static __m256 unpacklo_pd(__m256 x, __m256 y) {
- return _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(x), _mm256_castps_pd(y)));
- }
- AI static __m256 unpackhi_pd(__m256 x, __m256 y) {
- return _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(x), _mm256_castps_pd(y)));
- }
-
- template <>
- class SkNx<8, float> {
- public:
- AI SkNx(const __m256& vec) : fVec(vec) {}
-
- AI SkNx() {}
- AI SkNx(float val) : fVec(_mm256_set1_ps(val)) {}
- AI SkNx(float a, float b, float c, float d,
- float e, float f, float g, float h) : fVec(_mm256_setr_ps(a,b,c,d,e,f,g,h)) {}
-
- AI static SkNx Load(const void* ptr) { return _mm256_loadu_ps((const float*)ptr); }
- AI void store(void* ptr) const { _mm256_storeu_ps((float*)ptr, fVec); }
-
- AI static void Store4(void* ptr,
- const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
- __m256 rg0145 = _mm256_unpacklo_ps(r.fVec, g.fVec), // r0 g0 r1 g1 | r4 g4 r5 g5
- rg2367 = _mm256_unpackhi_ps(r.fVec, g.fVec), // r2 ... | r6 ...
- ba0145 = _mm256_unpacklo_ps(b.fVec, a.fVec), // b0 a0 b1 a1 | b4 a4 b5 a5
- ba2367 = _mm256_unpackhi_ps(b.fVec, a.fVec); // b2 ... | b6 ...
-
- __m256 _04 = unpacklo_pd(rg0145, ba0145), // r0 g0 b0 a0 | r4 g4 b4 a4
- _15 = unpackhi_pd(rg0145, ba0145), // r1 ... | r5 ...
- _26 = unpacklo_pd(rg2367, ba2367), // r2 ... | r6 ...
- _37 = unpackhi_pd(rg2367, ba2367); // r3 ... | r7 ...
-
- __m256 _01 = _mm256_permute2f128_ps(_04, _15, 32), // 32 == 0010 0000 == lo, lo
- _23 = _mm256_permute2f128_ps(_26, _37, 32),
- _45 = _mm256_permute2f128_ps(_04, _15, 49), // 49 == 0011 0001 == hi, hi
- _67 = _mm256_permute2f128_ps(_26, _37, 49);
-
- _mm256_storeu_ps((float*)ptr + 0*8, _01);
- _mm256_storeu_ps((float*)ptr + 1*8, _23);
- _mm256_storeu_ps((float*)ptr + 2*8, _45);
- _mm256_storeu_ps((float*)ptr + 3*8, _67);
- }
- AI static void Load4(const void* ptr, SkNx* r, SkNx* g, SkNx* b, SkNx* a) {
- Sk4f rl, gl, bl, al,
- rh, gh, bh, ah;
- Sk4f::Load4((const float*)ptr + 0, &rl, &gl, &bl, &al);
- Sk4f::Load4((const float*)ptr + 16, &rh, &gh, &bh, &ah);
- *r = _mm256_setr_m128(rl.fVec, rh.fVec);
- *g = _mm256_setr_m128(gl.fVec, gh.fVec);
- *b = _mm256_setr_m128(bl.fVec, bh.fVec);
- *a = _mm256_setr_m128(al.fVec, ah.fVec);
- }
-
- AI SkNx operator+(const SkNx& o) const { return _mm256_add_ps(fVec, o.fVec); }
- AI SkNx operator-(const SkNx& o) const { return _mm256_sub_ps(fVec, o.fVec); }
- AI SkNx operator*(const SkNx& o) const { return _mm256_mul_ps(fVec, o.fVec); }
- AI SkNx operator/(const SkNx& o) const { return _mm256_div_ps(fVec, o.fVec); }
-
- AI SkNx operator==(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_EQ_OQ); }
- AI SkNx operator!=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_NEQ_OQ); }
- AI SkNx operator <(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_LT_OQ); }
- AI SkNx operator >(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_GT_OQ); }
- AI SkNx operator<=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_LE_OQ); }
- AI SkNx operator>=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_GE_OQ); }
-
- AI static SkNx Min(const SkNx& l, const SkNx& r) { return _mm256_min_ps(l.fVec, r.fVec); }
- AI static SkNx Max(const SkNx& l, const SkNx& r) { return _mm256_max_ps(l.fVec, r.fVec); }
-
- AI SkNx sqrt() const { return _mm256_sqrt_ps (fVec); }
- AI SkNx rsqrt() const { return _mm256_rsqrt_ps(fVec); }
- AI SkNx invert() const { return _mm256_rcp_ps (fVec); }
-
- AI SkNx abs() const { return _mm256_andnot_ps(_mm256_set1_ps(-0.0f), fVec); }
- AI SkNx floor() const { return _mm256_floor_ps(fVec); }
-
- AI float operator[](int k) const {
- SkASSERT(0 <= k && k < 8);
- union { __m256 v; float fs[8]; } pun = {fVec};
- return pun.fs[k&7];
- }
-
- AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
- return _mm256_blendv_ps(e.fVec, t.fVec, fVec);
- }
-
- __m256 fVec;
- };
-
- AI static void SkNx_split(const Sk8f& v, Sk4f* lo, Sk4f* hi) {
- *lo = _mm256_extractf128_ps(v.fVec, 0);
- *hi = _mm256_extractf128_ps(v.fVec, 1);
- }
-
- AI static Sk8f SkNx_join(const Sk4f& lo, const Sk4f& hi) {
- return _mm256_insertf128_ps(_mm256_castps128_ps256(lo.fVec), hi.fVec, 1);
- }
-
- AI static Sk8f SkNx_fma(const Sk8f& a, const Sk8f& b, const Sk8f& c) {
- return _mm256_fmadd_ps(a.fVec, b.fVec, c.fVec);
- }
-
- template<> AI /*static*/ Sk8i SkNx_cast<int>(const Sk8b& src) {
- return _mm256_cvtepu8_epi32(src.fVec);
- }
-
- template<> AI /*static*/ Sk8f SkNx_cast<float>(const Sk8b& src) {
- return _mm256_cvtepi32_ps(SkNx_cast<int>(src).fVec);
- }
-
- template<> AI /*static*/ Sk8i SkNx_cast<int>(const Sk8h& src) {
- return _mm256_cvtepu16_epi32(src.fVec);
- }
-
- template<> AI /*static*/ Sk8f SkNx_cast<float>(const Sk8h& src) {
- return _mm256_cvtepi32_ps(SkNx_cast<int>(src).fVec);
- }
-
- template<> AI /*static*/ Sk8f SkNx_cast<float>(const Sk8i& src) {
- return _mm256_cvtepi32_ps(src.fVec);
- }
-
- template<> AI /*static*/ Sk8i SkNx_cast<int>(const Sk8f& src) {
- return _mm256_cvttps_epi32(src.fVec);
- }
-
- template<> AI /*static*/ Sk8h SkNx_cast<uint16_t>(const Sk8i& src) {
- __m128i lo = _mm256_extractf128_si256(src.fVec, 0),
- hi = _mm256_extractf128_si256(src.fVec, 1);
- return _mm_packus_epi32(lo, hi);
- }
-
- template<> AI /*static*/ Sk8h SkNx_cast<uint16_t>(const Sk8f& src) {
- return SkNx_cast<uint16_t>(SkNx_cast<int>(src));
- }
-
- template<> AI /*static*/ Sk8b SkNx_cast<uint8_t>(const Sk8i& src) {
- auto _16 = SkNx_cast<uint16_t>(src);
- return _mm_packus_epi16(_16.fVec, _16.fVec);
- }
-
-#endif
-
template<> AI /*static*/ Sk4f SkNx_cast<float, int32_t>(const Sk4i& src) {
return _mm_cvtepi32_ps(src.fVec);
}
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 0378f4af1f..44cbe83618 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -23,11 +23,7 @@
namespace {
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
- static constexpr int N = 8;
-#else
static constexpr int N = 4;
-#endif
using SkNf = SkNx<N, float>;
using SkNi = SkNx<N, int32_t>;
@@ -202,71 +198,6 @@ SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
v.store(dst);
}
-#if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
- SI __m256i mask(size_t tail) {
- static const int masks[][8] = {
- {~0,~0,~0,~0, ~0,~0,~0,~0 }, // remember, tail == 0 ~~> load all N
- {~0, 0, 0, 0, 0, 0, 0, 0 },
- {~0,~0, 0, 0, 0, 0, 0, 0 },
- {~0,~0,~0, 0, 0, 0, 0, 0 },
- {~0,~0,~0,~0, 0, 0, 0, 0 },
- {~0,~0,~0,~0, ~0, 0, 0, 0 },
- {~0,~0,~0,~0, ~0,~0, 0, 0 },
- {~0,~0,~0,~0, ~0,~0,~0, 0 },
- };
- return SkNi::Load(masks + tail).fVec;
- }
-
- SI SkNi load(size_t tail, const int32_t* src) {
- return tail ? _mm256_maskload_epi32((const int*)src, mask(tail))
- : SkNi::Load(src);
- }
- SI SkNu load(size_t tail, const uint32_t* src) {
- return tail ? _mm256_maskload_epi32((const int*)src, mask(tail))
- : SkNu::Load(src);
- }
- SI SkNf load(size_t tail, const float* src) {
- return tail ? _mm256_maskload_ps((const float*)src, mask(tail))
- : SkNf::Load(src);
- }
- SI SkNi gather(size_t tail, const int32_t* src, const SkNi& offset) {
- auto m = mask(tail);
- return _mm256_mask_i32gather_epi32(SkNi(0).fVec, (const int*)src, offset.fVec, m, 4);
- }
- SI SkNu gather(size_t tail, const uint32_t* src, const SkNi& offset) {
- auto m = mask(tail);
- return _mm256_mask_i32gather_epi32(SkNi(0).fVec, (const int*)src, offset.fVec, m, 4);
- }
- SI SkNf gather(size_t tail, const float* src, const SkNi& offset) {
- auto m = _mm256_castsi256_ps(mask(tail));
- return _mm256_mask_i32gather_ps(SkNf(0).fVec, (const float*)src, offset.fVec, m, 4);
- }
-
- static const char* bug = "I don't think MSAN understands maskstore.";
-
- SI void store(size_t tail, const SkNi& v, int32_t* dst) {
- if (tail) {
- _mm256_maskstore_epi32((int*)dst, mask(tail), v.fVec);
- return sk_msan_mark_initialized(dst, dst+tail, bug);
- }
- v.store(dst);
- }
- SI void store(size_t tail, const SkNu& v, uint32_t* dst) {
- if (tail) {
- _mm256_maskstore_epi32((int*)dst, mask(tail), v.fVec);
- return sk_msan_mark_initialized(dst, dst+tail, bug);
- }
- v.store(dst);
- }
- SI void store(size_t tail, const SkNf& v, float* dst) {
- if (tail) {
- _mm256_maskstore_ps((float*)dst, mask(tail), v.fVec);
- return sk_msan_mark_initialized(dst, dst+tail, bug);
- }
- v.store(dst);
- }
-#endif
-
SI SkNf SkNf_fma(const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); }
SI SkNi SkNf_round(const SkNf& x, const SkNf& scale) {