aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/core/SkHalf.h29
-rw-r--r--src/opts/SkNx_sse.h240
-rw-r--r--src/opts/SkRasterPipeline_opts.h69
3 files changed, 0 insertions, 338 deletions
diff --git a/src/core/SkHalf.h b/src/core/SkHalf.h
index e71cb8750a..dd978a2347 100644
--- a/src/core/SkHalf.h
+++ b/src/core/SkHalf.h
@@ -11,10 +11,6 @@
#include "SkNx.h"
#include "SkTypes.h"
-#if !defined(_MSC_VER) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
- #include <x86intrin.h>
-#endif
-
// 16-bit floating point value
// format is 1 bit sign, 5 bits exponent, 10 bits mantissa
// only used for storage
@@ -89,29 +85,4 @@ static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f& fs) {
#endif
}
-static inline Sk8f SkHalfToFloat_finite_ftz(const Sk8h& hs) {
-#if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
- return _mm256_cvtph_ps(hs.fVec);
-
-#else
- uint64_t parts[2];
- hs.store(parts);
- return SkNx_join(SkHalfToFloat_finite_ftz(parts[0]),
- SkHalfToFloat_finite_ftz(parts[1]));
-
-#endif
-}
-
-static inline Sk8h SkFloatToHalf_finite_ftz(const Sk8f& fs) {
-#if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
- return _mm256_cvtps_ph(fs.fVec, _MM_FROUND_CUR_DIRECTION);
-
-#else
- uint64_t parts[2];
- SkFloatToHalf_finite_ftz(fs.fLo).store(parts+0);
- SkFloatToHalf_finite_ftz(fs.fHi).store(parts+1);
- return Sk8h::Load(parts);
-#endif
-}
-
#endif
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index e81872291e..3e59a9d0e8 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -329,7 +329,6 @@ public:
AI void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }
AI static void Load4(const void* ptr, SkNx* r, SkNx* g, SkNx* b, SkNx* a) {
- // TODO: AVX2 version
__m128i _01 = _mm_loadu_si128(((__m128i*)ptr) + 0),
_23 = _mm_loadu_si128(((__m128i*)ptr) + 1),
_45 = _mm_loadu_si128(((__m128i*)ptr) + 2),
@@ -351,7 +350,6 @@ public:
*a = _mm_unpackhi_epi64(ba0123, ba4567);
}
AI static void Load3(const void* ptr, SkNx* r, SkNx* g, SkNx* b) {
- // TODO: AVX2 version
const uint8_t* ptr8 = (const uint8_t*) ptr;
__m128i rgb0 = _mm_loadu_si128((const __m128i*) (ptr8 + 0*2));
__m128i rgb1 = _mm_srli_si128(rgb0, 3*2);
@@ -377,7 +375,6 @@ public:
*b = _mm_unpacklo_epi64(bx03, bx47);
}
AI static void Store4(void* ptr, const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
- // TODO: AVX2 version
__m128i rg0123 = _mm_unpacklo_epi16(r.fVec, g.fVec), // r0 g0 r1 g1 r2 g2 r3 g3
rg4567 = _mm_unpackhi_epi16(r.fVec, g.fVec), // r4 g4 r5 g5 r6 g6 r7 g7
ba0123 = _mm_unpacklo_epi16(b.fVec, a.fVec),
@@ -486,243 +483,6 @@ public:
__m128i fVec;
};
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
-
- template <>
- class SkNx<8, uint8_t> {
- public:
- AI SkNx(const __m128i& vec) : fVec(vec) {}
-
- AI SkNx() {}
- AI SkNx(uint8_t v) : fVec(_mm_set1_epi8(v)) {}
- AI SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
- uint8_t e, uint8_t f, uint8_t g, uint8_t h)
- : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, 0,0,0,0, 0,0,0,0)) {}
-
-
- AI static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)ptr); }
- AI void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); }
-
- AI uint8_t operator[](int k) const {
- SkASSERT(0 <= k && k < 8);
- union { __m128i v; uint8_t us[16]; } pun = {fVec};
- return pun.us[k&7];
- }
-
- __m128i fVec;
- };
-
- template <>
- class SkNx<8, int32_t> {
- public:
- AI SkNx(const __m256i& vec) : fVec(vec) {}
-
- AI SkNx() {}
- AI SkNx(int32_t v) : fVec(_mm256_set1_epi32(v)) {}
- AI SkNx(int32_t a, int32_t b, int32_t c, int32_t d,
- int32_t e, int32_t f, int32_t g, int32_t h)
- : fVec(_mm256_setr_epi32(a,b,c,d, e,f,g,h)) {}
-
- AI static SkNx Load(const void* ptr) { return _mm256_loadu_si256((const __m256i*)ptr); }
- AI void store(void* ptr) const { _mm256_storeu_si256((__m256i*)ptr, fVec); }
-
- AI SkNx operator + (const SkNx& o) const { return _mm256_add_epi32(fVec, o.fVec); }
- AI SkNx operator - (const SkNx& o) const { return _mm256_sub_epi32(fVec, o.fVec); }
- AI SkNx operator * (const SkNx& o) const { return _mm256_mullo_epi32(fVec, o.fVec); }
-
- AI SkNx operator & (const SkNx& o) const { return _mm256_and_si256(fVec, o.fVec); }
- AI SkNx operator | (const SkNx& o) const { return _mm256_or_si256(fVec, o.fVec); }
- AI SkNx operator ^ (const SkNx& o) const { return _mm256_xor_si256(fVec, o.fVec); }
-
- AI SkNx operator << (int bits) const { return _mm256_slli_epi32(fVec, bits); }
- AI SkNx operator >> (int bits) const { return _mm256_srai_epi32(fVec, bits); }
-
- AI int32_t operator[](int k) const {
- SkASSERT(0 <= k && k < 8);
- union { __m256i v; int32_t is[8]; } pun = {fVec};
- return pun.is[k&7];
- }
-
- __m256i fVec;
- };
-
- template <>
- class SkNx<8, uint32_t> {
- public:
- AI SkNx(const __m256i& vec) : fVec(vec) {}
-
- AI SkNx() {}
- AI SkNx(uint32_t v) : fVec(_mm256_set1_epi32(v)) {}
- AI SkNx(uint32_t a, uint32_t b, uint32_t c, uint32_t d,
- uint32_t e, uint32_t f, uint32_t g, uint32_t h)
- : fVec(_mm256_setr_epi32(a,b,c,d, e,f,g,h)) {}
-
- AI static SkNx Load(const void* ptr) { return _mm256_loadu_si256((const __m256i*)ptr); }
- AI void store(void* ptr) const { _mm256_storeu_si256((__m256i*)ptr, fVec); }
-
- AI SkNx operator + (const SkNx& o) const { return _mm256_add_epi32(fVec, o.fVec); }
- AI SkNx operator - (const SkNx& o) const { return _mm256_sub_epi32(fVec, o.fVec); }
- AI SkNx operator * (const SkNx& o) const { return _mm256_mullo_epi32(fVec, o.fVec); }
-
- AI SkNx operator & (const SkNx& o) const { return _mm256_and_si256(fVec, o.fVec); }
- AI SkNx operator | (const SkNx& o) const { return _mm256_or_si256(fVec, o.fVec); }
- AI SkNx operator ^ (const SkNx& o) const { return _mm256_xor_si256(fVec, o.fVec); }
-
- AI SkNx operator << (int bits) const { return _mm256_slli_epi32(fVec, bits); }
- AI SkNx operator >> (int bits) const { return _mm256_srli_epi32(fVec, bits); }
-
- AI uint32_t operator[](int k) const {
- SkASSERT(0 <= k && k < 8);
- union { __m256i v; uint32_t us[8]; } pun = {fVec};
- return pun.us[k&7];
- }
-
- __m256i fVec;
- };
-
- // _mm256_unpack{lo,hi}_pd() auto-casting to and from __m256d.
- AI static __m256 unpacklo_pd(__m256 x, __m256 y) {
- return _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(x), _mm256_castps_pd(y)));
- }
- AI static __m256 unpackhi_pd(__m256 x, __m256 y) {
- return _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(x), _mm256_castps_pd(y)));
- }
-
- template <>
- class SkNx<8, float> {
- public:
- AI SkNx(const __m256& vec) : fVec(vec) {}
-
- AI SkNx() {}
- AI SkNx(float val) : fVec(_mm256_set1_ps(val)) {}
- AI SkNx(float a, float b, float c, float d,
- float e, float f, float g, float h) : fVec(_mm256_setr_ps(a,b,c,d,e,f,g,h)) {}
-
- AI static SkNx Load(const void* ptr) { return _mm256_loadu_ps((const float*)ptr); }
- AI void store(void* ptr) const { _mm256_storeu_ps((float*)ptr, fVec); }
-
- AI static void Store4(void* ptr,
- const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
- __m256 rg0145 = _mm256_unpacklo_ps(r.fVec, g.fVec), // r0 g0 r1 g1 | r4 g4 r5 g5
- rg2367 = _mm256_unpackhi_ps(r.fVec, g.fVec), // r2 ... | r6 ...
- ba0145 = _mm256_unpacklo_ps(b.fVec, a.fVec), // b0 a0 b1 a1 | b4 a4 b5 a5
- ba2367 = _mm256_unpackhi_ps(b.fVec, a.fVec); // b2 ... | b6 ...
-
- __m256 _04 = unpacklo_pd(rg0145, ba0145), // r0 g0 b0 a0 | r4 g4 b4 a4
- _15 = unpackhi_pd(rg0145, ba0145), // r1 ... | r5 ...
- _26 = unpacklo_pd(rg2367, ba2367), // r2 ... | r6 ...
- _37 = unpackhi_pd(rg2367, ba2367); // r3 ... | r7 ...
-
- __m256 _01 = _mm256_permute2f128_ps(_04, _15, 32), // 32 == 0010 0000 == lo, lo
- _23 = _mm256_permute2f128_ps(_26, _37, 32),
- _45 = _mm256_permute2f128_ps(_04, _15, 49), // 49 == 0011 0001 == hi, hi
- _67 = _mm256_permute2f128_ps(_26, _37, 49);
-
- _mm256_storeu_ps((float*)ptr + 0*8, _01);
- _mm256_storeu_ps((float*)ptr + 1*8, _23);
- _mm256_storeu_ps((float*)ptr + 2*8, _45);
- _mm256_storeu_ps((float*)ptr + 3*8, _67);
- }
- AI static void Load4(const void* ptr, SkNx* r, SkNx* g, SkNx* b, SkNx* a) {
- Sk4f rl, gl, bl, al,
- rh, gh, bh, ah;
- Sk4f::Load4((const float*)ptr + 0, &rl, &gl, &bl, &al);
- Sk4f::Load4((const float*)ptr + 16, &rh, &gh, &bh, &ah);
- *r = _mm256_setr_m128(rl.fVec, rh.fVec);
- *g = _mm256_setr_m128(gl.fVec, gh.fVec);
- *b = _mm256_setr_m128(bl.fVec, bh.fVec);
- *a = _mm256_setr_m128(al.fVec, ah.fVec);
- }
-
- AI SkNx operator+(const SkNx& o) const { return _mm256_add_ps(fVec, o.fVec); }
- AI SkNx operator-(const SkNx& o) const { return _mm256_sub_ps(fVec, o.fVec); }
- AI SkNx operator*(const SkNx& o) const { return _mm256_mul_ps(fVec, o.fVec); }
- AI SkNx operator/(const SkNx& o) const { return _mm256_div_ps(fVec, o.fVec); }
-
- AI SkNx operator==(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_EQ_OQ); }
- AI SkNx operator!=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_NEQ_OQ); }
- AI SkNx operator <(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_LT_OQ); }
- AI SkNx operator >(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_GT_OQ); }
- AI SkNx operator<=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_LE_OQ); }
- AI SkNx operator>=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_GE_OQ); }
-
- AI static SkNx Min(const SkNx& l, const SkNx& r) { return _mm256_min_ps(l.fVec, r.fVec); }
- AI static SkNx Max(const SkNx& l, const SkNx& r) { return _mm256_max_ps(l.fVec, r.fVec); }
-
- AI SkNx sqrt() const { return _mm256_sqrt_ps (fVec); }
- AI SkNx rsqrt() const { return _mm256_rsqrt_ps(fVec); }
- AI SkNx invert() const { return _mm256_rcp_ps (fVec); }
-
- AI SkNx abs() const { return _mm256_andnot_ps(_mm256_set1_ps(-0.0f), fVec); }
- AI SkNx floor() const { return _mm256_floor_ps(fVec); }
-
- AI float operator[](int k) const {
- SkASSERT(0 <= k && k < 8);
- union { __m256 v; float fs[8]; } pun = {fVec};
- return pun.fs[k&7];
- }
-
- AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
- return _mm256_blendv_ps(e.fVec, t.fVec, fVec);
- }
-
- __m256 fVec;
- };
-
- AI static void SkNx_split(const Sk8f& v, Sk4f* lo, Sk4f* hi) {
- *lo = _mm256_extractf128_ps(v.fVec, 0);
- *hi = _mm256_extractf128_ps(v.fVec, 1);
- }
-
- AI static Sk8f SkNx_join(const Sk4f& lo, const Sk4f& hi) {
- return _mm256_insertf128_ps(_mm256_castps128_ps256(lo.fVec), hi.fVec, 1);
- }
-
- AI static Sk8f SkNx_fma(const Sk8f& a, const Sk8f& b, const Sk8f& c) {
- return _mm256_fmadd_ps(a.fVec, b.fVec, c.fVec);
- }
-
- template<> AI /*static*/ Sk8i SkNx_cast<int>(const Sk8b& src) {
- return _mm256_cvtepu8_epi32(src.fVec);
- }
-
- template<> AI /*static*/ Sk8f SkNx_cast<float>(const Sk8b& src) {
- return _mm256_cvtepi32_ps(SkNx_cast<int>(src).fVec);
- }
-
- template<> AI /*static*/ Sk8i SkNx_cast<int>(const Sk8h& src) {
- return _mm256_cvtepu16_epi32(src.fVec);
- }
-
- template<> AI /*static*/ Sk8f SkNx_cast<float>(const Sk8h& src) {
- return _mm256_cvtepi32_ps(SkNx_cast<int>(src).fVec);
- }
-
- template<> AI /*static*/ Sk8f SkNx_cast<float>(const Sk8i& src) {
- return _mm256_cvtepi32_ps(src.fVec);
- }
-
- template<> AI /*static*/ Sk8i SkNx_cast<int>(const Sk8f& src) {
- return _mm256_cvttps_epi32(src.fVec);
- }
-
- template<> AI /*static*/ Sk8h SkNx_cast<uint16_t>(const Sk8i& src) {
- __m128i lo = _mm256_extractf128_si256(src.fVec, 0),
- hi = _mm256_extractf128_si256(src.fVec, 1);
- return _mm_packus_epi32(lo, hi);
- }
-
- template<> AI /*static*/ Sk8h SkNx_cast<uint16_t>(const Sk8f& src) {
- return SkNx_cast<uint16_t>(SkNx_cast<int>(src));
- }
-
- template<> AI /*static*/ Sk8b SkNx_cast<uint8_t>(const Sk8i& src) {
- auto _16 = SkNx_cast<uint16_t>(src);
- return _mm_packus_epi16(_16.fVec, _16.fVec);
- }
-
-#endif
-
template<> AI /*static*/ Sk4f SkNx_cast<float, int32_t>(const Sk4i& src) {
return _mm_cvtepi32_ps(src.fVec);
}
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 0378f4af1f..44cbe83618 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -23,11 +23,7 @@
namespace {
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
- static constexpr int N = 8;
-#else
static constexpr int N = 4;
-#endif
using SkNf = SkNx<N, float>;
using SkNi = SkNx<N, int32_t>;
@@ -202,71 +198,6 @@ SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
v.store(dst);
}
-#if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
- SI __m256i mask(size_t tail) {
- static const int masks[][8] = {
- {~0,~0,~0,~0, ~0,~0,~0,~0 }, // remember, tail == 0 ~~> load all N
- {~0, 0, 0, 0, 0, 0, 0, 0 },
- {~0,~0, 0, 0, 0, 0, 0, 0 },
- {~0,~0,~0, 0, 0, 0, 0, 0 },
- {~0,~0,~0,~0, 0, 0, 0, 0 },
- {~0,~0,~0,~0, ~0, 0, 0, 0 },
- {~0,~0,~0,~0, ~0,~0, 0, 0 },
- {~0,~0,~0,~0, ~0,~0,~0, 0 },
- };
- return SkNi::Load(masks + tail).fVec;
- }
-
- SI SkNi load(size_t tail, const int32_t* src) {
- return tail ? _mm256_maskload_epi32((const int*)src, mask(tail))
- : SkNi::Load(src);
- }
- SI SkNu load(size_t tail, const uint32_t* src) {
- return tail ? _mm256_maskload_epi32((const int*)src, mask(tail))
- : SkNu::Load(src);
- }
- SI SkNf load(size_t tail, const float* src) {
- return tail ? _mm256_maskload_ps((const float*)src, mask(tail))
- : SkNf::Load(src);
- }
- SI SkNi gather(size_t tail, const int32_t* src, const SkNi& offset) {
- auto m = mask(tail);
- return _mm256_mask_i32gather_epi32(SkNi(0).fVec, (const int*)src, offset.fVec, m, 4);
- }
- SI SkNu gather(size_t tail, const uint32_t* src, const SkNi& offset) {
- auto m = mask(tail);
- return _mm256_mask_i32gather_epi32(SkNi(0).fVec, (const int*)src, offset.fVec, m, 4);
- }
- SI SkNf gather(size_t tail, const float* src, const SkNi& offset) {
- auto m = _mm256_castsi256_ps(mask(tail));
- return _mm256_mask_i32gather_ps(SkNf(0).fVec, (const float*)src, offset.fVec, m, 4);
- }
-
- static const char* bug = "I don't think MSAN understands maskstore.";
-
- SI void store(size_t tail, const SkNi& v, int32_t* dst) {
- if (tail) {
- _mm256_maskstore_epi32((int*)dst, mask(tail), v.fVec);
- return sk_msan_mark_initialized(dst, dst+tail, bug);
- }
- v.store(dst);
- }
- SI void store(size_t tail, const SkNu& v, uint32_t* dst) {
- if (tail) {
- _mm256_maskstore_epi32((int*)dst, mask(tail), v.fVec);
- return sk_msan_mark_initialized(dst, dst+tail, bug);
- }
- v.store(dst);
- }
- SI void store(size_t tail, const SkNf& v, float* dst) {
- if (tail) {
- _mm256_maskstore_ps((float*)dst, mask(tail), v.fVec);
- return sk_msan_mark_initialized(dst, dst+tail, bug);
- }
- v.store(dst);
- }
-#endif
-
SI SkNf SkNf_fma(const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); }
SI SkNi SkNf_round(const SkNf& x, const SkNf& scale) {