aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkNx_sse.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/opts/SkNx_sse.h')
-rw-r--r--src/opts/SkNx_sse.h39
1 files changed, 18 insertions, 21 deletions
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index 34b58c10ed..80c7f0e9ae 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -52,13 +52,9 @@ public:
static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.fVec); }
static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.fVec); }
- SkNx sqrt () const { return _mm_sqrt_ps (fVec); }
- SkNx rsqrt0() const { return _mm_rsqrt_ps(fVec); }
- SkNx rsqrt1() const { return this->rsqrt0(); }
- SkNx rsqrt2() const { return this->rsqrt1(); }
-
- SkNx invert() const { return SkNx(1) / *this; }
- SkNx approxInvert() const { return _mm_rcp_ps(fVec); }
+ SkNx sqrt() const { return _mm_sqrt_ps (fVec); }
+ SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); }
+ SkNx invert() const { return _mm_rcp_ps(fVec); }
float operator[](int k) const {
SkASSERT(0 <= k && k < 2);
@@ -103,13 +99,9 @@ public:
SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); }
SkNx floor() const { return sse2_mm_floor_ps(fVec); }
- SkNx sqrt () const { return _mm_sqrt_ps (fVec); }
- SkNx rsqrt0() const { return _mm_rsqrt_ps(fVec); }
- SkNx rsqrt1() const { return this->rsqrt0(); }
- SkNx rsqrt2() const { return this->rsqrt1(); }
-
- SkNx invert() const { return SkNx(1) / *this; }
- SkNx approxInvert() const { return _mm_rcp_ps(fVec); }
+ SkNx sqrt() const { return _mm_sqrt_ps (fVec); }
+ SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); }
+ SkNx invert() const { return _mm_rcp_ps(fVec); }
float operator[](int k) const {
SkASSERT(0 <= k && k < 4);
@@ -346,13 +338,18 @@ template<> /*static*/ inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) {
return _mm_cvtepi32_ps(_32);
}
-static inline void Sk4f_ToBytes(uint8_t bytes[16],
- const Sk4f& a, const Sk4f& b, const Sk4f& c, const Sk4f& d) {
- _mm_storeu_si128((__m128i*)bytes,
- _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec),
- _mm_cvttps_epi32(b.fVec)),
- _mm_packus_epi16(_mm_cvttps_epi32(c.fVec),
- _mm_cvttps_epi32(d.fVec))));
+template<> /*static*/ inline Sk16b SkNx_cast<uint8_t, float>(const Sk16f& src) {
+ Sk8f ab, cd;
+ SkNx_split(src, &ab, &cd);
+
+ Sk4f a,b,c,d;
+ SkNx_split(ab, &a, &b);
+ SkNx_split(cd, &c, &d);
+
+ return _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec),
+ _mm_cvttps_epi32(b.fVec)),
+ _mm_packus_epi16(_mm_cvttps_epi32(c.fVec),
+ _mm_cvttps_epi32(d.fVec)));
}
template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {