aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/opts/SkNx_neon.h10
-rw-r--r--src/opts/SkNx_sse.h14
-rw-r--r--tests/SkNxTest.cpp34
3 files changed, 58 insertions, 0 deletions
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index 5ec3dc2f2b..4d7aefc93f 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -40,6 +40,8 @@ public:
return est1;
}
+ AI SkNx operator - () const { return vneg_f32(fVec); }
+
AI SkNx operator + (const SkNx& o) const { return vadd_f32(fVec, o.fVec); }
AI SkNx operator - (const SkNx& o) const { return vsub_f32(fVec, o.fVec); }
AI SkNx operator * (const SkNx& o) const { return vmul_f32(fVec, o.fVec); }
@@ -66,6 +68,8 @@ public:
AI static SkNx Min(const SkNx& l, const SkNx& r) { return vmin_f32(l.fVec, r.fVec); }
AI static SkNx Max(const SkNx& l, const SkNx& r) { return vmax_f32(l.fVec, r.fVec); }
+ AI SkNx abs() const { return vabs_f32(fVec); }
+
AI SkNx rsqrt() const {
float32x2_t est0 = vrsqrte_f32(fVec);
return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0);
@@ -97,6 +101,10 @@ public:
return vget_lane_u32(v,0) || vget_lane_u32(v,1);
}
+ AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+ return vbsl_f32(vreinterpret_u32_f32(fVec), t.fVec, e.fVec);
+ }
+
float32x2_t fVec;
};
@@ -135,6 +143,8 @@ public:
return est1;
}
+ AI SkNx operator - () const { return vnegq_f32(fVec); }
+
AI SkNx operator + (const SkNx& o) const { return vaddq_f32(fVec, o.fVec); }
AI SkNx operator - (const SkNx& o) const { return vsubq_f32(fVec, o.fVec); }
AI SkNx operator * (const SkNx& o) const { return vmulq_f32(fVec, o.fVec); }
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index 54c95970aa..415d6e1205 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -29,6 +29,8 @@ public:
AI void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); }
+ AI SkNx operator - () const { return _mm_xor_ps(_mm_set1_ps(-0.0f), fVec); }
+
AI SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
AI SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
AI SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); }
@@ -44,6 +46,7 @@ public:
AI static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.fVec); }
AI static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.fVec); }
+ AI SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); }
AI SkNx sqrt() const { return _mm_sqrt_ps (fVec); }
AI SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); }
AI SkNx invert() const { return _mm_rcp_ps(fVec); }
@@ -57,6 +60,15 @@ public:
AI bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fVec)) & 0xff); }
AI bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fVec)) & 0xff); }
+ AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+ return _mm_blendv_ps(e.fVec, t.fVec, fVec);
+ #else
+ return _mm_or_ps(_mm_and_ps (fVec, t.fVec),
+ _mm_andnot_ps(fVec, e.fVec));
+ #endif
+ }
+
__m128 fVec;
};
@@ -95,6 +107,8 @@ public:
_mm_storeu_ps(((float*) dst) + 12, v3);
}
+ AI SkNx operator - () const { return _mm_xor_ps(_mm_set1_ps(-0.0f), fVec); }
+
AI SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
AI SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
AI SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); }
diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp
index e3f3cd6f3f..240d7e001b 100644
--- a/tests/SkNxTest.cpp
+++ b/tests/SkNxTest.cpp
@@ -210,6 +210,12 @@ DEF_TEST(SkNx_abs, r) {
REPORTER_ASSERT(r, fs[1] == 0.0f);
REPORTER_ASSERT(r, fs[2] == 2.0f);
REPORTER_ASSERT(r, fs[3] == 4.0f);
+ auto fshi = Sk2f(0.0f, -0.0f).abs();
+ auto fslo = Sk2f(2.0f, -4.0f).abs();
+ REPORTER_ASSERT(r, fshi[0] == 0.0f);
+ REPORTER_ASSERT(r, fshi[1] == 0.0f);
+ REPORTER_ASSERT(r, fslo[0] == 2.0f);
+ REPORTER_ASSERT(r, fslo[1] == 4.0f);
}
DEF_TEST(Sk4i_abs, r) {
@@ -358,3 +364,31 @@ DEF_TEST(SkNx_4fLoad4Store4, r) {
Sk4f::Store4(dst, a, b, c, d);
REPORTER_ASSERT(r, 0 == memcmp(dst, src, 16 * sizeof(float)));
}
+
+DEF_TEST(SkNx_neg, r) {
+ auto fs = -Sk4f(0.0f, -0.0f, 2.0f, -4.0f);
+ REPORTER_ASSERT(r, fs[0] == 0.0f);
+ REPORTER_ASSERT(r, fs[1] == 0.0f);
+ REPORTER_ASSERT(r, fs[2] == -2.0f);
+ REPORTER_ASSERT(r, fs[3] == 4.0f);
+ auto fshi = -Sk2f(0.0f, -0.0f);
+ auto fslo = -Sk2f(2.0f, -4.0f);
+ REPORTER_ASSERT(r, fshi[0] == 0.0f);
+ REPORTER_ASSERT(r, fshi[1] == 0.0f);
+ REPORTER_ASSERT(r, fslo[0] == -2.0f);
+ REPORTER_ASSERT(r, fslo[1] == 4.0f);
+}
+
+DEF_TEST(SkNx_thenElse, r) {
+ auto fs = (Sk4f(0.0f, -0.0f, 2.0f, -4.0f) < 0).thenElse(-1, 1);
+ REPORTER_ASSERT(r, fs[0] == 1);
+ REPORTER_ASSERT(r, fs[1] == 1);
+ REPORTER_ASSERT(r, fs[2] == 1);
+ REPORTER_ASSERT(r, fs[3] == -1);
+ auto fshi = (Sk2f(0.0f, -0.0f) < 0).thenElse(-1, 1);
+ auto fslo = (Sk2f(2.0f, -4.0f) < 0).thenElse(-1, 1);
+ REPORTER_ASSERT(r, fshi[0] == 1);
+ REPORTER_ASSERT(r, fshi[1] == 1);
+ REPORTER_ASSERT(r, fslo[0] == 1);
+ REPORTER_ASSERT(r, fslo[1] == -1);
+}