aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/core/SkNx.h2
-rw-r--r--src/opts/SkNx_neon.h3
-rw-r--r--src/opts/SkNx_sse.h31
-rw-r--r--tests/SkNxTest.cpp19
4 files changed, 55 insertions, 0 deletions
diff --git a/src/core/SkNx.h b/src/core/SkNx.h
index 5df575342c..65e3fcb8c8 100644
--- a/src/core/SkNx.h
+++ b/src/core/SkNx.h
@@ -218,6 +218,8 @@ struct SkNx<1,T> {
private:
// Helper functions to choose the right float/double methods. (In <cmath> madness lies...)
+ AI static int Abs(int val) { return val < 0 ? -val : val; }
+
AI static float Abs(float val) { return ::fabsf(val); }
AI static float Sqrt(float val) { return ::sqrtf(val); }
AI static float Floor(float val) { return ::floorf(val); }
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index 5671f71315..5ec3dc2f2b 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -433,12 +433,15 @@ public:
}
AI static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.fVec); }
+ AI static SkNx Max(const SkNx& a, const SkNx& b) { return vmaxq_s32(a.fVec, b.fVec); }
// TODO as needed
AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
return vbslq_s32(vreinterpretq_u32_s32(fVec), t.fVec, e.fVec);
}
+ AI SkNx abs() const { return vabsq_s32(fVec); }
+
int32x4_t fVec;
};
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index 3e59a9d0e8..7542e3b4d7 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -198,6 +198,37 @@ public:
#endif
}
+ AI SkNx abs() const {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+ return _mm_abs_epi32(fVec);
+#else
+ SkNx mask = (*this) >> 31;
+ return (mask ^ (*this)) - mask;
+#endif
+ }
+
+ AI static SkNx Min(const SkNx& x, const SkNx& y) {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+ return _mm_min_epi32(x.fVec, y.fVec);
+#else
+ __m128i less_than = _mm_cmplt_epi32(x.fVec, y.fVec);
+ __m128i choose_x = _mm_and_si128(less_than, x.fVec);
+ __m128i choose_y = _mm_andnot_si128(less_than, y.fVec);
+ return _mm_or_si128(choose_x, choose_y);
+#endif
+ }
+
+ AI static SkNx Max(const SkNx& x, const SkNx& y) {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+ return _mm_max_epi32(x.fVec, y.fVec);
+#else
+ __m128i greater_than = _mm_cmpgt_epi32(x.fVec, y.fVec);
+ __m128i choose_x = _mm_and_si128(greater_than, x.fVec);
+ __m128i choose_y = _mm_andnot_si128(greater_than, y.fVec);
+ return _mm_or_si128(choose_x, choose_y);
+#endif
+ }
+
__m128i fVec;
};
diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp
index ce7b5bc48f..e3f3cd6f3f 100644
--- a/tests/SkNxTest.cpp
+++ b/tests/SkNxTest.cpp
@@ -212,6 +212,25 @@ DEF_TEST(SkNx_abs, r) {
REPORTER_ASSERT(r, fs[3] == 4.0f);
}
+DEF_TEST(Sk4i_abs, r) {
+ auto is = Sk4i(0, -1, 2, -2147483647).abs();
+ REPORTER_ASSERT(r, is[0] == 0);
+ REPORTER_ASSERT(r, is[1] == 1);
+ REPORTER_ASSERT(r, is[2] == 2);
+ REPORTER_ASSERT(r, is[3] == 2147483647);
+}
+
+DEF_TEST(Sk4i_minmax, r) {
+ auto a = Sk4i(0, 2, 4, 6);
+ auto b = Sk4i(1, 1, 3, 7);
+ auto min = Sk4i::Min(a, b);
+ auto max = Sk4i::Max(a, b);
+ for(int i = 0; i < 4; ++i) {
+ REPORTER_ASSERT(r, min[i] == SkTMin(a[i], b[i]));
+ REPORTER_ASSERT(r, max[i] == SkTMax(a[i], b[i]));
+ }
+}
+
DEF_TEST(SkNx_floor, r) {
auto fs = Sk4f(0.4f, -0.4f, 0.6f, -0.6f).floor();
REPORTER_ASSERT(r, fs[0] == 0.0f);