diff options
-rw-r--r-- | src/core/SkNx.h | 2 | ||||
-rw-r--r-- | src/opts/SkNx_neon.h | 3 | ||||
-rw-r--r-- | src/opts/SkNx_sse.h | 31 | ||||
-rw-r--r-- | tests/SkNxTest.cpp | 19 |
4 files changed, 55 insertions, 0 deletions
diff --git a/src/core/SkNx.h b/src/core/SkNx.h index 5df575342c..65e3fcb8c8 100644 --- a/src/core/SkNx.h +++ b/src/core/SkNx.h @@ -218,6 +218,8 @@ struct SkNx<1,T> { private: // Helper functions to choose the right float/double methods. (In <cmath> madness lies...) + AI static int Abs(int val) { return val < 0 ? -val : val; } + AI static float Abs(float val) { return ::fabsf(val); } AI static float Sqrt(float val) { return ::sqrtf(val); } AI static float Floor(float val) { return ::floorf(val); } diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index 5671f71315..5ec3dc2f2b 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -433,12 +433,15 @@ public: } AI static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.fVec); } + AI static SkNx Max(const SkNx& a, const SkNx& b) { return vmaxq_s32(a.fVec, b.fVec); } // TODO as needed AI SkNx thenElse(const SkNx& t, const SkNx& e) const { return vbslq_s32(vreinterpretq_u32_s32(fVec), t.fVec, e.fVec); } + AI SkNx abs() const { return vabsq_s32(fVec); } + int32x4_t fVec; }; diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index 3e59a9d0e8..7542e3b4d7 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -198,6 +198,37 @@ public: #endif } + AI SkNx abs() const { +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 + return _mm_abs_epi32(fVec); +#else + SkNx mask = (*this) >> 31; + return (mask ^ (*this)) - mask; +#endif + } + + AI static SkNx Min(const SkNx& x, const SkNx& y) { +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 + return _mm_min_epi32(x.fVec, y.fVec); +#else + __m128i less_than = _mm_cmplt_epi32(x.fVec, y.fVec); + __m128i choose_x = _mm_and_si128(less_than, x.fVec); + __m128i choose_y = _mm_andnot_si128(less_than, y.fVec); + return _mm_or_si128(choose_x, choose_y); +#endif + } + + AI static SkNx Max(const SkNx& x, const SkNx& y) { +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 + return _mm_max_epi32(x.fVec, y.fVec); +#else + __m128i greater_than = _mm_cmpgt_epi32(x.fVec, y.fVec); + __m128i choose_x = _mm_and_si128(greater_than, x.fVec); + __m128i choose_y = _mm_andnot_si128(greater_than, y.fVec); + return _mm_or_si128(choose_x, choose_y); +#endif + } + __m128i fVec; }; diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp index ce7b5bc48f..e3f3cd6f3f 100644 --- a/tests/SkNxTest.cpp +++ b/tests/SkNxTest.cpp @@ -212,6 +212,25 @@ DEF_TEST(SkNx_abs, r) { REPORTER_ASSERT(r, fs[3] == 4.0f); } +DEF_TEST(Sk4i_abs, r) { + auto is = Sk4i(0, -1, 2, -2147483647).abs(); + REPORTER_ASSERT(r, is[0] == 0); + REPORTER_ASSERT(r, is[1] == 1); + REPORTER_ASSERT(r, is[2] == 2); + REPORTER_ASSERT(r, is[3] == 2147483647); +} + +DEF_TEST(Sk4i_minmax, r) { + auto a = Sk4i(0, 2, 4, 6); + auto b = Sk4i(1, 1, 3, 7); + auto min = Sk4i::Min(a, b); + auto max = Sk4i::Max(a, b); + for(int i = 0; i < 4; ++i) { + REPORTER_ASSERT(r, min[i] == SkTMin(a[i], b[i])); + REPORTER_ASSERT(r, max[i] == SkTMax(a[i], b[i])); + } +} + DEF_TEST(SkNx_floor, r) { auto fs = Sk4f(0.4f, -0.4f, 0.6f, -0.6f).floor(); REPORTER_ASSERT(r, fs[0] == 0.0f); |