diff options
author | Yuqian Li <liyuqian@google.com> | 2017-07-12 13:36:05 -0400 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-07-12 20:20:43 +0000 |
commit | 7da6ba2d63cfd5ae6add617f18ba4882e755642b (patch) | |
tree | 99d469a5d4192dc65ab42fb8377e899b1c3c2f8b /src/opts | |
parent | f0ca0e0844021d39466c5b69cc9afd195e353bba (diff) |
Implement Sk4i's abs, min, max
CQ_INCLUDE_TRYBOTS=skia.primary:Test-Debian9-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD
Bug: skia:
Change-Id: Ia9ec3f72095e1c744f88df7bb990d99e0f87d578
Reviewed-on: https://skia-review.googlesource.com/22720
Commit-Queue: Yuqian Li <liyuqian@google.com>
Reviewed-by: Herb Derby <herb@google.com>
Diffstat (limited to 'src/opts')
-rw-r--r-- | src/opts/SkNx_neon.h | 3 | ||||
-rw-r--r-- | src/opts/SkNx_sse.h | 31 |
2 files changed, 34 insertions, 0 deletions
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index 5671f71315..5ec3dc2f2b 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -433,12 +433,15 @@ public: } AI static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.fVec); } + AI static SkNx Max(const SkNx& a, const SkNx& b) { return vmaxq_s32(a.fVec, b.fVec); } // TODO as needed AI SkNx thenElse(const SkNx& t, const SkNx& e) const { return vbslq_s32(vreinterpretq_u32_s32(fVec), t.fVec, e.fVec); } + AI SkNx abs() const { return vabsq_s32(fVec); } + int32x4_t fVec; }; diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index 3e59a9d0e8..7542e3b4d7 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -198,6 +198,37 @@ public: #endif } + AI SkNx abs() const { +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 + return _mm_abs_epi32(fVec); +#else + SkNx mask = (*this) >> 31; + return (mask ^ (*this)) - mask; +#endif + } + + AI static SkNx Min(const SkNx& x, const SkNx& y) { +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 + return _mm_min_epi32(x.fVec, y.fVec); +#else + __m128i less_than = _mm_cmplt_epi32(x.fVec, y.fVec); + __m128i choose_x = _mm_and_si128(less_than, x.fVec); + __m128i choose_y = _mm_andnot_si128(less_than, y.fVec); + return _mm_or_si128(choose_x, choose_y); +#endif + } + + AI static SkNx Max(const SkNx& x, const SkNx& y) { +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 + return _mm_max_epi32(x.fVec, y.fVec); +#else + __m128i greater_than = _mm_cmpgt_epi32(x.fVec, y.fVec); + __m128i choose_x = _mm_and_si128(greater_than, x.fVec); + __m128i choose_y = _mm_andnot_si128(greater_than, y.fVec); + return _mm_or_si128(choose_x, choose_y); +#endif + } + __m128i fVec; }; |