aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts
diff options
context:
space:
mode:
authorGravatar Yuqian Li <liyuqian@google.com>2017-07-12 13:36:05 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-07-12 20:20:43 +0000
commit7da6ba2d63cfd5ae6add617f18ba4882e755642b (patch)
tree99d469a5d4192dc65ab42fb8377e899b1c3c2f8b /src/opts
parentf0ca0e0844021d39466c5b69cc9afd195e353bba (diff)
Implement Sk4i's abs, min, max
CQ_INCLUDE_TRYBOTS=skia.primary:Test-Debian9-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD Bug: skia: Change-Id: Ia9ec3f72095e1c744f88df7bb990d99e0f87d578 Reviewed-on: https://skia-review.googlesource.com/22720 Commit-Queue: Yuqian Li <liyuqian@google.com> Reviewed-by: Herb Derby <herb@google.com>
Diffstat (limited to 'src/opts')
-rw-r--r--src/opts/SkNx_neon.h3
-rw-r--r--src/opts/SkNx_sse.h31
2 files changed, 34 insertions, 0 deletions
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index 5671f71315..5ec3dc2f2b 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -433,12 +433,15 @@ public:
}
AI static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.fVec); }
+ AI static SkNx Max(const SkNx& a, const SkNx& b) { return vmaxq_s32(a.fVec, b.fVec); }
// TODO as needed
AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
return vbslq_s32(vreinterpretq_u32_s32(fVec), t.fVec, e.fVec);
}
+ AI SkNx abs() const { return vabsq_s32(fVec); }
+
int32x4_t fVec;
};
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index 3e59a9d0e8..7542e3b4d7 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -198,6 +198,37 @@ public:
#endif
}
+ AI SkNx abs() const {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+ return _mm_abs_epi32(fVec);
+#else
+ SkNx mask = (*this) >> 31;
+ return (mask ^ (*this)) - mask;
+#endif
+ }
+
+ AI static SkNx Min(const SkNx& x, const SkNx& y) {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+ return _mm_min_epi32(x.fVec, y.fVec);
+#else
+ __m128i less_than = _mm_cmplt_epi32(x.fVec, y.fVec);
+ __m128i choose_x = _mm_and_si128(less_than, x.fVec);
+ __m128i choose_y = _mm_andnot_si128(less_than, y.fVec);
+ return _mm_or_si128(choose_x, choose_y);
+#endif
+ }
+
+ AI static SkNx Max(const SkNx& x, const SkNx& y) {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+ return _mm_max_epi32(x.fVec, y.fVec);
+#else
+ __m128i greater_than = _mm_cmpgt_epi32(x.fVec, y.fVec);
+ __m128i choose_x = _mm_and_si128(greater_than, x.fVec);
+ __m128i choose_y = _mm_andnot_si128(greater_than, y.fVec);
+ return _mm_or_si128(choose_x, choose_y);
+#endif
+ }
+
__m128i fVec;
};