aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkNx_sse.h
diff options
context:
space:
mode:
authorGravatar Chris Dalton <csmartdalton@google.com>2018-06-08 11:46:42 -0600
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2018-06-08 18:41:13 +0000
commit89c5e8878e7957b6340de4ba1a14bded1f383fe2 (patch)
tree0f2c60c6b6a2a6399f814f463709be88e9c07943 /src/opts/SkNx_sse.h
parent76f5706d80f07427132c2c431ccb43bf03910115 (diff)
Implement Sk2f::floor
Bug: skia: Change-Id: Id40e7165a338d321df71a1852b48eb2570ecd75b Reviewed-on: https://skia-review.googlesource.com/133460 Commit-Queue: Mike Klein <mtklein@google.com> Reviewed-by: Mike Klein <mtklein@google.com>
Diffstat (limited to 'src/opts/SkNx_sse.h')
-rw-r--r--src/opts/SkNx_sse.h30
1 files changed, 21 insertions, 9 deletions
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index b8f175c919..1720f14fb8 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -15,6 +15,17 @@
namespace {
+// Emulate _mm_floor_ps() with SSE2:
+// - roundtrip through integers via truncation
+// - subtract 1 if that's too big (possible for negative values).
+// This restricts the domain of our inputs to a maximum somehwere around 2^31.
+// Seems plenty big.
+AI static __m128 emulate_mm_floor_ps(__m128 v) {
+ __m128 roundtrip = _mm_cvtepi32_ps(_mm_cvttps_epi32(v));
+ __m128 too_big = _mm_cmpgt_ps(roundtrip, v);
+ return _mm_sub_ps(roundtrip, _mm_and_ps(too_big, _mm_set1_ps(1.0f)));
+}
+
template <>
class SkNx<2, float> {
public:
@@ -71,7 +82,15 @@ public:
AI static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.fVec); }
AI static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.fVec); }
- AI SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); }
+ AI SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); }
+ AI SkNx floor() const {
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+ return _mm_floor_ps(fVec);
+ #else
+ return emulate_mm_floor_ps(fVec);
+ #endif
+ }
+
AI SkNx sqrt() const { return _mm_sqrt_ps (fVec); }
AI SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); }
AI SkNx invert() const { return _mm_rcp_ps(fVec); }
@@ -161,14 +180,7 @@ public:
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
return _mm_floor_ps(fVec);
#else
- // Emulate _mm_floor_ps() with SSE2:
- // - roundtrip through integers via truncation
- // - subtract 1 if that's too big (possible for negative values).
- // This restricts the domain of our inputs to a maximum somehwere around 2^31.
- // Seems plenty big.
- __m128 roundtrip = _mm_cvtepi32_ps(_mm_cvttps_epi32(fVec));
- __m128 too_big = _mm_cmpgt_ps(roundtrip, fVec);
- return _mm_sub_ps(roundtrip, _mm_and_ps(too_big, _mm_set1_ps(1.0f)));
+ return emulate_mm_floor_ps(fVec);
#endif
}