diff options
author | 2018-06-08 11:46:42 -0600 | |
---|---|---|
committer | 2018-06-08 18:41:13 +0000 | |
commit | 89c5e8878e7957b6340de4ba1a14bded1f383fe2 (patch) | |
tree | 0f2c60c6b6a2a6399f814f463709be88e9c07943 /src/opts/SkNx_sse.h | |
parent | 76f5706d80f07427132c2c431ccb43bf03910115 (diff) |
Implement Sk2f::floor
Bug: skia:
Change-Id: Id40e7165a338d321df71a1852b48eb2570ecd75b
Reviewed-on: https://skia-review.googlesource.com/133460
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Mike Klein <mtklein@google.com>
Diffstat (limited to 'src/opts/SkNx_sse.h')
-rw-r--r-- | src/opts/SkNx_sse.h | 30 |
1 files changed, 21 insertions, 9 deletions
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index b8f175c919..1720f14fb8 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -15,6 +15,17 @@ namespace { +// Emulate _mm_floor_ps() with SSE2: +// - roundtrip through integers via truncation +// - subtract 1 if that's too big (possible for negative values). +// This restricts the domain of our inputs to a maximum somehwere around 2^31. +// Seems plenty big. +AI static __m128 emulate_mm_floor_ps(__m128 v) { + __m128 roundtrip = _mm_cvtepi32_ps(_mm_cvttps_epi32(v)); + __m128 too_big = _mm_cmpgt_ps(roundtrip, v); + return _mm_sub_ps(roundtrip, _mm_and_ps(too_big, _mm_set1_ps(1.0f))); +} + template <> class SkNx<2, float> { public: @@ -71,7 +82,15 @@ public: AI static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.fVec); } AI static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.fVec); } - AI SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); } + AI SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); } + AI SkNx floor() const { + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 + return _mm_floor_ps(fVec); + #else + return emulate_mm_floor_ps(fVec); + #endif + } + AI SkNx sqrt() const { return _mm_sqrt_ps (fVec); } AI SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); } AI SkNx invert() const { return _mm_rcp_ps(fVec); } @@ -161,14 +180,7 @@ public: #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 return _mm_floor_ps(fVec); #else - // Emulate _mm_floor_ps() with SSE2: - // - roundtrip through integers via truncation - // - subtract 1 if that's too big (possible for negative values). - // This restricts the domain of our inputs to a maximum somehwere around 2^31. - // Seems plenty big. - __m128 roundtrip = _mm_cvtepi32_ps(_mm_cvttps_epi32(fVec)); - __m128 too_big = _mm_cmpgt_ps(roundtrip, fVec); - return _mm_sub_ps(roundtrip, _mm_and_ps(too_big, _mm_set1_ps(1.0f))); + return emulate_mm_floor_ps(fVec); #endif } |