aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Chris Dalton <csmartdalton@google.com>2018-06-08 11:46:42 -0600
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2018-06-08 18:41:13 +0000
commit89c5e8878e7957b6340de4ba1a14bded1f383fe2 (patch)
tree0f2c60c6b6a2a6399f814f463709be88e9c07943
parent76f5706d80f07427132c2c431ccb43bf03910115 (diff)
Implement Sk2f::floor
Bug: skia: Change-Id: Id40e7165a338d321df71a1852b48eb2570ecd75b Reviewed-on: https://skia-review.googlesource.com/133460 Commit-Queue: Mike Klein <mtklein@google.com> Reviewed-by: Mike Klein <mtklein@google.com>
-rw-r--r--src/opts/SkNx_neon.h18
-rw-r--r--src/opts/SkNx_sse.h30
-rw-r--r--tests/SkNxTest.cpp8
3 files changed, 44 insertions, 12 deletions
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index 8bf6e7759c..1552e0c181 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -12,15 +12,20 @@
namespace {
-// ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it:
+// ARMv8 has vrndm(q)_f32 to floor floats. Here we emulate it:
// - roundtrip through integers via truncation
// - subtract 1 if that's too big (possible for negative values).
// This restricts the domain of our inputs to a maximum somehwere around 2^31. Seems plenty big.
-AI static float32x4_t armv7_vrndmq_f32(float32x4_t v) {
+AI static float32x4_t emulate_vrndmq_f32(float32x4_t v) {
auto roundtrip = vcvtq_f32_s32(vcvtq_s32_f32(v));
auto too_big = vcgtq_f32(roundtrip, v);
return vsubq_f32(roundtrip, (float32x4_t)vandq_u32(too_big, (uint32x4_t)vdupq_n_f32(1)));
}
+AI static float32x2_t emulate_vrndm_f32(float32x2_t v) {
+ auto roundtrip = vcvt_f32_s32(vcvt_s32_f32(v));
+ auto too_big = vcgt_f32(roundtrip, v);
+ return vsub_f32(roundtrip, (float32x2_t)vand_u32(too_big, (uint32x2_t)vdup_n_f32(1)));
+}
template <>
class SkNx<2, float> {
@@ -102,6 +107,13 @@ public:
AI static SkNx Max(const SkNx& l, const SkNx& r) { return vmax_f32(l.fVec, r.fVec); }
AI SkNx abs() const { return vabs_f32(fVec); }
+ AI SkNx floor() const {
+ #if defined(SK_CPU_ARM64)
+ return vrndm_f32(fVec);
+ #else
+ return emulate_vrndm_f32(fVec);
+ #endif
+ }
AI SkNx rsqrt() const {
float32x2_t est0 = vrsqrte_f32(fVec);
@@ -223,7 +235,7 @@ public:
#if defined(SK_CPU_ARM64)
return vrndmq_f32(fVec);
#else
- return armv7_vrndmq_f32(fVec);
+ return emulate_vrndmq_f32(fVec);
#endif
}
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index b8f175c919..1720f14fb8 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -15,6 +15,17 @@
namespace {
+// Emulate _mm_floor_ps() with SSE2:
+// - roundtrip through integers via truncation
+// - subtract 1 if that's too big (possible for negative values).
+// This restricts the domain of our inputs to a maximum somehwere around 2^31.
+// Seems plenty big.
+AI static __m128 emulate_mm_floor_ps(__m128 v) {
+ __m128 roundtrip = _mm_cvtepi32_ps(_mm_cvttps_epi32(v));
+ __m128 too_big = _mm_cmpgt_ps(roundtrip, v);
+ return _mm_sub_ps(roundtrip, _mm_and_ps(too_big, _mm_set1_ps(1.0f)));
+}
+
template <>
class SkNx<2, float> {
public:
@@ -71,7 +82,15 @@ public:
AI static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.fVec); }
AI static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.fVec); }
- AI SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); }
+ AI SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); }
+ AI SkNx floor() const {
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
+ return _mm_floor_ps(fVec);
+ #else
+ return emulate_mm_floor_ps(fVec);
+ #endif
+ }
+
AI SkNx sqrt() const { return _mm_sqrt_ps (fVec); }
AI SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); }
AI SkNx invert() const { return _mm_rcp_ps(fVec); }
@@ -161,14 +180,7 @@ public:
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
return _mm_floor_ps(fVec);
#else
- // Emulate _mm_floor_ps() with SSE2:
- // - roundtrip through integers via truncation
- // - subtract 1 if that's too big (possible for negative values).
- // This restricts the domain of our inputs to a maximum somehwere around 2^31.
- // Seems plenty big.
- __m128 roundtrip = _mm_cvtepi32_ps(_mm_cvttps_epi32(fVec));
- __m128 too_big = _mm_cmpgt_ps(roundtrip, fVec);
- return _mm_sub_ps(roundtrip, _mm_and_ps(too_big, _mm_set1_ps(1.0f)));
+ return emulate_mm_floor_ps(fVec);
#endif
}
diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp
index 933a5fc877..a454043936 100644
--- a/tests/SkNxTest.cpp
+++ b/tests/SkNxTest.cpp
@@ -257,6 +257,14 @@ DEF_TEST(SkNx_floor, r) {
REPORTER_ASSERT(r, fs[1] == -1.0f);
REPORTER_ASSERT(r, fs[2] == 0.0f);
REPORTER_ASSERT(r, fs[3] == -1.0f);
+
+ auto fs2 = Sk2f(0.4f, -0.4f).floor();
+ REPORTER_ASSERT(r, fs2[0] == 0.0f);
+ REPORTER_ASSERT(r, fs2[1] == -1.0f);
+
+ auto fs3 = Sk2f(0.6f, -0.6f).floor();
+ REPORTER_ASSERT(r, fs3[0] == 0.0f);
+ REPORTER_ASSERT(r, fs3[1] == -1.0f);
}
DEF_TEST(SkNx_shuffle, r) {